Deleted Added
full compact
i386.c (237021) i386.c (251212)
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
21
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
21
22/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 237021 2012-06-13 20:21:08Z pfg $ */
22/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 251212 2013-06-01 01:02:24Z pfg $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables. */
61#define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
91 0, /* "large" insn */
92 2, /* MOVE_RATIO */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
116 2, /* Branch cost */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
146 3, /* MOVE_RATIO */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
170 1, /* Branch cost */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
177};
178
179static const
180struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
199 3, /* MOVE_RATIO */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
223 1, /* Branch cost */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
230};
231
232static const
233struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
276 2, /* Branch cost */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
305 6, /* MOVE_RATIO */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
329 2, /* Branch cost */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
336};
337
338static const
339struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
358 4, /* MOVE_RATIO */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
369
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
383 1, /* Branch cost */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
390};
391
392static const
393struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
412 4, /* MOVE_RATIO */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
436 1, /* Branch cost */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
443};
444
445static const
446struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
465 9, /* MOVE_RATIO */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
489 5, /* Branch cost */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496};
497
498static const
499struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
518 9, /* MOVE_RATIO */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
542 5, /* Branch cost */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
549};
550
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables. */
61#define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
91 0, /* "large" insn */
92 2, /* MOVE_RATIO */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
116 2, /* Branch cost */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
146 3, /* MOVE_RATIO */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
170 1, /* Branch cost */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
177};
178
179static const
180struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
199 3, /* MOVE_RATIO */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
223 1, /* Branch cost */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
230};
231
232static const
233struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
276 2, /* Branch cost */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
305 6, /* MOVE_RATIO */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
329 2, /* Branch cost */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
336};
337
338static const
339struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
358 4, /* MOVE_RATIO */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
369
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
383 1, /* Branch cost */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
390};
391
392static const
393struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
412 4, /* MOVE_RATIO */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
436 1, /* Branch cost */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
443};
444
445static const
446struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
465 9, /* MOVE_RATIO */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
489 5, /* Branch cost */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496};
497
498static const
499struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
518 9, /* MOVE_RATIO */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
542 5, /* Branch cost */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
549};
550
551struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
570 9, /* MOVE_RATIO */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
592 /* On K8
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
595 On AMDFAM10
596 MOVD reg64, xmmreg Double FADD 3
597 1/1 1/1
598 MOVD reg32, xmmreg Double FADD 3
599 1/1 1/1 */
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
605 time). */
606 100, /* number of parallel prefetches */
607 5, /* Branch cost */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
614};
615
551static const
552struct processor_costs pentium4_cost = {
553 COSTS_N_INSNS (1), /* cost of an add instruction */
554 COSTS_N_INSNS (3), /* cost of a lea instruction */
555 COSTS_N_INSNS (4), /* variable shift costs */
556 COSTS_N_INSNS (4), /* constant shift costs */
557 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
558 COSTS_N_INSNS (15), /* HI */
559 COSTS_N_INSNS (15), /* SI */
560 COSTS_N_INSNS (15), /* DI */
561 COSTS_N_INSNS (15)}, /* other */
562 0, /* cost of multiply per each bit set */
563 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
564 COSTS_N_INSNS (56), /* HI */
565 COSTS_N_INSNS (56), /* SI */
566 COSTS_N_INSNS (56), /* DI */
567 COSTS_N_INSNS (56)}, /* other */
568 COSTS_N_INSNS (1), /* cost of movsx */
569 COSTS_N_INSNS (1), /* cost of movzx */
570 16, /* "large" insn */
571 6, /* MOVE_RATIO */
572 2, /* cost for loading QImode using movzbl */
573 {4, 5, 4}, /* cost of loading integer registers
574 in QImode, HImode and SImode.
575 Relative to reg-reg move (2). */
576 {2, 3, 2}, /* cost of storing integer registers */
577 2, /* cost of reg,reg fld/fst */
578 {2, 2, 6}, /* cost of loading fp registers
579 in SFmode, DFmode and XFmode */
580 {4, 4, 6}, /* cost of storing fp registers
581 in SFmode, DFmode and XFmode */
582 2, /* cost of moving MMX register */
583 {2, 2}, /* cost of loading MMX registers
584 in SImode and DImode */
585 {2, 2}, /* cost of storing MMX registers
586 in SImode and DImode */
587 12, /* cost of moving SSE register */
588 {12, 12, 12}, /* cost of loading SSE registers
589 in SImode, DImode and TImode */
590 {2, 2, 8}, /* cost of storing SSE registers
591 in SImode, DImode and TImode */
592 10, /* MMX or SSE register to integer */
593 64, /* size of prefetch block */
594 6, /* number of parallel prefetches */
595 2, /* Branch cost */
596 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
597 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
598 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
599 COSTS_N_INSNS (2), /* cost of FABS instruction. */
600 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
601 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
602};
603
604static const
605struct processor_costs nocona_cost = {
606 COSTS_N_INSNS (1), /* cost of an add instruction */
607 COSTS_N_INSNS (1), /* cost of a lea instruction */
608 COSTS_N_INSNS (1), /* variable shift costs */
609 COSTS_N_INSNS (1), /* constant shift costs */
610 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
611 COSTS_N_INSNS (10), /* HI */
612 COSTS_N_INSNS (10), /* SI */
613 COSTS_N_INSNS (10), /* DI */
614 COSTS_N_INSNS (10)}, /* other */
615 0, /* cost of multiply per each bit set */
616 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
617 COSTS_N_INSNS (66), /* HI */
618 COSTS_N_INSNS (66), /* SI */
619 COSTS_N_INSNS (66), /* DI */
620 COSTS_N_INSNS (66)}, /* other */
621 COSTS_N_INSNS (1), /* cost of movsx */
622 COSTS_N_INSNS (1), /* cost of movzx */
623 16, /* "large" insn */
624 17, /* MOVE_RATIO */
625 4, /* cost for loading QImode using movzbl */
626 {4, 4, 4}, /* cost of loading integer registers
627 in QImode, HImode and SImode.
628 Relative to reg-reg move (2). */
629 {4, 4, 4}, /* cost of storing integer registers */
630 3, /* cost of reg,reg fld/fst */
631 {12, 12, 12}, /* cost of loading fp registers
632 in SFmode, DFmode and XFmode */
633 {4, 4, 4}, /* cost of storing fp registers
634 in SFmode, DFmode and XFmode */
635 6, /* cost of moving MMX register */
636 {12, 12}, /* cost of loading MMX registers
637 in SImode and DImode */
638 {12, 12}, /* cost of storing MMX registers
639 in SImode and DImode */
640 6, /* cost of moving SSE register */
641 {12, 12, 12}, /* cost of loading SSE registers
642 in SImode, DImode and TImode */
643 {12, 12, 12}, /* cost of storing SSE registers
644 in SImode, DImode and TImode */
645 8, /* MMX or SSE register to integer */
646 128, /* size of prefetch block */
647 8, /* number of parallel prefetches */
648 1, /* Branch cost */
649 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
650 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
651 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
652 COSTS_N_INSNS (3), /* cost of FABS instruction. */
653 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
654 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
655};
656
657static const
658struct processor_costs core2_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (3), /* HI */
665 COSTS_N_INSNS (3), /* SI */
666 COSTS_N_INSNS (3), /* DI */
667 COSTS_N_INSNS (3)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (22), /* HI */
671 COSTS_N_INSNS (22), /* SI */
672 COSTS_N_INSNS (22), /* DI */
673 COSTS_N_INSNS (22)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 16, /* MOVE_RATIO */
678 2, /* cost for loading QImode using movzbl */
679 {6, 6, 6}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {4, 4, 4}, /* cost of storing integer registers */
683 2, /* cost of reg,reg fld/fst */
684 {6, 6, 6}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {4, 4, 4}, /* cost of loading integer registers */
687 2, /* cost of moving MMX register */
688 {6, 6}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {6, 6, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 4}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 2, /* MMX or SSE register to integer */
698 128, /* size of prefetch block */
699 8, /* number of parallel prefetches */
700 3, /* Branch cost */
701 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (1), /* cost of FABS instruction. */
705 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
707};
708
709/* Generic64 should produce code tuned for Nocona and K8. */
710static const
711struct processor_costs generic64_cost = {
712 COSTS_N_INSNS (1), /* cost of an add instruction */
713 /* On all chips taken into consideration lea is 2 cycles and more. With
714 this cost however our current implementation of synth_mult results in
715 use of unnecessary temporary registers causing regression on several
716 SPECfp benchmarks. */
717 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
718 COSTS_N_INSNS (1), /* variable shift costs */
719 COSTS_N_INSNS (1), /* constant shift costs */
720 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
721 COSTS_N_INSNS (4), /* HI */
722 COSTS_N_INSNS (3), /* SI */
723 COSTS_N_INSNS (4), /* DI */
724 COSTS_N_INSNS (2)}, /* other */
725 0, /* cost of multiply per each bit set */
726 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
727 COSTS_N_INSNS (26), /* HI */
728 COSTS_N_INSNS (42), /* SI */
729 COSTS_N_INSNS (74), /* DI */
730 COSTS_N_INSNS (74)}, /* other */
731 COSTS_N_INSNS (1), /* cost of movsx */
732 COSTS_N_INSNS (1), /* cost of movzx */
733 8, /* "large" insn */
734 17, /* MOVE_RATIO */
735 4, /* cost for loading QImode using movzbl */
736 {4, 4, 4}, /* cost of loading integer registers
737 in QImode, HImode and SImode.
738 Relative to reg-reg move (2). */
739 {4, 4, 4}, /* cost of storing integer registers */
740 4, /* cost of reg,reg fld/fst */
741 {12, 12, 12}, /* cost of loading fp registers
742 in SFmode, DFmode and XFmode */
743 {6, 6, 8}, /* cost of storing fp registers
744 in SFmode, DFmode and XFmode */
745 2, /* cost of moving MMX register */
746 {8, 8}, /* cost of loading MMX registers
747 in SImode and DImode */
748 {8, 8}, /* cost of storing MMX registers
749 in SImode and DImode */
750 2, /* cost of moving SSE register */
751 {8, 8, 8}, /* cost of loading SSE registers
752 in SImode, DImode and TImode */
753 {8, 8, 8}, /* cost of storing SSE registers
754 in SImode, DImode and TImode */
755 5, /* MMX or SSE register to integer */
756 64, /* size of prefetch block */
757 6, /* number of parallel prefetches */
758 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
759 is increased to perhaps more appropriate value of 5. */
760 3, /* Branch cost */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
767};
768
769/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
770static const
771struct processor_costs generic32_cost = {
772 COSTS_N_INSNS (1), /* cost of an add instruction */
773 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
774 COSTS_N_INSNS (1), /* variable shift costs */
775 COSTS_N_INSNS (1), /* constant shift costs */
776 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
777 COSTS_N_INSNS (4), /* HI */
778 COSTS_N_INSNS (3), /* SI */
779 COSTS_N_INSNS (4), /* DI */
780 COSTS_N_INSNS (2)}, /* other */
781 0, /* cost of multiply per each bit set */
782 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
783 COSTS_N_INSNS (26), /* HI */
784 COSTS_N_INSNS (42), /* SI */
785 COSTS_N_INSNS (74), /* DI */
786 COSTS_N_INSNS (74)}, /* other */
787 COSTS_N_INSNS (1), /* cost of movsx */
788 COSTS_N_INSNS (1), /* cost of movzx */
789 8, /* "large" insn */
790 17, /* MOVE_RATIO */
791 4, /* cost for loading QImode using movzbl */
792 {4, 4, 4}, /* cost of loading integer registers
793 in QImode, HImode and SImode.
794 Relative to reg-reg move (2). */
795 {4, 4, 4}, /* cost of storing integer registers */
796 4, /* cost of reg,reg fld/fst */
797 {12, 12, 12}, /* cost of loading fp registers
798 in SFmode, DFmode and XFmode */
799 {6, 6, 8}, /* cost of storing fp registers
800 in SFmode, DFmode and XFmode */
801 2, /* cost of moving MMX register */
802 {8, 8}, /* cost of loading MMX registers
803 in SImode and DImode */
804 {8, 8}, /* cost of storing MMX registers
805 in SImode and DImode */
806 2, /* cost of moving SSE register */
807 {8, 8, 8}, /* cost of loading SSE registers
808 in SImode, DImode and TImode */
809 {8, 8, 8}, /* cost of storing SSE registers
810 in SImode, DImode and TImode */
811 5, /* MMX or SSE register to integer */
812 64, /* size of prefetch block */
813 6, /* number of parallel prefetches */
814 3, /* Branch cost */
815 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
816 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
817 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
818 COSTS_N_INSNS (8), /* cost of FABS instruction. */
819 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
820 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
821};
822
823const struct processor_costs *ix86_cost = &pentium_cost;
824
825/* Processor feature/optimization bitmasks. */
826#define m_386 (1<<PROCESSOR_I386)
827#define m_486 (1<<PROCESSOR_I486)
828#define m_PENT (1<<PROCESSOR_PENTIUM)
829#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
830#define m_GEODE (1<<PROCESSOR_GEODE)
831#define m_K6_GEODE (m_K6 | m_GEODE)
832#define m_K6 (1<<PROCESSOR_K6)
833#define m_ATHLON (1<<PROCESSOR_ATHLON)
834#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
835#define m_K8 (1<<PROCESSOR_K8)
836#define m_ATHLON_K8 (m_K8 | m_ATHLON)
616static const
617struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
636 6, /* MOVE_RATIO */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
660 2, /* Branch cost */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
667};
668
669static const
670struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
689 17, /* MOVE_RATIO */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
713 1, /* Branch cost */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
720};
721
722static const
723struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 16, /* MOVE_RATIO */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
765 3, /* Branch cost */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
772};
773
774/* Generic64 should produce code tuned for Nocona and K8. */
775static const
776struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
799 17, /* MOVE_RATIO */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
825 3, /* Branch cost */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
832};
833
834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
835static const
836struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
855 17, /* MOVE_RATIO */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
879 3, /* Branch cost */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
886};
887
888const struct processor_costs *ix86_cost = &pentium_cost;
889
890/* Processor feature/optimization bitmasks. */
891#define m_386 (1<<PROCESSOR_I386)
892#define m_486 (1<<PROCESSOR_I486)
893#define m_PENT (1<<PROCESSOR_PENTIUM)
894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895#define m_GEODE (1<<PROCESSOR_GEODE)
896#define m_K6_GEODE (m_K6 | m_GEODE)
897#define m_K6 (1<<PROCESSOR_K6)
898#define m_ATHLON (1<<PROCESSOR_ATHLON)
899#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900#define m_K8 (1<<PROCESSOR_K8)
901#define m_ATHLON_K8 (m_K8 | m_ATHLON)
902#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
837#define m_NOCONA (1<<PROCESSOR_NOCONA)
838#define m_CORE2 (1<<PROCESSOR_CORE2)
839#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
840#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
841#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
903#define m_NOCONA (1<<PROCESSOR_NOCONA)
904#define m_CORE2 (1<<PROCESSOR_CORE2)
905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
842
843/* Generic instruction choice should be common subset of supported CPUs
844 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
845
846/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
847 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
848 generic because it is not working well with PPro base chips. */
909
910/* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
912
913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
849const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
850const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
917 | m_GENERIC64;
918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
851const int x86_zero_extend_with_and = m_486 | m_PENT;
920const int x86_zero_extend_with_and = m_486 | m_PENT;
852const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
921/* Enable to zero extend integer registers to avoid partial dependencies */
922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
853const int x86_double_with_add = ~m_386;
854const int x86_use_bit_test = m_386;
924const int x86_double_with_add = ~m_386;
925const int x86_use_bit_test = m_386;
855const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
856const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
857const int x86_3dnow_a = m_ATHLON_K8;
858const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
929 | m_NOCONA;
930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
859/* Branch hints were put in P4 based on simulation result. But
860 after P4 was made, no performance benefit was observed with
861 branch hints. It also increases the code size. As the result,
862 icc never generates branch hints. */
863const int x86_branch_hints = 0;
933/* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937const int x86_branch_hints = 0;
864const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
865/* We probably ought to watch for partial register stalls on Generic32
866 compilation setting as well. However in current implementation the
867 partial register stalls are not eliminated very well - they can
868 be introduced via subregs synthesized by combine and can happen
869 in caller/callee saving sequences.
870 Because this option pays back little on PPro based chips and is in conflict
871 with partial reg. dependencies used by Athlon/P4 based chips, it is better
872 to leave it off for generic32 for now. */
873const int x86_partial_reg_stall = m_PPRO;
874const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
875const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
940/* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948const int x86_partial_reg_stall = m_PPRO;
949const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
876const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
877const int x86_use_mov0 = m_K6;
878const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
879const int x86_read_modify_write = ~m_PENT;
880const int x86_read_modify = ~(m_PENT | m_PPRO);
881const int x86_split_long_moves = m_PPRO;
953const int x86_use_mov0 = m_K6;
954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955const int x86_read_modify_write = ~m_PENT;
956const int x86_read_modify = ~(m_PENT | m_PPRO);
957const int x86_split_long_moves = m_PPRO;
882const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
960 /* m_PENT4 ? */
883const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
884const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
885const int x86_qimode_math = ~(0);
886const int x86_promote_qi_regs = 0;
887/* On PPro this flag is meant to avoid partial register stalls. Just like
888 the x86_partial_reg_stall this option might be considered for Generic32
889 if our scheme for avoiding partial stalls was more effective. */
890const int x86_himode_math = ~(m_PPRO);
891const int x86_promote_hi_regs = m_PPRO;
961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963const int x86_qimode_math = ~(0);
964const int x86_promote_qi_regs = 0;
965/* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968const int x86_himode_math = ~(m_PPRO);
969const int x86_promote_hi_regs = m_PPRO;
892const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
893const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
894const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
895const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
896const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
897const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
898const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
899const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979/* Enable if integer moves are preferred for DFmode copies */
980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
991 not proper. */
992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
994 | m_GENERIC;
900const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
901const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
902const int x86_shift1 = ~m_486;
995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997const int x86_shift1 = ~m_486;
903const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
904/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
905 that thread 128bit SSE registers as single units versus K8 based chips that
906 divide SSE registers to two 64bit halves.
907 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
908 to allow register renaming on 128bit SSE units, but usually results in one
909 extra microop on 64bit SSE units. Experimental results shows that disabling
910 this option on P4 brings over 20% SPECfp regression, while enabling it on
911 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
912 of moves. */
1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1009 of moves. */
913const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
914/* Set for machines where the type and dependencies are resolved on SSE
915 register parts instead of whole registers, so we may maintain just
916 lower part of scalar values in proper format leaving the upper part
917 undefined. */
918const int x86_sse_split_regs = m_ATHLON_K8;
1012/* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1015 undefined. */
1016const int x86_sse_split_regs = m_ATHLON_K8;
919const int x86_sse_typeless_stores = m_ATHLON_K8;
1017/* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1019 movaps reg, reg
1020 else
1021 movss reg, reg
1022 if (x86_sse_partial_reg_dependency == true)
1023 movapd reg, reg
1024 else
1025 movsd reg, reg
1026
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1030 else
1031 movsd mem, reg
1032
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1036 movups mem, reg
1037
1038 if (x86_sse_partial_reg_dependency == true)
1039 {
1040 xorps reg, reg
1041 movlps mem, reg
1042 movhps mem+8, reg
1043 }
1044 else
1045 {
1046 movlps mem, reg
1047 movhps mem+8, reg
1048 }
1049
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1053 movupd mem, reg
1054
1055 if (x86_sse_split_regs == true)
1056 {
1057 movlpd mem, reg
1058 movhpd mem+8, reg
1059 }
1060 else
1061 {
1062 movsd mem, reg
1063 movhpd mem+8, reg
1064 }
1065 */
1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
920const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
921const int x86_use_ffreep = m_ATHLON_K8;
1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
922const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
923const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
924
925/* ??? Allowing interunit moves makes it all too easy for the compiler to put
926 integer data in xmm registers. Which results in pretty abysmal code. */
927const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
928
1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1072
1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1076
929const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
930/* Some CPU cores are not able to predict more than 4 branch instructions in
931 the 16 byte window. */
1079/* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
932const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
934const int x86_use_bt = m_ATHLON_K8;
1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
935/* Compare and exchange was added for 80486. */
936const int x86_cmpxchg = ~m_386;
937/* Compare and exchange 8 bytes was added for pentium. */
938const int x86_cmpxchg8b = ~(m_386 | m_486);
1086/* Compare and exchange was added for 80486. */
1087const int x86_cmpxchg = ~m_386;
1088/* Compare and exchange 8 bytes was added for pentium. */
1089const int x86_cmpxchg8b = ~(m_386 | m_486);
939/* Compare and exchange 16 bytes was added for nocona. */
940const int x86_cmpxchg16b = m_NOCONA | m_CORE2;
941/* Exchange and add was added for 80486. */
942const int x86_xadd = ~m_386;
1090/* Exchange and add was added for 80486. */
1091const int x86_xadd = ~m_386;
943const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
1092const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
944
945/* In case the average insn count for single function invocation is
946 lower than this constant, emit fast (but longer) prologue and
947 epilogue code. */
948#define FAST_PROLOGUE_INSN_COUNT 20
949
950/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
951static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
952static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
953static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
954
955/* Array of the smallest class containing reg number REGNO, indexed by
956 REGNO. Used by REGNO_REG_CLASS in i386.h. */
957
958enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
959{
960 /* ax, dx, cx, bx */
961 AREG, DREG, CREG, BREG,
962 /* si, di, bp, sp */
963 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
964 /* FP registers */
965 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
966 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
967 /* arg pointer */
968 NON_Q_REGS,
969 /* flags, fpsr, dirflag, frame */
970 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
971 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
972 SSE_REGS, SSE_REGS,
973 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
974 MMX_REGS, MMX_REGS,
975 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
976 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
977 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
978 SSE_REGS, SSE_REGS,
979};
980
981/* The "default" register map used in 32bit mode. */
982
983int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
984{
985 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
986 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
987 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
988 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
989 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
990 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
991 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
992};
993
994static int const x86_64_int_parameter_registers[6] =
995{
996 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
997 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
998};
999
1000static int const x86_64_int_return_registers[4] =
1001{
1002 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1003};
1004
1005/* The "default" register map used in 64bit mode. */
1006int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1007{
1008 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1009 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1010 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1011 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1012 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1013 8,9,10,11,12,13,14,15, /* extended integer registers */
1014 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1015};
1016
1017/* Define the register numbers to be used in Dwarf debugging information.
1018 The SVR4 reference port C compiler uses the following register numbers
1019 in its Dwarf output code:
1020 0 for %eax (gcc regno = 0)
1021 1 for %ecx (gcc regno = 2)
1022 2 for %edx (gcc regno = 1)
1023 3 for %ebx (gcc regno = 3)
1024 4 for %esp (gcc regno = 7)
1025 5 for %ebp (gcc regno = 6)
1026 6 for %esi (gcc regno = 4)
1027 7 for %edi (gcc regno = 5)
1028 The following three DWARF register numbers are never generated by
1029 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1030 believes these numbers have these meanings.
1031 8 for %eip (no gcc equivalent)
1032 9 for %eflags (gcc regno = 17)
1033 10 for %trapno (no gcc equivalent)
1034 It is not at all clear how we should number the FP stack registers
1035 for the x86 architecture. If the version of SDB on x86/svr4 were
1036 a bit less brain dead with respect to floating-point then we would
1037 have a precedent to follow with respect to DWARF register numbers
1038 for x86 FP registers, but the SDB on x86/svr4 is so completely
1039 broken with respect to FP registers that it is hardly worth thinking
1040 of it as something to strive for compatibility with.
1041 The version of x86/svr4 SDB I have at the moment does (partially)
1042 seem to believe that DWARF register number 11 is associated with
1043 the x86 register %st(0), but that's about all. Higher DWARF
1044 register numbers don't seem to be associated with anything in
1045 particular, and even for DWARF regno 11, SDB only seems to under-
1046 stand that it should say that a variable lives in %st(0) (when
1047 asked via an `=' command) if we said it was in DWARF regno 11,
1048 but SDB still prints garbage when asked for the value of the
1049 variable in question (via a `/' command).
1050 (Also note that the labels SDB prints for various FP stack regs
1051 when doing an `x' command are all wrong.)
1052 Note that these problems generally don't affect the native SVR4
1053 C compiler because it doesn't allow the use of -O with -g and
1054 because when it is *not* optimizing, it allocates a memory
1055 location for each floating-point variable, and the memory
1056 location is what gets described in the DWARF AT_location
1057 attribute for the variable in question.
1058 Regardless of the severe mental illness of the x86/svr4 SDB, we
1059 do something sensible here and we use the following DWARF
1060 register numbers. Note that these are all stack-top-relative
1061 numbers.
1062 11 for %st(0) (gcc regno = 8)
1063 12 for %st(1) (gcc regno = 9)
1064 13 for %st(2) (gcc regno = 10)
1065 14 for %st(3) (gcc regno = 11)
1066 15 for %st(4) (gcc regno = 12)
1067 16 for %st(5) (gcc regno = 13)
1068 17 for %st(6) (gcc regno = 14)
1069 18 for %st(7) (gcc regno = 15)
1070*/
1071int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1072{
1073 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1074 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1075 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1076 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1077 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1078 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1079 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1080};
1081
1082/* Test and compare insns in i386.md store the information needed to
1083 generate branch and scc insns here. */
1084
1085rtx ix86_compare_op0 = NULL_RTX;
1086rtx ix86_compare_op1 = NULL_RTX;
1087rtx ix86_compare_emitted = NULL_RTX;
1088
1089/* Size of the register save area. */
1090#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1091
1092/* Define the structure for the machine field in struct function. */
1093
1094struct stack_local_entry GTY(())
1095{
1096 unsigned short mode;
1097 unsigned short n;
1098 rtx rtl;
1099 struct stack_local_entry *next;
1100};
1101
1102/* Structure describing stack frame layout.
1103 Stack grows downward:
1104
1105 [arguments]
1106 <- ARG_POINTER
1107 saved pc
1108
1109 saved frame pointer if frame_pointer_needed
1110 <- HARD_FRAME_POINTER
1111 [saved regs]
1112
1113 [padding1] \
1114 )
1115 [va_arg registers] (
1116 > to_allocate <- FRAME_POINTER
1117 [frame] (
1118 )
1119 [padding2] /
1120 */
1121struct ix86_frame
1122{
1123 int nregs;
1124 int padding1;
1125 int va_arg_size;
1126 HOST_WIDE_INT frame;
1127 int padding2;
1128 int outgoing_arguments_size;
1129 int red_zone_size;
1130
1131 HOST_WIDE_INT to_allocate;
1132 /* The offsets relative to ARG_POINTER. */
1133 HOST_WIDE_INT frame_pointer_offset;
1134 HOST_WIDE_INT hard_frame_pointer_offset;
1135 HOST_WIDE_INT stack_pointer_offset;
1136
1137 /* When save_regs_using_mov is set, emit prologue using
1138 move instead of push instructions. */
1139 bool save_regs_using_mov;
1140};
1141
1142/* Code model option. */
1143enum cmodel ix86_cmodel;
1144/* Asm dialect. */
1145enum asm_dialect ix86_asm_dialect = ASM_ATT;
1146/* TLS dialects. */
1147enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1148
1149/* Which unit we are generating floating point math for. */
1150enum fpmath_unit ix86_fpmath;
1151
1152/* Which cpu are we scheduling for. */
1153enum processor_type ix86_tune;
1154/* Which instruction set architecture to use. */
1155enum processor_type ix86_arch;
1156
1157/* true if sse prefetch instruction is not NOOP. */
1158int x86_prefetch_sse;
1159
1093
1094/* In case the average insn count for single function invocation is
1095 lower than this constant, emit fast (but longer) prologue and
1096 epilogue code. */
1097#define FAST_PROLOGUE_INSN_COUNT 20
1098
1099/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1100static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1101static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1102static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1103
1104/* Array of the smallest class containing reg number REGNO, indexed by
1105 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1106
1107enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1108{
1109 /* ax, dx, cx, bx */
1110 AREG, DREG, CREG, BREG,
1111 /* si, di, bp, sp */
1112 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1113 /* FP registers */
1114 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1115 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1116 /* arg pointer */
1117 NON_Q_REGS,
1118 /* flags, fpsr, dirflag, frame */
1119 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1121 SSE_REGS, SSE_REGS,
1122 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1123 MMX_REGS, MMX_REGS,
1124 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1125 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1126 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1127 SSE_REGS, SSE_REGS,
1128};
1129
1130/* The "default" register map used in 32bit mode. */
1131
1132int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1133{
1134 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1135 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1136 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1137 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1138 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1139 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1140 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1141};
1142
1143static int const x86_64_int_parameter_registers[6] =
1144{
1145 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1146 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1147};
1148
1149static int const x86_64_int_return_registers[4] =
1150{
1151 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1152};
1153
1154/* The "default" register map used in 64bit mode. */
1155int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1156{
1157 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1158 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1159 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1160 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1161 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1162 8,9,10,11,12,13,14,15, /* extended integer registers */
1163 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1164};
1165
1166/* Define the register numbers to be used in Dwarf debugging information.
1167 The SVR4 reference port C compiler uses the following register numbers
1168 in its Dwarf output code:
1169 0 for %eax (gcc regno = 0)
1170 1 for %ecx (gcc regno = 2)
1171 2 for %edx (gcc regno = 1)
1172 3 for %ebx (gcc regno = 3)
1173 4 for %esp (gcc regno = 7)
1174 5 for %ebp (gcc regno = 6)
1175 6 for %esi (gcc regno = 4)
1176 7 for %edi (gcc regno = 5)
1177 The following three DWARF register numbers are never generated by
1178 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1179 believes these numbers have these meanings.
1180 8 for %eip (no gcc equivalent)
1181 9 for %eflags (gcc regno = 17)
1182 10 for %trapno (no gcc equivalent)
1183 It is not at all clear how we should number the FP stack registers
1184 for the x86 architecture. If the version of SDB on x86/svr4 were
1185 a bit less brain dead with respect to floating-point then we would
1186 have a precedent to follow with respect to DWARF register numbers
1187 for x86 FP registers, but the SDB on x86/svr4 is so completely
1188 broken with respect to FP registers that it is hardly worth thinking
1189 of it as something to strive for compatibility with.
1190 The version of x86/svr4 SDB I have at the moment does (partially)
1191 seem to believe that DWARF register number 11 is associated with
1192 the x86 register %st(0), but that's about all. Higher DWARF
1193 register numbers don't seem to be associated with anything in
1194 particular, and even for DWARF regno 11, SDB only seems to under-
1195 stand that it should say that a variable lives in %st(0) (when
1196 asked via an `=' command) if we said it was in DWARF regno 11,
1197 but SDB still prints garbage when asked for the value of the
1198 variable in question (via a `/' command).
1199 (Also note that the labels SDB prints for various FP stack regs
1200 when doing an `x' command are all wrong.)
1201 Note that these problems generally don't affect the native SVR4
1202 C compiler because it doesn't allow the use of -O with -g and
1203 because when it is *not* optimizing, it allocates a memory
1204 location for each floating-point variable, and the memory
1205 location is what gets described in the DWARF AT_location
1206 attribute for the variable in question.
1207 Regardless of the severe mental illness of the x86/svr4 SDB, we
1208 do something sensible here and we use the following DWARF
1209 register numbers. Note that these are all stack-top-relative
1210 numbers.
1211 11 for %st(0) (gcc regno = 8)
1212 12 for %st(1) (gcc regno = 9)
1213 13 for %st(2) (gcc regno = 10)
1214 14 for %st(3) (gcc regno = 11)
1215 15 for %st(4) (gcc regno = 12)
1216 16 for %st(5) (gcc regno = 13)
1217 17 for %st(6) (gcc regno = 14)
1218 18 for %st(7) (gcc regno = 15)
1219*/
1220int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1221{
1222 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1223 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1224 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1225 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1226 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1229};
1230
1231/* Test and compare insns in i386.md store the information needed to
1232 generate branch and scc insns here. */
1233
1234rtx ix86_compare_op0 = NULL_RTX;
1235rtx ix86_compare_op1 = NULL_RTX;
1236rtx ix86_compare_emitted = NULL_RTX;
1237
1238/* Size of the register save area. */
1239#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1240
1241/* Define the structure for the machine field in struct function. */
1242
1243struct stack_local_entry GTY(())
1244{
1245 unsigned short mode;
1246 unsigned short n;
1247 rtx rtl;
1248 struct stack_local_entry *next;
1249};
1250
1251/* Structure describing stack frame layout.
1252 Stack grows downward:
1253
1254 [arguments]
1255 <- ARG_POINTER
1256 saved pc
1257
1258 saved frame pointer if frame_pointer_needed
1259 <- HARD_FRAME_POINTER
1260 [saved regs]
1261
1262 [padding1] \
1263 )
1264 [va_arg registers] (
1265 > to_allocate <- FRAME_POINTER
1266 [frame] (
1267 )
1268 [padding2] /
1269 */
1270struct ix86_frame
1271{
1272 int nregs;
1273 int padding1;
1274 int va_arg_size;
1275 HOST_WIDE_INT frame;
1276 int padding2;
1277 int outgoing_arguments_size;
1278 int red_zone_size;
1279
1280 HOST_WIDE_INT to_allocate;
1281 /* The offsets relative to ARG_POINTER. */
1282 HOST_WIDE_INT frame_pointer_offset;
1283 HOST_WIDE_INT hard_frame_pointer_offset;
1284 HOST_WIDE_INT stack_pointer_offset;
1285
1286 /* When save_regs_using_mov is set, emit prologue using
1287 move instead of push instructions. */
1288 bool save_regs_using_mov;
1289};
1290
1291/* Code model option. */
1292enum cmodel ix86_cmodel;
1293/* Asm dialect. */
1294enum asm_dialect ix86_asm_dialect = ASM_ATT;
1295/* TLS dialects. */
1296enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1297
1298/* Which unit we are generating floating point math for. */
1299enum fpmath_unit ix86_fpmath;
1300
1301/* Which cpu are we scheduling for. */
1302enum processor_type ix86_tune;
1303/* Which instruction set architecture to use. */
1304enum processor_type ix86_arch;
1305
1306/* true if sse prefetch instruction is not NOOP. */
1307int x86_prefetch_sse;
1308
1309/* true if cmpxchg16b is supported. */
1310int x86_cmpxchg16b;
1311
1160/* ix86_regparm_string as a number */
1161static int ix86_regparm;
1162
1163/* -mstackrealign option */
1164extern int ix86_force_align_arg_pointer;
1165static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1166
1167/* Preferred alignment for stack boundary in bits. */
1168unsigned int ix86_preferred_stack_boundary;
1169
1170/* Values 1-5: see jump.c */
1171int ix86_branch_cost;
1172
1173/* Variables which are this size or smaller are put in the data/bss
1174 or ldata/lbss sections. */
1175
1176int ix86_section_threshold = 65536;
1177
1178/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1179char internal_label_prefix[16];
1180int internal_label_prefix_len;
1181
1182static bool ix86_handle_option (size_t, const char *, int);
1183static void output_pic_addr_const (FILE *, rtx, int);
1184static void put_condition_code (enum rtx_code, enum machine_mode,
1185 int, int, FILE *);
1186static const char *get_some_local_dynamic_name (void);
1187static int get_some_local_dynamic_name_1 (rtx *, void *);
1188static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1189static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1190 rtx *);
1191static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1192static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1193 enum machine_mode);
1194static rtx get_thread_pointer (int);
1195static rtx legitimize_tls_address (rtx, enum tls_model, int);
1196static void get_pc_thunk_name (char [32], unsigned int);
1197static rtx gen_push (rtx);
1198static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1199static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1200static struct machine_function * ix86_init_machine_status (void);
1201static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1202static int ix86_nsaved_regs (void);
1203static void ix86_emit_save_regs (void);
1204static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1205static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1206static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1207static HOST_WIDE_INT ix86_GOT_alias_set (void);
1208static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1209static rtx ix86_expand_aligntest (rtx, int);
1210static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1211static int ix86_issue_rate (void);
1212static int ix86_adjust_cost (rtx, rtx, rtx, int);
1213static int ia32_multipass_dfa_lookahead (void);
1214static void ix86_init_mmx_sse_builtins (void);
1215static rtx x86_this_parameter (tree);
1216static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1217 HOST_WIDE_INT, tree);
1218static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1219static void x86_file_start (void);
1220static void ix86_reorg (void);
1221static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1222static tree ix86_build_builtin_va_list (void);
1223static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1224 tree, int *, int);
1225static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1226static bool ix86_scalar_mode_supported_p (enum machine_mode);
1227static bool ix86_vector_mode_supported_p (enum machine_mode);
1228
1229static int ix86_address_cost (rtx);
1230static bool ix86_cannot_force_const_mem (rtx);
1231static rtx ix86_delegitimize_address (rtx);
1232
1233static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1234
1235struct builtin_description;
1236static rtx ix86_expand_sse_comi (const struct builtin_description *,
1237 tree, rtx);
1238static rtx ix86_expand_sse_compare (const struct builtin_description *,
1239 tree, rtx);
1240static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1241static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1242static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1243static rtx ix86_expand_store_builtin (enum insn_code, tree);
1244static rtx safe_vector_operand (rtx, enum machine_mode);
1245static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1246static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1247static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1248static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1249static int ix86_fp_comparison_cost (enum rtx_code code);
1250static unsigned int ix86_select_alt_pic_regnum (void);
1251static int ix86_save_reg (unsigned int, int);
1252static void ix86_compute_frame_layout (struct ix86_frame *);
1253static int ix86_comp_type_attributes (tree, tree);
1254static int ix86_function_regparm (tree, tree);
1255const struct attribute_spec ix86_attribute_table[];
1256static bool ix86_function_ok_for_sibcall (tree, tree);
1257static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1258static int ix86_value_regno (enum machine_mode, tree, tree);
1259static bool contains_128bit_aligned_vector_p (tree);
1260static rtx ix86_struct_value_rtx (tree, int);
1261static bool ix86_ms_bitfield_layout_p (tree);
1262static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1263static int extended_reg_mentioned_1 (rtx *, void *);
1264static bool ix86_rtx_costs (rtx, int, int, int *);
1265static int min_insn_size (rtx);
1266static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1267static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1268static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1269 tree, bool);
1270static void ix86_init_builtins (void);
1271static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1272static const char *ix86_mangle_fundamental_type (tree);
1273static tree ix86_stack_protect_fail (void);
1274static rtx ix86_internal_arg_pointer (void);
1275static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1276
1277/* This function is only used on Solaris. */
1278static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1279 ATTRIBUTE_UNUSED;
1280
1281/* Register class used for passing given 64bit part of the argument.
1282 These represent classes as documented by the PS ABI, with the exception
1283 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1284 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1285
1286 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1287 whenever possible (upper half does contain padding).
1288 */
1289enum x86_64_reg_class
1290 {
1291 X86_64_NO_CLASS,
1292 X86_64_INTEGER_CLASS,
1293 X86_64_INTEGERSI_CLASS,
1294 X86_64_SSE_CLASS,
1295 X86_64_SSESF_CLASS,
1296 X86_64_SSEDF_CLASS,
1297 X86_64_SSEUP_CLASS,
1298 X86_64_X87_CLASS,
1299 X86_64_X87UP_CLASS,
1300 X86_64_COMPLEX_X87_CLASS,
1301 X86_64_MEMORY_CLASS
1302 };
1303static const char * const x86_64_reg_class_name[] = {
1304 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1305 "sseup", "x87", "x87up", "cplx87", "no"
1306};
1307
1308#define MAX_CLASSES 4
1309
1310/* Table of constants used by fldpi, fldln2, etc.... */
1311static REAL_VALUE_TYPE ext_80387_constants_table [5];
1312static bool ext_80387_constants_init = 0;
1313static void init_ext_80387_constants (void);
1314static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1315static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1316static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1317static section *x86_64_elf_select_section (tree decl, int reloc,
1318 unsigned HOST_WIDE_INT align)
1319 ATTRIBUTE_UNUSED;
1320
1321/* Initialize the GCC target structure. */
1322#undef TARGET_ATTRIBUTE_TABLE
1323#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1324#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1325# undef TARGET_MERGE_DECL_ATTRIBUTES
1326# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1327#endif
1328
1329#undef TARGET_COMP_TYPE_ATTRIBUTES
1330#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1331
1332#undef TARGET_INIT_BUILTINS
1333#define TARGET_INIT_BUILTINS ix86_init_builtins
1334#undef TARGET_EXPAND_BUILTIN
1335#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1336
1337#undef TARGET_ASM_FUNCTION_EPILOGUE
1338#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1339
1340#undef TARGET_ENCODE_SECTION_INFO
1341#ifndef SUBTARGET_ENCODE_SECTION_INFO
1342#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1343#else
1344#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1345#endif
1346
1347#undef TARGET_ASM_OPEN_PAREN
1348#define TARGET_ASM_OPEN_PAREN ""
1349#undef TARGET_ASM_CLOSE_PAREN
1350#define TARGET_ASM_CLOSE_PAREN ""
1351
1352#undef TARGET_ASM_ALIGNED_HI_OP
1353#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1354#undef TARGET_ASM_ALIGNED_SI_OP
1355#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1356#ifdef ASM_QUAD
1357#undef TARGET_ASM_ALIGNED_DI_OP
1358#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1359#endif
1360
1361#undef TARGET_ASM_UNALIGNED_HI_OP
1362#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1363#undef TARGET_ASM_UNALIGNED_SI_OP
1364#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1365#undef TARGET_ASM_UNALIGNED_DI_OP
1366#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1367
1368#undef TARGET_SCHED_ADJUST_COST
1369#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1370#undef TARGET_SCHED_ISSUE_RATE
1371#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1372#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1373#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1374 ia32_multipass_dfa_lookahead
1375
1376#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1377#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1378
1379#ifdef HAVE_AS_TLS
1380#undef TARGET_HAVE_TLS
1381#define TARGET_HAVE_TLS true
1382#endif
1383#undef TARGET_CANNOT_FORCE_CONST_MEM
1384#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1385#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1386#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1387
1388#undef TARGET_DELEGITIMIZE_ADDRESS
1389#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1390
1391#undef TARGET_MS_BITFIELD_LAYOUT_P
1392#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1393
1394#if TARGET_MACHO
1395#undef TARGET_BINDS_LOCAL_P
1396#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1397#endif
1398
1399#undef TARGET_ASM_OUTPUT_MI_THUNK
1400#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1401#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1402#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1403
1404#undef TARGET_ASM_FILE_START
1405#define TARGET_ASM_FILE_START x86_file_start
1406
1407#undef TARGET_DEFAULT_TARGET_FLAGS
1408#define TARGET_DEFAULT_TARGET_FLAGS \
1409 (TARGET_DEFAULT \
1410 | TARGET_64BIT_DEFAULT \
1411 | TARGET_SUBTARGET_DEFAULT \
1412 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1413
1414#undef TARGET_HANDLE_OPTION
1415#define TARGET_HANDLE_OPTION ix86_handle_option
1416
1417#undef TARGET_RTX_COSTS
1418#define TARGET_RTX_COSTS ix86_rtx_costs
1419#undef TARGET_ADDRESS_COST
1420#define TARGET_ADDRESS_COST ix86_address_cost
1421
1422#undef TARGET_FIXED_CONDITION_CODE_REGS
1423#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1424#undef TARGET_CC_MODES_COMPATIBLE
1425#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1426
1427#undef TARGET_MACHINE_DEPENDENT_REORG
1428#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1429
1430#undef TARGET_BUILD_BUILTIN_VA_LIST
1431#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1432
1433#undef TARGET_MD_ASM_CLOBBERS
1434#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1435
1436#undef TARGET_PROMOTE_PROTOTYPES
1437#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1438#undef TARGET_STRUCT_VALUE_RTX
1439#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1440#undef TARGET_SETUP_INCOMING_VARARGS
1441#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1442#undef TARGET_MUST_PASS_IN_STACK
1443#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1444#undef TARGET_PASS_BY_REFERENCE
1445#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1446#undef TARGET_INTERNAL_ARG_POINTER
1447#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1448#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1449#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1450
1451#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1452#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1453
1454#undef TARGET_SCALAR_MODE_SUPPORTED_P
1455#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1456
1457#undef TARGET_VECTOR_MODE_SUPPORTED_P
1458#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1459
1460#ifdef HAVE_AS_TLS
1461#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1462#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1463#endif
1464
1465#ifdef SUBTARGET_INSERT_ATTRIBUTES
1466#undef TARGET_INSERT_ATTRIBUTES
1467#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1468#endif
1469
1470#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1471#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1472
1473#undef TARGET_STACK_PROTECT_FAIL
1474#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1475
1476#undef TARGET_FUNCTION_VALUE
1477#define TARGET_FUNCTION_VALUE ix86_function_value
1478
1479struct gcc_target targetm = TARGET_INITIALIZER;
1480
1481
1482/* The svr4 ABI for the i386 says that records and unions are returned
1483 in memory. */
1484#ifndef DEFAULT_PCC_STRUCT_RETURN
1485#define DEFAULT_PCC_STRUCT_RETURN 1
1486#endif
1487
1488/* Implement TARGET_HANDLE_OPTION. */
1489
1490static bool
1491ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1492{
1493 switch (code)
1494 {
1495 case OPT_m3dnow:
1496 if (!value)
1497 {
1498 target_flags &= ~MASK_3DNOW_A;
1499 target_flags_explicit |= MASK_3DNOW_A;
1500 }
1501 return true;
1502
1503 case OPT_mmmx:
1504 if (!value)
1505 {
1506 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1507 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1508 }
1509 return true;
1510
1511 case OPT_msse:
1512 if (!value)
1513 {
1312/* ix86_regparm_string as a number */
1313static int ix86_regparm;
1314
1315/* -mstackrealign option */
1316extern int ix86_force_align_arg_pointer;
1317static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1318
1319/* Preferred alignment for stack boundary in bits. */
1320unsigned int ix86_preferred_stack_boundary;
1321
1322/* Values 1-5: see jump.c */
1323int ix86_branch_cost;
1324
1325/* Variables which are this size or smaller are put in the data/bss
1326 or ldata/lbss sections. */
1327
1328int ix86_section_threshold = 65536;
1329
1330/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1331char internal_label_prefix[16];
1332int internal_label_prefix_len;
1333
1334static bool ix86_handle_option (size_t, const char *, int);
1335static void output_pic_addr_const (FILE *, rtx, int);
1336static void put_condition_code (enum rtx_code, enum machine_mode,
1337 int, int, FILE *);
1338static const char *get_some_local_dynamic_name (void);
1339static int get_some_local_dynamic_name_1 (rtx *, void *);
1340static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1341static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1342 rtx *);
1343static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1344static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1345 enum machine_mode);
1346static rtx get_thread_pointer (int);
1347static rtx legitimize_tls_address (rtx, enum tls_model, int);
1348static void get_pc_thunk_name (char [32], unsigned int);
1349static rtx gen_push (rtx);
1350static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1351static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1352static struct machine_function * ix86_init_machine_status (void);
1353static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1354static int ix86_nsaved_regs (void);
1355static void ix86_emit_save_regs (void);
1356static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1357static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1358static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1359static HOST_WIDE_INT ix86_GOT_alias_set (void);
1360static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1361static rtx ix86_expand_aligntest (rtx, int);
1362static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1363static int ix86_issue_rate (void);
1364static int ix86_adjust_cost (rtx, rtx, rtx, int);
1365static int ia32_multipass_dfa_lookahead (void);
1366static void ix86_init_mmx_sse_builtins (void);
1367static rtx x86_this_parameter (tree);
1368static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1369 HOST_WIDE_INT, tree);
1370static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1371static void x86_file_start (void);
1372static void ix86_reorg (void);
1373static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1374static tree ix86_build_builtin_va_list (void);
1375static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1376 tree, int *, int);
1377static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1378static bool ix86_scalar_mode_supported_p (enum machine_mode);
1379static bool ix86_vector_mode_supported_p (enum machine_mode);
1380
1381static int ix86_address_cost (rtx);
1382static bool ix86_cannot_force_const_mem (rtx);
1383static rtx ix86_delegitimize_address (rtx);
1384
1385static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1386
1387struct builtin_description;
1388static rtx ix86_expand_sse_comi (const struct builtin_description *,
1389 tree, rtx);
1390static rtx ix86_expand_sse_compare (const struct builtin_description *,
1391 tree, rtx);
1392static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1393static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1394static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1395static rtx ix86_expand_store_builtin (enum insn_code, tree);
1396static rtx safe_vector_operand (rtx, enum machine_mode);
1397static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1398static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1399static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1400static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1401static int ix86_fp_comparison_cost (enum rtx_code code);
1402static unsigned int ix86_select_alt_pic_regnum (void);
1403static int ix86_save_reg (unsigned int, int);
1404static void ix86_compute_frame_layout (struct ix86_frame *);
1405static int ix86_comp_type_attributes (tree, tree);
1406static int ix86_function_regparm (tree, tree);
1407const struct attribute_spec ix86_attribute_table[];
1408static bool ix86_function_ok_for_sibcall (tree, tree);
1409static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1410static int ix86_value_regno (enum machine_mode, tree, tree);
1411static bool contains_128bit_aligned_vector_p (tree);
1412static rtx ix86_struct_value_rtx (tree, int);
1413static bool ix86_ms_bitfield_layout_p (tree);
1414static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1415static int extended_reg_mentioned_1 (rtx *, void *);
1416static bool ix86_rtx_costs (rtx, int, int, int *);
1417static int min_insn_size (rtx);
1418static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1419static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1420static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1421 tree, bool);
1422static void ix86_init_builtins (void);
1423static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1424static const char *ix86_mangle_fundamental_type (tree);
1425static tree ix86_stack_protect_fail (void);
1426static rtx ix86_internal_arg_pointer (void);
1427static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1428
1429/* This function is only used on Solaris. */
1430static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1431 ATTRIBUTE_UNUSED;
1432
1433/* Register class used for passing given 64bit part of the argument.
1434 These represent classes as documented by the PS ABI, with the exception
1435 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1436 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1437
1438 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1439 whenever possible (upper half does contain padding).
1440 */
1441enum x86_64_reg_class
1442 {
1443 X86_64_NO_CLASS,
1444 X86_64_INTEGER_CLASS,
1445 X86_64_INTEGERSI_CLASS,
1446 X86_64_SSE_CLASS,
1447 X86_64_SSESF_CLASS,
1448 X86_64_SSEDF_CLASS,
1449 X86_64_SSEUP_CLASS,
1450 X86_64_X87_CLASS,
1451 X86_64_X87UP_CLASS,
1452 X86_64_COMPLEX_X87_CLASS,
1453 X86_64_MEMORY_CLASS
1454 };
1455static const char * const x86_64_reg_class_name[] = {
1456 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1457 "sseup", "x87", "x87up", "cplx87", "no"
1458};
1459
1460#define MAX_CLASSES 4
1461
1462/* Table of constants used by fldpi, fldln2, etc.... */
1463static REAL_VALUE_TYPE ext_80387_constants_table [5];
1464static bool ext_80387_constants_init = 0;
1465static void init_ext_80387_constants (void);
1466static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1467static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1468static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1469static section *x86_64_elf_select_section (tree decl, int reloc,
1470 unsigned HOST_WIDE_INT align)
1471 ATTRIBUTE_UNUSED;
1472
1473/* Initialize the GCC target structure. */
1474#undef TARGET_ATTRIBUTE_TABLE
1475#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1476#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1477# undef TARGET_MERGE_DECL_ATTRIBUTES
1478# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1479#endif
1480
1481#undef TARGET_COMP_TYPE_ATTRIBUTES
1482#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1483
1484#undef TARGET_INIT_BUILTINS
1485#define TARGET_INIT_BUILTINS ix86_init_builtins
1486#undef TARGET_EXPAND_BUILTIN
1487#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1488
1489#undef TARGET_ASM_FUNCTION_EPILOGUE
1490#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1491
1492#undef TARGET_ENCODE_SECTION_INFO
1493#ifndef SUBTARGET_ENCODE_SECTION_INFO
1494#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1495#else
1496#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1497#endif
1498
1499#undef TARGET_ASM_OPEN_PAREN
1500#define TARGET_ASM_OPEN_PAREN ""
1501#undef TARGET_ASM_CLOSE_PAREN
1502#define TARGET_ASM_CLOSE_PAREN ""
1503
1504#undef TARGET_ASM_ALIGNED_HI_OP
1505#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1506#undef TARGET_ASM_ALIGNED_SI_OP
1507#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1508#ifdef ASM_QUAD
1509#undef TARGET_ASM_ALIGNED_DI_OP
1510#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1511#endif
1512
1513#undef TARGET_ASM_UNALIGNED_HI_OP
1514#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1515#undef TARGET_ASM_UNALIGNED_SI_OP
1516#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1517#undef TARGET_ASM_UNALIGNED_DI_OP
1518#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1519
1520#undef TARGET_SCHED_ADJUST_COST
1521#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1522#undef TARGET_SCHED_ISSUE_RATE
1523#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1524#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1525#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1526 ia32_multipass_dfa_lookahead
1527
1528#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1529#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1530
1531#ifdef HAVE_AS_TLS
1532#undef TARGET_HAVE_TLS
1533#define TARGET_HAVE_TLS true
1534#endif
1535#undef TARGET_CANNOT_FORCE_CONST_MEM
1536#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1537#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1538#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1539
1540#undef TARGET_DELEGITIMIZE_ADDRESS
1541#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1542
1543#undef TARGET_MS_BITFIELD_LAYOUT_P
1544#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1545
1546#if TARGET_MACHO
1547#undef TARGET_BINDS_LOCAL_P
1548#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1549#endif
1550
1551#undef TARGET_ASM_OUTPUT_MI_THUNK
1552#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1553#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1554#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1555
1556#undef TARGET_ASM_FILE_START
1557#define TARGET_ASM_FILE_START x86_file_start
1558
1559#undef TARGET_DEFAULT_TARGET_FLAGS
1560#define TARGET_DEFAULT_TARGET_FLAGS \
1561 (TARGET_DEFAULT \
1562 | TARGET_64BIT_DEFAULT \
1563 | TARGET_SUBTARGET_DEFAULT \
1564 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1565
1566#undef TARGET_HANDLE_OPTION
1567#define TARGET_HANDLE_OPTION ix86_handle_option
1568
1569#undef TARGET_RTX_COSTS
1570#define TARGET_RTX_COSTS ix86_rtx_costs
1571#undef TARGET_ADDRESS_COST
1572#define TARGET_ADDRESS_COST ix86_address_cost
1573
1574#undef TARGET_FIXED_CONDITION_CODE_REGS
1575#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1576#undef TARGET_CC_MODES_COMPATIBLE
1577#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1578
1579#undef TARGET_MACHINE_DEPENDENT_REORG
1580#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1581
1582#undef TARGET_BUILD_BUILTIN_VA_LIST
1583#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1584
1585#undef TARGET_MD_ASM_CLOBBERS
1586#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1587
1588#undef TARGET_PROMOTE_PROTOTYPES
1589#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1590#undef TARGET_STRUCT_VALUE_RTX
1591#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1592#undef TARGET_SETUP_INCOMING_VARARGS
1593#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1594#undef TARGET_MUST_PASS_IN_STACK
1595#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1596#undef TARGET_PASS_BY_REFERENCE
1597#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1598#undef TARGET_INTERNAL_ARG_POINTER
1599#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1600#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1601#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1602
1603#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1604#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1605
1606#undef TARGET_SCALAR_MODE_SUPPORTED_P
1607#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1608
1609#undef TARGET_VECTOR_MODE_SUPPORTED_P
1610#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1611
1612#ifdef HAVE_AS_TLS
1613#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1615#endif
1616
1617#ifdef SUBTARGET_INSERT_ATTRIBUTES
1618#undef TARGET_INSERT_ATTRIBUTES
1619#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1620#endif
1621
1622#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1623#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1624
1625#undef TARGET_STACK_PROTECT_FAIL
1626#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1627
1628#undef TARGET_FUNCTION_VALUE
1629#define TARGET_FUNCTION_VALUE ix86_function_value
1630
1631struct gcc_target targetm = TARGET_INITIALIZER;
1632
1633
1634/* The svr4 ABI for the i386 says that records and unions are returned
1635 in memory. */
1636#ifndef DEFAULT_PCC_STRUCT_RETURN
1637#define DEFAULT_PCC_STRUCT_RETURN 1
1638#endif
1639
1640/* Implement TARGET_HANDLE_OPTION. */
1641
1642static bool
1643ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1644{
1645 switch (code)
1646 {
1647 case OPT_m3dnow:
1648 if (!value)
1649 {
1650 target_flags &= ~MASK_3DNOW_A;
1651 target_flags_explicit |= MASK_3DNOW_A;
1652 }
1653 return true;
1654
1655 case OPT_mmmx:
1656 if (!value)
1657 {
1658 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1659 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1660 }
1661 return true;
1662
1663 case OPT_msse:
1664 if (!value)
1665 {
1514 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
1515 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
1666 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1667 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1516 }
1517 return true;
1518
1519 case OPT_msse2:
1520 if (!value)
1521 {
1668 }
1669 return true;
1670
1671 case OPT_msse2:
1672 if (!value)
1673 {
1522 target_flags &= ~(MASK_SSE3 | MASK_SSSE3);
1523 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3;
1674 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1675 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1524 }
1525 return true;
1526
1527 case OPT_msse3:
1528 if (!value)
1529 {
1676 }
1677 return true;
1678
1679 case OPT_msse3:
1680 if (!value)
1681 {
1530 target_flags &= ~MASK_SSSE3;
1531 target_flags_explicit |= MASK_SSSE3;
1682 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1683 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1532 }
1533 return true;
1534
1535 default:
1536 return true;
1537 }
1538}
1539
1540/* Sometimes certain combinations of command options do not make
1541 sense on a particular target machine. You can define a macro
1542 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1543 defined, is executed once just after all the command options have
1544 been parsed.
1545
1546 Don't use this macro to turn on various extra optimizations for
1547 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1548
1549void
1550override_options (void)
1551{
1552 int i;
1553 int ix86_tune_defaulted = 0;
1554
1555 /* Comes from final.c -- no real reason to change it. */
1556#define MAX_CODE_ALIGN 16
1557
1558 static struct ptt
1559 {
1560 const struct processor_costs *cost; /* Processor costs */
1561 const int target_enable; /* Target flags to enable. */
1562 const int target_disable; /* Target flags to disable. */
1563 const int align_loop; /* Default alignments. */
1564 const int align_loop_max_skip;
1565 const int align_jump;
1566 const int align_jump_max_skip;
1567 const int align_func;
1568 }
1569 const processor_target_table[PROCESSOR_max] =
1570 {
1571 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1572 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1573 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1574 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1575 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1576 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1577 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1578 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1579 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1580 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1581 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1582 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1684 }
1685 return true;
1686
1687 default:
1688 return true;
1689 }
1690}
1691
1692/* Sometimes certain combinations of command options do not make
1693 sense on a particular target machine. You can define a macro
1694 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1695 defined, is executed once just after all the command options have
1696 been parsed.
1697
1698 Don't use this macro to turn on various extra optimizations for
1699 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1700
1701void
1702override_options (void)
1703{
1704 int i;
1705 int ix86_tune_defaulted = 0;
1706
1707 /* Comes from final.c -- no real reason to change it. */
1708#define MAX_CODE_ALIGN 16
1709
1710 static struct ptt
1711 {
1712 const struct processor_costs *cost; /* Processor costs */
1713 const int target_enable; /* Target flags to enable. */
1714 const int target_disable; /* Target flags to disable. */
1715 const int align_loop; /* Default alignments. */
1716 const int align_loop_max_skip;
1717 const int align_jump;
1718 const int align_jump_max_skip;
1719 const int align_func;
1720 }
1721 const processor_target_table[PROCESSOR_max] =
1722 {
1723 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1724 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1725 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1726 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1727 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1728 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1729 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1730 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1731 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1732 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1733 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1734 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1583 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1735 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1736 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1584 };
1585
1586 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1587 static struct pta
1588 {
1589 const char *const name; /* processor name or nickname. */
1590 const enum processor_type processor;
1591 const enum pta_flags
1592 {
1593 PTA_SSE = 1,
1594 PTA_SSE2 = 2,
1595 PTA_SSE3 = 4,
1596 PTA_MMX = 8,
1597 PTA_PREFETCH_SSE = 16,
1598 PTA_3DNOW = 32,
1599 PTA_3DNOW_A = 64,
1600 PTA_64BIT = 128,
1737 };
1738
1739 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1740 static struct pta
1741 {
1742 const char *const name; /* processor name or nickname. */
1743 const enum processor_type processor;
1744 const enum pta_flags
1745 {
1746 PTA_SSE = 1,
1747 PTA_SSE2 = 2,
1748 PTA_SSE3 = 4,
1749 PTA_MMX = 8,
1750 PTA_PREFETCH_SSE = 16,
1751 PTA_3DNOW = 32,
1752 PTA_3DNOW_A = 64,
1753 PTA_64BIT = 128,
1601 PTA_SSSE3 = 256
1754 PTA_SSSE3 = 256,
1755 PTA_CX16 = 512,
1756 PTA_POPCNT = 1024,
1757 PTA_ABM = 2048,
1758 PTA_SSE4A = 4096
1602 } flags;
1603 }
1604 const processor_alias_table[] =
1605 {
1606 {"i386", PROCESSOR_I386, 0},
1607 {"i486", PROCESSOR_I486, 0},
1608 {"i586", PROCESSOR_PENTIUM, 0},
1609 {"pentium", PROCESSOR_PENTIUM, 0},
1610 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1611 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1612 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1613 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1614 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1615 {"i686", PROCESSOR_PENTIUMPRO, 0},
1616 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1617 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1618 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1619 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1620 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1621 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1622 | PTA_MMX | PTA_PREFETCH_SSE},
1623 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1624 | PTA_MMX | PTA_PREFETCH_SSE},
1625 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1626 | PTA_MMX | PTA_PREFETCH_SSE},
1627 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1759 } flags;
1760 }
1761 const processor_alias_table[] =
1762 {
1763 {"i386", PROCESSOR_I386, 0},
1764 {"i486", PROCESSOR_I486, 0},
1765 {"i586", PROCESSOR_PENTIUM, 0},
1766 {"pentium", PROCESSOR_PENTIUM, 0},
1767 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1768 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1769 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1770 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1771 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1772 {"i686", PROCESSOR_PENTIUMPRO, 0},
1773 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1774 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1775 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1776 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1777 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1778 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1779 | PTA_MMX | PTA_PREFETCH_SSE},
1780 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1781 | PTA_MMX | PTA_PREFETCH_SSE},
1782 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1783 | PTA_MMX | PTA_PREFETCH_SSE},
1784 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1628 | PTA_MMX | PTA_PREFETCH_SSE},
1785 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1629 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1630 | PTA_64BIT | PTA_MMX
1786 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1787 | PTA_64BIT | PTA_MMX
1631 | PTA_PREFETCH_SSE},
1788 | PTA_PREFETCH_SSE | PTA_CX16},
1632 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1633 | PTA_3DNOW_A},
1634 {"k6", PROCESSOR_K6, PTA_MMX},
1635 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1636 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1637 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1638 | PTA_3DNOW_A},
1639 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1640 | PTA_3DNOW | PTA_3DNOW_A},
1641 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1642 | PTA_3DNOW_A | PTA_SSE},
1643 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1644 | PTA_3DNOW_A | PTA_SSE},
1645 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1646 | PTA_3DNOW_A | PTA_SSE},
1647 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1648 | PTA_SSE | PTA_SSE2 },
1649 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1650 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1651 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1652 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1653 | PTA_SSE3 },
1654 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1655 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1656 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1657 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1658 | PTA_SSE3 },
1659 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1660 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1661 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1662 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1663 | PTA_SSE3 },
1664 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1665 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1789 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1790 | PTA_3DNOW_A},
1791 {"k6", PROCESSOR_K6, PTA_MMX},
1792 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1793 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1794 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1795 | PTA_3DNOW_A},
1796 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1797 | PTA_3DNOW | PTA_3DNOW_A},
1798 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1799 | PTA_3DNOW_A | PTA_SSE},
1800 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1801 | PTA_3DNOW_A | PTA_SSE},
1802 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1803 | PTA_3DNOW_A | PTA_SSE},
1804 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1805 | PTA_SSE | PTA_SSE2 },
1806 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1807 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1808 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1809 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1810 | PTA_SSE3 },
1811 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1812 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1813 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1814 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1815 | PTA_SSE3 },
1816 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1820 | PTA_SSE3 },
1821 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1824 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1825 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1826 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1827 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1828 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1829 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1830 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1666 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1667 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1668 };
1669
1670 int const pta_size = ARRAY_SIZE (processor_alias_table);
1671
1672#ifdef SUBTARGET_OVERRIDE_OPTIONS
1673 SUBTARGET_OVERRIDE_OPTIONS;
1674#endif
1675
1676#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1677 SUBSUBTARGET_OVERRIDE_OPTIONS;
1678#endif
1679
1680 /* -fPIC is the default for x86_64. */
1681 if (TARGET_MACHO && TARGET_64BIT)
1682 flag_pic = 2;
1683
1684 /* Set the default values for switches whose default depends on TARGET_64BIT
1685 in case they weren't overwritten by command line options. */
1686 if (TARGET_64BIT)
1687 {
1688 /* Mach-O doesn't support omitting the frame pointer for now. */
1689 if (flag_omit_frame_pointer == 2)
1690 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1691 if (flag_asynchronous_unwind_tables == 2)
1692 flag_asynchronous_unwind_tables = 1;
1693 if (flag_pcc_struct_return == 2)
1694 flag_pcc_struct_return = 0;
1695 }
1696 else
1697 {
1698 if (flag_omit_frame_pointer == 2)
1699 flag_omit_frame_pointer = 0;
1700 if (flag_asynchronous_unwind_tables == 2)
1701 flag_asynchronous_unwind_tables = 0;
1702 if (flag_pcc_struct_return == 2)
1703 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1704 }
1705
1706 /* Need to check -mtune=generic first. */
1707 if (ix86_tune_string)
1708 {
1709 if (!strcmp (ix86_tune_string, "generic")
1710 || !strcmp (ix86_tune_string, "i686")
1711 /* As special support for cross compilers we read -mtune=native
1712 as -mtune=generic. With native compilers we won't see the
1713 -mtune=native, as it was changed by the driver. */
1714 || !strcmp (ix86_tune_string, "native"))
1715 {
1716 if (TARGET_64BIT)
1717 ix86_tune_string = "generic64";
1718 else
1719 ix86_tune_string = "generic32";
1720 }
1721 else if (!strncmp (ix86_tune_string, "generic", 7))
1722 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1723 }
1724 else
1725 {
1726 if (ix86_arch_string)
1727 ix86_tune_string = ix86_arch_string;
1728 if (!ix86_tune_string)
1729 {
1730 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1731 ix86_tune_defaulted = 1;
1732 }
1733
1734 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1735 need to use a sensible tune option. */
1736 if (!strcmp (ix86_tune_string, "generic")
1737 || !strcmp (ix86_tune_string, "x86-64")
1738 || !strcmp (ix86_tune_string, "i686"))
1739 {
1740 if (TARGET_64BIT)
1741 ix86_tune_string = "generic64";
1742 else
1743 ix86_tune_string = "generic32";
1744 }
1745 }
1746 if (!strcmp (ix86_tune_string, "x86-64"))
1747 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1748 "-mtune=generic instead as appropriate.");
1749
1750 if (!ix86_arch_string)
1751 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1752 if (!strcmp (ix86_arch_string, "generic"))
1753 error ("generic CPU can be used only for -mtune= switch");
1754 if (!strncmp (ix86_arch_string, "generic", 7))
1755 error ("bad value (%s) for -march= switch", ix86_arch_string);
1756
1757 if (ix86_cmodel_string != 0)
1758 {
1759 if (!strcmp (ix86_cmodel_string, "small"))
1760 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1761 else if (!strcmp (ix86_cmodel_string, "medium"))
1762 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1763 else if (flag_pic)
1764 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1765 else if (!strcmp (ix86_cmodel_string, "32"))
1766 ix86_cmodel = CM_32;
1767 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1768 ix86_cmodel = CM_KERNEL;
1769 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1770 ix86_cmodel = CM_LARGE;
1771 else
1772 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1773 }
1774 else
1775 {
1776 ix86_cmodel = CM_32;
1777 if (TARGET_64BIT)
1778 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1779 }
1780 if (ix86_asm_string != 0)
1781 {
1782 if (! TARGET_MACHO
1783 && !strcmp (ix86_asm_string, "intel"))
1784 ix86_asm_dialect = ASM_INTEL;
1785 else if (!strcmp (ix86_asm_string, "att"))
1786 ix86_asm_dialect = ASM_ATT;
1787 else
1788 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1789 }
1790 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1791 error ("code model %qs not supported in the %s bit mode",
1792 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1793 if (ix86_cmodel == CM_LARGE)
1794 sorry ("code model %<large%> not supported yet");
1795 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1796 sorry ("%i-bit mode not compiled in",
1797 (target_flags & MASK_64BIT) ? 64 : 32);
1798
1799 for (i = 0; i < pta_size; i++)
1800 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1801 {
1802 ix86_arch = processor_alias_table[i].processor;
1803 /* Default cpu tuning to the architecture. */
1804 ix86_tune = ix86_arch;
1805 if (processor_alias_table[i].flags & PTA_MMX
1806 && !(target_flags_explicit & MASK_MMX))
1807 target_flags |= MASK_MMX;
1808 if (processor_alias_table[i].flags & PTA_3DNOW
1809 && !(target_flags_explicit & MASK_3DNOW))
1810 target_flags |= MASK_3DNOW;
1811 if (processor_alias_table[i].flags & PTA_3DNOW_A
1812 && !(target_flags_explicit & MASK_3DNOW_A))
1813 target_flags |= MASK_3DNOW_A;
1814 if (processor_alias_table[i].flags & PTA_SSE
1815 && !(target_flags_explicit & MASK_SSE))
1816 target_flags |= MASK_SSE;
1817 if (processor_alias_table[i].flags & PTA_SSE2
1818 && !(target_flags_explicit & MASK_SSE2))
1819 target_flags |= MASK_SSE2;
1820 if (processor_alias_table[i].flags & PTA_SSE3
1821 && !(target_flags_explicit & MASK_SSE3))
1822 target_flags |= MASK_SSE3;
1823 if (processor_alias_table[i].flags & PTA_SSSE3
1824 && !(target_flags_explicit & MASK_SSSE3))
1825 target_flags |= MASK_SSSE3;
1826 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1827 x86_prefetch_sse = true;
1831 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1832 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1833 };
1834
1835 int const pta_size = ARRAY_SIZE (processor_alias_table);
1836
1837#ifdef SUBTARGET_OVERRIDE_OPTIONS
1838 SUBTARGET_OVERRIDE_OPTIONS;
1839#endif
1840
1841#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1842 SUBSUBTARGET_OVERRIDE_OPTIONS;
1843#endif
1844
1845 /* -fPIC is the default for x86_64. */
1846 if (TARGET_MACHO && TARGET_64BIT)
1847 flag_pic = 2;
1848
1849 /* Set the default values for switches whose default depends on TARGET_64BIT
1850 in case they weren't overwritten by command line options. */
1851 if (TARGET_64BIT)
1852 {
1853 /* Mach-O doesn't support omitting the frame pointer for now. */
1854 if (flag_omit_frame_pointer == 2)
1855 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1856 if (flag_asynchronous_unwind_tables == 2)
1857 flag_asynchronous_unwind_tables = 1;
1858 if (flag_pcc_struct_return == 2)
1859 flag_pcc_struct_return = 0;
1860 }
1861 else
1862 {
1863 if (flag_omit_frame_pointer == 2)
1864 flag_omit_frame_pointer = 0;
1865 if (flag_asynchronous_unwind_tables == 2)
1866 flag_asynchronous_unwind_tables = 0;
1867 if (flag_pcc_struct_return == 2)
1868 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1869 }
1870
1871 /* Need to check -mtune=generic first. */
1872 if (ix86_tune_string)
1873 {
1874 if (!strcmp (ix86_tune_string, "generic")
1875 || !strcmp (ix86_tune_string, "i686")
1876 /* As special support for cross compilers we read -mtune=native
1877 as -mtune=generic. With native compilers we won't see the
1878 -mtune=native, as it was changed by the driver. */
1879 || !strcmp (ix86_tune_string, "native"))
1880 {
1881 if (TARGET_64BIT)
1882 ix86_tune_string = "generic64";
1883 else
1884 ix86_tune_string = "generic32";
1885 }
1886 else if (!strncmp (ix86_tune_string, "generic", 7))
1887 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1888 }
1889 else
1890 {
1891 if (ix86_arch_string)
1892 ix86_tune_string = ix86_arch_string;
1893 if (!ix86_tune_string)
1894 {
1895 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1896 ix86_tune_defaulted = 1;
1897 }
1898
1899 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1900 need to use a sensible tune option. */
1901 if (!strcmp (ix86_tune_string, "generic")
1902 || !strcmp (ix86_tune_string, "x86-64")
1903 || !strcmp (ix86_tune_string, "i686"))
1904 {
1905 if (TARGET_64BIT)
1906 ix86_tune_string = "generic64";
1907 else
1908 ix86_tune_string = "generic32";
1909 }
1910 }
1911 if (!strcmp (ix86_tune_string, "x86-64"))
1912 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1913 "-mtune=generic instead as appropriate.");
1914
1915 if (!ix86_arch_string)
1916 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1917 if (!strcmp (ix86_arch_string, "generic"))
1918 error ("generic CPU can be used only for -mtune= switch");
1919 if (!strncmp (ix86_arch_string, "generic", 7))
1920 error ("bad value (%s) for -march= switch", ix86_arch_string);
1921
1922 if (ix86_cmodel_string != 0)
1923 {
1924 if (!strcmp (ix86_cmodel_string, "small"))
1925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1926 else if (!strcmp (ix86_cmodel_string, "medium"))
1927 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1928 else if (flag_pic)
1929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1930 else if (!strcmp (ix86_cmodel_string, "32"))
1931 ix86_cmodel = CM_32;
1932 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1933 ix86_cmodel = CM_KERNEL;
1934 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1935 ix86_cmodel = CM_LARGE;
1936 else
1937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1938 }
1939 else
1940 {
1941 ix86_cmodel = CM_32;
1942 if (TARGET_64BIT)
1943 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1944 }
1945 if (ix86_asm_string != 0)
1946 {
1947 if (! TARGET_MACHO
1948 && !strcmp (ix86_asm_string, "intel"))
1949 ix86_asm_dialect = ASM_INTEL;
1950 else if (!strcmp (ix86_asm_string, "att"))
1951 ix86_asm_dialect = ASM_ATT;
1952 else
1953 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1954 }
1955 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1956 error ("code model %qs not supported in the %s bit mode",
1957 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1958 if (ix86_cmodel == CM_LARGE)
1959 sorry ("code model %<large%> not supported yet");
1960 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1961 sorry ("%i-bit mode not compiled in",
1962 (target_flags & MASK_64BIT) ? 64 : 32);
1963
1964 for (i = 0; i < pta_size; i++)
1965 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1966 {
1967 ix86_arch = processor_alias_table[i].processor;
1968 /* Default cpu tuning to the architecture. */
1969 ix86_tune = ix86_arch;
1970 if (processor_alias_table[i].flags & PTA_MMX
1971 && !(target_flags_explicit & MASK_MMX))
1972 target_flags |= MASK_MMX;
1973 if (processor_alias_table[i].flags & PTA_3DNOW
1974 && !(target_flags_explicit & MASK_3DNOW))
1975 target_flags |= MASK_3DNOW;
1976 if (processor_alias_table[i].flags & PTA_3DNOW_A
1977 && !(target_flags_explicit & MASK_3DNOW_A))
1978 target_flags |= MASK_3DNOW_A;
1979 if (processor_alias_table[i].flags & PTA_SSE
1980 && !(target_flags_explicit & MASK_SSE))
1981 target_flags |= MASK_SSE;
1982 if (processor_alias_table[i].flags & PTA_SSE2
1983 && !(target_flags_explicit & MASK_SSE2))
1984 target_flags |= MASK_SSE2;
1985 if (processor_alias_table[i].flags & PTA_SSE3
1986 && !(target_flags_explicit & MASK_SSE3))
1987 target_flags |= MASK_SSE3;
1988 if (processor_alias_table[i].flags & PTA_SSSE3
1989 && !(target_flags_explicit & MASK_SSSE3))
1990 target_flags |= MASK_SSSE3;
1991 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1992 x86_prefetch_sse = true;
1993 if (processor_alias_table[i].flags & PTA_CX16)
1994 x86_cmpxchg16b = true;
1995 if (processor_alias_table[i].flags & PTA_POPCNT
1996 && !(target_flags_explicit & MASK_POPCNT))
1997 target_flags |= MASK_POPCNT;
1998 if (processor_alias_table[i].flags & PTA_ABM
1999 && !(target_flags_explicit & MASK_ABM))
2000 target_flags |= MASK_ABM;
2001 if (processor_alias_table[i].flags & PTA_SSE4A
2002 && !(target_flags_explicit & MASK_SSE4A))
2003 target_flags |= MASK_SSE4A;
1828 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1829 error ("CPU you selected does not support x86-64 "
1830 "instruction set");
1831 break;
1832 }
1833
1834 if (i == pta_size)
1835 error ("bad value (%s) for -march= switch", ix86_arch_string);
1836
1837 for (i = 0; i < pta_size; i++)
1838 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1839 {
1840 ix86_tune = processor_alias_table[i].processor;
1841 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1842 {
1843 if (ix86_tune_defaulted)
1844 {
1845 ix86_tune_string = "x86-64";
1846 for (i = 0; i < pta_size; i++)
1847 if (! strcmp (ix86_tune_string,
1848 processor_alias_table[i].name))
1849 break;
1850 ix86_tune = processor_alias_table[i].processor;
1851 }
1852 else
1853 error ("CPU you selected does not support x86-64 "
1854 "instruction set");
1855 }
1856 /* Intel CPUs have always interpreted SSE prefetch instructions as
1857 NOPs; so, we can enable SSE prefetch instructions even when
1858 -mtune (rather than -march) points us to a processor that has them.
1859 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1860 higher processors. */
1861 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1862 x86_prefetch_sse = true;
1863 break;
1864 }
1865 if (i == pta_size)
1866 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1867
1868 if (optimize_size)
1869 ix86_cost = &size_cost;
1870 else
1871 ix86_cost = processor_target_table[ix86_tune].cost;
1872 target_flags |= processor_target_table[ix86_tune].target_enable;
1873 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1874
1875 /* Arrange to set up i386_stack_locals for all functions. */
1876 init_machine_status = ix86_init_machine_status;
1877
1878 /* Validate -mregparm= value. */
1879 if (ix86_regparm_string)
1880 {
1881 i = atoi (ix86_regparm_string);
1882 if (i < 0 || i > REGPARM_MAX)
1883 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1884 else
1885 ix86_regparm = i;
1886 }
1887 else
1888 if (TARGET_64BIT)
1889 ix86_regparm = REGPARM_MAX;
1890
1891 /* If the user has provided any of the -malign-* options,
1892 warn and use that value only if -falign-* is not set.
1893 Remove this code in GCC 3.2 or later. */
1894 if (ix86_align_loops_string)
1895 {
1896 warning (0, "-malign-loops is obsolete, use -falign-loops");
1897 if (align_loops == 0)
1898 {
1899 i = atoi (ix86_align_loops_string);
1900 if (i < 0 || i > MAX_CODE_ALIGN)
1901 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1902 else
1903 align_loops = 1 << i;
1904 }
1905 }
1906
1907 if (ix86_align_jumps_string)
1908 {
1909 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1910 if (align_jumps == 0)
1911 {
1912 i = atoi (ix86_align_jumps_string);
1913 if (i < 0 || i > MAX_CODE_ALIGN)
1914 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1915 else
1916 align_jumps = 1 << i;
1917 }
1918 }
1919
1920 if (ix86_align_funcs_string)
1921 {
1922 warning (0, "-malign-functions is obsolete, use -falign-functions");
1923 if (align_functions == 0)
1924 {
1925 i = atoi (ix86_align_funcs_string);
1926 if (i < 0 || i > MAX_CODE_ALIGN)
1927 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1928 else
1929 align_functions = 1 << i;
1930 }
1931 }
1932
1933 /* Default align_* from the processor table. */
1934 if (align_loops == 0)
1935 {
1936 align_loops = processor_target_table[ix86_tune].align_loop;
1937 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1938 }
1939 if (align_jumps == 0)
1940 {
1941 align_jumps = processor_target_table[ix86_tune].align_jump;
1942 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1943 }
1944 if (align_functions == 0)
1945 {
1946 align_functions = processor_target_table[ix86_tune].align_func;
1947 }
1948
1949 /* Validate -mbranch-cost= value, or provide default. */
1950 ix86_branch_cost = ix86_cost->branch_cost;
1951 if (ix86_branch_cost_string)
1952 {
1953 i = atoi (ix86_branch_cost_string);
1954 if (i < 0 || i > 5)
1955 error ("-mbranch-cost=%d is not between 0 and 5", i);
1956 else
1957 ix86_branch_cost = i;
1958 }
1959 if (ix86_section_threshold_string)
1960 {
1961 i = atoi (ix86_section_threshold_string);
1962 if (i < 0)
1963 error ("-mlarge-data-threshold=%d is negative", i);
1964 else
1965 ix86_section_threshold = i;
1966 }
1967
1968 if (ix86_tls_dialect_string)
1969 {
1970 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1971 ix86_tls_dialect = TLS_DIALECT_GNU;
1972 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1973 ix86_tls_dialect = TLS_DIALECT_GNU2;
1974 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1975 ix86_tls_dialect = TLS_DIALECT_SUN;
1976 else
1977 error ("bad value (%s) for -mtls-dialect= switch",
1978 ix86_tls_dialect_string);
1979 }
1980
1981 /* Keep nonleaf frame pointers. */
1982 if (flag_omit_frame_pointer)
1983 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1984 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1985 flag_omit_frame_pointer = 1;
1986
1987 /* If we're doing fast math, we don't care about comparison order
1988 wrt NaNs. This lets us use a shorter comparison sequence. */
1989 if (flag_finite_math_only)
1990 target_flags &= ~MASK_IEEE_FP;
1991
1992 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1993 since the insns won't need emulation. */
1994 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1995 target_flags &= ~MASK_NO_FANCY_MATH_387;
1996
1997 /* Likewise, if the target doesn't have a 387, or we've specified
1998 software floating point, don't use 387 inline intrinsics. */
1999 if (!TARGET_80387)
2000 target_flags |= MASK_NO_FANCY_MATH_387;
2001
2002 /* Turn on SSE3 builtins for -mssse3. */
2003 if (TARGET_SSSE3)
2004 target_flags |= MASK_SSE3;
2005
2004 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2005 error ("CPU you selected does not support x86-64 "
2006 "instruction set");
2007 break;
2008 }
2009
2010 if (i == pta_size)
2011 error ("bad value (%s) for -march= switch", ix86_arch_string);
2012
2013 for (i = 0; i < pta_size; i++)
2014 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2015 {
2016 ix86_tune = processor_alias_table[i].processor;
2017 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2018 {
2019 if (ix86_tune_defaulted)
2020 {
2021 ix86_tune_string = "x86-64";
2022 for (i = 0; i < pta_size; i++)
2023 if (! strcmp (ix86_tune_string,
2024 processor_alias_table[i].name))
2025 break;
2026 ix86_tune = processor_alias_table[i].processor;
2027 }
2028 else
2029 error ("CPU you selected does not support x86-64 "
2030 "instruction set");
2031 }
2032 /* Intel CPUs have always interpreted SSE prefetch instructions as
2033 NOPs; so, we can enable SSE prefetch instructions even when
2034 -mtune (rather than -march) points us to a processor that has them.
2035 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2036 higher processors. */
2037 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2038 x86_prefetch_sse = true;
2039 break;
2040 }
2041 if (i == pta_size)
2042 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2043
2044 if (optimize_size)
2045 ix86_cost = &size_cost;
2046 else
2047 ix86_cost = processor_target_table[ix86_tune].cost;
2048 target_flags |= processor_target_table[ix86_tune].target_enable;
2049 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2050
2051 /* Arrange to set up i386_stack_locals for all functions. */
2052 init_machine_status = ix86_init_machine_status;
2053
2054 /* Validate -mregparm= value. */
2055 if (ix86_regparm_string)
2056 {
2057 i = atoi (ix86_regparm_string);
2058 if (i < 0 || i > REGPARM_MAX)
2059 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2060 else
2061 ix86_regparm = i;
2062 }
2063 else
2064 if (TARGET_64BIT)
2065 ix86_regparm = REGPARM_MAX;
2066
2067 /* If the user has provided any of the -malign-* options,
2068 warn and use that value only if -falign-* is not set.
2069 Remove this code in GCC 3.2 or later. */
2070 if (ix86_align_loops_string)
2071 {
2072 warning (0, "-malign-loops is obsolete, use -falign-loops");
2073 if (align_loops == 0)
2074 {
2075 i = atoi (ix86_align_loops_string);
2076 if (i < 0 || i > MAX_CODE_ALIGN)
2077 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2078 else
2079 align_loops = 1 << i;
2080 }
2081 }
2082
2083 if (ix86_align_jumps_string)
2084 {
2085 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2086 if (align_jumps == 0)
2087 {
2088 i = atoi (ix86_align_jumps_string);
2089 if (i < 0 || i > MAX_CODE_ALIGN)
2090 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2091 else
2092 align_jumps = 1 << i;
2093 }
2094 }
2095
2096 if (ix86_align_funcs_string)
2097 {
2098 warning (0, "-malign-functions is obsolete, use -falign-functions");
2099 if (align_functions == 0)
2100 {
2101 i = atoi (ix86_align_funcs_string);
2102 if (i < 0 || i > MAX_CODE_ALIGN)
2103 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2104 else
2105 align_functions = 1 << i;
2106 }
2107 }
2108
2109 /* Default align_* from the processor table. */
2110 if (align_loops == 0)
2111 {
2112 align_loops = processor_target_table[ix86_tune].align_loop;
2113 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2114 }
2115 if (align_jumps == 0)
2116 {
2117 align_jumps = processor_target_table[ix86_tune].align_jump;
2118 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2119 }
2120 if (align_functions == 0)
2121 {
2122 align_functions = processor_target_table[ix86_tune].align_func;
2123 }
2124
2125 /* Validate -mbranch-cost= value, or provide default. */
2126 ix86_branch_cost = ix86_cost->branch_cost;
2127 if (ix86_branch_cost_string)
2128 {
2129 i = atoi (ix86_branch_cost_string);
2130 if (i < 0 || i > 5)
2131 error ("-mbranch-cost=%d is not between 0 and 5", i);
2132 else
2133 ix86_branch_cost = i;
2134 }
2135 if (ix86_section_threshold_string)
2136 {
2137 i = atoi (ix86_section_threshold_string);
2138 if (i < 0)
2139 error ("-mlarge-data-threshold=%d is negative", i);
2140 else
2141 ix86_section_threshold = i;
2142 }
2143
2144 if (ix86_tls_dialect_string)
2145 {
2146 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2147 ix86_tls_dialect = TLS_DIALECT_GNU;
2148 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2149 ix86_tls_dialect = TLS_DIALECT_GNU2;
2150 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2151 ix86_tls_dialect = TLS_DIALECT_SUN;
2152 else
2153 error ("bad value (%s) for -mtls-dialect= switch",
2154 ix86_tls_dialect_string);
2155 }
2156
2157 /* Keep nonleaf frame pointers. */
2158 if (flag_omit_frame_pointer)
2159 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2160 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2161 flag_omit_frame_pointer = 1;
2162
2163 /* If we're doing fast math, we don't care about comparison order
2164 wrt NaNs. This lets us use a shorter comparison sequence. */
2165 if (flag_finite_math_only)
2166 target_flags &= ~MASK_IEEE_FP;
2167
2168 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2169 since the insns won't need emulation. */
2170 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2171 target_flags &= ~MASK_NO_FANCY_MATH_387;
2172
2173 /* Likewise, if the target doesn't have a 387, or we've specified
2174 software floating point, don't use 387 inline intrinsics. */
2175 if (!TARGET_80387)
2176 target_flags |= MASK_NO_FANCY_MATH_387;
2177
2178 /* Turn on SSE3 builtins for -mssse3. */
2179 if (TARGET_SSSE3)
2180 target_flags |= MASK_SSE3;
2181
2182 /* Turn on SSE3 builtins for -msse4a. */
2183 if (TARGET_SSE4A)
2184 target_flags |= MASK_SSE3;
2185
2006 /* Turn on SSE2 builtins for -msse3. */
2007 if (TARGET_SSE3)
2008 target_flags |= MASK_SSE2;
2009
2010 /* Turn on SSE builtins for -msse2. */
2011 if (TARGET_SSE2)
2012 target_flags |= MASK_SSE;
2013
2014 /* Turn on MMX builtins for -msse. */
2015 if (TARGET_SSE)
2016 {
2017 target_flags |= MASK_MMX & ~target_flags_explicit;
2018 x86_prefetch_sse = true;
2019 }
2020
2021 /* Turn on MMX builtins for 3Dnow. */
2022 if (TARGET_3DNOW)
2023 target_flags |= MASK_MMX;
2024
2186 /* Turn on SSE2 builtins for -msse3. */
2187 if (TARGET_SSE3)
2188 target_flags |= MASK_SSE2;
2189
2190 /* Turn on SSE builtins for -msse2. */
2191 if (TARGET_SSE2)
2192 target_flags |= MASK_SSE;
2193
2194 /* Turn on MMX builtins for -msse. */
2195 if (TARGET_SSE)
2196 {
2197 target_flags |= MASK_MMX & ~target_flags_explicit;
2198 x86_prefetch_sse = true;
2199 }
2200
2201 /* Turn on MMX builtins for 3Dnow. */
2202 if (TARGET_3DNOW)
2203 target_flags |= MASK_MMX;
2204
2205 /* Turn on POPCNT builtins for -mabm. */
2206 if (TARGET_ABM)
2207 target_flags |= MASK_POPCNT;
2208
2025 if (TARGET_64BIT)
2026 {
2027 if (TARGET_ALIGN_DOUBLE)
2028 error ("-malign-double makes no sense in the 64bit mode");
2029 if (TARGET_RTD)
2030 error ("-mrtd calling convention not supported in the 64bit mode");
2031
2032 /* Enable by default the SSE and MMX builtins. Do allow the user to
2033 explicitly disable any of these. In particular, disabling SSE and
2034 MMX for kernel code is extremely useful. */
2035 target_flags
2036 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2037 & ~target_flags_explicit);
2038 }
2039 else
2040 {
2041 /* i386 ABI does not specify red zone. It still makes sense to use it
2042 when programmer takes care to stack from being destroyed. */
2043 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2044 target_flags |= MASK_NO_RED_ZONE;
2045 }
2046
2047 /* Validate -mpreferred-stack-boundary= value, or provide default.
2048 The default of 128 bits is for Pentium III's SSE __m128. We can't
2049 change it because of optimize_size. Otherwise, we can't mix object
2050 files compiled with -Os and -On. */
2051 ix86_preferred_stack_boundary = 128;
2052 if (ix86_preferred_stack_boundary_string)
2053 {
2054 i = atoi (ix86_preferred_stack_boundary_string);
2055 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2056 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2057 TARGET_64BIT ? 4 : 2);
2058 else
2059 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2060 }
2061
2062 /* Accept -msseregparm only if at least SSE support is enabled. */
2063 if (TARGET_SSEREGPARM
2064 && ! TARGET_SSE)
2065 error ("-msseregparm used without SSE enabled");
2066
2067 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2068
2069 if (ix86_fpmath_string != 0)
2070 {
2071 if (! strcmp (ix86_fpmath_string, "387"))
2072 ix86_fpmath = FPMATH_387;
2073 else if (! strcmp (ix86_fpmath_string, "sse"))
2074 {
2075 if (!TARGET_SSE)
2076 {
2077 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2078 ix86_fpmath = FPMATH_387;
2079 }
2080 else
2081 ix86_fpmath = FPMATH_SSE;
2082 }
2083 else if (! strcmp (ix86_fpmath_string, "387,sse")
2084 || ! strcmp (ix86_fpmath_string, "sse,387"))
2085 {
2086 if (!TARGET_SSE)
2087 {
2088 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2089 ix86_fpmath = FPMATH_387;
2090 }
2091 else if (!TARGET_80387)
2092 {
2093 warning (0, "387 instruction set disabled, using SSE arithmetics");
2094 ix86_fpmath = FPMATH_SSE;
2095 }
2096 else
2097 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2098 }
2099 else
2100 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2101 }
2102
2103 /* If the i387 is disabled, then do not return values in it. */
2104 if (!TARGET_80387)
2105 target_flags &= ~MASK_FLOAT_RETURNS;
2106
2107 if ((x86_accumulate_outgoing_args & TUNEMASK)
2108 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2109 && !optimize_size)
2110 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2111
2112 /* ??? Unwind info is not correct around the CFG unless either a frame
2113 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2114 unwind info generation to be aware of the CFG and propagating states
2115 around edges. */
2116 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2117 || flag_exceptions || flag_non_call_exceptions)
2118 && flag_omit_frame_pointer
2119 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2120 {
2121 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2122 warning (0, "unwind tables currently require either a frame pointer "
2123 "or -maccumulate-outgoing-args for correctness");
2124 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2125 }
2126
2127 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2128 {
2129 char *p;
2130 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2131 p = strchr (internal_label_prefix, 'X');
2132 internal_label_prefix_len = p - internal_label_prefix;
2133 *p = '\0';
2134 }
2135
2136 /* When scheduling description is not available, disable scheduler pass
2137 so it won't slow down the compilation and make x87 code slower. */
2138 if (!TARGET_SCHEDULE)
2139 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2140}
2141
2142/* switch to the appropriate section for output of DECL.
2143 DECL is either a `VAR_DECL' node or a constant of some sort.
2144 RELOC indicates whether forming the initial value of DECL requires
2145 link-time relocations. */
2146
2147static section *
2148x86_64_elf_select_section (tree decl, int reloc,
2149 unsigned HOST_WIDE_INT align)
2150{
2151 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2152 && ix86_in_large_data_p (decl))
2153 {
2154 const char *sname = NULL;
2155 unsigned int flags = SECTION_WRITE;
2156 switch (categorize_decl_for_section (decl, reloc))
2157 {
2158 case SECCAT_DATA:
2159 sname = ".ldata";
2160 break;
2161 case SECCAT_DATA_REL:
2162 sname = ".ldata.rel";
2163 break;
2164 case SECCAT_DATA_REL_LOCAL:
2165 sname = ".ldata.rel.local";
2166 break;
2167 case SECCAT_DATA_REL_RO:
2168 sname = ".ldata.rel.ro";
2169 break;
2170 case SECCAT_DATA_REL_RO_LOCAL:
2171 sname = ".ldata.rel.ro.local";
2172 break;
2173 case SECCAT_BSS:
2174 sname = ".lbss";
2175 flags |= SECTION_BSS;
2176 break;
2177 case SECCAT_RODATA:
2178 case SECCAT_RODATA_MERGE_STR:
2179 case SECCAT_RODATA_MERGE_STR_INIT:
2180 case SECCAT_RODATA_MERGE_CONST:
2181 sname = ".lrodata";
2182 flags = 0;
2183 break;
2184 case SECCAT_SRODATA:
2185 case SECCAT_SDATA:
2186 case SECCAT_SBSS:
2187 gcc_unreachable ();
2188 case SECCAT_TEXT:
2189 case SECCAT_TDATA:
2190 case SECCAT_TBSS:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2193 break;
2194 }
2195 if (sname)
2196 {
2197 /* We might get called with string constants, but get_named_section
2198 doesn't like them as they are not DECLs. Also, we need to set
2199 flags in that case. */
2200 if (!DECL_P (decl))
2201 return get_section (sname, flags, NULL);
2202 return get_named_section (decl, sname, reloc);
2203 }
2204 }
2205 return default_elf_select_section (decl, reloc, align);
2206}
2207
2208/* Build up a unique section name, expressed as a
2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210 RELOC indicates whether the initial value of EXP requires
2211 link-time relocations. */
2212
2213static void
2214x86_64_elf_unique_section (tree decl, int reloc)
2215{
2216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2217 && ix86_in_large_data_p (decl))
2218 {
2219 const char *prefix = NULL;
2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2221 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2222
2223 switch (categorize_decl_for_section (decl, reloc))
2224 {
2225 case SECCAT_DATA:
2226 case SECCAT_DATA_REL:
2227 case SECCAT_DATA_REL_LOCAL:
2228 case SECCAT_DATA_REL_RO:
2229 case SECCAT_DATA_REL_RO_LOCAL:
2230 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2231 break;
2232 case SECCAT_BSS:
2233 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2234 break;
2235 case SECCAT_RODATA:
2236 case SECCAT_RODATA_MERGE_STR:
2237 case SECCAT_RODATA_MERGE_STR_INIT:
2238 case SECCAT_RODATA_MERGE_CONST:
2239 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2240 break;
2241 case SECCAT_SRODATA:
2242 case SECCAT_SDATA:
2243 case SECCAT_SBSS:
2244 gcc_unreachable ();
2245 case SECCAT_TEXT:
2246 case SECCAT_TDATA:
2247 case SECCAT_TBSS:
2248 /* We don't split these for medium model. Place them into
2249 default sections and hope for best. */
2250 break;
2251 }
2252 if (prefix)
2253 {
2254 const char *name;
2255 size_t nlen, plen;
2256 char *string;
2257 plen = strlen (prefix);
2258
2259 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2260 name = targetm.strip_name_encoding (name);
2261 nlen = strlen (name);
2262
2263 string = alloca (nlen + plen + 1);
2264 memcpy (string, prefix, plen);
2265 memcpy (string + plen, name, nlen + 1);
2266
2267 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2268 return;
2269 }
2270 }
2271 default_unique_section (decl, reloc);
2272}
2273
2274#ifdef COMMON_ASM_OP
2275/* This says how to output assembler code to declare an
2276 uninitialized external linkage data object.
2277
2278 For medium model x86-64 we need to use .largecomm opcode for
2279 large objects. */
2280void
2281x86_elf_aligned_common (FILE *file,
2282 const char *name, unsigned HOST_WIDE_INT size,
2283 int align)
2284{
2285 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2286 && size > (unsigned int)ix86_section_threshold)
2287 fprintf (file, ".largecomm\t");
2288 else
2289 fprintf (file, "%s", COMMON_ASM_OP);
2290 assemble_name (file, name);
2291 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2292 size, align / BITS_PER_UNIT);
2293}
2294
2295/* Utility function for targets to use in implementing
2296 ASM_OUTPUT_ALIGNED_BSS. */
2297
2298void
2299x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2300 const char *name, unsigned HOST_WIDE_INT size,
2301 int align)
2302{
2303 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2304 && size > (unsigned int)ix86_section_threshold)
2305 switch_to_section (get_named_section (decl, ".lbss", 0));
2306 else
2307 switch_to_section (bss_section);
2308 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2309#ifdef ASM_DECLARE_OBJECT_NAME
2310 last_assemble_variable_decl = decl;
2311 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2312#else
2313 /* Standard thing is just output label for the object. */
2314 ASM_OUTPUT_LABEL (file, name);
2315#endif /* ASM_DECLARE_OBJECT_NAME */
2316 ASM_OUTPUT_SKIP (file, size ? size : 1);
2317}
2318#endif
2319
2320void
2321optimization_options (int level, int size ATTRIBUTE_UNUSED)
2322{
2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2324 make the problem with not enough registers even worse. */
2325#ifdef INSN_SCHEDULING
2326 if (level > 1)
2327 flag_schedule_insns = 0;
2328#endif
2329
2330 if (TARGET_MACHO)
2331 /* The Darwin libraries never set errno, so we might as well
2332 avoid calling them when that's the only reason we would. */
2333 flag_errno_math = 0;
2334
2335 /* The default values of these switches depend on the TARGET_64BIT
2336 that is not known at this moment. Mark these values with 2 and
2337 let user the to override these. In case there is no command line option
2338 specifying them, we will set the defaults in override_options. */
2339 if (optimize >= 1)
2340 flag_omit_frame_pointer = 2;
2341 flag_pcc_struct_return = 2;
2342 flag_asynchronous_unwind_tables = 2;
2343#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344 SUBTARGET_OPTIMIZATION_OPTIONS;
2345#endif
2346}
2347
2348/* Table of valid machine attributes. */
2349const struct attribute_spec ix86_attribute_table[] =
2350{
2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352 /* Stdcall attribute says callee is responsible for popping arguments
2353 if they are not variable. */
2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2355 /* Fastcall attribute says callee is responsible for popping arguments
2356 if they are not variable. */
2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2358 /* Cdecl attribute says the callee is a normal C declaration */
2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2360 /* Regparm attribute specifies how many integer arguments are to be
2361 passed in registers. */
2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2363 /* Sseregparm attribute says we are using x86_64 calling conventions
2364 for FP arguments. */
2365 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2366 /* force_align_arg_pointer says this function realigns the stack at entry. */
2367 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2368 false, true, true, ix86_handle_cconv_attribute },
2369#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2370 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2371 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2372 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2373#endif
2374 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2375 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2376#ifdef SUBTARGET_ATTRIBUTE_TABLE
2377 SUBTARGET_ATTRIBUTE_TABLE,
2378#endif
2379 { NULL, 0, 0, false, false, false, NULL }
2380};
2381
2382/* Decide whether we can make a sibling call to a function. DECL is the
2383 declaration of the function being targeted by the call and EXP is the
2384 CALL_EXPR representing the call. */
2385
2386static bool
2387ix86_function_ok_for_sibcall (tree decl, tree exp)
2388{
2389 tree func;
2390 rtx a, b;
2391
2392 /* If we are generating position-independent code, we cannot sibcall
2393 optimize any indirect call, or a direct call to a global function,
2394 as the PLT requires %ebx be live. */
2395 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2396 return false;
2397
2398 if (decl)
2399 func = decl;
2400 else
2401 {
2402 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2403 if (POINTER_TYPE_P (func))
2404 func = TREE_TYPE (func);
2405 }
2406
2407 /* Check that the return value locations are the same. Like
2408 if we are returning floats on the 80387 register stack, we cannot
2409 make a sibcall from a function that doesn't return a float to a
2410 function that does or, conversely, from a function that does return
2411 a float to a function that doesn't; the necessary stack adjustment
2412 would not be executed. This is also the place we notice
2413 differences in the return value ABI. Note that it is ok for one
2414 of the functions to have void return type as long as the return
2415 value of the other is passed in a register. */
2416 a = ix86_function_value (TREE_TYPE (exp), func, false);
2417 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2418 cfun->decl, false);
2419 if (STACK_REG_P (a) || STACK_REG_P (b))
2420 {
2421 if (!rtx_equal_p (a, b))
2422 return false;
2423 }
2424 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2425 ;
2426 else if (!rtx_equal_p (a, b))
2427 return false;
2428
2429 /* If this call is indirect, we'll need to be able to use a call-clobbered
2430 register for the address of the target function. Make sure that all
2431 such registers are not used for passing parameters. */
2432 if (!decl && !TARGET_64BIT)
2433 {
2434 tree type;
2435
2436 /* We're looking at the CALL_EXPR, we need the type of the function. */
2437 type = TREE_OPERAND (exp, 0); /* pointer expression */
2438 type = TREE_TYPE (type); /* pointer type */
2439 type = TREE_TYPE (type); /* function type */
2440
2441 if (ix86_function_regparm (type, NULL) >= 3)
2442 {
2443 /* ??? Need to count the actual number of registers to be used,
2444 not the possible number of registers. Fix later. */
2445 return false;
2446 }
2447 }
2448
2449#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2450 /* Dllimport'd functions are also called indirectly. */
2451 if (decl && DECL_DLLIMPORT_P (decl)
2452 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2453 return false;
2454#endif
2455
2456 /* If we forced aligned the stack, then sibcalling would unalign the
2457 stack, which may break the called function. */
2458 if (cfun->machine->force_align_arg_pointer)
2459 return false;
2460
2461 /* Otherwise okay. That also includes certain types of indirect calls. */
2462 return true;
2463}
2464
2465/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2466 calling convention attributes;
2467 arguments as in struct attribute_spec.handler. */
2468
2469static tree
2470ix86_handle_cconv_attribute (tree *node, tree name,
2471 tree args,
2472 int flags ATTRIBUTE_UNUSED,
2473 bool *no_add_attrs)
2474{
2475 if (TREE_CODE (*node) != FUNCTION_TYPE
2476 && TREE_CODE (*node) != METHOD_TYPE
2477 && TREE_CODE (*node) != FIELD_DECL
2478 && TREE_CODE (*node) != TYPE_DECL)
2479 {
2480 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2481 IDENTIFIER_POINTER (name));
2482 *no_add_attrs = true;
2483 return NULL_TREE;
2484 }
2485
2486 /* Can combine regparm with all attributes but fastcall. */
2487 if (is_attribute_p ("regparm", name))
2488 {
2489 tree cst;
2490
2491 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2492 {
2493 error ("fastcall and regparm attributes are not compatible");
2494 }
2495
2496 cst = TREE_VALUE (args);
2497 if (TREE_CODE (cst) != INTEGER_CST)
2498 {
2499 warning (OPT_Wattributes,
2500 "%qs attribute requires an integer constant argument",
2501 IDENTIFIER_POINTER (name));
2502 *no_add_attrs = true;
2503 }
2504 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2505 {
2506 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2507 IDENTIFIER_POINTER (name), REGPARM_MAX);
2508 *no_add_attrs = true;
2509 }
2510
2511 if (!TARGET_64BIT
2512 && lookup_attribute (ix86_force_align_arg_pointer_string,
2513 TYPE_ATTRIBUTES (*node))
2514 && compare_tree_int (cst, REGPARM_MAX-1))
2515 {
2516 error ("%s functions limited to %d register parameters",
2517 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2518 }
2519
2520 return NULL_TREE;
2521 }
2522
2523 if (TARGET_64BIT)
2524 {
2525 warning (OPT_Wattributes, "%qs attribute ignored",
2526 IDENTIFIER_POINTER (name));
2527 *no_add_attrs = true;
2528 return NULL_TREE;
2529 }
2530
2531 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2532 if (is_attribute_p ("fastcall", name))
2533 {
2534 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2535 {
2536 error ("fastcall and cdecl attributes are not compatible");
2537 }
2538 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2539 {
2540 error ("fastcall and stdcall attributes are not compatible");
2541 }
2542 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2543 {
2544 error ("fastcall and regparm attributes are not compatible");
2545 }
2546 }
2547
2548 /* Can combine stdcall with fastcall (redundant), regparm and
2549 sseregparm. */
2550 else if (is_attribute_p ("stdcall", name))
2551 {
2552 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2553 {
2554 error ("stdcall and cdecl attributes are not compatible");
2555 }
2556 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2557 {
2558 error ("stdcall and fastcall attributes are not compatible");
2559 }
2560 }
2561
2562 /* Can combine cdecl with regparm and sseregparm. */
2563 else if (is_attribute_p ("cdecl", name))
2564 {
2565 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2566 {
2567 error ("stdcall and cdecl attributes are not compatible");
2568 }
2569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2570 {
2571 error ("fastcall and cdecl attributes are not compatible");
2572 }
2573 }
2574
2575 /* Can combine sseregparm with all attributes. */
2576
2577 return NULL_TREE;
2578}
2579
2580/* Return 0 if the attributes for two types are incompatible, 1 if they
2581 are compatible, and 2 if they are nearly compatible (which causes a
2582 warning to be generated). */
2583
2584static int
2585ix86_comp_type_attributes (tree type1, tree type2)
2586{
2587 /* Check for mismatch of non-default calling convention. */
2588 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2589
2590 if (TREE_CODE (type1) != FUNCTION_TYPE)
2591 return 1;
2592
2593 /* Check for mismatched fastcall/regparm types. */
2594 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2595 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2596 || (ix86_function_regparm (type1, NULL)
2597 != ix86_function_regparm (type2, NULL)))
2598 return 0;
2599
2600 /* Check for mismatched sseregparm types. */
2601 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2602 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2603 return 0;
2604
2605 /* Check for mismatched return types (cdecl vs stdcall). */
2606 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2607 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2608 return 0;
2609
2610 return 1;
2611}
2612
2613/* Return the regparm value for a function with the indicated TYPE and DECL.
2614 DECL may be NULL when calling function indirectly
2615 or considering a libcall. */
2616
2617static int
2618ix86_function_regparm (tree type, tree decl)
2619{
2620 tree attr;
2621 int regparm = ix86_regparm;
2622 bool user_convention = false;
2623
2624 if (!TARGET_64BIT)
2625 {
2626 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2627 if (attr)
2628 {
2629 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2630 user_convention = true;
2631 }
2632
2633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2634 {
2635 regparm = 2;
2636 user_convention = true;
2637 }
2638
2639 /* Use register calling convention for local functions when possible. */
2640 if (!TARGET_64BIT && !user_convention && decl
2641 && flag_unit_at_a_time && !profile_flag)
2642 {
2643 struct cgraph_local_info *i = cgraph_local_info (decl);
2644 if (i && i->local)
2645 {
2646 int local_regparm, globals = 0, regno;
2647
2648 /* Make sure no regparm register is taken by a global register
2649 variable. */
2650 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2651 if (global_regs[local_regparm])
2652 break;
2653 /* We can't use regparm(3) for nested functions as these use
2654 static chain pointer in third argument. */
2655 if (local_regparm == 3
2656 && decl_function_context (decl)
2657 && !DECL_NO_STATIC_CHAIN (decl))
2658 local_regparm = 2;
2659 /* If the function realigns its stackpointer, the
2660 prologue will clobber %ecx. If we've already
2661 generated code for the callee, the callee
2662 DECL_STRUCT_FUNCTION is gone, so we fall back to
2663 scanning the attributes for the self-realigning
2664 property. */
2665 if ((DECL_STRUCT_FUNCTION (decl)
2666 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2667 || (!DECL_STRUCT_FUNCTION (decl)
2668 && lookup_attribute (ix86_force_align_arg_pointer_string,
2669 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2670 local_regparm = 2;
2671 /* Each global register variable increases register preassure,
2672 so the more global reg vars there are, the smaller regparm
2673 optimization use, unless requested by the user explicitly. */
2674 for (regno = 0; regno < 6; regno++)
2675 if (global_regs[regno])
2676 globals++;
2677 local_regparm
2678 = globals < local_regparm ? local_regparm - globals : 0;
2679
2680 if (local_regparm > regparm)
2681 regparm = local_regparm;
2682 }
2683 }
2684 }
2685 return regparm;
2686}
2687
2688/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2689 DFmode (2) arguments in SSE registers for a function with the
2690 indicated TYPE and DECL. DECL may be NULL when calling function
2691 indirectly or considering a libcall. Otherwise return 0. */
2692
2693static int
2694ix86_function_sseregparm (tree type, tree decl)
2695{
2696 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2697 by the sseregparm attribute. */
2698 if (TARGET_SSEREGPARM
2699 || (type
2700 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2701 {
2702 if (!TARGET_SSE)
2703 {
2704 if (decl)
2705 error ("Calling %qD with attribute sseregparm without "
2706 "SSE/SSE2 enabled", decl);
2707 else
2708 error ("Calling %qT with attribute sseregparm without "
2709 "SSE/SSE2 enabled", type);
2710 return 0;
2711 }
2712
2713 return 2;
2714 }
2715
2716 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2717 (and DFmode for SSE2) arguments in SSE registers,
2718 even for 32-bit targets. */
2719 if (!TARGET_64BIT && decl
2720 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2721 {
2722 struct cgraph_local_info *i = cgraph_local_info (decl);
2723 if (i && i->local)
2724 return TARGET_SSE2 ? 2 : 1;
2725 }
2726
2727 return 0;
2728}
2729
2730/* Return true if EAX is live at the start of the function. Used by
2731 ix86_expand_prologue to determine if we need special help before
2732 calling allocate_stack_worker. */
2733
2734static bool
2735ix86_eax_live_at_start_p (void)
2736{
2737 /* Cheat. Don't bother working forward from ix86_function_regparm
2738 to the function type to whether an actual argument is located in
2739 eax. Instead just look at cfg info, which is still close enough
2740 to correct at this point. This gives false positives for broken
2741 functions that might use uninitialized data that happens to be
2742 allocated in eax, but who cares? */
2743 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2744}
2745
2746/* Value is the number of bytes of arguments automatically
2747 popped when returning from a subroutine call.
2748 FUNDECL is the declaration node of the function (as a tree),
2749 FUNTYPE is the data type of the function (as a tree),
2750 or for a library call it is an identifier node for the subroutine name.
2751 SIZE is the number of bytes of arguments passed on the stack.
2752
2753 On the 80386, the RTD insn may be used to pop them if the number
2754 of args is fixed, but if the number is variable then the caller
2755 must pop them all. RTD can't be used for library calls now
2756 because the library is compiled with the Unix compiler.
2757 Use of RTD is a selectable option, since it is incompatible with
2758 standard Unix calling sequences. If the option is not selected,
2759 the caller must always pop the args.
2760
2761 The attribute stdcall is equivalent to RTD on a per module basis. */
2762
2763int
2764ix86_return_pops_args (tree fundecl, tree funtype, int size)
2765{
2766 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2767
2768 /* Cdecl functions override -mrtd, and never pop the stack. */
2769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2770
2771 /* Stdcall and fastcall functions will pop the stack if not
2772 variable args. */
2773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2774 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2775 rtd = 1;
2776
2777 if (rtd
2778 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2779 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2780 == void_type_node)))
2781 return size;
2782 }
2783
2784 /* Lose any fake structure return argument if it is passed on the stack. */
2785 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2786 && !TARGET_64BIT
2787 && !KEEP_AGGREGATE_RETURN_POINTER)
2788 {
2789 int nregs = ix86_function_regparm (funtype, fundecl);
2790
2791 if (!nregs)
2792 return GET_MODE_SIZE (Pmode);
2793 }
2794
2795 return 0;
2796}
2797
2798/* Argument support functions. */
2799
2800/* Return true when register may be used to pass function parameters. */
2801bool
2802ix86_function_arg_regno_p (int regno)
2803{
2804 int i;
2805 if (!TARGET_64BIT)
2806 {
2807 if (TARGET_MACHO)
2808 return (regno < REGPARM_MAX
2809 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2810 else
2811 return (regno < REGPARM_MAX
2812 || (TARGET_MMX && MMX_REGNO_P (regno)
2813 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2814 || (TARGET_SSE && SSE_REGNO_P (regno)
2815 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2816 }
2817
2818 if (TARGET_MACHO)
2819 {
2820 if (SSE_REGNO_P (regno) && TARGET_SSE)
2821 return true;
2822 }
2823 else
2824 {
2825 if (TARGET_SSE && SSE_REGNO_P (regno)
2826 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2827 return true;
2828 }
2829 /* RAX is used as hidden argument to va_arg functions. */
2830 if (!regno)
2831 return true;
2832 for (i = 0; i < REGPARM_MAX; i++)
2833 if (regno == x86_64_int_parameter_registers[i])
2834 return true;
2835 return false;
2836}
2837
2838/* Return if we do not know how to pass TYPE solely in registers. */
2839
2840static bool
2841ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2842{
2843 if (must_pass_in_stack_var_size_or_pad (mode, type))
2844 return true;
2845
2846 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2847 The layout_type routine is crafty and tries to trick us into passing
2848 currently unsupported vector types on the stack by using TImode. */
2849 return (!TARGET_64BIT && mode == TImode
2850 && type && TREE_CODE (type) != VECTOR_TYPE);
2851}
2852
2853/* Initialize a variable CUM of type CUMULATIVE_ARGS
2854 for a call to a function whose data type is FNTYPE.
2855 For a library call, FNTYPE is 0. */
2856
2857void
2858init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2859 tree fntype, /* tree ptr for function decl */
2860 rtx libname, /* SYMBOL_REF of library name or 0 */
2861 tree fndecl)
2862{
2863 static CUMULATIVE_ARGS zero_cum;
2864 tree param, next_param;
2865
2866 if (TARGET_DEBUG_ARG)
2867 {
2868 fprintf (stderr, "\ninit_cumulative_args (");
2869 if (fntype)
2870 fprintf (stderr, "fntype code = %s, ret code = %s",
2871 tree_code_name[(int) TREE_CODE (fntype)],
2872 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2873 else
2874 fprintf (stderr, "no fntype");
2875
2876 if (libname)
2877 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2878 }
2879
2880 *cum = zero_cum;
2881
2882 /* Set up the number of registers to use for passing arguments. */
2883 cum->nregs = ix86_regparm;
2884 if (TARGET_SSE)
2885 cum->sse_nregs = SSE_REGPARM_MAX;
2886 if (TARGET_MMX)
2887 cum->mmx_nregs = MMX_REGPARM_MAX;
2888 cum->warn_sse = true;
2889 cum->warn_mmx = true;
2890 cum->maybe_vaarg = false;
2891
2892 /* Use ecx and edx registers if function has fastcall attribute,
2893 else look for regparm information. */
2894 if (fntype && !TARGET_64BIT)
2895 {
2896 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2897 {
2898 cum->nregs = 2;
2899 cum->fastcall = 1;
2900 }
2901 else
2902 cum->nregs = ix86_function_regparm (fntype, fndecl);
2903 }
2904
2905 /* Set up the number of SSE registers used for passing SFmode
2906 and DFmode arguments. Warn for mismatching ABI. */
2907 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2908
2909 /* Determine if this function has variable arguments. This is
2910 indicated by the last argument being 'void_type_mode' if there
2911 are no variable arguments. If there are variable arguments, then
2912 we won't pass anything in registers in 32-bit mode. */
2913
2914 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2915 {
2916 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2917 param != 0; param = next_param)
2918 {
2919 next_param = TREE_CHAIN (param);
2920 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2921 {
2922 if (!TARGET_64BIT)
2923 {
2924 cum->nregs = 0;
2925 cum->sse_nregs = 0;
2926 cum->mmx_nregs = 0;
2927 cum->warn_sse = 0;
2928 cum->warn_mmx = 0;
2929 cum->fastcall = 0;
2930 cum->float_in_sse = 0;
2931 }
2932 cum->maybe_vaarg = true;
2933 }
2934 }
2935 }
2936 if ((!fntype && !libname)
2937 || (fntype && !TYPE_ARG_TYPES (fntype)))
2938 cum->maybe_vaarg = true;
2939
2940 if (TARGET_DEBUG_ARG)
2941 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2942
2943 return;
2944}
2945
2946/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2947 But in the case of vector types, it is some vector mode.
2948
2949 When we have only some of our vector isa extensions enabled, then there
2950 are some modes for which vector_mode_supported_p is false. For these
2951 modes, the generic vector support in gcc will choose some non-vector mode
2952 in order to implement the type. By computing the natural mode, we'll
2953 select the proper ABI location for the operand and not depend on whatever
2954 the middle-end decides to do with these vector types. */
2955
2956static enum machine_mode
2957type_natural_mode (tree type)
2958{
2959 enum machine_mode mode = TYPE_MODE (type);
2960
2961 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2962 {
2963 HOST_WIDE_INT size = int_size_in_bytes (type);
2964 if ((size == 8 || size == 16)
2965 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2966 && TYPE_VECTOR_SUBPARTS (type) > 1)
2967 {
2968 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2969
2970 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2971 mode = MIN_MODE_VECTOR_FLOAT;
2972 else
2973 mode = MIN_MODE_VECTOR_INT;
2974
2975 /* Get the mode which has this inner mode and number of units. */
2976 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2977 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2978 && GET_MODE_INNER (mode) == innermode)
2979 return mode;
2980
2981 gcc_unreachable ();
2982 }
2983 }
2984
2985 return mode;
2986}
2987
2988/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2989 this may not agree with the mode that the type system has chosen for the
2990 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2991 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2992
2993static rtx
2994gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2995 unsigned int regno)
2996{
2997 rtx tmp;
2998
2999 if (orig_mode != BLKmode)
3000 tmp = gen_rtx_REG (orig_mode, regno);
3001 else
3002 {
3003 tmp = gen_rtx_REG (mode, regno);
3004 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3005 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3006 }
3007
3008 return tmp;
3009}
3010
3011/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3012 of this code is to classify each 8bytes of incoming argument by the register
3013 class and assign registers accordingly. */
3014
3015/* Return the union class of CLASS1 and CLASS2.
3016 See the x86-64 PS ABI for details. */
3017
3018static enum x86_64_reg_class
3019merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3020{
3021 /* Rule #1: If both classes are equal, this is the resulting class. */
3022 if (class1 == class2)
3023 return class1;
3024
3025 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3026 the other class. */
3027 if (class1 == X86_64_NO_CLASS)
3028 return class2;
3029 if (class2 == X86_64_NO_CLASS)
3030 return class1;
3031
3032 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3033 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3034 return X86_64_MEMORY_CLASS;
3035
3036 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3037 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3038 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3039 return X86_64_INTEGERSI_CLASS;
3040 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3041 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3042 return X86_64_INTEGER_CLASS;
3043
3044 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3045 MEMORY is used. */
3046 if (class1 == X86_64_X87_CLASS
3047 || class1 == X86_64_X87UP_CLASS
3048 || class1 == X86_64_COMPLEX_X87_CLASS
3049 || class2 == X86_64_X87_CLASS
3050 || class2 == X86_64_X87UP_CLASS
3051 || class2 == X86_64_COMPLEX_X87_CLASS)
3052 return X86_64_MEMORY_CLASS;
3053
3054 /* Rule #6: Otherwise class SSE is used. */
3055 return X86_64_SSE_CLASS;
3056}
3057
3058/* Classify the argument of type TYPE and mode MODE.
3059 CLASSES will be filled by the register class used to pass each word
3060 of the operand. The number of words is returned. In case the parameter
3061 should be passed in memory, 0 is returned. As a special case for zero
3062 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3063
3064 BIT_OFFSET is used internally for handling records and specifies offset
3065 of the offset in bits modulo 256 to avoid overflow cases.
3066
3067 See the x86-64 PS ABI for details.
3068*/
3069
3070static int
3071classify_argument (enum machine_mode mode, tree type,
3072 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3073{
3074 HOST_WIDE_INT bytes =
3075 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3076 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3077
3078 /* Variable sized entities are always passed/returned in memory. */
3079 if (bytes < 0)
3080 return 0;
3081
3082 if (mode != VOIDmode
3083 && targetm.calls.must_pass_in_stack (mode, type))
3084 return 0;
3085
3086 if (type && AGGREGATE_TYPE_P (type))
3087 {
3088 int i;
3089 tree field;
3090 enum x86_64_reg_class subclasses[MAX_CLASSES];
3091
3092 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3093 if (bytes > 16)
3094 return 0;
3095
3096 for (i = 0; i < words; i++)
3097 classes[i] = X86_64_NO_CLASS;
3098
3099 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3100 signalize memory class, so handle it as special case. */
3101 if (!words)
3102 {
3103 classes[0] = X86_64_NO_CLASS;
3104 return 1;
3105 }
3106
3107 /* Classify each field of record and merge classes. */
3108 switch (TREE_CODE (type))
3109 {
3110 case RECORD_TYPE:
3111 /* For classes first merge in the field of the subclasses. */
3112 if (TYPE_BINFO (type))
3113 {
3114 tree binfo, base_binfo;
3115 int basenum;
3116
3117 for (binfo = TYPE_BINFO (type), basenum = 0;
3118 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3119 {
3120 int num;
3121 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3122 tree type = BINFO_TYPE (base_binfo);
3123
3124 num = classify_argument (TYPE_MODE (type),
3125 type, subclasses,
3126 (offset + bit_offset) % 256);
3127 if (!num)
3128 return 0;
3129 for (i = 0; i < num; i++)
3130 {
3131 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3132 classes[i + pos] =
3133 merge_classes (subclasses[i], classes[i + pos]);
3134 }
3135 }
3136 }
3137 /* And now merge the fields of structure. */
3138 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3139 {
3140 if (TREE_CODE (field) == FIELD_DECL)
3141 {
3142 int num;
3143
3144 if (TREE_TYPE (field) == error_mark_node)
3145 continue;
3146
3147 /* Bitfields are always classified as integer. Handle them
3148 early, since later code would consider them to be
3149 misaligned integers. */
3150 if (DECL_BIT_FIELD (field))
3151 {
3152 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3153 i < ((int_bit_position (field) + (bit_offset % 64))
3154 + tree_low_cst (DECL_SIZE (field), 0)
3155 + 63) / 8 / 8; i++)
3156 classes[i] =
3157 merge_classes (X86_64_INTEGER_CLASS,
3158 classes[i]);
3159 }
3160 else
3161 {
3162 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3163 TREE_TYPE (field), subclasses,
3164 (int_bit_position (field)
3165 + bit_offset) % 256);
3166 if (!num)
3167 return 0;
3168 for (i = 0; i < num; i++)
3169 {
3170 int pos =
3171 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3172 classes[i + pos] =
3173 merge_classes (subclasses[i], classes[i + pos]);
3174 }
3175 }
3176 }
3177 }
3178 break;
3179
3180 case ARRAY_TYPE:
3181 /* Arrays are handled as small records. */
3182 {
3183 int num;
3184 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3185 TREE_TYPE (type), subclasses, bit_offset);
3186 if (!num)
3187 return 0;
3188
3189 /* The partial classes are now full classes. */
3190 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3191 subclasses[0] = X86_64_SSE_CLASS;
3192 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3193 subclasses[0] = X86_64_INTEGER_CLASS;
3194
3195 for (i = 0; i < words; i++)
3196 classes[i] = subclasses[i % num];
3197
3198 break;
3199 }
3200 case UNION_TYPE:
3201 case QUAL_UNION_TYPE:
3202 /* Unions are similar to RECORD_TYPE but offset is always 0.
3203 */
3204
3205 /* Unions are not derived. */
3206 gcc_assert (!TYPE_BINFO (type)
3207 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3208 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3209 {
3210 if (TREE_CODE (field) == FIELD_DECL)
3211 {
3212 int num;
3213
3214 if (TREE_TYPE (field) == error_mark_node)
3215 continue;
3216
3217 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3218 TREE_TYPE (field), subclasses,
3219 bit_offset);
3220 if (!num)
3221 return 0;
3222 for (i = 0; i < num; i++)
3223 classes[i] = merge_classes (subclasses[i], classes[i]);
3224 }
3225 }
3226 break;
3227
3228 default:
3229 gcc_unreachable ();
3230 }
3231
3232 /* Final merger cleanup. */
3233 for (i = 0; i < words; i++)
3234 {
3235 /* If one class is MEMORY, everything should be passed in
3236 memory. */
3237 if (classes[i] == X86_64_MEMORY_CLASS)
3238 return 0;
3239
3240 /* The X86_64_SSEUP_CLASS should be always preceded by
3241 X86_64_SSE_CLASS. */
3242 if (classes[i] == X86_64_SSEUP_CLASS
3243 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3244 classes[i] = X86_64_SSE_CLASS;
3245
3246 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3247 if (classes[i] == X86_64_X87UP_CLASS
3248 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3249 classes[i] = X86_64_SSE_CLASS;
3250 }
3251 return words;
3252 }
3253
3254 /* Compute alignment needed. We align all types to natural boundaries with
3255 exception of XFmode that is aligned to 64bits. */
3256 if (mode != VOIDmode && mode != BLKmode)
3257 {
3258 int mode_alignment = GET_MODE_BITSIZE (mode);
3259
3260 if (mode == XFmode)
3261 mode_alignment = 128;
3262 else if (mode == XCmode)
3263 mode_alignment = 256;
3264 if (COMPLEX_MODE_P (mode))
3265 mode_alignment /= 2;
3266 /* Misaligned fields are always returned in memory. */
3267 if (bit_offset % mode_alignment)
3268 return 0;
3269 }
3270
3271 /* for V1xx modes, just use the base mode */
3272 if (VECTOR_MODE_P (mode)
3273 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3274 mode = GET_MODE_INNER (mode);
3275
3276 /* Classification of atomic types. */
3277 switch (mode)
3278 {
3279 case SDmode:
3280 case DDmode:
3281 classes[0] = X86_64_SSE_CLASS;
3282 return 1;
3283 case TDmode:
3284 classes[0] = X86_64_SSE_CLASS;
3285 classes[1] = X86_64_SSEUP_CLASS;
3286 return 2;
3287 case DImode:
3288 case SImode:
3289 case HImode:
3290 case QImode:
3291 case CSImode:
3292 case CHImode:
3293 case CQImode:
3294 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3295 classes[0] = X86_64_INTEGERSI_CLASS;
3296 else
3297 classes[0] = X86_64_INTEGER_CLASS;
3298 return 1;
3299 case CDImode:
3300 case TImode:
3301 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3302 return 2;
3303 case CTImode:
3304 return 0;
3305 case SFmode:
3306 if (!(bit_offset % 64))
3307 classes[0] = X86_64_SSESF_CLASS;
3308 else
3309 classes[0] = X86_64_SSE_CLASS;
3310 return 1;
3311 case DFmode:
3312 classes[0] = X86_64_SSEDF_CLASS;
3313 return 1;
3314 case XFmode:
3315 classes[0] = X86_64_X87_CLASS;
3316 classes[1] = X86_64_X87UP_CLASS;
3317 return 2;
3318 case TFmode:
3319 classes[0] = X86_64_SSE_CLASS;
3320 classes[1] = X86_64_SSEUP_CLASS;
3321 return 2;
3322 case SCmode:
3323 classes[0] = X86_64_SSE_CLASS;
3324 return 1;
3325 case DCmode:
3326 classes[0] = X86_64_SSEDF_CLASS;
3327 classes[1] = X86_64_SSEDF_CLASS;
3328 return 2;
3329 case XCmode:
3330 classes[0] = X86_64_COMPLEX_X87_CLASS;
3331 return 1;
3332 case TCmode:
3333 /* This modes is larger than 16 bytes. */
3334 return 0;
3335 case V4SFmode:
3336 case V4SImode:
3337 case V16QImode:
3338 case V8HImode:
3339 case V2DFmode:
3340 case V2DImode:
3341 classes[0] = X86_64_SSE_CLASS;
3342 classes[1] = X86_64_SSEUP_CLASS;
3343 return 2;
3344 case V2SFmode:
3345 case V2SImode:
3346 case V4HImode:
3347 case V8QImode:
3348 classes[0] = X86_64_SSE_CLASS;
3349 return 1;
3350 case BLKmode:
3351 case VOIDmode:
3352 return 0;
3353 default:
3354 gcc_assert (VECTOR_MODE_P (mode));
3355
3356 if (bytes > 16)
3357 return 0;
3358
3359 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3360
3361 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3362 classes[0] = X86_64_INTEGERSI_CLASS;
3363 else
3364 classes[0] = X86_64_INTEGER_CLASS;
3365 classes[1] = X86_64_INTEGER_CLASS;
3366 return 1 + (bytes > 8);
3367 }
3368}
3369
3370/* Examine the argument and return set number of register required in each
3371 class. Return 0 iff parameter should be passed in memory. */
3372static int
3373examine_argument (enum machine_mode mode, tree type, int in_return,
3374 int *int_nregs, int *sse_nregs)
3375{
3376 enum x86_64_reg_class class[MAX_CLASSES];
3377 int n = classify_argument (mode, type, class, 0);
3378
3379 *int_nregs = 0;
3380 *sse_nregs = 0;
3381 if (!n)
3382 return 0;
3383 for (n--; n >= 0; n--)
3384 switch (class[n])
3385 {
3386 case X86_64_INTEGER_CLASS:
3387 case X86_64_INTEGERSI_CLASS:
3388 (*int_nregs)++;
3389 break;
3390 case X86_64_SSE_CLASS:
3391 case X86_64_SSESF_CLASS:
3392 case X86_64_SSEDF_CLASS:
3393 (*sse_nregs)++;
3394 break;
3395 case X86_64_NO_CLASS:
3396 case X86_64_SSEUP_CLASS:
3397 break;
3398 case X86_64_X87_CLASS:
3399 case X86_64_X87UP_CLASS:
3400 if (!in_return)
3401 return 0;
3402 break;
3403 case X86_64_COMPLEX_X87_CLASS:
3404 return in_return ? 2 : 0;
3405 case X86_64_MEMORY_CLASS:
3406 gcc_unreachable ();
3407 }
3408 return 1;
3409}
3410
3411/* Construct container for the argument used by GCC interface. See
3412 FUNCTION_ARG for the detailed description. */
3413
3414static rtx
3415construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3416 tree type, int in_return, int nintregs, int nsseregs,
3417 const int *intreg, int sse_regno)
3418{
3419 /* The following variables hold the static issued_error state. */
3420 static bool issued_sse_arg_error;
3421 static bool issued_sse_ret_error;
3422 static bool issued_x87_ret_error;
3423
3424 enum machine_mode tmpmode;
3425 int bytes =
3426 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3427 enum x86_64_reg_class class[MAX_CLASSES];
3428 int n;
3429 int i;
3430 int nexps = 0;
3431 int needed_sseregs, needed_intregs;
3432 rtx exp[MAX_CLASSES];
3433 rtx ret;
3434
3435 n = classify_argument (mode, type, class, 0);
3436 if (TARGET_DEBUG_ARG)
3437 {
3438 if (!n)
3439 fprintf (stderr, "Memory class\n");
3440 else
3441 {
3442 fprintf (stderr, "Classes:");
3443 for (i = 0; i < n; i++)
3444 {
3445 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3446 }
3447 fprintf (stderr, "\n");
3448 }
3449 }
3450 if (!n)
3451 return NULL;
3452 if (!examine_argument (mode, type, in_return, &needed_intregs,
3453 &needed_sseregs))
3454 return NULL;
3455 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3456 return NULL;
3457
3458 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3459 some less clueful developer tries to use floating-point anyway. */
3460 if (needed_sseregs && !TARGET_SSE)
3461 {
3462 if (in_return)
3463 {
3464 if (!issued_sse_ret_error)
3465 {
3466 error ("SSE register return with SSE disabled");
3467 issued_sse_ret_error = true;
3468 }
3469 }
3470 else if (!issued_sse_arg_error)
3471 {
3472 error ("SSE register argument with SSE disabled");
3473 issued_sse_arg_error = true;
3474 }
3475 return NULL;
3476 }
3477
3478 /* Likewise, error if the ABI requires us to return values in the
3479 x87 registers and the user specified -mno-80387. */
3480 if (!TARGET_80387 && in_return)
3481 for (i = 0; i < n; i++)
3482 if (class[i] == X86_64_X87_CLASS
3483 || class[i] == X86_64_X87UP_CLASS
3484 || class[i] == X86_64_COMPLEX_X87_CLASS)
3485 {
3486 if (!issued_x87_ret_error)
3487 {
3488 error ("x87 register return with x87 disabled");
3489 issued_x87_ret_error = true;
3490 }
3491 return NULL;
3492 }
3493
3494 /* First construct simple cases. Avoid SCmode, since we want to use
3495 single register to pass this type. */
3496 if (n == 1 && mode != SCmode)
3497 switch (class[0])
3498 {
3499 case X86_64_INTEGER_CLASS:
3500 case X86_64_INTEGERSI_CLASS:
3501 return gen_rtx_REG (mode, intreg[0]);
3502 case X86_64_SSE_CLASS:
3503 case X86_64_SSESF_CLASS:
3504 case X86_64_SSEDF_CLASS:
3505 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3506 case X86_64_X87_CLASS:
3507 case X86_64_COMPLEX_X87_CLASS:
3508 return gen_rtx_REG (mode, FIRST_STACK_REG);
3509 case X86_64_NO_CLASS:
3510 /* Zero sized array, struct or class. */
3511 return NULL;
3512 default:
3513 gcc_unreachable ();
3514 }
3515 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3516 && mode != BLKmode)
3517 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3518 if (n == 2
3519 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3520 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3521 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3522 && class[1] == X86_64_INTEGER_CLASS
3523 && (mode == CDImode || mode == TImode || mode == TFmode)
3524 && intreg[0] + 1 == intreg[1])
3525 return gen_rtx_REG (mode, intreg[0]);
3526
3527 /* Otherwise figure out the entries of the PARALLEL. */
3528 for (i = 0; i < n; i++)
3529 {
3530 switch (class[i])
3531 {
3532 case X86_64_NO_CLASS:
3533 break;
3534 case X86_64_INTEGER_CLASS:
3535 case X86_64_INTEGERSI_CLASS:
3536 /* Merge TImodes on aligned occasions here too. */
3537 if (i * 8 + 8 > bytes)
3538 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3539 else if (class[i] == X86_64_INTEGERSI_CLASS)
3540 tmpmode = SImode;
3541 else
3542 tmpmode = DImode;
3543 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3544 if (tmpmode == BLKmode)
3545 tmpmode = DImode;
3546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3547 gen_rtx_REG (tmpmode, *intreg),
3548 GEN_INT (i*8));
3549 intreg++;
3550 break;
3551 case X86_64_SSESF_CLASS:
3552 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3553 gen_rtx_REG (SFmode,
3554 SSE_REGNO (sse_regno)),
3555 GEN_INT (i*8));
3556 sse_regno++;
3557 break;
3558 case X86_64_SSEDF_CLASS:
3559 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3560 gen_rtx_REG (DFmode,
3561 SSE_REGNO (sse_regno)),
3562 GEN_INT (i*8));
3563 sse_regno++;
3564 break;
3565 case X86_64_SSE_CLASS:
3566 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3567 tmpmode = TImode;
3568 else
3569 tmpmode = DImode;
3570 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3571 gen_rtx_REG (tmpmode,
3572 SSE_REGNO (sse_regno)),
3573 GEN_INT (i*8));
3574 if (tmpmode == TImode)
3575 i++;
3576 sse_regno++;
3577 break;
3578 default:
3579 gcc_unreachable ();
3580 }
3581 }
3582
3583 /* Empty aligned struct, union or class. */
3584 if (nexps == 0)
3585 return NULL;
3586
3587 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3588 for (i = 0; i < nexps; i++)
3589 XVECEXP (ret, 0, i) = exp [i];
3590 return ret;
3591}
3592
3593/* Update the data in CUM to advance over an argument
3594 of mode MODE and data type TYPE.
3595 (TYPE is null for libcalls where that information may not be available.) */
3596
3597void
3598function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3599 tree type, int named)
3600{
3601 int bytes =
3602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3603 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3604
3605 if (type)
3606 mode = type_natural_mode (type);
3607
3608 if (TARGET_DEBUG_ARG)
3609 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3610 "mode=%s, named=%d)\n\n",
3611 words, cum->words, cum->nregs, cum->sse_nregs,
3612 GET_MODE_NAME (mode), named);
3613
3614 if (TARGET_64BIT)
3615 {
3616 int int_nregs, sse_nregs;
3617 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3618 cum->words += words;
3619 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3620 {
3621 cum->nregs -= int_nregs;
3622 cum->sse_nregs -= sse_nregs;
3623 cum->regno += int_nregs;
3624 cum->sse_regno += sse_nregs;
3625 }
3626 else
3627 cum->words += words;
3628 }
3629 else
3630 {
3631 switch (mode)
3632 {
3633 default:
3634 break;
3635
3636 case BLKmode:
3637 if (bytes < 0)
3638 break;
3639 /* FALLTHRU */
3640
3641 case DImode:
3642 case SImode:
3643 case HImode:
3644 case QImode:
3645 cum->words += words;
3646 cum->nregs -= words;
3647 cum->regno += words;
3648
3649 if (cum->nregs <= 0)
3650 {
3651 cum->nregs = 0;
3652 cum->regno = 0;
3653 }
3654 break;
3655
3656 case DFmode:
3657 if (cum->float_in_sse < 2)
3658 break;
3659 case SFmode:
3660 if (cum->float_in_sse < 1)
3661 break;
3662 /* FALLTHRU */
3663
3664 case TImode:
3665 case V16QImode:
3666 case V8HImode:
3667 case V4SImode:
3668 case V2DImode:
3669 case V4SFmode:
3670 case V2DFmode:
3671 if (!type || !AGGREGATE_TYPE_P (type))
3672 {
3673 cum->sse_words += words;
3674 cum->sse_nregs -= 1;
3675 cum->sse_regno += 1;
3676 if (cum->sse_nregs <= 0)
3677 {
3678 cum->sse_nregs = 0;
3679 cum->sse_regno = 0;
3680 }
3681 }
3682 break;
3683
3684 case V8QImode:
3685 case V4HImode:
3686 case V2SImode:
3687 case V2SFmode:
3688 if (!type || !AGGREGATE_TYPE_P (type))
3689 {
3690 cum->mmx_words += words;
3691 cum->mmx_nregs -= 1;
3692 cum->mmx_regno += 1;
3693 if (cum->mmx_nregs <= 0)
3694 {
3695 cum->mmx_nregs = 0;
3696 cum->mmx_regno = 0;
3697 }
3698 }
3699 break;
3700 }
3701 }
3702}
3703
3704/* Define where to put the arguments to a function.
3705 Value is zero to push the argument on the stack,
3706 or a hard register in which to store the argument.
3707
3708 MODE is the argument's machine mode.
3709 TYPE is the data type of the argument (as a tree).
3710 This is null for libcalls where that information may
3711 not be available.
3712 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3713 the preceding args and about the function being called.
3714 NAMED is nonzero if this argument is a named parameter
3715 (otherwise it is an extra parameter matching an ellipsis). */
3716
3717rtx
3718function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3719 tree type, int named)
3720{
3721 enum machine_mode mode = orig_mode;
3722 rtx ret = NULL_RTX;
3723 int bytes =
3724 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3725 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3726 static bool warnedsse, warnedmmx;
3727
3728 /* To simplify the code below, represent vector types with a vector mode
3729 even if MMX/SSE are not active. */
3730 if (type && TREE_CODE (type) == VECTOR_TYPE)
3731 mode = type_natural_mode (type);
3732
3733 /* Handle a hidden AL argument containing number of registers for varargs
3734 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3735 any AL settings. */
3736 if (mode == VOIDmode)
3737 {
3738 if (TARGET_64BIT)
3739 return GEN_INT (cum->maybe_vaarg
3740 ? (cum->sse_nregs < 0
3741 ? SSE_REGPARM_MAX
3742 : cum->sse_regno)
3743 : -1);
3744 else
3745 return constm1_rtx;
3746 }
3747 if (TARGET_64BIT)
3748 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3749 cum->sse_nregs,
3750 &x86_64_int_parameter_registers [cum->regno],
3751 cum->sse_regno);
3752 else
3753 switch (mode)
3754 {
3755 /* For now, pass fp/complex values on the stack. */
3756 default:
3757 break;
3758
3759 case BLKmode:
3760 if (bytes < 0)
3761 break;
3762 /* FALLTHRU */
3763 case DImode:
3764 case SImode:
3765 case HImode:
3766 case QImode:
3767 if (words <= cum->nregs)
3768 {
3769 int regno = cum->regno;
3770
3771 /* Fastcall allocates the first two DWORD (SImode) or
3772 smaller arguments to ECX and EDX. */
3773 if (cum->fastcall)
3774 {
3775 if (mode == BLKmode || mode == DImode)
3776 break;
3777
3778 /* ECX not EAX is the first allocated register. */
3779 if (regno == 0)
3780 regno = 2;
3781 }
3782 ret = gen_rtx_REG (mode, regno);
3783 }
3784 break;
3785 case DFmode:
3786 if (cum->float_in_sse < 2)
3787 break;
3788 case SFmode:
3789 if (cum->float_in_sse < 1)
3790 break;
3791 /* FALLTHRU */
3792 case TImode:
3793 case V16QImode:
3794 case V8HImode:
3795 case V4SImode:
3796 case V2DImode:
3797 case V4SFmode:
3798 case V2DFmode:
3799 if (!type || !AGGREGATE_TYPE_P (type))
3800 {
3801 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3802 {
3803 warnedsse = true;
3804 warning (0, "SSE vector argument without SSE enabled "
3805 "changes the ABI");
3806 }
3807 if (cum->sse_nregs)
3808 ret = gen_reg_or_parallel (mode, orig_mode,
3809 cum->sse_regno + FIRST_SSE_REG);
3810 }
3811 break;
3812 case V8QImode:
3813 case V4HImode:
3814 case V2SImode:
3815 case V2SFmode:
3816 if (!type || !AGGREGATE_TYPE_P (type))
3817 {
3818 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3819 {
3820 warnedmmx = true;
3821 warning (0, "MMX vector argument without MMX enabled "
3822 "changes the ABI");
3823 }
3824 if (cum->mmx_nregs)
3825 ret = gen_reg_or_parallel (mode, orig_mode,
3826 cum->mmx_regno + FIRST_MMX_REG);
3827 }
3828 break;
3829 }
3830
3831 if (TARGET_DEBUG_ARG)
3832 {
3833 fprintf (stderr,
3834 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3835 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3836
3837 if (ret)
3838 print_simple_rtl (stderr, ret);
3839 else
3840 fprintf (stderr, ", stack");
3841
3842 fprintf (stderr, " )\n");
3843 }
3844
3845 return ret;
3846}
3847
3848/* A C expression that indicates when an argument must be passed by
3849 reference. If nonzero for an argument, a copy of that argument is
3850 made in memory and a pointer to the argument is passed instead of
3851 the argument itself. The pointer is passed in whatever way is
3852 appropriate for passing a pointer to that type. */
3853
3854static bool
3855ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3856 enum machine_mode mode ATTRIBUTE_UNUSED,
3857 tree type, bool named ATTRIBUTE_UNUSED)
3858{
3859 if (!TARGET_64BIT)
3860 return 0;
3861
3862 if (type && int_size_in_bytes (type) == -1)
3863 {
3864 if (TARGET_DEBUG_ARG)
3865 fprintf (stderr, "function_arg_pass_by_reference\n");
3866 return 1;
3867 }
3868
3869 return 0;
3870}
3871
3872/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3873 ABI. Only called if TARGET_SSE. */
3874static bool
3875contains_128bit_aligned_vector_p (tree type)
3876{
3877 enum machine_mode mode = TYPE_MODE (type);
3878 if (SSE_REG_MODE_P (mode)
3879 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3880 return true;
3881 if (TYPE_ALIGN (type) < 128)
3882 return false;
3883
3884 if (AGGREGATE_TYPE_P (type))
3885 {
3886 /* Walk the aggregates recursively. */
3887 switch (TREE_CODE (type))
3888 {
3889 case RECORD_TYPE:
3890 case UNION_TYPE:
3891 case QUAL_UNION_TYPE:
3892 {
3893 tree field;
3894
3895 if (TYPE_BINFO (type))
3896 {
3897 tree binfo, base_binfo;
3898 int i;
3899
3900 for (binfo = TYPE_BINFO (type), i = 0;
3901 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3902 if (contains_128bit_aligned_vector_p
3903 (BINFO_TYPE (base_binfo)))
3904 return true;
3905 }
3906 /* And now merge the fields of structure. */
3907 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3908 {
3909 if (TREE_CODE (field) == FIELD_DECL
3910 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3911 return true;
3912 }
3913 break;
3914 }
3915
3916 case ARRAY_TYPE:
3917 /* Just for use if some languages passes arrays by value. */
3918 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3919 return true;
3920 break;
3921
3922 default:
3923 gcc_unreachable ();
3924 }
3925 }
3926 return false;
3927}
3928
3929/* Gives the alignment boundary, in bits, of an argument with the
3930 specified mode and type. */
3931
3932int
3933ix86_function_arg_boundary (enum machine_mode mode, tree type)
3934{
3935 int align;
3936 if (type)
3937 align = TYPE_ALIGN (type);
3938 else
3939 align = GET_MODE_ALIGNMENT (mode);
3940 if (align < PARM_BOUNDARY)
3941 align = PARM_BOUNDARY;
3942 if (!TARGET_64BIT)
3943 {
3944 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3945 make an exception for SSE modes since these require 128bit
3946 alignment.
3947
3948 The handling here differs from field_alignment. ICC aligns MMX
3949 arguments to 4 byte boundaries, while structure fields are aligned
3950 to 8 byte boundaries. */
3951 if (!TARGET_SSE)
3952 align = PARM_BOUNDARY;
3953 else if (!type)
3954 {
3955 if (!SSE_REG_MODE_P (mode))
3956 align = PARM_BOUNDARY;
3957 }
3958 else
3959 {
3960 if (!contains_128bit_aligned_vector_p (type))
3961 align = PARM_BOUNDARY;
3962 }
3963 }
3964 if (align > 128)
3965 align = 128;
3966 return align;
3967}
3968
3969/* Return true if N is a possible register number of function value. */
3970bool
3971ix86_function_value_regno_p (int regno)
3972{
3973 if (TARGET_MACHO)
3974 {
3975 if (!TARGET_64BIT)
3976 {
3977 return ((regno) == 0
3978 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3979 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3980 }
3981 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3982 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3983 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3984 }
3985 else
3986 {
3987 if (regno == 0
3988 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3989 || (regno == FIRST_SSE_REG && TARGET_SSE))
3990 return true;
3991
3992 if (!TARGET_64BIT
3993 && (regno == FIRST_MMX_REG && TARGET_MMX))
3994 return true;
3995
3996 return false;
3997 }
3998}
3999
4000/* Define how to find the value returned by a function.
4001 VALTYPE is the data type of the value (as a tree).
4002 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4003 otherwise, FUNC is 0. */
4004rtx
4005ix86_function_value (tree valtype, tree fntype_or_decl,
4006 bool outgoing ATTRIBUTE_UNUSED)
4007{
4008 enum machine_mode natmode = type_natural_mode (valtype);
4009
4010 if (TARGET_64BIT)
4011 {
4012 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4013 1, REGPARM_MAX, SSE_REGPARM_MAX,
4014 x86_64_int_return_registers, 0);
4015 /* For zero sized structures, construct_container return NULL, but we
4016 need to keep rest of compiler happy by returning meaningful value. */
4017 if (!ret)
4018 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4019 return ret;
4020 }
4021 else
4022 {
4023 tree fn = NULL_TREE, fntype;
4024 if (fntype_or_decl
4025 && DECL_P (fntype_or_decl))
4026 fn = fntype_or_decl;
4027 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4028 return gen_rtx_REG (TYPE_MODE (valtype),
4029 ix86_value_regno (natmode, fn, fntype));
4030 }
4031}
4032
4033/* Return true iff type is returned in memory. */
4034int
4035ix86_return_in_memory (tree type)
4036{
4037 int needed_intregs, needed_sseregs, size;
4038 enum machine_mode mode = type_natural_mode (type);
4039
4040 if (TARGET_64BIT)
4041 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4042
4043 if (mode == BLKmode)
4044 return 1;
4045
4046 size = int_size_in_bytes (type);
4047
4048 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4049 return 0;
4050
4051 if (VECTOR_MODE_P (mode) || mode == TImode)
4052 {
4053 /* User-created vectors small enough to fit in EAX. */
4054 if (size < 8)
4055 return 0;
4056
4057 /* MMX/3dNow values are returned in MM0,
4058 except when it doesn't exits. */
4059 if (size == 8)
4060 return (TARGET_MMX ? 0 : 1);
4061
4062 /* SSE values are returned in XMM0, except when it doesn't exist. */
4063 if (size == 16)
4064 return (TARGET_SSE ? 0 : 1);
4065 }
4066
4067 if (mode == XFmode)
4068 return 0;
4069
4070 if (mode == TDmode)
4071 return 1;
4072
4073 if (size > 12)
4074 return 1;
4075 return 0;
4076}
4077
4078/* When returning SSE vector types, we have a choice of either
4079 (1) being abi incompatible with a -march switch, or
4080 (2) generating an error.
4081 Given no good solution, I think the safest thing is one warning.
4082 The user won't be able to use -Werror, but....
4083
4084 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4085 called in response to actually generating a caller or callee that
4086 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4087 via aggregate_value_p for general type probing from tree-ssa. */
4088
4089static rtx
4090ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4091{
4092 static bool warnedsse, warnedmmx;
4093
4094 if (type)
4095 {
4096 /* Look at the return type of the function, not the function type. */
4097 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4098
4099 if (!TARGET_SSE && !warnedsse)
4100 {
4101 if (mode == TImode
4102 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4103 {
4104 warnedsse = true;
4105 warning (0, "SSE vector return without SSE enabled "
4106 "changes the ABI");
4107 }
4108 }
4109
4110 if (!TARGET_MMX && !warnedmmx)
4111 {
4112 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4113 {
4114 warnedmmx = true;
4115 warning (0, "MMX vector return without MMX enabled "
4116 "changes the ABI");
4117 }
4118 }
4119 }
4120
4121 return NULL;
4122}
4123
4124/* Define how to find the value returned by a library function
4125 assuming the value has mode MODE. */
4126rtx
4127ix86_libcall_value (enum machine_mode mode)
4128{
4129 if (TARGET_64BIT)
4130 {
4131 switch (mode)
4132 {
4133 case SFmode:
4134 case SCmode:
4135 case DFmode:
4136 case DCmode:
4137 case TFmode:
4138 case SDmode:
4139 case DDmode:
4140 case TDmode:
4141 return gen_rtx_REG (mode, FIRST_SSE_REG);
4142 case XFmode:
4143 case XCmode:
4144 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4145 case TCmode:
4146 return NULL;
4147 default:
4148 return gen_rtx_REG (mode, 0);
4149 }
4150 }
4151 else
4152 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4153}
4154
4155/* Given a mode, return the register to use for a return value. */
4156
4157static int
4158ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4159{
4160 gcc_assert (!TARGET_64BIT);
4161
4162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4163 we normally prevent this case when mmx is not available. However
4164 some ABIs may require the result to be returned like DImode. */
4165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4166 return TARGET_MMX ? FIRST_MMX_REG : 0;
4167
4168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4169 we prevent this case when sse is not available. However some ABIs
4170 may require the result to be returned like integer TImode. */
4171 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4172 return TARGET_SSE ? FIRST_SSE_REG : 0;
4173
4174 /* Decimal floating point values can go in %eax, unlike other float modes. */
4175 if (DECIMAL_FLOAT_MODE_P (mode))
4176 return 0;
4177
4178 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4179 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4180 return 0;
4181
4182 /* Floating point return values in %st(0), except for local functions when
4183 SSE math is enabled or for functions with sseregparm attribute. */
4184 if ((func || fntype)
4185 && (mode == SFmode || mode == DFmode))
4186 {
4187 int sse_level = ix86_function_sseregparm (fntype, func);
4188 if ((sse_level >= 1 && mode == SFmode)
4189 || (sse_level == 2 && mode == DFmode))
4190 return FIRST_SSE_REG;
4191 }
4192
4193 return FIRST_FLOAT_REG;
4194}
4195
4196/* Create the va_list data type. */
4197
4198static tree
4199ix86_build_builtin_va_list (void)
4200{
4201 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4202
4203 /* For i386 we use plain pointer to argument area. */
4204 if (!TARGET_64BIT)
4205 return build_pointer_type (char_type_node);
4206
4207 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4208 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4209
4210 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4211 unsigned_type_node);
4212 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4213 unsigned_type_node);
4214 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4215 ptr_type_node);
4216 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4217 ptr_type_node);
4218
4219 va_list_gpr_counter_field = f_gpr;
4220 va_list_fpr_counter_field = f_fpr;
4221
4222 DECL_FIELD_CONTEXT (f_gpr) = record;
4223 DECL_FIELD_CONTEXT (f_fpr) = record;
4224 DECL_FIELD_CONTEXT (f_ovf) = record;
4225 DECL_FIELD_CONTEXT (f_sav) = record;
4226
4227 TREE_CHAIN (record) = type_decl;
4228 TYPE_NAME (record) = type_decl;
4229 TYPE_FIELDS (record) = f_gpr;
4230 TREE_CHAIN (f_gpr) = f_fpr;
4231 TREE_CHAIN (f_fpr) = f_ovf;
4232 TREE_CHAIN (f_ovf) = f_sav;
4233
4234 layout_type (record);
4235
4236 /* The correct type is an array type of one element. */
4237 return build_array_type (record, build_index_type (size_zero_node));
4238}
4239
4240/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4241
4242static void
4243ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4244 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4245 int no_rtl)
4246{
4247 CUMULATIVE_ARGS next_cum;
4248 rtx save_area = NULL_RTX, mem;
4249 rtx label;
4250 rtx label_ref;
4251 rtx tmp_reg;
4252 rtx nsse_reg;
4253 int set;
4254 tree fntype;
4255 int stdarg_p;
4256 int i;
4257
4258 if (!TARGET_64BIT)
4259 return;
4260
4261 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4262 return;
4263
4264 /* Indicate to allocate space on the stack for varargs save area. */
4265 ix86_save_varrargs_registers = 1;
4266
4267 cfun->stack_alignment_needed = 128;
4268
4269 fntype = TREE_TYPE (current_function_decl);
4270 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4271 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4272 != void_type_node));
4273
4274 /* For varargs, we do not want to skip the dummy va_dcl argument.
4275 For stdargs, we do want to skip the last named argument. */
4276 next_cum = *cum;
4277 if (stdarg_p)
4278 function_arg_advance (&next_cum, mode, type, 1);
4279
4280 if (!no_rtl)
4281 save_area = frame_pointer_rtx;
4282
4283 set = get_varargs_alias_set ();
4284
4285 for (i = next_cum.regno;
4286 i < ix86_regparm
4287 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4288 i++)
4289 {
4290 mem = gen_rtx_MEM (Pmode,
4291 plus_constant (save_area, i * UNITS_PER_WORD));
4292 MEM_NOTRAP_P (mem) = 1;
4293 set_mem_alias_set (mem, set);
4294 emit_move_insn (mem, gen_rtx_REG (Pmode,
4295 x86_64_int_parameter_registers[i]));
4296 }
4297
4298 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4299 {
4300 /* Now emit code to save SSE registers. The AX parameter contains number
4301 of SSE parameter registers used to call this function. We use
4302 sse_prologue_save insn template that produces computed jump across
4303 SSE saves. We need some preparation work to get this working. */
4304
4305 label = gen_label_rtx ();
4306 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4307
4308 /* Compute address to jump to :
4309 label - 5*eax + nnamed_sse_arguments*5 */
4310 tmp_reg = gen_reg_rtx (Pmode);
4311 nsse_reg = gen_reg_rtx (Pmode);
4312 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4313 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4314 gen_rtx_MULT (Pmode, nsse_reg,
4315 GEN_INT (4))));
4316 if (next_cum.sse_regno)
4317 emit_move_insn
4318 (nsse_reg,
4319 gen_rtx_CONST (DImode,
4320 gen_rtx_PLUS (DImode,
4321 label_ref,
4322 GEN_INT (next_cum.sse_regno * 4))));
4323 else
4324 emit_move_insn (nsse_reg, label_ref);
4325 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4326
4327 /* Compute address of memory block we save into. We always use pointer
4328 pointing 127 bytes after first byte to store - this is needed to keep
4329 instruction size limited by 4 bytes. */
4330 tmp_reg = gen_reg_rtx (Pmode);
4331 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4332 plus_constant (save_area,
4333 8 * REGPARM_MAX + 127)));
4334 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4335 MEM_NOTRAP_P (mem) = 1;
4336 set_mem_alias_set (mem, set);
4337 set_mem_align (mem, BITS_PER_WORD);
4338
4339 /* And finally do the dirty job! */
4340 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4341 GEN_INT (next_cum.sse_regno), label));
4342 }
4343
4344}
4345
4346/* Implement va_start. */
4347
4348void
4349ix86_va_start (tree valist, rtx nextarg)
4350{
4351 HOST_WIDE_INT words, n_gpr, n_fpr;
4352 tree f_gpr, f_fpr, f_ovf, f_sav;
4353 tree gpr, fpr, ovf, sav, t;
4354 tree type;
4355
4356 /* Only 64bit target needs something special. */
4357 if (!TARGET_64BIT)
4358 {
4359 std_expand_builtin_va_start (valist, nextarg);
4360 return;
4361 }
4362
4363 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4364 f_fpr = TREE_CHAIN (f_gpr);
4365 f_ovf = TREE_CHAIN (f_fpr);
4366 f_sav = TREE_CHAIN (f_ovf);
4367
4368 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4369 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4370 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4371 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4372 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4373
4374 /* Count number of gp and fp argument registers used. */
4375 words = current_function_args_info.words;
4376 n_gpr = current_function_args_info.regno;
4377 n_fpr = current_function_args_info.sse_regno;
4378
4379 if (TARGET_DEBUG_ARG)
4380 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4381 (int) words, (int) n_gpr, (int) n_fpr);
4382
4383 if (cfun->va_list_gpr_size)
4384 {
4385 type = TREE_TYPE (gpr);
4386 t = build2 (MODIFY_EXPR, type, gpr,
4387 build_int_cst (type, n_gpr * 8));
4388 TREE_SIDE_EFFECTS (t) = 1;
4389 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4390 }
4391
4392 if (cfun->va_list_fpr_size)
4393 {
4394 type = TREE_TYPE (fpr);
4395 t = build2 (MODIFY_EXPR, type, fpr,
4396 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4397 TREE_SIDE_EFFECTS (t) = 1;
4398 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4399 }
4400
4401 /* Find the overflow area. */
4402 type = TREE_TYPE (ovf);
4403 t = make_tree (type, virtual_incoming_args_rtx);
4404 if (words != 0)
4405 t = build2 (PLUS_EXPR, type, t,
4406 build_int_cst (type, words * UNITS_PER_WORD));
4407 t = build2 (MODIFY_EXPR, type, ovf, t);
4408 TREE_SIDE_EFFECTS (t) = 1;
4409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4410
4411 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4412 {
4413 /* Find the register save area.
4414 Prologue of the function save it right above stack frame. */
4415 type = TREE_TYPE (sav);
4416 t = make_tree (type, frame_pointer_rtx);
4417 t = build2 (MODIFY_EXPR, type, sav, t);
4418 TREE_SIDE_EFFECTS (t) = 1;
4419 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4420 }
4421}
4422
4423/* Implement va_arg. */
4424
4425tree
4426ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4427{
4428 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4429 tree f_gpr, f_fpr, f_ovf, f_sav;
4430 tree gpr, fpr, ovf, sav, t;
4431 int size, rsize;
4432 tree lab_false, lab_over = NULL_TREE;
4433 tree addr, t2;
4434 rtx container;
4435 int indirect_p = 0;
4436 tree ptrtype;
4437 enum machine_mode nat_mode;
4438
4439 /* Only 64bit target needs something special. */
4440 if (!TARGET_64BIT)
4441 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4442
4443 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4444 f_fpr = TREE_CHAIN (f_gpr);
4445 f_ovf = TREE_CHAIN (f_fpr);
4446 f_sav = TREE_CHAIN (f_ovf);
4447
4448 valist = build_va_arg_indirect_ref (valist);
4449 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4451 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4452 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4453
4454 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4455 if (indirect_p)
4456 type = build_pointer_type (type);
4457 size = int_size_in_bytes (type);
4458 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4459
4460 nat_mode = type_natural_mode (type);
4461 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4462 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4463
4464 /* Pull the value out of the saved registers. */
4465
4466 addr = create_tmp_var (ptr_type_node, "addr");
4467 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4468
4469 if (container)
4470 {
4471 int needed_intregs, needed_sseregs;
4472 bool need_temp;
4473 tree int_addr, sse_addr;
4474
4475 lab_false = create_artificial_label ();
4476 lab_over = create_artificial_label ();
4477
4478 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4479
4480 need_temp = (!REG_P (container)
4481 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4482 || TYPE_ALIGN (type) > 128));
4483
4484 /* In case we are passing structure, verify that it is consecutive block
4485 on the register save area. If not we need to do moves. */
4486 if (!need_temp && !REG_P (container))
4487 {
4488 /* Verify that all registers are strictly consecutive */
4489 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4490 {
4491 int i;
4492
4493 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4494 {
4495 rtx slot = XVECEXP (container, 0, i);
4496 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4497 || INTVAL (XEXP (slot, 1)) != i * 16)
4498 need_temp = 1;
4499 }
4500 }
4501 else
4502 {
4503 int i;
4504
4505 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4506 {
4507 rtx slot = XVECEXP (container, 0, i);
4508 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4509 || INTVAL (XEXP (slot, 1)) != i * 8)
4510 need_temp = 1;
4511 }
4512 }
4513 }
4514 if (!need_temp)
4515 {
4516 int_addr = addr;
4517 sse_addr = addr;
4518 }
4519 else
4520 {
4521 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4522 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4523 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4524 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4525 }
4526
4527 /* First ensure that we fit completely in registers. */
4528 if (needed_intregs)
4529 {
4530 t = build_int_cst (TREE_TYPE (gpr),
4531 (REGPARM_MAX - needed_intregs + 1) * 8);
4532 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4533 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4534 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4535 gimplify_and_add (t, pre_p);
4536 }
4537 if (needed_sseregs)
4538 {
4539 t = build_int_cst (TREE_TYPE (fpr),
4540 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4541 + REGPARM_MAX * 8);
4542 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4543 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4544 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4545 gimplify_and_add (t, pre_p);
4546 }
4547
4548 /* Compute index to start of area used for integer regs. */
4549 if (needed_intregs)
4550 {
4551 /* int_addr = gpr + sav; */
4552 t = fold_convert (ptr_type_node, gpr);
4553 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4554 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4555 gimplify_and_add (t, pre_p);
4556 }
4557 if (needed_sseregs)
4558 {
4559 /* sse_addr = fpr + sav; */
4560 t = fold_convert (ptr_type_node, fpr);
4561 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4562 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4563 gimplify_and_add (t, pre_p);
4564 }
4565 if (need_temp)
4566 {
4567 int i;
4568 tree temp = create_tmp_var (type, "va_arg_tmp");
4569
4570 /* addr = &temp; */
4571 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4572 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4573 gimplify_and_add (t, pre_p);
4574
4575 for (i = 0; i < XVECLEN (container, 0); i++)
4576 {
4577 rtx slot = XVECEXP (container, 0, i);
4578 rtx reg = XEXP (slot, 0);
4579 enum machine_mode mode = GET_MODE (reg);
4580 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4581 tree addr_type = build_pointer_type (piece_type);
4582 tree src_addr, src;
4583 int src_offset;
4584 tree dest_addr, dest;
4585
4586 if (SSE_REGNO_P (REGNO (reg)))
4587 {
4588 src_addr = sse_addr;
4589 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4590 }
4591 else
4592 {
4593 src_addr = int_addr;
4594 src_offset = REGNO (reg) * 8;
4595 }
4596 src_addr = fold_convert (addr_type, src_addr);
4597 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4598 size_int (src_offset)));
4599 src = build_va_arg_indirect_ref (src_addr);
4600
4601 dest_addr = fold_convert (addr_type, addr);
4602 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4603 size_int (INTVAL (XEXP (slot, 1)))));
4604 dest = build_va_arg_indirect_ref (dest_addr);
4605
4606 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4607 gimplify_and_add (t, pre_p);
4608 }
4609 }
4610
4611 if (needed_intregs)
4612 {
4613 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4614 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4615 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4616 gimplify_and_add (t, pre_p);
4617 }
4618 if (needed_sseregs)
4619 {
4620 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4621 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4622 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4623 gimplify_and_add (t, pre_p);
4624 }
4625
4626 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4627 gimplify_and_add (t, pre_p);
4628
4629 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4630 append_to_statement_list (t, pre_p);
4631 }
4632
4633 /* ... otherwise out of the overflow area. */
4634
4635 /* Care for on-stack alignment if needed. */
4636 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4637 || integer_zerop (TYPE_SIZE (type)))
4638 t = ovf;
4639 else
4640 {
4641 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4642 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4643 build_int_cst (TREE_TYPE (ovf), align - 1));
4644 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4645 build_int_cst (TREE_TYPE (t), -align));
4646 }
4647 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4648
4649 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4650 gimplify_and_add (t2, pre_p);
4651
4652 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4653 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4654 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4655 gimplify_and_add (t, pre_p);
4656
4657 if (container)
4658 {
4659 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4660 append_to_statement_list (t, pre_p);
4661 }
4662
4663 ptrtype = build_pointer_type (type);
4664 addr = fold_convert (ptrtype, addr);
4665
4666 if (indirect_p)
4667 addr = build_va_arg_indirect_ref (addr);
4668 return build_va_arg_indirect_ref (addr);
4669}
4670
4671/* Return nonzero if OPNUM's MEM should be matched
4672 in movabs* patterns. */
4673
4674int
4675ix86_check_movabs (rtx insn, int opnum)
4676{
4677 rtx set, mem;
4678
4679 set = PATTERN (insn);
4680 if (GET_CODE (set) == PARALLEL)
4681 set = XVECEXP (set, 0, 0);
4682 gcc_assert (GET_CODE (set) == SET);
4683 mem = XEXP (set, opnum);
4684 while (GET_CODE (mem) == SUBREG)
4685 mem = SUBREG_REG (mem);
4686 gcc_assert (GET_CODE (mem) == MEM);
4687 return (volatile_ok || !MEM_VOLATILE_P (mem));
4688}
4689
4690/* Initialize the table of extra 80387 mathematical constants. */
4691
4692static void
4693init_ext_80387_constants (void)
4694{
4695 static const char * cst[5] =
4696 {
4697 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4698 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4699 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4700 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4701 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4702 };
4703 int i;
4704
4705 for (i = 0; i < 5; i++)
4706 {
4707 real_from_string (&ext_80387_constants_table[i], cst[i]);
4708 /* Ensure each constant is rounded to XFmode precision. */
4709 real_convert (&ext_80387_constants_table[i],
4710 XFmode, &ext_80387_constants_table[i]);
4711 }
4712
4713 ext_80387_constants_init = 1;
4714}
4715
4716/* Return true if the constant is something that can be loaded with
4717 a special instruction. */
4718
4719int
4720standard_80387_constant_p (rtx x)
4721{
4722 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4723 return -1;
4724
4725 if (x == CONST0_RTX (GET_MODE (x)))
4726 return 1;
4727 if (x == CONST1_RTX (GET_MODE (x)))
4728 return 2;
4729
4730 /* For XFmode constants, try to find a special 80387 instruction when
4731 optimizing for size or on those CPUs that benefit from them. */
4732 if (GET_MODE (x) == XFmode
4733 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4734 {
4735 REAL_VALUE_TYPE r;
4736 int i;
4737
4738 if (! ext_80387_constants_init)
4739 init_ext_80387_constants ();
4740
4741 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4742 for (i = 0; i < 5; i++)
4743 if (real_identical (&r, &ext_80387_constants_table[i]))
4744 return i + 3;
4745 }
4746
4747 return 0;
4748}
4749
4750/* Return the opcode of the special instruction to be used to load
4751 the constant X. */
4752
4753const char *
4754standard_80387_constant_opcode (rtx x)
4755{
4756 switch (standard_80387_constant_p (x))
4757 {
4758 case 1:
4759 return "fldz";
4760 case 2:
4761 return "fld1";
4762 case 3:
4763 return "fldlg2";
4764 case 4:
4765 return "fldln2";
4766 case 5:
4767 return "fldl2e";
4768 case 6:
4769 return "fldl2t";
4770 case 7:
4771 return "fldpi";
4772 default:
4773 gcc_unreachable ();
4774 }
4775}
4776
4777/* Return the CONST_DOUBLE representing the 80387 constant that is
4778 loaded by the specified special instruction. The argument IDX
4779 matches the return value from standard_80387_constant_p. */
4780
4781rtx
4782standard_80387_constant_rtx (int idx)
4783{
4784 int i;
4785
4786 if (! ext_80387_constants_init)
4787 init_ext_80387_constants ();
4788
4789 switch (idx)
4790 {
4791 case 3:
4792 case 4:
4793 case 5:
4794 case 6:
4795 case 7:
4796 i = idx - 3;
4797 break;
4798
4799 default:
4800 gcc_unreachable ();
4801 }
4802
4803 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4804 XFmode);
4805}
4806
4807/* Return 1 if mode is a valid mode for sse. */
4808static int
4809standard_sse_mode_p (enum machine_mode mode)
4810{
4811 switch (mode)
4812 {
4813 case V16QImode:
4814 case V8HImode:
4815 case V4SImode:
4816 case V2DImode:
4817 case V4SFmode:
4818 case V2DFmode:
4819 return 1;
4820
4821 default:
4822 return 0;
4823 }
4824}
4825
4826/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4827 */
4828int
4829standard_sse_constant_p (rtx x)
4830{
4831 enum machine_mode mode = GET_MODE (x);
4832
4833 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4834 return 1;
4835 if (vector_all_ones_operand (x, mode)
4836 && standard_sse_mode_p (mode))
4837 return TARGET_SSE2 ? 2 : -1;
4838
4839 return 0;
4840}
4841
4842/* Return the opcode of the special instruction to be used to load
4843 the constant X. */
4844
4845const char *
4846standard_sse_constant_opcode (rtx insn, rtx x)
4847{
4848 switch (standard_sse_constant_p (x))
4849 {
4850 case 1:
4851 if (get_attr_mode (insn) == MODE_V4SF)
4852 return "xorps\t%0, %0";
4853 else if (get_attr_mode (insn) == MODE_V2DF)
4854 return "xorpd\t%0, %0";
4855 else
4856 return "pxor\t%0, %0";
4857 case 2:
4858 return "pcmpeqd\t%0, %0";
4859 }
4860 gcc_unreachable ();
4861}
4862
4863/* Returns 1 if OP contains a symbol reference */
4864
4865int
4866symbolic_reference_mentioned_p (rtx op)
4867{
4868 const char *fmt;
4869 int i;
4870
4871 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4872 return 1;
4873
4874 fmt = GET_RTX_FORMAT (GET_CODE (op));
4875 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4876 {
4877 if (fmt[i] == 'E')
4878 {
4879 int j;
4880
4881 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4882 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4883 return 1;
4884 }
4885
4886 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4887 return 1;
4888 }
4889
4890 return 0;
4891}
4892
4893/* Return 1 if it is appropriate to emit `ret' instructions in the
4894 body of a function. Do this only if the epilogue is simple, needing a
4895 couple of insns. Prior to reloading, we can't tell how many registers
4896 must be saved, so return 0 then. Return 0 if there is no frame
4897 marker to de-allocate. */
4898
4899int
4900ix86_can_use_return_insn_p (void)
4901{
4902 struct ix86_frame frame;
4903
4904 if (! reload_completed || frame_pointer_needed)
4905 return 0;
4906
4907 /* Don't allow more than 32 pop, since that's all we can do
4908 with one instruction. */
4909 if (current_function_pops_args
4910 && current_function_args_size >= 32768)
4911 return 0;
4912
4913 ix86_compute_frame_layout (&frame);
4914 return frame.to_allocate == 0 && frame.nregs == 0;
4915}
4916
4917/* Value should be nonzero if functions must have frame pointers.
4918 Zero means the frame pointer need not be set up (and parms may
4919 be accessed via the stack pointer) in functions that seem suitable. */
4920
4921int
4922ix86_frame_pointer_required (void)
4923{
4924 /* If we accessed previous frames, then the generated code expects
4925 to be able to access the saved ebp value in our frame. */
4926 if (cfun->machine->accesses_prev_frame)
4927 return 1;
4928
4929 /* Several x86 os'es need a frame pointer for other reasons,
4930 usually pertaining to setjmp. */
4931 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4932 return 1;
4933
4934 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4935 the frame pointer by default. Turn it back on now if we've not
4936 got a leaf function. */
4937 if (TARGET_OMIT_LEAF_FRAME_POINTER
4938 && (!current_function_is_leaf
4939 || ix86_current_function_calls_tls_descriptor))
4940 return 1;
4941
4942 if (current_function_profile)
4943 return 1;
4944
4945 return 0;
4946}
4947
4948/* Record that the current function accesses previous call frames. */
4949
4950void
4951ix86_setup_frame_addresses (void)
4952{
4953 cfun->machine->accesses_prev_frame = 1;
4954}
4955
4956#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4957# define USE_HIDDEN_LINKONCE 1
4958#else
4959# define USE_HIDDEN_LINKONCE 0
4960#endif
4961
4962static int pic_labels_used;
4963
4964/* Fills in the label name that should be used for a pc thunk for
4965 the given register. */
4966
4967static void
4968get_pc_thunk_name (char name[32], unsigned int regno)
4969{
4970 gcc_assert (!TARGET_64BIT);
4971
4972 if (USE_HIDDEN_LINKONCE)
4973 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4974 else
4975 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4976}
4977
4978
4979/* This function generates code for -fpic that loads %ebx with
4980 the return address of the caller and then returns. */
4981
4982void
4983ix86_file_end (void)
4984{
4985 rtx xops[2];
4986 int regno;
4987
4988 for (regno = 0; regno < 8; ++regno)
4989 {
4990 char name[32];
4991
4992 if (! ((pic_labels_used >> regno) & 1))
4993 continue;
4994
4995 get_pc_thunk_name (name, regno);
4996
4997#if TARGET_MACHO
4998 if (TARGET_MACHO)
4999 {
5000 switch_to_section (darwin_sections[text_coal_section]);
5001 fputs ("\t.weak_definition\t", asm_out_file);
5002 assemble_name (asm_out_file, name);
5003 fputs ("\n\t.private_extern\t", asm_out_file);
5004 assemble_name (asm_out_file, name);
5005 fputs ("\n", asm_out_file);
5006 ASM_OUTPUT_LABEL (asm_out_file, name);
5007 }
5008 else
5009#endif
5010 if (USE_HIDDEN_LINKONCE)
5011 {
5012 tree decl;
5013
5014 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5015 error_mark_node);
5016 TREE_PUBLIC (decl) = 1;
5017 TREE_STATIC (decl) = 1;
5018 DECL_ONE_ONLY (decl) = 1;
5019
5020 (*targetm.asm_out.unique_section) (decl, 0);
5021 switch_to_section (get_named_section (decl, NULL, 0));
5022
5023 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5024 fputs ("\t.hidden\t", asm_out_file);
5025 assemble_name (asm_out_file, name);
5026 fputc ('\n', asm_out_file);
5027 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5028 }
5029 else
5030 {
5031 switch_to_section (text_section);
5032 ASM_OUTPUT_LABEL (asm_out_file, name);
5033 }
5034
5035 xops[0] = gen_rtx_REG (SImode, regno);
5036 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5037 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5038 output_asm_insn ("ret", xops);
5039 }
5040
5041 if (NEED_INDICATE_EXEC_STACK)
5042 file_end_indicate_exec_stack ();
5043}
5044
5045/* Emit code for the SET_GOT patterns. */
5046
5047const char *
5048output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5049{
5050 rtx xops[3];
5051
5052 xops[0] = dest;
5053 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5054
5055 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5056 {
5057 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5058
5059 if (!flag_pic)
5060 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5061 else
5062 output_asm_insn ("call\t%a2", xops);
5063
5064#if TARGET_MACHO
5065 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5066 is what will be referenced by the Mach-O PIC subsystem. */
5067 if (!label)
5068 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5069#endif
5070
5071 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5072 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5073
5074 if (flag_pic)
5075 output_asm_insn ("pop{l}\t%0", xops);
5076 }
5077 else
5078 {
5079 char name[32];
5080 get_pc_thunk_name (name, REGNO (dest));
5081 pic_labels_used |= 1 << REGNO (dest);
5082
5083 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5084 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5085 output_asm_insn ("call\t%X2", xops);
5086 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5087 is what will be referenced by the Mach-O PIC subsystem. */
5088#if TARGET_MACHO
5089 if (!label)
5090 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5091 else
5092 targetm.asm_out.internal_label (asm_out_file, "L",
5093 CODE_LABEL_NUMBER (label));
5094#endif
5095 }
5096
5097 if (TARGET_MACHO)
5098 return "";
5099
5100 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5101 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5102 else
5103 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5104
5105 return "";
5106}
5107
5108/* Generate an "push" pattern for input ARG. */
5109
5110static rtx
5111gen_push (rtx arg)
5112{
5113 return gen_rtx_SET (VOIDmode,
5114 gen_rtx_MEM (Pmode,
5115 gen_rtx_PRE_DEC (Pmode,
5116 stack_pointer_rtx)),
5117 arg);
5118}
5119
5120/* Return >= 0 if there is an unused call-clobbered register available
5121 for the entire function. */
5122
5123static unsigned int
5124ix86_select_alt_pic_regnum (void)
5125{
5126 if (current_function_is_leaf && !current_function_profile
5127 && !ix86_current_function_calls_tls_descriptor)
5128 {
5129 int i;
5130 for (i = 2; i >= 0; --i)
5131 if (!regs_ever_live[i])
5132 return i;
5133 }
5134
5135 return INVALID_REGNUM;
5136}
5137
5138/* Return 1 if we need to save REGNO. */
5139static int
5140ix86_save_reg (unsigned int regno, int maybe_eh_return)
5141{
5142 if (pic_offset_table_rtx
5143 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5144 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5145 || current_function_profile
5146 || current_function_calls_eh_return
5147 || current_function_uses_const_pool))
5148 {
5149 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5150 return 0;
5151 return 1;
5152 }
5153
5154 if (current_function_calls_eh_return && maybe_eh_return)
5155 {
5156 unsigned i;
5157 for (i = 0; ; i++)
5158 {
5159 unsigned test = EH_RETURN_DATA_REGNO (i);
5160 if (test == INVALID_REGNUM)
5161 break;
5162 if (test == regno)
5163 return 1;
5164 }
5165 }
5166
5167 if (cfun->machine->force_align_arg_pointer
5168 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5169 return 1;
5170
5171 return (regs_ever_live[regno]
5172 && !call_used_regs[regno]
5173 && !fixed_regs[regno]
5174 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5175}
5176
5177/* Return number of registers to be saved on the stack. */
5178
5179static int
5180ix86_nsaved_regs (void)
5181{
5182 int nregs = 0;
5183 int regno;
5184
5185 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5186 if (ix86_save_reg (regno, true))
5187 nregs++;
5188 return nregs;
5189}
5190
5191/* Return the offset between two registers, one to be eliminated, and the other
5192 its replacement, at the start of a routine. */
5193
5194HOST_WIDE_INT
5195ix86_initial_elimination_offset (int from, int to)
5196{
5197 struct ix86_frame frame;
5198 ix86_compute_frame_layout (&frame);
5199
5200 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5201 return frame.hard_frame_pointer_offset;
5202 else if (from == FRAME_POINTER_REGNUM
5203 && to == HARD_FRAME_POINTER_REGNUM)
5204 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5205 else
5206 {
5207 gcc_assert (to == STACK_POINTER_REGNUM);
5208
5209 if (from == ARG_POINTER_REGNUM)
5210 return frame.stack_pointer_offset;
5211
5212 gcc_assert (from == FRAME_POINTER_REGNUM);
5213 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5214 }
5215}
5216
5217/* Fill structure ix86_frame about frame of currently computed function. */
5218
5219static void
5220ix86_compute_frame_layout (struct ix86_frame *frame)
5221{
5222 HOST_WIDE_INT total_size;
5223 unsigned int stack_alignment_needed;
5224 HOST_WIDE_INT offset;
5225 unsigned int preferred_alignment;
5226 HOST_WIDE_INT size = get_frame_size ();
5227
5228 frame->nregs = ix86_nsaved_regs ();
5229 total_size = size;
5230
5231 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5232 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5233
5234 /* During reload iteration the amount of registers saved can change.
5235 Recompute the value as needed. Do not recompute when amount of registers
5236 didn't change as reload does multiple calls to the function and does not
5237 expect the decision to change within single iteration. */
5238 if (!optimize_size
5239 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5240 {
5241 int count = frame->nregs;
5242
5243 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5244 /* The fast prologue uses move instead of push to save registers. This
5245 is significantly longer, but also executes faster as modern hardware
5246 can execute the moves in parallel, but can't do that for push/pop.
5247
5248 Be careful about choosing what prologue to emit: When function takes
5249 many instructions to execute we may use slow version as well as in
5250 case function is known to be outside hot spot (this is known with
5251 feedback only). Weight the size of function by number of registers
5252 to save as it is cheap to use one or two push instructions but very
5253 slow to use many of them. */
5254 if (count)
5255 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5256 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5257 || (flag_branch_probabilities
5258 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5259 cfun->machine->use_fast_prologue_epilogue = false;
5260 else
5261 cfun->machine->use_fast_prologue_epilogue
5262 = !expensive_function_p (count);
5263 }
5264 if (TARGET_PROLOGUE_USING_MOVE
5265 && cfun->machine->use_fast_prologue_epilogue)
5266 frame->save_regs_using_mov = true;
5267 else
5268 frame->save_regs_using_mov = false;
5269
5270
5271 /* Skip return address and saved base pointer. */
5272 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5273
5274 frame->hard_frame_pointer_offset = offset;
5275
5276 /* Do some sanity checking of stack_alignment_needed and
5277 preferred_alignment, since i386 port is the only using those features
5278 that may break easily. */
5279
5280 gcc_assert (!size || stack_alignment_needed);
5281 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5282 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5283 gcc_assert (stack_alignment_needed
5284 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5285
5286 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5287 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5288
5289 /* Register save area */
5290 offset += frame->nregs * UNITS_PER_WORD;
5291
5292 /* Va-arg area */
5293 if (ix86_save_varrargs_registers)
5294 {
5295 offset += X86_64_VARARGS_SIZE;
5296 frame->va_arg_size = X86_64_VARARGS_SIZE;
5297 }
5298 else
5299 frame->va_arg_size = 0;
5300
5301 /* Align start of frame for local function. */
5302 frame->padding1 = ((offset + stack_alignment_needed - 1)
5303 & -stack_alignment_needed) - offset;
5304
5305 offset += frame->padding1;
5306
5307 /* Frame pointer points here. */
5308 frame->frame_pointer_offset = offset;
5309
5310 offset += size;
5311
5312 /* Add outgoing arguments area. Can be skipped if we eliminated
5313 all the function calls as dead code.
5314 Skipping is however impossible when function calls alloca. Alloca
5315 expander assumes that last current_function_outgoing_args_size
5316 of stack frame are unused. */
5317 if (ACCUMULATE_OUTGOING_ARGS
5318 && (!current_function_is_leaf || current_function_calls_alloca
5319 || ix86_current_function_calls_tls_descriptor))
5320 {
5321 offset += current_function_outgoing_args_size;
5322 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5323 }
5324 else
5325 frame->outgoing_arguments_size = 0;
5326
5327 /* Align stack boundary. Only needed if we're calling another function
5328 or using alloca. */
5329 if (!current_function_is_leaf || current_function_calls_alloca
5330 || ix86_current_function_calls_tls_descriptor)
5331 frame->padding2 = ((offset + preferred_alignment - 1)
5332 & -preferred_alignment) - offset;
5333 else
5334 frame->padding2 = 0;
5335
5336 offset += frame->padding2;
5337
5338 /* We've reached end of stack frame. */
5339 frame->stack_pointer_offset = offset;
5340
5341 /* Size prologue needs to allocate. */
5342 frame->to_allocate =
5343 (size + frame->padding1 + frame->padding2
5344 + frame->outgoing_arguments_size + frame->va_arg_size);
5345
5346 if ((!frame->to_allocate && frame->nregs <= 1)
5347 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5348 frame->save_regs_using_mov = false;
5349
5350 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5351 && current_function_is_leaf
5352 && !ix86_current_function_calls_tls_descriptor)
5353 {
5354 frame->red_zone_size = frame->to_allocate;
5355 if (frame->save_regs_using_mov)
5356 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5357 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5358 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5359 }
5360 else
5361 frame->red_zone_size = 0;
5362 frame->to_allocate -= frame->red_zone_size;
5363 frame->stack_pointer_offset -= frame->red_zone_size;
5364#if 0
5365 fprintf (stderr, "nregs: %i\n", frame->nregs);
5366 fprintf (stderr, "size: %i\n", size);
5367 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5368 fprintf (stderr, "padding1: %i\n", frame->padding1);
5369 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5370 fprintf (stderr, "padding2: %i\n", frame->padding2);
5371 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5372 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5373 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5374 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5375 frame->hard_frame_pointer_offset);
5376 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5377#endif
5378}
5379
5380/* Emit code to save registers in the prologue. */
5381
5382static void
5383ix86_emit_save_regs (void)
5384{
5385 unsigned int regno;
5386 rtx insn;
5387
5388 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5389 if (ix86_save_reg (regno, true))
5390 {
5391 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5392 RTX_FRAME_RELATED_P (insn) = 1;
5393 }
5394}
5395
5396/* Emit code to save registers using MOV insns. First register
5397 is restored from POINTER + OFFSET. */
5398static void
5399ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5400{
5401 unsigned int regno;
5402 rtx insn;
5403
5404 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5405 if (ix86_save_reg (regno, true))
5406 {
5407 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5408 Pmode, offset),
5409 gen_rtx_REG (Pmode, regno));
5410 RTX_FRAME_RELATED_P (insn) = 1;
5411 offset += UNITS_PER_WORD;
5412 }
5413}
5414
5415/* Expand prologue or epilogue stack adjustment.
5416 The pattern exist to put a dependency on all ebp-based memory accesses.
5417 STYLE should be negative if instructions should be marked as frame related,
5418 zero if %r11 register is live and cannot be freely used and positive
5419 otherwise. */
5420
5421static void
5422pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5423{
5424 rtx insn;
5425
5426 if (! TARGET_64BIT)
5427 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5428 else if (x86_64_immediate_operand (offset, DImode))
5429 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5430 else
5431 {
5432 rtx r11;
5433 /* r11 is used by indirect sibcall return as well, set before the
5434 epilogue and used after the epilogue. ATM indirect sibcall
5435 shouldn't be used together with huge frame sizes in one
5436 function because of the frame_size check in sibcall.c. */
5437 gcc_assert (style);
5438 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5439 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5440 if (style < 0)
5441 RTX_FRAME_RELATED_P (insn) = 1;
5442 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5443 offset));
5444 }
5445 if (style < 0)
5446 RTX_FRAME_RELATED_P (insn) = 1;
5447}
5448
5449/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5450
5451static rtx
5452ix86_internal_arg_pointer (void)
5453{
5454 bool has_force_align_arg_pointer =
5455 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5456 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5457 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5458 && DECL_NAME (current_function_decl)
5459 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5460 && DECL_FILE_SCOPE_P (current_function_decl))
5461 || ix86_force_align_arg_pointer
5462 || has_force_align_arg_pointer)
5463 {
5464 /* Nested functions can't realign the stack due to a register
5465 conflict. */
5466 if (DECL_CONTEXT (current_function_decl)
5467 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5468 {
5469 if (ix86_force_align_arg_pointer)
5470 warning (0, "-mstackrealign ignored for nested functions");
5471 if (has_force_align_arg_pointer)
5472 error ("%s not supported for nested functions",
5473 ix86_force_align_arg_pointer_string);
5474 return virtual_incoming_args_rtx;
5475 }
5476 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5477 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5478 }
5479 else
5480 return virtual_incoming_args_rtx;
5481}
5482
5483/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5484 This is called from dwarf2out.c to emit call frame instructions
5485 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5486static void
5487ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5488{
5489 rtx unspec = SET_SRC (pattern);
5490 gcc_assert (GET_CODE (unspec) == UNSPEC);
5491
5492 switch (index)
5493 {
5494 case UNSPEC_REG_SAVE:
5495 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5496 SET_DEST (pattern));
5497 break;
5498 case UNSPEC_DEF_CFA:
5499 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5500 INTVAL (XVECEXP (unspec, 0, 0)));
5501 break;
5502 default:
5503 gcc_unreachable ();
5504 }
5505}
5506
5507/* Expand the prologue into a bunch of separate insns. */
5508
5509void
5510ix86_expand_prologue (void)
5511{
5512 rtx insn;
5513 bool pic_reg_used;
5514 struct ix86_frame frame;
5515 HOST_WIDE_INT allocate;
5516
5517 ix86_compute_frame_layout (&frame);
5518
5519 if (cfun->machine->force_align_arg_pointer)
5520 {
5521 rtx x, y;
5522
5523 /* Grab the argument pointer. */
5524 x = plus_constant (stack_pointer_rtx, 4);
5525 y = cfun->machine->force_align_arg_pointer;
5526 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5527 RTX_FRAME_RELATED_P (insn) = 1;
5528
5529 /* The unwind info consists of two parts: install the fafp as the cfa,
5530 and record the fafp as the "save register" of the stack pointer.
5531 The later is there in order that the unwinder can see where it
5532 should restore the stack pointer across the and insn. */
5533 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5534 x = gen_rtx_SET (VOIDmode, y, x);
5535 RTX_FRAME_RELATED_P (x) = 1;
5536 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5537 UNSPEC_REG_SAVE);
5538 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5539 RTX_FRAME_RELATED_P (y) = 1;
5540 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5541 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5542 REG_NOTES (insn) = x;
5543
5544 /* Align the stack. */
5545 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5546 GEN_INT (-16)));
5547
5548 /* And here we cheat like madmen with the unwind info. We force the
5549 cfa register back to sp+4, which is exactly what it was at the
5550 start of the function. Re-pushing the return address results in
5551 the return at the same spot relative to the cfa, and thus is
5552 correct wrt the unwind info. */
5553 x = cfun->machine->force_align_arg_pointer;
5554 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5555 insn = emit_insn (gen_push (x));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557
5558 x = GEN_INT (4);
5559 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5560 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5561 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5562 REG_NOTES (insn) = x;
5563 }
5564
5565 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5566 slower on all targets. Also sdb doesn't like it. */
5567
5568 if (frame_pointer_needed)
5569 {
5570 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5571 RTX_FRAME_RELATED_P (insn) = 1;
5572
5573 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5574 RTX_FRAME_RELATED_P (insn) = 1;
5575 }
5576
5577 allocate = frame.to_allocate;
5578
5579 if (!frame.save_regs_using_mov)
5580 ix86_emit_save_regs ();
5581 else
5582 allocate += frame.nregs * UNITS_PER_WORD;
5583
5584 /* When using red zone we may start register saving before allocating
5585 the stack frame saving one cycle of the prologue. */
5586 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5587 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5588 : stack_pointer_rtx,
5589 -frame.nregs * UNITS_PER_WORD);
5590
5591 if (allocate == 0)
5592 ;
5593 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5594 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5595 GEN_INT (-allocate), -1);
5596 else
5597 {
5598 /* Only valid for Win32. */
5599 rtx eax = gen_rtx_REG (SImode, 0);
5600 bool eax_live = ix86_eax_live_at_start_p ();
5601 rtx t;
5602
5603 gcc_assert (!TARGET_64BIT);
5604
5605 if (eax_live)
5606 {
5607 emit_insn (gen_push (eax));
5608 allocate -= 4;
5609 }
5610
5611 emit_move_insn (eax, GEN_INT (allocate));
5612
5613 insn = emit_insn (gen_allocate_stack_worker (eax));
5614 RTX_FRAME_RELATED_P (insn) = 1;
5615 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5616 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5617 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5618 t, REG_NOTES (insn));
5619
5620 if (eax_live)
5621 {
5622 if (frame_pointer_needed)
5623 t = plus_constant (hard_frame_pointer_rtx,
5624 allocate
5625 - frame.to_allocate
5626 - frame.nregs * UNITS_PER_WORD);
5627 else
5628 t = plus_constant (stack_pointer_rtx, allocate);
5629 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5630 }
5631 }
5632
5633 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5634 {
5635 if (!frame_pointer_needed || !frame.to_allocate)
5636 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5637 else
5638 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5639 -frame.nregs * UNITS_PER_WORD);
5640 }
5641
5642 pic_reg_used = false;
5643 if (pic_offset_table_rtx
5644 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5645 || current_function_profile))
5646 {
5647 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5648
5649 if (alt_pic_reg_used != INVALID_REGNUM)
5650 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5651
5652 pic_reg_used = true;
5653 }
5654
5655 if (pic_reg_used)
5656 {
5657 if (TARGET_64BIT)
5658 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5659 else
5660 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5661
5662 /* Even with accurate pre-reload life analysis, we can wind up
5663 deleting all references to the pic register after reload.
5664 Consider if cross-jumping unifies two sides of a branch
5665 controlled by a comparison vs the only read from a global.
5666 In which case, allow the set_got to be deleted, though we're
5667 too late to do anything about the ebx save in the prologue. */
5668 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5669 }
5670
5671 /* Prevent function calls from be scheduled before the call to mcount.
5672 In the pic_reg_used case, make sure that the got load isn't deleted. */
5673 if (current_function_profile)
5674 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5675}
5676
5677/* Emit code to restore saved registers using MOV insns. First register
5678 is restored from POINTER + OFFSET. */
5679static void
5680ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5681 int maybe_eh_return)
5682{
5683 int regno;
5684 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5685
5686 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5687 if (ix86_save_reg (regno, maybe_eh_return))
5688 {
5689 /* Ensure that adjust_address won't be forced to produce pointer
5690 out of range allowed by x86-64 instruction set. */
5691 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5692 {
5693 rtx r11;
5694
5695 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5696 emit_move_insn (r11, GEN_INT (offset));
5697 emit_insn (gen_adddi3 (r11, r11, pointer));
5698 base_address = gen_rtx_MEM (Pmode, r11);
5699 offset = 0;
5700 }
5701 emit_move_insn (gen_rtx_REG (Pmode, regno),
5702 adjust_address (base_address, Pmode, offset));
5703 offset += UNITS_PER_WORD;
5704 }
5705}
5706
5707/* Restore function stack, frame, and registers. */
5708
5709void
5710ix86_expand_epilogue (int style)
5711{
5712 int regno;
5713 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5714 struct ix86_frame frame;
5715 HOST_WIDE_INT offset;
5716
5717 ix86_compute_frame_layout (&frame);
5718
5719 /* Calculate start of saved registers relative to ebp. Special care
5720 must be taken for the normal return case of a function using
5721 eh_return: the eax and edx registers are marked as saved, but not
5722 restored along this path. */
5723 offset = frame.nregs;
5724 if (current_function_calls_eh_return && style != 2)
5725 offset -= 2;
5726 offset *= -UNITS_PER_WORD;
5727
5728 /* If we're only restoring one register and sp is not valid then
5729 using a move instruction to restore the register since it's
5730 less work than reloading sp and popping the register.
5731
5732 The default code result in stack adjustment using add/lea instruction,
5733 while this code results in LEAVE instruction (or discrete equivalent),
5734 so it is profitable in some other cases as well. Especially when there
5735 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5736 and there is exactly one register to pop. This heuristic may need some
5737 tuning in future. */
5738 if ((!sp_valid && frame.nregs <= 1)
5739 || (TARGET_EPILOGUE_USING_MOVE
5740 && cfun->machine->use_fast_prologue_epilogue
5741 && (frame.nregs > 1 || frame.to_allocate))
5742 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5743 || (frame_pointer_needed && TARGET_USE_LEAVE
5744 && cfun->machine->use_fast_prologue_epilogue
5745 && frame.nregs == 1)
5746 || current_function_calls_eh_return)
5747 {
5748 /* Restore registers. We can use ebp or esp to address the memory
5749 locations. If both are available, default to ebp, since offsets
5750 are known to be small. Only exception is esp pointing directly to the
5751 end of block of saved registers, where we may simplify addressing
5752 mode. */
5753
5754 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5755 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5756 frame.to_allocate, style == 2);
5757 else
5758 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5759 offset, style == 2);
5760
5761 /* eh_return epilogues need %ecx added to the stack pointer. */
5762 if (style == 2)
5763 {
5764 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5765
5766 if (frame_pointer_needed)
5767 {
5768 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5769 tmp = plus_constant (tmp, UNITS_PER_WORD);
5770 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5771
5772 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5773 emit_move_insn (hard_frame_pointer_rtx, tmp);
5774
5775 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5776 const0_rtx, style);
5777 }
5778 else
5779 {
5780 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5781 tmp = plus_constant (tmp, (frame.to_allocate
5782 + frame.nregs * UNITS_PER_WORD));
5783 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5784 }
5785 }
5786 else if (!frame_pointer_needed)
5787 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5788 GEN_INT (frame.to_allocate
5789 + frame.nregs * UNITS_PER_WORD),
5790 style);
5791 /* If not an i386, mov & pop is faster than "leave". */
5792 else if (TARGET_USE_LEAVE || optimize_size
5793 || !cfun->machine->use_fast_prologue_epilogue)
5794 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5795 else
5796 {
5797 pro_epilogue_adjust_stack (stack_pointer_rtx,
5798 hard_frame_pointer_rtx,
5799 const0_rtx, style);
5800 if (TARGET_64BIT)
5801 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5802 else
5803 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5804 }
5805 }
5806 else
5807 {
5808 /* First step is to deallocate the stack frame so that we can
5809 pop the registers. */
5810 if (!sp_valid)
5811 {
5812 gcc_assert (frame_pointer_needed);
5813 pro_epilogue_adjust_stack (stack_pointer_rtx,
5814 hard_frame_pointer_rtx,
5815 GEN_INT (offset), style);
5816 }
5817 else if (frame.to_allocate)
5818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5819 GEN_INT (frame.to_allocate), style);
5820
5821 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5822 if (ix86_save_reg (regno, false))
5823 {
5824 if (TARGET_64BIT)
5825 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5826 else
5827 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5828 }
5829 if (frame_pointer_needed)
5830 {
5831 /* Leave results in shorter dependency chains on CPUs that are
5832 able to grok it fast. */
5833 if (TARGET_USE_LEAVE)
5834 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5835 else if (TARGET_64BIT)
5836 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5837 else
5838 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5839 }
5840 }
5841
5842 if (cfun->machine->force_align_arg_pointer)
5843 {
5844 emit_insn (gen_addsi3 (stack_pointer_rtx,
5845 cfun->machine->force_align_arg_pointer,
5846 GEN_INT (-4)));
5847 }
5848
5849 /* Sibcall epilogues don't want a return instruction. */
5850 if (style == 0)
5851 return;
5852
5853 if (current_function_pops_args && current_function_args_size)
5854 {
5855 rtx popc = GEN_INT (current_function_pops_args);
5856
5857 /* i386 can only pop 64K bytes. If asked to pop more, pop
5858 return address, do explicit add, and jump indirectly to the
5859 caller. */
5860
5861 if (current_function_pops_args >= 65536)
5862 {
5863 rtx ecx = gen_rtx_REG (SImode, 2);
5864
5865 /* There is no "pascal" calling convention in 64bit ABI. */
5866 gcc_assert (!TARGET_64BIT);
5867
5868 emit_insn (gen_popsi1 (ecx));
5869 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5870 emit_jump_insn (gen_return_indirect_internal (ecx));
5871 }
5872 else
5873 emit_jump_insn (gen_return_pop_internal (popc));
5874 }
5875 else
5876 emit_jump_insn (gen_return_internal ());
5877}
5878
5879/* Reset from the function's potential modifications. */
5880
5881static void
5882ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5883 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5884{
5885 if (pic_offset_table_rtx)
5886 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5887#if TARGET_MACHO
5888 /* Mach-O doesn't support labels at the end of objects, so if
5889 it looks like we might want one, insert a NOP. */
5890 {
5891 rtx insn = get_last_insn ();
5892 while (insn
5893 && NOTE_P (insn)
5894 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5895 insn = PREV_INSN (insn);
5896 if (insn
5897 && (LABEL_P (insn)
5898 || (NOTE_P (insn)
5899 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5900 fputs ("\tnop\n", file);
5901 }
5902#endif
5903
5904}
5905
5906/* Extract the parts of an RTL expression that is a valid memory address
5907 for an instruction. Return 0 if the structure of the address is
5908 grossly off. Return -1 if the address contains ASHIFT, so it is not
5909 strictly valid, but still used for computing length of lea instruction. */
5910
5911int
5912ix86_decompose_address (rtx addr, struct ix86_address *out)
5913{
5914 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5915 rtx base_reg, index_reg;
5916 HOST_WIDE_INT scale = 1;
5917 rtx scale_rtx = NULL_RTX;
5918 int retval = 1;
5919 enum ix86_address_seg seg = SEG_DEFAULT;
5920
5921 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5922 base = addr;
5923 else if (GET_CODE (addr) == PLUS)
5924 {
5925 rtx addends[4], op;
5926 int n = 0, i;
5927
5928 op = addr;
5929 do
5930 {
5931 if (n >= 4)
5932 return 0;
5933 addends[n++] = XEXP (op, 1);
5934 op = XEXP (op, 0);
5935 }
5936 while (GET_CODE (op) == PLUS);
5937 if (n >= 4)
5938 return 0;
5939 addends[n] = op;
5940
5941 for (i = n; i >= 0; --i)
5942 {
5943 op = addends[i];
5944 switch (GET_CODE (op))
5945 {
5946 case MULT:
5947 if (index)
5948 return 0;
5949 index = XEXP (op, 0);
5950 scale_rtx = XEXP (op, 1);
5951 break;
5952
5953 case UNSPEC:
5954 if (XINT (op, 1) == UNSPEC_TP
5955 && TARGET_TLS_DIRECT_SEG_REFS
5956 && seg == SEG_DEFAULT)
5957 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5958 else
5959 return 0;
5960 break;
5961
5962 case REG:
5963 case SUBREG:
5964 if (!base)
5965 base = op;
5966 else if (!index)
5967 index = op;
5968 else
5969 return 0;
5970 break;
5971
5972 case CONST:
5973 case CONST_INT:
5974 case SYMBOL_REF:
5975 case LABEL_REF:
5976 if (disp)
5977 return 0;
5978 disp = op;
5979 break;
5980
5981 default:
5982 return 0;
5983 }
5984 }
5985 }
5986 else if (GET_CODE (addr) == MULT)
5987 {
5988 index = XEXP (addr, 0); /* index*scale */
5989 scale_rtx = XEXP (addr, 1);
5990 }
5991 else if (GET_CODE (addr) == ASHIFT)
5992 {
5993 rtx tmp;
5994
5995 /* We're called for lea too, which implements ashift on occasion. */
5996 index = XEXP (addr, 0);
5997 tmp = XEXP (addr, 1);
5998 if (GET_CODE (tmp) != CONST_INT)
5999 return 0;
6000 scale = INTVAL (tmp);
6001 if ((unsigned HOST_WIDE_INT) scale > 3)
6002 return 0;
6003 scale = 1 << scale;
6004 retval = -1;
6005 }
6006 else
6007 disp = addr; /* displacement */
6008
6009 /* Extract the integral value of scale. */
6010 if (scale_rtx)
6011 {
6012 if (GET_CODE (scale_rtx) != CONST_INT)
6013 return 0;
6014 scale = INTVAL (scale_rtx);
6015 }
6016
6017 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6018 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6019
6020 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6021 if (base_reg && index_reg && scale == 1
6022 && (index_reg == arg_pointer_rtx
6023 || index_reg == frame_pointer_rtx
6024 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6025 {
6026 rtx tmp;
6027 tmp = base, base = index, index = tmp;
6028 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6029 }
6030
6031 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6032 if ((base_reg == hard_frame_pointer_rtx
6033 || base_reg == frame_pointer_rtx
6034 || base_reg == arg_pointer_rtx) && !disp)
6035 disp = const0_rtx;
6036
6037 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6038 Avoid this by transforming to [%esi+0]. */
6039 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6040 && base_reg && !index_reg && !disp
6041 && REG_P (base_reg)
6042 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6043 disp = const0_rtx;
6044
6045 /* Special case: encode reg+reg instead of reg*2. */
6046 if (!base && index && scale && scale == 2)
6047 base = index, base_reg = index_reg, scale = 1;
6048
6049 /* Special case: scaling cannot be encoded without base or displacement. */
6050 if (!base && !disp && index && scale != 1)
6051 disp = const0_rtx;
6052
6053 out->base = base;
6054 out->index = index;
6055 out->disp = disp;
6056 out->scale = scale;
6057 out->seg = seg;
6058
6059 return retval;
6060}
6061
6062/* Return cost of the memory address x.
6063 For i386, it is better to use a complex address than let gcc copy
6064 the address into a reg and make a new pseudo. But not if the address
6065 requires to two regs - that would mean more pseudos with longer
6066 lifetimes. */
6067static int
6068ix86_address_cost (rtx x)
6069{
6070 struct ix86_address parts;
6071 int cost = 1;
6072 int ok = ix86_decompose_address (x, &parts);
6073
6074 gcc_assert (ok);
6075
6076 if (parts.base && GET_CODE (parts.base) == SUBREG)
6077 parts.base = SUBREG_REG (parts.base);
6078 if (parts.index && GET_CODE (parts.index) == SUBREG)
6079 parts.index = SUBREG_REG (parts.index);
6080
6081 /* More complex memory references are better. */
6082 if (parts.disp && parts.disp != const0_rtx)
6083 cost--;
6084 if (parts.seg != SEG_DEFAULT)
6085 cost--;
6086
6087 /* Attempt to minimize number of registers in the address. */
6088 if ((parts.base
6089 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6090 || (parts.index
6091 && (!REG_P (parts.index)
6092 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6093 cost++;
6094
6095 if (parts.base
6096 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6097 && parts.index
6098 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6099 && parts.base != parts.index)
6100 cost++;
6101
6102 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6103 since it's predecode logic can't detect the length of instructions
6104 and it degenerates to vector decoded. Increase cost of such
6105 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6106 to split such addresses or even refuse such addresses at all.
6107
6108 Following addressing modes are affected:
6109 [base+scale*index]
6110 [scale*index+disp]
6111 [base+index]
6112
6113 The first and last case may be avoidable by explicitly coding the zero in
6114 memory address, but I don't have AMD-K6 machine handy to check this
6115 theory. */
6116
6117 if (TARGET_K6
6118 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6119 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6120 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6121 cost += 10;
6122
6123 return cost;
6124}
6125
6126/* If X is a machine specific address (i.e. a symbol or label being
6127 referenced as a displacement from the GOT implemented using an
6128 UNSPEC), then return the base term. Otherwise return X. */
6129
6130rtx
6131ix86_find_base_term (rtx x)
6132{
6133 rtx term;
6134
6135 if (TARGET_64BIT)
6136 {
6137 if (GET_CODE (x) != CONST)
6138 return x;
6139 term = XEXP (x, 0);
6140 if (GET_CODE (term) == PLUS
6141 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6142 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6143 term = XEXP (term, 0);
6144 if (GET_CODE (term) != UNSPEC
6145 || XINT (term, 1) != UNSPEC_GOTPCREL)
6146 return x;
6147
6148 term = XVECEXP (term, 0, 0);
6149
6150 if (GET_CODE (term) != SYMBOL_REF
6151 && GET_CODE (term) != LABEL_REF)
6152 return x;
6153
6154 return term;
6155 }
6156
6157 term = ix86_delegitimize_address (x);
6158
6159 if (GET_CODE (term) != SYMBOL_REF
6160 && GET_CODE (term) != LABEL_REF)
6161 return x;
6162
6163 return term;
6164}
6165
6166/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6167 this is used for to form addresses to local data when -fPIC is in
6168 use. */
6169
6170static bool
6171darwin_local_data_pic (rtx disp)
6172{
6173 if (GET_CODE (disp) == MINUS)
6174 {
6175 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6176 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6177 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6178 {
6179 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6180 if (! strcmp (sym_name, "<pic base>"))
6181 return true;
6182 }
6183 }
6184
6185 return false;
6186}
6187
6188/* Determine if a given RTX is a valid constant. We already know this
6189 satisfies CONSTANT_P. */
6190
6191bool
6192legitimate_constant_p (rtx x)
6193{
6194 switch (GET_CODE (x))
6195 {
6196 case CONST:
6197 x = XEXP (x, 0);
6198
6199 if (GET_CODE (x) == PLUS)
6200 {
6201 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6202 return false;
6203 x = XEXP (x, 0);
6204 }
6205
6206 if (TARGET_MACHO && darwin_local_data_pic (x))
6207 return true;
6208
6209 /* Only some unspecs are valid as "constants". */
6210 if (GET_CODE (x) == UNSPEC)
6211 switch (XINT (x, 1))
6212 {
6213 case UNSPEC_GOTOFF:
6214 return TARGET_64BIT;
6215 case UNSPEC_TPOFF:
6216 case UNSPEC_NTPOFF:
6217 x = XVECEXP (x, 0, 0);
6218 return (GET_CODE (x) == SYMBOL_REF
6219 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6220 case UNSPEC_DTPOFF:
6221 x = XVECEXP (x, 0, 0);
6222 return (GET_CODE (x) == SYMBOL_REF
6223 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6224 default:
6225 return false;
6226 }
6227
6228 /* We must have drilled down to a symbol. */
6229 if (GET_CODE (x) == LABEL_REF)
6230 return true;
6231 if (GET_CODE (x) != SYMBOL_REF)
6232 return false;
6233 /* FALLTHRU */
6234
6235 case SYMBOL_REF:
6236 /* TLS symbols are never valid. */
6237 if (SYMBOL_REF_TLS_MODEL (x))
6238 return false;
6239 break;
6240
6241 case CONST_DOUBLE:
6242 if (GET_MODE (x) == TImode
6243 && x != CONST0_RTX (TImode)
6244 && !TARGET_64BIT)
6245 return false;
6246 break;
6247
6248 case CONST_VECTOR:
6249 if (x == CONST0_RTX (GET_MODE (x)))
6250 return true;
6251 return false;
6252
6253 default:
6254 break;
6255 }
6256
6257 /* Otherwise we handle everything else in the move patterns. */
6258 return true;
6259}
6260
6261/* Determine if it's legal to put X into the constant pool. This
6262 is not possible for the address of thread-local symbols, which
6263 is checked above. */
6264
6265static bool
6266ix86_cannot_force_const_mem (rtx x)
6267{
6268 /* We can always put integral constants and vectors in memory. */
6269 switch (GET_CODE (x))
6270 {
6271 case CONST_INT:
6272 case CONST_DOUBLE:
6273 case CONST_VECTOR:
6274 return false;
6275
6276 default:
6277 break;
6278 }
6279 return !legitimate_constant_p (x);
6280}
6281
6282/* Determine if a given RTX is a valid constant address. */
6283
6284bool
6285constant_address_p (rtx x)
6286{
6287 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6288}
6289
6290/* Nonzero if the constant value X is a legitimate general operand
6291 when generating PIC code. It is given that flag_pic is on and
6292 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6293
6294bool
6295legitimate_pic_operand_p (rtx x)
6296{
6297 rtx inner;
6298
6299 switch (GET_CODE (x))
6300 {
6301 case CONST:
6302 inner = XEXP (x, 0);
6303 if (GET_CODE (inner) == PLUS
6304 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6305 inner = XEXP (inner, 0);
6306
6307 /* Only some unspecs are valid as "constants". */
6308 if (GET_CODE (inner) == UNSPEC)
6309 switch (XINT (inner, 1))
6310 {
6311 case UNSPEC_GOTOFF:
6312 return TARGET_64BIT;
6313 case UNSPEC_TPOFF:
6314 x = XVECEXP (inner, 0, 0);
6315 return (GET_CODE (x) == SYMBOL_REF
6316 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6317 default:
6318 return false;
6319 }
6320 /* FALLTHRU */
6321
6322 case SYMBOL_REF:
6323 case LABEL_REF:
6324 return legitimate_pic_address_disp_p (x);
6325
6326 default:
6327 return true;
6328 }
6329}
6330
6331/* Determine if a given CONST RTX is a valid memory displacement
6332 in PIC mode. */
6333
6334int
6335legitimate_pic_address_disp_p (rtx disp)
6336{
6337 bool saw_plus;
6338
6339 /* In 64bit mode we can allow direct addresses of symbols and labels
6340 when they are not dynamic symbols. */
6341 if (TARGET_64BIT)
6342 {
6343 rtx op0 = disp, op1;
6344
6345 switch (GET_CODE (disp))
6346 {
6347 case LABEL_REF:
6348 return true;
6349
6350 case CONST:
6351 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6352 break;
6353 op0 = XEXP (XEXP (disp, 0), 0);
6354 op1 = XEXP (XEXP (disp, 0), 1);
6355 if (GET_CODE (op1) != CONST_INT
6356 || INTVAL (op1) >= 16*1024*1024
6357 || INTVAL (op1) < -16*1024*1024)
6358 break;
6359 if (GET_CODE (op0) == LABEL_REF)
6360 return true;
6361 if (GET_CODE (op0) != SYMBOL_REF)
6362 break;
6363 /* FALLTHRU */
6364
6365 case SYMBOL_REF:
6366 /* TLS references should always be enclosed in UNSPEC. */
6367 if (SYMBOL_REF_TLS_MODEL (op0))
6368 return false;
6369 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6370 return true;
6371 break;
6372
6373 default:
6374 break;
6375 }
6376 }
6377 if (GET_CODE (disp) != CONST)
6378 return 0;
6379 disp = XEXP (disp, 0);
6380
6381 if (TARGET_64BIT)
6382 {
6383 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6384 of GOT tables. We should not need these anyway. */
6385 if (GET_CODE (disp) != UNSPEC
6386 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6387 && XINT (disp, 1) != UNSPEC_GOTOFF))
6388 return 0;
6389
6390 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6391 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6392 return 0;
6393 return 1;
6394 }
6395
6396 saw_plus = false;
6397 if (GET_CODE (disp) == PLUS)
6398 {
6399 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6400 return 0;
6401 disp = XEXP (disp, 0);
6402 saw_plus = true;
6403 }
6404
6405 if (TARGET_MACHO && darwin_local_data_pic (disp))
6406 return 1;
6407
6408 if (GET_CODE (disp) != UNSPEC)
6409 return 0;
6410
6411 switch (XINT (disp, 1))
6412 {
6413 case UNSPEC_GOT:
6414 if (saw_plus)
6415 return false;
6416 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6417 case UNSPEC_GOTOFF:
6418 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6419 While ABI specify also 32bit relocation but we don't produce it in
6420 small PIC model at all. */
6421 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6422 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6423 && !TARGET_64BIT)
6424 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6425 return false;
6426 case UNSPEC_GOTTPOFF:
6427 case UNSPEC_GOTNTPOFF:
6428 case UNSPEC_INDNTPOFF:
6429 if (saw_plus)
6430 return false;
6431 disp = XVECEXP (disp, 0, 0);
6432 return (GET_CODE (disp) == SYMBOL_REF
6433 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6434 case UNSPEC_NTPOFF:
6435 disp = XVECEXP (disp, 0, 0);
6436 return (GET_CODE (disp) == SYMBOL_REF
6437 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6438 case UNSPEC_DTPOFF:
6439 disp = XVECEXP (disp, 0, 0);
6440 return (GET_CODE (disp) == SYMBOL_REF
6441 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6442 }
6443
6444 return 0;
6445}
6446
6447/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6448 memory address for an instruction. The MODE argument is the machine mode
6449 for the MEM expression that wants to use this address.
6450
6451 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6452 convert common non-canonical forms to canonical form so that they will
6453 be recognized. */
6454
6455int
6456legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6457{
6458 struct ix86_address parts;
6459 rtx base, index, disp;
6460 HOST_WIDE_INT scale;
6461 const char *reason = NULL;
6462 rtx reason_rtx = NULL_RTX;
6463
6464 if (TARGET_DEBUG_ADDR)
6465 {
6466 fprintf (stderr,
6467 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6468 GET_MODE_NAME (mode), strict);
6469 debug_rtx (addr);
6470 }
6471
6472 if (ix86_decompose_address (addr, &parts) <= 0)
6473 {
6474 reason = "decomposition failed";
6475 goto report_error;
6476 }
6477
6478 base = parts.base;
6479 index = parts.index;
6480 disp = parts.disp;
6481 scale = parts.scale;
6482
6483 /* Validate base register.
6484
6485 Don't allow SUBREG's that span more than a word here. It can lead to spill
6486 failures when the base is one word out of a two word structure, which is
6487 represented internally as a DImode int. */
6488
6489 if (base)
6490 {
6491 rtx reg;
6492 reason_rtx = base;
6493
6494 if (REG_P (base))
6495 reg = base;
6496 else if (GET_CODE (base) == SUBREG
6497 && REG_P (SUBREG_REG (base))
6498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6499 <= UNITS_PER_WORD)
6500 reg = SUBREG_REG (base);
6501 else
6502 {
6503 reason = "base is not a register";
6504 goto report_error;
6505 }
6506
6507 if (GET_MODE (base) != Pmode)
6508 {
6509 reason = "base is not in Pmode";
6510 goto report_error;
6511 }
6512
6513 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6514 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6515 {
6516 reason = "base is not valid";
6517 goto report_error;
6518 }
6519 }
6520
6521 /* Validate index register.
6522
6523 Don't allow SUBREG's that span more than a word here -- same as above. */
6524
6525 if (index)
6526 {
6527 rtx reg;
6528 reason_rtx = index;
6529
6530 if (REG_P (index))
6531 reg = index;
6532 else if (GET_CODE (index) == SUBREG
6533 && REG_P (SUBREG_REG (index))
6534 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6535 <= UNITS_PER_WORD)
6536 reg = SUBREG_REG (index);
6537 else
6538 {
6539 reason = "index is not a register";
6540 goto report_error;
6541 }
6542
6543 if (GET_MODE (index) != Pmode)
6544 {
6545 reason = "index is not in Pmode";
6546 goto report_error;
6547 }
6548
6549 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6550 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6551 {
6552 reason = "index is not valid";
6553 goto report_error;
6554 }
6555 }
6556
6557 /* Validate scale factor. */
6558 if (scale != 1)
6559 {
6560 reason_rtx = GEN_INT (scale);
6561 if (!index)
6562 {
6563 reason = "scale without index";
6564 goto report_error;
6565 }
6566
6567 if (scale != 2 && scale != 4 && scale != 8)
6568 {
6569 reason = "scale is not a valid multiplier";
6570 goto report_error;
6571 }
6572 }
6573
6574 /* Validate displacement. */
6575 if (disp)
6576 {
6577 reason_rtx = disp;
6578
6579 if (GET_CODE (disp) == CONST
6580 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6581 switch (XINT (XEXP (disp, 0), 1))
6582 {
6583 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6584 used. While ABI specify also 32bit relocations, we don't produce
6585 them at all and use IP relative instead. */
6586 case UNSPEC_GOT:
6587 case UNSPEC_GOTOFF:
6588 gcc_assert (flag_pic);
6589 if (!TARGET_64BIT)
6590 goto is_legitimate_pic;
6591 reason = "64bit address unspec";
6592 goto report_error;
6593
6594 case UNSPEC_GOTPCREL:
6595 gcc_assert (flag_pic);
6596 goto is_legitimate_pic;
6597
6598 case UNSPEC_GOTTPOFF:
6599 case UNSPEC_GOTNTPOFF:
6600 case UNSPEC_INDNTPOFF:
6601 case UNSPEC_NTPOFF:
6602 case UNSPEC_DTPOFF:
6603 break;
6604
6605 default:
6606 reason = "invalid address unspec";
6607 goto report_error;
6608 }
6609
6610 else if (SYMBOLIC_CONST (disp)
6611 && (flag_pic
6612 || (TARGET_MACHO
6613#if TARGET_MACHO
6614 && MACHOPIC_INDIRECT
6615 && !machopic_operand_p (disp)
6616#endif
6617 )))
6618 {
6619
6620 is_legitimate_pic:
6621 if (TARGET_64BIT && (index || base))
6622 {
6623 /* foo@dtpoff(%rX) is ok. */
6624 if (GET_CODE (disp) != CONST
6625 || GET_CODE (XEXP (disp, 0)) != PLUS
6626 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6627 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6628 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6629 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6630 {
6631 reason = "non-constant pic memory reference";
6632 goto report_error;
6633 }
6634 }
6635 else if (! legitimate_pic_address_disp_p (disp))
6636 {
6637 reason = "displacement is an invalid pic construct";
6638 goto report_error;
6639 }
6640
6641 /* This code used to verify that a symbolic pic displacement
6642 includes the pic_offset_table_rtx register.
6643
6644 While this is good idea, unfortunately these constructs may
6645 be created by "adds using lea" optimization for incorrect
6646 code like:
6647
6648 int a;
6649 int foo(int i)
6650 {
6651 return *(&a+i);
6652 }
6653
6654 This code is nonsensical, but results in addressing
6655 GOT table with pic_offset_table_rtx base. We can't
6656 just refuse it easily, since it gets matched by
6657 "addsi3" pattern, that later gets split to lea in the
6658 case output register differs from input. While this
6659 can be handled by separate addsi pattern for this case
6660 that never results in lea, this seems to be easier and
6661 correct fix for crash to disable this test. */
6662 }
6663 else if (GET_CODE (disp) != LABEL_REF
6664 && GET_CODE (disp) != CONST_INT
6665 && (GET_CODE (disp) != CONST
6666 || !legitimate_constant_p (disp))
6667 && (GET_CODE (disp) != SYMBOL_REF
6668 || !legitimate_constant_p (disp)))
6669 {
6670 reason = "displacement is not constant";
6671 goto report_error;
6672 }
6673 else if (TARGET_64BIT
6674 && !x86_64_immediate_operand (disp, VOIDmode))
6675 {
6676 reason = "displacement is out of range";
6677 goto report_error;
6678 }
6679 }
6680
6681 /* Everything looks valid. */
6682 if (TARGET_DEBUG_ADDR)
6683 fprintf (stderr, "Success.\n");
6684 return TRUE;
6685
6686 report_error:
6687 if (TARGET_DEBUG_ADDR)
6688 {
6689 fprintf (stderr, "Error: %s\n", reason);
6690 debug_rtx (reason_rtx);
6691 }
6692 return FALSE;
6693}
6694
6695/* Return a unique alias set for the GOT. */
6696
6697static HOST_WIDE_INT
6698ix86_GOT_alias_set (void)
6699{
6700 static HOST_WIDE_INT set = -1;
6701 if (set == -1)
6702 set = new_alias_set ();
6703 return set;
6704}
6705
6706/* Return a legitimate reference for ORIG (an address) using the
6707 register REG. If REG is 0, a new pseudo is generated.
6708
6709 There are two types of references that must be handled:
6710
6711 1. Global data references must load the address from the GOT, via
6712 the PIC reg. An insn is emitted to do this load, and the reg is
6713 returned.
6714
6715 2. Static data references, constant pool addresses, and code labels
6716 compute the address as an offset from the GOT, whose base is in
6717 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6718 differentiate them from global data objects. The returned
6719 address is the PIC reg + an unspec constant.
6720
6721 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6722 reg also appears in the address. */
6723
6724static rtx
6725legitimize_pic_address (rtx orig, rtx reg)
6726{
6727 rtx addr = orig;
6728 rtx new = orig;
6729 rtx base;
6730
6731#if TARGET_MACHO
6732 if (TARGET_MACHO && !TARGET_64BIT)
6733 {
6734 if (reg == 0)
6735 reg = gen_reg_rtx (Pmode);
6736 /* Use the generic Mach-O PIC machinery. */
6737 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6738 }
6739#endif
6740
6741 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6742 new = addr;
6743 else if (TARGET_64BIT
6744 && ix86_cmodel != CM_SMALL_PIC
6745 && local_symbolic_operand (addr, Pmode))
6746 {
6747 rtx tmpreg;
6748 /* This symbol may be referenced via a displacement from the PIC
6749 base address (@GOTOFF). */
6750
6751 if (reload_in_progress)
6752 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6753 if (GET_CODE (addr) == CONST)
6754 addr = XEXP (addr, 0);
6755 if (GET_CODE (addr) == PLUS)
6756 {
6757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6758 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6759 }
6760 else
6761 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6762 new = gen_rtx_CONST (Pmode, new);
6763 if (!reg)
6764 tmpreg = gen_reg_rtx (Pmode);
6765 else
6766 tmpreg = reg;
6767 emit_move_insn (tmpreg, new);
6768
6769 if (reg != 0)
6770 {
6771 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6772 tmpreg, 1, OPTAB_DIRECT);
6773 new = reg;
6774 }
6775 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6776 }
6777 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6778 {
6779 /* This symbol may be referenced via a displacement from the PIC
6780 base address (@GOTOFF). */
6781
6782 if (reload_in_progress)
6783 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6784 if (GET_CODE (addr) == CONST)
6785 addr = XEXP (addr, 0);
6786 if (GET_CODE (addr) == PLUS)
6787 {
6788 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6789 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6790 }
6791 else
6792 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6793 new = gen_rtx_CONST (Pmode, new);
6794 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6795
6796 if (reg != 0)
6797 {
6798 emit_move_insn (reg, new);
6799 new = reg;
6800 }
6801 }
6802 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6803 {
6804 if (TARGET_64BIT)
6805 {
6806 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6807 new = gen_rtx_CONST (Pmode, new);
6808 new = gen_const_mem (Pmode, new);
6809 set_mem_alias_set (new, ix86_GOT_alias_set ());
6810
6811 if (reg == 0)
6812 reg = gen_reg_rtx (Pmode);
6813 /* Use directly gen_movsi, otherwise the address is loaded
6814 into register for CSE. We don't want to CSE this addresses,
6815 instead we CSE addresses from the GOT table, so skip this. */
6816 emit_insn (gen_movsi (reg, new));
6817 new = reg;
6818 }
6819 else
6820 {
6821 /* This symbol must be referenced via a load from the
6822 Global Offset Table (@GOT). */
6823
6824 if (reload_in_progress)
6825 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6827 new = gen_rtx_CONST (Pmode, new);
6828 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6829 new = gen_const_mem (Pmode, new);
6830 set_mem_alias_set (new, ix86_GOT_alias_set ());
6831
6832 if (reg == 0)
6833 reg = gen_reg_rtx (Pmode);
6834 emit_move_insn (reg, new);
6835 new = reg;
6836 }
6837 }
6838 else
6839 {
6840 if (GET_CODE (addr) == CONST_INT
6841 && !x86_64_immediate_operand (addr, VOIDmode))
6842 {
6843 if (reg)
6844 {
6845 emit_move_insn (reg, addr);
6846 new = reg;
6847 }
6848 else
6849 new = force_reg (Pmode, addr);
6850 }
6851 else if (GET_CODE (addr) == CONST)
6852 {
6853 addr = XEXP (addr, 0);
6854
6855 /* We must match stuff we generate before. Assume the only
6856 unspecs that can get here are ours. Not that we could do
6857 anything with them anyway.... */
6858 if (GET_CODE (addr) == UNSPEC
6859 || (GET_CODE (addr) == PLUS
6860 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6861 return orig;
6862 gcc_assert (GET_CODE (addr) == PLUS);
6863 }
6864 if (GET_CODE (addr) == PLUS)
6865 {
6866 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6867
6868 /* Check first to see if this is a constant offset from a @GOTOFF
6869 symbol reference. */
6870 if (local_symbolic_operand (op0, Pmode)
6871 && GET_CODE (op1) == CONST_INT)
6872 {
6873 if (!TARGET_64BIT)
6874 {
6875 if (reload_in_progress)
6876 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6877 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6878 UNSPEC_GOTOFF);
6879 new = gen_rtx_PLUS (Pmode, new, op1);
6880 new = gen_rtx_CONST (Pmode, new);
6881 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6882
6883 if (reg != 0)
6884 {
6885 emit_move_insn (reg, new);
6886 new = reg;
6887 }
6888 }
6889 else
6890 {
6891 if (INTVAL (op1) < -16*1024*1024
6892 || INTVAL (op1) >= 16*1024*1024)
6893 {
6894 if (!x86_64_immediate_operand (op1, Pmode))
6895 op1 = force_reg (Pmode, op1);
6896 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6897 }
6898 }
6899 }
6900 else
6901 {
6902 base = legitimize_pic_address (XEXP (addr, 0), reg);
6903 new = legitimize_pic_address (XEXP (addr, 1),
6904 base == reg ? NULL_RTX : reg);
6905
6906 if (GET_CODE (new) == CONST_INT)
6907 new = plus_constant (base, INTVAL (new));
6908 else
6909 {
6910 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6911 {
6912 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6913 new = XEXP (new, 1);
6914 }
6915 new = gen_rtx_PLUS (Pmode, base, new);
6916 }
6917 }
6918 }
6919 }
6920 return new;
6921}
6922
6923/* Load the thread pointer. If TO_REG is true, force it into a register. */
6924
6925static rtx
6926get_thread_pointer (int to_reg)
6927{
6928 rtx tp, reg, insn;
6929
6930 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6931 if (!to_reg)
6932 return tp;
6933
6934 reg = gen_reg_rtx (Pmode);
6935 insn = gen_rtx_SET (VOIDmode, reg, tp);
6936 insn = emit_insn (insn);
6937
6938 return reg;
6939}
6940
6941/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6942 false if we expect this to be used for a memory address and true if
6943 we expect to load the address into a register. */
6944
6945static rtx
6946legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6947{
6948 rtx dest, base, off, pic, tp;
6949 int type;
6950
6951 switch (model)
6952 {
6953 case TLS_MODEL_GLOBAL_DYNAMIC:
6954 dest = gen_reg_rtx (Pmode);
6955 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6956
6957 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6958 {
6959 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6960
6961 start_sequence ();
6962 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6963 insns = get_insns ();
6964 end_sequence ();
6965
6966 emit_libcall_block (insns, dest, rax, x);
6967 }
6968 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6969 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6970 else
6971 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6972
6973 if (TARGET_GNU2_TLS)
6974 {
6975 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6976
6977 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6978 }
6979 break;
6980
6981 case TLS_MODEL_LOCAL_DYNAMIC:
6982 base = gen_reg_rtx (Pmode);
6983 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6984
6985 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6986 {
6987 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6988
6989 start_sequence ();
6990 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6991 insns = get_insns ();
6992 end_sequence ();
6993
6994 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6995 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6996 emit_libcall_block (insns, base, rax, note);
6997 }
6998 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6999 emit_insn (gen_tls_local_dynamic_base_64 (base));
7000 else
7001 emit_insn (gen_tls_local_dynamic_base_32 (base));
7002
7003 if (TARGET_GNU2_TLS)
7004 {
7005 rtx x = ix86_tls_module_base ();
7006
7007 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7008 gen_rtx_MINUS (Pmode, x, tp));
7009 }
7010
7011 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7012 off = gen_rtx_CONST (Pmode, off);
7013
7014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7015
7016 if (TARGET_GNU2_TLS)
7017 {
7018 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7019
7020 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7021 }
7022
7023 break;
7024
7025 case TLS_MODEL_INITIAL_EXEC:
7026 if (TARGET_64BIT)
7027 {
7028 pic = NULL;
7029 type = UNSPEC_GOTNTPOFF;
7030 }
7031 else if (flag_pic)
7032 {
7033 if (reload_in_progress)
7034 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7035 pic = pic_offset_table_rtx;
7036 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7037 }
7038 else if (!TARGET_ANY_GNU_TLS)
7039 {
7040 pic = gen_reg_rtx (Pmode);
7041 emit_insn (gen_set_got (pic));
7042 type = UNSPEC_GOTTPOFF;
7043 }
7044 else
7045 {
7046 pic = NULL;
7047 type = UNSPEC_INDNTPOFF;
7048 }
7049
7050 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7051 off = gen_rtx_CONST (Pmode, off);
7052 if (pic)
7053 off = gen_rtx_PLUS (Pmode, pic, off);
7054 off = gen_const_mem (Pmode, off);
7055 set_mem_alias_set (off, ix86_GOT_alias_set ());
7056
7057 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7058 {
7059 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7060 off = force_reg (Pmode, off);
7061 return gen_rtx_PLUS (Pmode, base, off);
7062 }
7063 else
7064 {
7065 base = get_thread_pointer (true);
7066 dest = gen_reg_rtx (Pmode);
7067 emit_insn (gen_subsi3 (dest, base, off));
7068 }
7069 break;
7070
7071 case TLS_MODEL_LOCAL_EXEC:
7072 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7073 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7074 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7075 off = gen_rtx_CONST (Pmode, off);
7076
7077 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7078 {
7079 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7080 return gen_rtx_PLUS (Pmode, base, off);
7081 }
7082 else
7083 {
7084 base = get_thread_pointer (true);
7085 dest = gen_reg_rtx (Pmode);
7086 emit_insn (gen_subsi3 (dest, base, off));
7087 }
7088 break;
7089
7090 default:
7091 gcc_unreachable ();
7092 }
7093
7094 return dest;
7095}
7096
7097/* Try machine-dependent ways of modifying an illegitimate address
7098 to be legitimate. If we find one, return the new, valid address.
7099 This macro is used in only one place: `memory_address' in explow.c.
7100
7101 OLDX is the address as it was before break_out_memory_refs was called.
7102 In some cases it is useful to look at this to decide what needs to be done.
7103
7104 MODE and WIN are passed so that this macro can use
7105 GO_IF_LEGITIMATE_ADDRESS.
7106
7107 It is always safe for this macro to do nothing. It exists to recognize
7108 opportunities to optimize the output.
7109
7110 For the 80386, we handle X+REG by loading X into a register R and
7111 using R+REG. R will go in a general reg and indexing will be used.
7112 However, if REG is a broken-out memory address or multiplication,
7113 nothing needs to be done because REG can certainly go in a general reg.
7114
7115 When -fpic is used, special handling is needed for symbolic references.
7116 See comments by legitimize_pic_address in i386.c for details. */
7117
7118rtx
7119legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7120{
7121 int changed = 0;
7122 unsigned log;
7123
7124 if (TARGET_DEBUG_ADDR)
7125 {
7126 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7127 GET_MODE_NAME (mode));
7128 debug_rtx (x);
7129 }
7130
7131 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7132 if (log)
7133 return legitimize_tls_address (x, log, false);
7134 if (GET_CODE (x) == CONST
7135 && GET_CODE (XEXP (x, 0)) == PLUS
7136 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7137 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7138 {
7139 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7140 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7141 }
7142
7143 if (flag_pic && SYMBOLIC_CONST (x))
7144 return legitimize_pic_address (x, 0);
7145
7146 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7147 if (GET_CODE (x) == ASHIFT
7148 && GET_CODE (XEXP (x, 1)) == CONST_INT
7149 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7150 {
7151 changed = 1;
7152 log = INTVAL (XEXP (x, 1));
7153 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7154 GEN_INT (1 << log));
7155 }
7156
7157 if (GET_CODE (x) == PLUS)
7158 {
7159 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7160
7161 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7162 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7163 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7164 {
7165 changed = 1;
7166 log = INTVAL (XEXP (XEXP (x, 0), 1));
7167 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7168 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7169 GEN_INT (1 << log));
7170 }
7171
7172 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7173 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7174 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7175 {
7176 changed = 1;
7177 log = INTVAL (XEXP (XEXP (x, 1), 1));
7178 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7179 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7180 GEN_INT (1 << log));
7181 }
7182
7183 /* Put multiply first if it isn't already. */
7184 if (GET_CODE (XEXP (x, 1)) == MULT)
7185 {
7186 rtx tmp = XEXP (x, 0);
7187 XEXP (x, 0) = XEXP (x, 1);
7188 XEXP (x, 1) = tmp;
7189 changed = 1;
7190 }
7191
7192 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7193 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7194 created by virtual register instantiation, register elimination, and
7195 similar optimizations. */
7196 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7197 {
7198 changed = 1;
7199 x = gen_rtx_PLUS (Pmode,
7200 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7201 XEXP (XEXP (x, 1), 0)),
7202 XEXP (XEXP (x, 1), 1));
7203 }
7204
7205 /* Canonicalize
7206 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7207 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7208 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7209 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7210 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7211 && CONSTANT_P (XEXP (x, 1)))
7212 {
7213 rtx constant;
7214 rtx other = NULL_RTX;
7215
7216 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7217 {
7218 constant = XEXP (x, 1);
7219 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7220 }
7221 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7222 {
7223 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7224 other = XEXP (x, 1);
7225 }
7226 else
7227 constant = 0;
7228
7229 if (constant)
7230 {
7231 changed = 1;
7232 x = gen_rtx_PLUS (Pmode,
7233 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7234 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7235 plus_constant (other, INTVAL (constant)));
7236 }
7237 }
7238
7239 if (changed && legitimate_address_p (mode, x, FALSE))
7240 return x;
7241
7242 if (GET_CODE (XEXP (x, 0)) == MULT)
7243 {
7244 changed = 1;
7245 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7246 }
7247
7248 if (GET_CODE (XEXP (x, 1)) == MULT)
7249 {
7250 changed = 1;
7251 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7252 }
7253
7254 if (changed
7255 && GET_CODE (XEXP (x, 1)) == REG
7256 && GET_CODE (XEXP (x, 0)) == REG)
7257 return x;
7258
7259 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7260 {
7261 changed = 1;
7262 x = legitimize_pic_address (x, 0);
7263 }
7264
7265 if (changed && legitimate_address_p (mode, x, FALSE))
7266 return x;
7267
7268 if (GET_CODE (XEXP (x, 0)) == REG)
7269 {
7270 rtx temp = gen_reg_rtx (Pmode);
7271 rtx val = force_operand (XEXP (x, 1), temp);
7272 if (val != temp)
7273 emit_move_insn (temp, val);
7274
7275 XEXP (x, 1) = temp;
7276 return x;
7277 }
7278
7279 else if (GET_CODE (XEXP (x, 1)) == REG)
7280 {
7281 rtx temp = gen_reg_rtx (Pmode);
7282 rtx val = force_operand (XEXP (x, 0), temp);
7283 if (val != temp)
7284 emit_move_insn (temp, val);
7285
7286 XEXP (x, 0) = temp;
7287 return x;
7288 }
7289 }
7290
7291 return x;
7292}
7293
7294/* Print an integer constant expression in assembler syntax. Addition
7295 and subtraction are the only arithmetic that may appear in these
7296 expressions. FILE is the stdio stream to write to, X is the rtx, and
7297 CODE is the operand print code from the output string. */
7298
7299static void
7300output_pic_addr_const (FILE *file, rtx x, int code)
7301{
7302 char buf[256];
7303
7304 switch (GET_CODE (x))
7305 {
7306 case PC:
7307 gcc_assert (flag_pic);
7308 putc ('.', file);
7309 break;
7310
7311 case SYMBOL_REF:
7312 if (! TARGET_MACHO || TARGET_64BIT)
7313 output_addr_const (file, x);
7314 else
7315 {
7316 const char *name = XSTR (x, 0);
7317
7318 /* Mark the decl as referenced so that cgraph will output the function. */
7319 if (SYMBOL_REF_DECL (x))
7320 mark_decl_referenced (SYMBOL_REF_DECL (x));
7321
7322#if TARGET_MACHO
7323 if (MACHOPIC_INDIRECT
7324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7325 name = machopic_indirection_name (x, /*stub_p=*/true);
7326#endif
7327 assemble_name (file, name);
7328 }
7329 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7330 fputs ("@PLT", file);
7331 break;
7332
7333 case LABEL_REF:
7334 x = XEXP (x, 0);
7335 /* FALLTHRU */
7336 case CODE_LABEL:
7337 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7338 assemble_name (asm_out_file, buf);
7339 break;
7340
7341 case CONST_INT:
7342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7343 break;
7344
7345 case CONST:
7346 /* This used to output parentheses around the expression,
7347 but that does not work on the 386 (either ATT or BSD assembler). */
7348 output_pic_addr_const (file, XEXP (x, 0), code);
7349 break;
7350
7351 case CONST_DOUBLE:
7352 if (GET_MODE (x) == VOIDmode)
7353 {
7354 /* We can use %d if the number is <32 bits and positive. */
7355 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7356 fprintf (file, "0x%lx%08lx",
7357 (unsigned long) CONST_DOUBLE_HIGH (x),
7358 (unsigned long) CONST_DOUBLE_LOW (x));
7359 else
7360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7361 }
7362 else
7363 /* We can't handle floating point constants;
7364 PRINT_OPERAND must handle them. */
7365 output_operand_lossage ("floating constant misused");
7366 break;
7367
7368 case PLUS:
7369 /* Some assemblers need integer constants to appear first. */
7370 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7371 {
7372 output_pic_addr_const (file, XEXP (x, 0), code);
7373 putc ('+', file);
7374 output_pic_addr_const (file, XEXP (x, 1), code);
7375 }
7376 else
7377 {
7378 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7379 output_pic_addr_const (file, XEXP (x, 1), code);
7380 putc ('+', file);
7381 output_pic_addr_const (file, XEXP (x, 0), code);
7382 }
7383 break;
7384
7385 case MINUS:
7386 if (!TARGET_MACHO)
7387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7388 output_pic_addr_const (file, XEXP (x, 0), code);
7389 putc ('-', file);
7390 output_pic_addr_const (file, XEXP (x, 1), code);
7391 if (!TARGET_MACHO)
7392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7393 break;
7394
7395 case UNSPEC:
7396 gcc_assert (XVECLEN (x, 0) == 1);
7397 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7398 switch (XINT (x, 1))
7399 {
7400 case UNSPEC_GOT:
7401 fputs ("@GOT", file);
7402 break;
7403 case UNSPEC_GOTOFF:
7404 fputs ("@GOTOFF", file);
7405 break;
7406 case UNSPEC_GOTPCREL:
7407 fputs ("@GOTPCREL(%rip)", file);
7408 break;
7409 case UNSPEC_GOTTPOFF:
7410 /* FIXME: This might be @TPOFF in Sun ld too. */
7411 fputs ("@GOTTPOFF", file);
7412 break;
7413 case UNSPEC_TPOFF:
7414 fputs ("@TPOFF", file);
7415 break;
7416 case UNSPEC_NTPOFF:
7417 if (TARGET_64BIT)
7418 fputs ("@TPOFF", file);
7419 else
7420 fputs ("@NTPOFF", file);
7421 break;
7422 case UNSPEC_DTPOFF:
7423 fputs ("@DTPOFF", file);
7424 break;
7425 case UNSPEC_GOTNTPOFF:
7426 if (TARGET_64BIT)
7427 fputs ("@GOTTPOFF(%rip)", file);
7428 else
7429 fputs ("@GOTNTPOFF", file);
7430 break;
7431 case UNSPEC_INDNTPOFF:
7432 fputs ("@INDNTPOFF", file);
7433 break;
7434 default:
7435 output_operand_lossage ("invalid UNSPEC as operand");
7436 break;
7437 }
7438 break;
7439
7440 default:
7441 output_operand_lossage ("invalid expression as operand");
7442 }
7443}
7444
7445/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7446 We need to emit DTP-relative relocations. */
7447
7448static void
7449i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7450{
7451 fputs (ASM_LONG, file);
7452 output_addr_const (file, x);
7453 fputs ("@DTPOFF", file);
7454 switch (size)
7455 {
7456 case 4:
7457 break;
7458 case 8:
7459 fputs (", 0", file);
7460 break;
7461 default:
7462 gcc_unreachable ();
7463 }
7464}
7465
7466/* In the name of slightly smaller debug output, and to cater to
7467 general assembler lossage, recognize PIC+GOTOFF and turn it back
7468 into a direct symbol reference.
7469
7470 On Darwin, this is necessary to avoid a crash, because Darwin
7471 has a different PIC label for each routine but the DWARF debugging
7472 information is not associated with any particular routine, so it's
7473 necessary to remove references to the PIC label from RTL stored by
7474 the DWARF output code. */
7475
7476static rtx
7477ix86_delegitimize_address (rtx orig_x)
7478{
7479 rtx x = orig_x;
7480 /* reg_addend is NULL or a multiple of some register. */
7481 rtx reg_addend = NULL_RTX;
7482 /* const_addend is NULL or a const_int. */
7483 rtx const_addend = NULL_RTX;
7484 /* This is the result, or NULL. */
7485 rtx result = NULL_RTX;
7486
7487 if (GET_CODE (x) == MEM)
7488 x = XEXP (x, 0);
7489
7490 if (TARGET_64BIT)
7491 {
7492 if (GET_CODE (x) != CONST
7493 || GET_CODE (XEXP (x, 0)) != UNSPEC
7494 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7495 || GET_CODE (orig_x) != MEM)
7496 return orig_x;
7497 return XVECEXP (XEXP (x, 0), 0, 0);
7498 }
7499
7500 if (GET_CODE (x) != PLUS
7501 || GET_CODE (XEXP (x, 1)) != CONST)
7502 return orig_x;
7503
7504 if (GET_CODE (XEXP (x, 0)) == REG
7505 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7506 /* %ebx + GOT/GOTOFF */
7507 ;
7508 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7509 {
7510 /* %ebx + %reg * scale + GOT/GOTOFF */
7511 reg_addend = XEXP (x, 0);
7512 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7513 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7514 reg_addend = XEXP (reg_addend, 1);
7515 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7516 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7517 reg_addend = XEXP (reg_addend, 0);
7518 else
7519 return orig_x;
7520 if (GET_CODE (reg_addend) != REG
7521 && GET_CODE (reg_addend) != MULT
7522 && GET_CODE (reg_addend) != ASHIFT)
7523 return orig_x;
7524 }
7525 else
7526 return orig_x;
7527
7528 x = XEXP (XEXP (x, 1), 0);
7529 if (GET_CODE (x) == PLUS
7530 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7531 {
7532 const_addend = XEXP (x, 1);
7533 x = XEXP (x, 0);
7534 }
7535
7536 if (GET_CODE (x) == UNSPEC
7537 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7538 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7539 result = XVECEXP (x, 0, 0);
7540
7541 if (TARGET_MACHO && darwin_local_data_pic (x)
7542 && GET_CODE (orig_x) != MEM)
7543 result = XEXP (x, 0);
7544
7545 if (! result)
7546 return orig_x;
7547
7548 if (const_addend)
7549 result = gen_rtx_PLUS (Pmode, result, const_addend);
7550 if (reg_addend)
7551 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7552 return result;
7553}
7554
7555static void
7556put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7557 int fp, FILE *file)
7558{
7559 const char *suffix;
7560
7561 if (mode == CCFPmode || mode == CCFPUmode)
7562 {
7563 enum rtx_code second_code, bypass_code;
7564 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7565 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7566 code = ix86_fp_compare_code_to_integer (code);
7567 mode = CCmode;
7568 }
7569 if (reverse)
7570 code = reverse_condition (code);
7571
7572 switch (code)
7573 {
7574 case EQ:
7575 suffix = "e";
7576 break;
7577 case NE:
7578 suffix = "ne";
7579 break;
7580 case GT:
7581 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7582 suffix = "g";
7583 break;
7584 case GTU:
7585 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7586 Those same assemblers have the same but opposite lossage on cmov. */
7587 gcc_assert (mode == CCmode);
7588 suffix = fp ? "nbe" : "a";
7589 break;
7590 case LT:
7591 switch (mode)
7592 {
7593 case CCNOmode:
7594 case CCGOCmode:
7595 suffix = "s";
7596 break;
7597
7598 case CCmode:
7599 case CCGCmode:
7600 suffix = "l";
7601 break;
7602
7603 default:
7604 gcc_unreachable ();
7605 }
7606 break;
7607 case LTU:
7608 gcc_assert (mode == CCmode);
7609 suffix = "b";
7610 break;
7611 case GE:
7612 switch (mode)
7613 {
7614 case CCNOmode:
7615 case CCGOCmode:
7616 suffix = "ns";
7617 break;
7618
7619 case CCmode:
7620 case CCGCmode:
7621 suffix = "ge";
7622 break;
7623
7624 default:
7625 gcc_unreachable ();
7626 }
7627 break;
7628 case GEU:
7629 /* ??? As above. */
7630 gcc_assert (mode == CCmode);
7631 suffix = fp ? "nb" : "ae";
7632 break;
7633 case LE:
7634 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7635 suffix = "le";
7636 break;
7637 case LEU:
7638 gcc_assert (mode == CCmode);
7639 suffix = "be";
7640 break;
7641 case UNORDERED:
7642 suffix = fp ? "u" : "p";
7643 break;
7644 case ORDERED:
7645 suffix = fp ? "nu" : "np";
7646 break;
7647 default:
7648 gcc_unreachable ();
7649 }
7650 fputs (suffix, file);
7651}
7652
7653/* Print the name of register X to FILE based on its machine mode and number.
7654 If CODE is 'w', pretend the mode is HImode.
7655 If CODE is 'b', pretend the mode is QImode.
7656 If CODE is 'k', pretend the mode is SImode.
7657 If CODE is 'q', pretend the mode is DImode.
7658 If CODE is 'h', pretend the reg is the 'high' byte register.
7659 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7660
7661void
7662print_reg (rtx x, int code, FILE *file)
7663{
7664 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7665 && REGNO (x) != FRAME_POINTER_REGNUM
7666 && REGNO (x) != FLAGS_REG
7667 && REGNO (x) != FPSR_REG);
7668
7669 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7670 putc ('%', file);
7671
7672 if (code == 'w' || MMX_REG_P (x))
7673 code = 2;
7674 else if (code == 'b')
7675 code = 1;
7676 else if (code == 'k')
7677 code = 4;
7678 else if (code == 'q')
7679 code = 8;
7680 else if (code == 'y')
7681 code = 3;
7682 else if (code == 'h')
7683 code = 0;
7684 else
7685 code = GET_MODE_SIZE (GET_MODE (x));
7686
7687 /* Irritatingly, AMD extended registers use different naming convention
7688 from the normal registers. */
7689 if (REX_INT_REG_P (x))
7690 {
7691 gcc_assert (TARGET_64BIT);
7692 switch (code)
7693 {
7694 case 0:
7695 error ("extended registers have no high halves");
7696 break;
7697 case 1:
7698 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7699 break;
7700 case 2:
7701 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7702 break;
7703 case 4:
7704 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7705 break;
7706 case 8:
7707 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7708 break;
7709 default:
7710 error ("unsupported operand size for extended register");
7711 break;
7712 }
7713 return;
7714 }
7715 switch (code)
7716 {
7717 case 3:
7718 if (STACK_TOP_P (x))
7719 {
7720 fputs ("st(0)", file);
7721 break;
7722 }
7723 /* FALLTHRU */
7724 case 8:
7725 case 4:
7726 case 12:
7727 if (! ANY_FP_REG_P (x))
7728 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7729 /* FALLTHRU */
7730 case 16:
7731 case 2:
7732 normal:
7733 fputs (hi_reg_name[REGNO (x)], file);
7734 break;
7735 case 1:
7736 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7737 goto normal;
7738 fputs (qi_reg_name[REGNO (x)], file);
7739 break;
7740 case 0:
7741 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7742 goto normal;
7743 fputs (qi_high_reg_name[REGNO (x)], file);
7744 break;
7745 default:
7746 gcc_unreachable ();
7747 }
7748}
7749
7750/* Locate some local-dynamic symbol still in use by this function
7751 so that we can print its name in some tls_local_dynamic_base
7752 pattern. */
7753
7754static const char *
7755get_some_local_dynamic_name (void)
7756{
7757 rtx insn;
7758
7759 if (cfun->machine->some_ld_name)
7760 return cfun->machine->some_ld_name;
7761
7762 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7763 if (INSN_P (insn)
7764 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7765 return cfun->machine->some_ld_name;
7766
7767 gcc_unreachable ();
7768}
7769
7770static int
7771get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7772{
7773 rtx x = *px;
7774
7775 if (GET_CODE (x) == SYMBOL_REF
7776 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7777 {
7778 cfun->machine->some_ld_name = XSTR (x, 0);
7779 return 1;
7780 }
7781
7782 return 0;
7783}
7784
7785/* Meaning of CODE:
7786 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7787 C -- print opcode suffix for set/cmov insn.
7788 c -- like C, but print reversed condition
7789 F,f -- likewise, but for floating-point.
7790 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7791 otherwise nothing
7792 R -- print the prefix for register names.
7793 z -- print the opcode suffix for the size of the current operand.
7794 * -- print a star (in certain assembler syntax)
7795 A -- print an absolute memory reference.
7796 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7797 s -- print a shift double count, followed by the assemblers argument
7798 delimiter.
7799 b -- print the QImode name of the register for the indicated operand.
7800 %b0 would print %al if operands[0] is reg 0.
7801 w -- likewise, print the HImode name of the register.
7802 k -- likewise, print the SImode name of the register.
7803 q -- likewise, print the DImode name of the register.
7804 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7805 y -- print "st(0)" instead of "st" as a register.
7806 D -- print condition for SSE cmp instruction.
7807 P -- if PIC, print an @PLT suffix.
7808 X -- don't print any sort of PIC '@' suffix for a symbol.
7809 & -- print some in-use local-dynamic symbol name.
7810 H -- print a memory address offset by 8; used for sse high-parts
7811 */
7812
7813void
7814print_operand (FILE *file, rtx x, int code)
7815{
7816 if (code)
7817 {
7818 switch (code)
7819 {
7820 case '*':
7821 if (ASSEMBLER_DIALECT == ASM_ATT)
7822 putc ('*', file);
7823 return;
7824
7825 case '&':
7826 assemble_name (file, get_some_local_dynamic_name ());
7827 return;
7828
7829 case 'A':
7830 switch (ASSEMBLER_DIALECT)
7831 {
7832 case ASM_ATT:
7833 putc ('*', file);
7834 break;
7835
7836 case ASM_INTEL:
7837 /* Intel syntax. For absolute addresses, registers should not
7838 be surrounded by braces. */
7839 if (GET_CODE (x) != REG)
7840 {
7841 putc ('[', file);
7842 PRINT_OPERAND (file, x, 0);
7843 putc (']', file);
7844 return;
7845 }
7846 break;
7847
7848 default:
7849 gcc_unreachable ();
7850 }
7851
7852 PRINT_OPERAND (file, x, 0);
7853 return;
7854
7855
7856 case 'L':
7857 if (ASSEMBLER_DIALECT == ASM_ATT)
7858 putc ('l', file);
7859 return;
7860
7861 case 'W':
7862 if (ASSEMBLER_DIALECT == ASM_ATT)
7863 putc ('w', file);
7864 return;
7865
7866 case 'B':
7867 if (ASSEMBLER_DIALECT == ASM_ATT)
7868 putc ('b', file);
7869 return;
7870
7871 case 'Q':
7872 if (ASSEMBLER_DIALECT == ASM_ATT)
7873 putc ('l', file);
7874 return;
7875
7876 case 'S':
7877 if (ASSEMBLER_DIALECT == ASM_ATT)
7878 putc ('s', file);
7879 return;
7880
7881 case 'T':
7882 if (ASSEMBLER_DIALECT == ASM_ATT)
7883 putc ('t', file);
7884 return;
7885
7886 case 'z':
7887 /* 387 opcodes don't get size suffixes if the operands are
7888 registers. */
7889 if (STACK_REG_P (x))
7890 return;
7891
7892 /* Likewise if using Intel opcodes. */
7893 if (ASSEMBLER_DIALECT == ASM_INTEL)
7894 return;
7895
7896 /* This is the size of op from size of operand. */
7897 switch (GET_MODE_SIZE (GET_MODE (x)))
7898 {
7899 case 2:
7900#ifdef HAVE_GAS_FILDS_FISTS
7901 putc ('s', file);
7902#endif
7903 return;
7904
7905 case 4:
7906 if (GET_MODE (x) == SFmode)
7907 {
7908 putc ('s', file);
7909 return;
7910 }
7911 else
7912 putc ('l', file);
7913 return;
7914
7915 case 12:
7916 case 16:
7917 putc ('t', file);
7918 return;
7919
7920 case 8:
7921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7922 {
7923#ifdef GAS_MNEMONICS
7924 putc ('q', file);
7925#else
7926 putc ('l', file);
7927 putc ('l', file);
7928#endif
7929 }
7930 else
7931 putc ('l', file);
7932 return;
7933
7934 default:
7935 gcc_unreachable ();
7936 }
7937
7938 case 'b':
7939 case 'w':
7940 case 'k':
7941 case 'q':
7942 case 'h':
7943 case 'y':
7944 case 'X':
7945 case 'P':
7946 break;
7947
7948 case 's':
7949 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7950 {
7951 PRINT_OPERAND (file, x, 0);
7952 putc (',', file);
7953 }
7954 return;
7955
7956 case 'D':
7957 /* Little bit of braindamage here. The SSE compare instructions
7958 does use completely different names for the comparisons that the
7959 fp conditional moves. */
7960 switch (GET_CODE (x))
7961 {
7962 case EQ:
7963 case UNEQ:
7964 fputs ("eq", file);
7965 break;
7966 case LT:
7967 case UNLT:
7968 fputs ("lt", file);
7969 break;
7970 case LE:
7971 case UNLE:
7972 fputs ("le", file);
7973 break;
7974 case UNORDERED:
7975 fputs ("unord", file);
7976 break;
7977 case NE:
7978 case LTGT:
7979 fputs ("neq", file);
7980 break;
7981 case UNGE:
7982 case GE:
7983 fputs ("nlt", file);
7984 break;
7985 case UNGT:
7986 case GT:
7987 fputs ("nle", file);
7988 break;
7989 case ORDERED:
7990 fputs ("ord", file);
7991 break;
7992 default:
7993 gcc_unreachable ();
7994 }
7995 return;
7996 case 'O':
7997#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7998 if (ASSEMBLER_DIALECT == ASM_ATT)
7999 {
8000 switch (GET_MODE (x))
8001 {
8002 case HImode: putc ('w', file); break;
8003 case SImode:
8004 case SFmode: putc ('l', file); break;
8005 case DImode:
8006 case DFmode: putc ('q', file); break;
8007 default: gcc_unreachable ();
8008 }
8009 putc ('.', file);
8010 }
8011#endif
8012 return;
8013 case 'C':
8014 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8015 return;
8016 case 'F':
8017#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8018 if (ASSEMBLER_DIALECT == ASM_ATT)
8019 putc ('.', file);
8020#endif
8021 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8022 return;
8023
8024 /* Like above, but reverse condition */
8025 case 'c':
8026 /* Check to see if argument to %c is really a constant
8027 and not a condition code which needs to be reversed. */
8028 if (!COMPARISON_P (x))
8029 {
8030 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8031 return;
8032 }
8033 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8034 return;
8035 case 'f':
8036#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8037 if (ASSEMBLER_DIALECT == ASM_ATT)
8038 putc ('.', file);
8039#endif
8040 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8041 return;
8042
8043 case 'H':
8044 /* It doesn't actually matter what mode we use here, as we're
8045 only going to use this for printing. */
8046 x = adjust_address_nv (x, DImode, 8);
8047 break;
8048
8049 case '+':
8050 {
8051 rtx x;
8052
8053 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8054 return;
8055
8056 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8057 if (x)
8058 {
8059 int pred_val = INTVAL (XEXP (x, 0));
8060
8061 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8062 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8063 {
8064 int taken = pred_val > REG_BR_PROB_BASE / 2;
8065 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8066
8067 /* Emit hints only in the case default branch prediction
8068 heuristics would fail. */
8069 if (taken != cputaken)
8070 {
8071 /* We use 3e (DS) prefix for taken branches and
8072 2e (CS) prefix for not taken branches. */
8073 if (taken)
8074 fputs ("ds ; ", file);
8075 else
8076 fputs ("cs ; ", file);
8077 }
8078 }
8079 }
8080 return;
8081 }
8082 default:
8083 output_operand_lossage ("invalid operand code '%c'", code);
8084 }
8085 }
8086
8087 if (GET_CODE (x) == REG)
8088 print_reg (x, code, file);
8089
8090 else if (GET_CODE (x) == MEM)
8091 {
8092 /* No `byte ptr' prefix for call instructions. */
8093 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8094 {
8095 const char * size;
8096 switch (GET_MODE_SIZE (GET_MODE (x)))
8097 {
8098 case 1: size = "BYTE"; break;
8099 case 2: size = "WORD"; break;
8100 case 4: size = "DWORD"; break;
8101 case 8: size = "QWORD"; break;
8102 case 12: size = "XWORD"; break;
8103 case 16: size = "XMMWORD"; break;
8104 default:
8105 gcc_unreachable ();
8106 }
8107
8108 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8109 if (code == 'b')
8110 size = "BYTE";
8111 else if (code == 'w')
8112 size = "WORD";
8113 else if (code == 'k')
8114 size = "DWORD";
8115
8116 fputs (size, file);
8117 fputs (" PTR ", file);
8118 }
8119
8120 x = XEXP (x, 0);
8121 /* Avoid (%rip) for call operands. */
8122 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8123 && GET_CODE (x) != CONST_INT)
8124 output_addr_const (file, x);
8125 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8126 output_operand_lossage ("invalid constraints for operand");
8127 else
8128 output_address (x);
8129 }
8130
8131 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8132 {
8133 REAL_VALUE_TYPE r;
8134 long l;
8135
8136 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8137 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8138
8139 if (ASSEMBLER_DIALECT == ASM_ATT)
8140 putc ('$', file);
8141 fprintf (file, "0x%08lx", l);
8142 }
8143
8144 /* These float cases don't actually occur as immediate operands. */
8145 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8146 {
8147 char dstr[30];
8148
8149 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8150 fprintf (file, "%s", dstr);
8151 }
8152
8153 else if (GET_CODE (x) == CONST_DOUBLE
8154 && GET_MODE (x) == XFmode)
8155 {
8156 char dstr[30];
8157
8158 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8159 fprintf (file, "%s", dstr);
8160 }
8161
8162 else
8163 {
8164 /* We have patterns that allow zero sets of memory, for instance.
8165 In 64-bit mode, we should probably support all 8-byte vectors,
8166 since we can in fact encode that into an immediate. */
8167 if (GET_CODE (x) == CONST_VECTOR)
8168 {
8169 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8170 x = const0_rtx;
8171 }
8172
8173 if (code != 'P')
8174 {
8175 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8176 {
8177 if (ASSEMBLER_DIALECT == ASM_ATT)
8178 putc ('$', file);
8179 }
8180 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8181 || GET_CODE (x) == LABEL_REF)
8182 {
8183 if (ASSEMBLER_DIALECT == ASM_ATT)
8184 putc ('$', file);
8185 else
8186 fputs ("OFFSET FLAT:", file);
8187 }
8188 }
8189 if (GET_CODE (x) == CONST_INT)
8190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8191 else if (flag_pic)
8192 output_pic_addr_const (file, x, code);
8193 else
8194 output_addr_const (file, x);
8195 }
8196}
8197
8198/* Print a memory operand whose address is ADDR. */
8199
8200void
8201print_operand_address (FILE *file, rtx addr)
8202{
8203 struct ix86_address parts;
8204 rtx base, index, disp;
8205 int scale;
8206 int ok = ix86_decompose_address (addr, &parts);
8207
8208 gcc_assert (ok);
8209
8210 base = parts.base;
8211 index = parts.index;
8212 disp = parts.disp;
8213 scale = parts.scale;
8214
8215 switch (parts.seg)
8216 {
8217 case SEG_DEFAULT:
8218 break;
8219 case SEG_FS:
8220 case SEG_GS:
8221 if (USER_LABEL_PREFIX[0] == 0)
8222 putc ('%', file);
8223 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8224 break;
8225 default:
8226 gcc_unreachable ();
8227 }
8228
8229 if (!base && !index)
8230 {
8231 /* Displacement only requires special attention. */
8232
8233 if (GET_CODE (disp) == CONST_INT)
8234 {
8235 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8236 {
8237 if (USER_LABEL_PREFIX[0] == 0)
8238 putc ('%', file);
8239 fputs ("ds:", file);
8240 }
8241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8242 }
8243 else if (flag_pic)
8244 output_pic_addr_const (file, disp, 0);
8245 else
8246 output_addr_const (file, disp);
8247
8248 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8249 if (TARGET_64BIT)
8250 {
8251 if (GET_CODE (disp) == CONST
8252 && GET_CODE (XEXP (disp, 0)) == PLUS
8253 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8254 disp = XEXP (XEXP (disp, 0), 0);
8255 if (GET_CODE (disp) == LABEL_REF
8256 || (GET_CODE (disp) == SYMBOL_REF
8257 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8258 fputs ("(%rip)", file);
8259 }
8260 }
8261 else
8262 {
8263 if (ASSEMBLER_DIALECT == ASM_ATT)
8264 {
8265 if (disp)
8266 {
8267 if (flag_pic)
8268 output_pic_addr_const (file, disp, 0);
8269 else if (GET_CODE (disp) == LABEL_REF)
8270 output_asm_label (disp);
8271 else
8272 output_addr_const (file, disp);
8273 }
8274
8275 putc ('(', file);
8276 if (base)
8277 print_reg (base, 0, file);
8278 if (index)
8279 {
8280 putc (',', file);
8281 print_reg (index, 0, file);
8282 if (scale != 1)
8283 fprintf (file, ",%d", scale);
8284 }
8285 putc (')', file);
8286 }
8287 else
8288 {
8289 rtx offset = NULL_RTX;
8290
8291 if (disp)
8292 {
8293 /* Pull out the offset of a symbol; print any symbol itself. */
8294 if (GET_CODE (disp) == CONST
8295 && GET_CODE (XEXP (disp, 0)) == PLUS
8296 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8297 {
8298 offset = XEXP (XEXP (disp, 0), 1);
8299 disp = gen_rtx_CONST (VOIDmode,
8300 XEXP (XEXP (disp, 0), 0));
8301 }
8302
8303 if (flag_pic)
8304 output_pic_addr_const (file, disp, 0);
8305 else if (GET_CODE (disp) == LABEL_REF)
8306 output_asm_label (disp);
8307 else if (GET_CODE (disp) == CONST_INT)
8308 offset = disp;
8309 else
8310 output_addr_const (file, disp);
8311 }
8312
8313 putc ('[', file);
8314 if (base)
8315 {
8316 print_reg (base, 0, file);
8317 if (offset)
8318 {
8319 if (INTVAL (offset) >= 0)
8320 putc ('+', file);
8321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8322 }
8323 }
8324 else if (offset)
8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8326 else
8327 putc ('0', file);
8328
8329 if (index)
8330 {
8331 putc ('+', file);
8332 print_reg (index, 0, file);
8333 if (scale != 1)
8334 fprintf (file, "*%d", scale);
8335 }
8336 putc (']', file);
8337 }
8338 }
8339}
8340
8341bool
8342output_addr_const_extra (FILE *file, rtx x)
8343{
8344 rtx op;
8345
8346 if (GET_CODE (x) != UNSPEC)
8347 return false;
8348
8349 op = XVECEXP (x, 0, 0);
8350 switch (XINT (x, 1))
8351 {
8352 case UNSPEC_GOTTPOFF:
8353 output_addr_const (file, op);
8354 /* FIXME: This might be @TPOFF in Sun ld. */
8355 fputs ("@GOTTPOFF", file);
8356 break;
8357 case UNSPEC_TPOFF:
8358 output_addr_const (file, op);
8359 fputs ("@TPOFF", file);
8360 break;
8361 case UNSPEC_NTPOFF:
8362 output_addr_const (file, op);
8363 if (TARGET_64BIT)
8364 fputs ("@TPOFF", file);
8365 else
8366 fputs ("@NTPOFF", file);
8367 break;
8368 case UNSPEC_DTPOFF:
8369 output_addr_const (file, op);
8370 fputs ("@DTPOFF", file);
8371 break;
8372 case UNSPEC_GOTNTPOFF:
8373 output_addr_const (file, op);
8374 if (TARGET_64BIT)
8375 fputs ("@GOTTPOFF(%rip)", file);
8376 else
8377 fputs ("@GOTNTPOFF", file);
8378 break;
8379 case UNSPEC_INDNTPOFF:
8380 output_addr_const (file, op);
8381 fputs ("@INDNTPOFF", file);
8382 break;
8383
8384 default:
8385 return false;
8386 }
8387
8388 return true;
8389}
8390
8391/* Split one or more DImode RTL references into pairs of SImode
8392 references. The RTL can be REG, offsettable MEM, integer constant, or
8393 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8394 split and "num" is its length. lo_half and hi_half are output arrays
8395 that parallel "operands". */
8396
8397void
8398split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8399{
8400 while (num--)
8401 {
8402 rtx op = operands[num];
8403
8404 /* simplify_subreg refuse to split volatile memory addresses,
8405 but we still have to handle it. */
8406 if (GET_CODE (op) == MEM)
8407 {
8408 lo_half[num] = adjust_address (op, SImode, 0);
8409 hi_half[num] = adjust_address (op, SImode, 4);
8410 }
8411 else
8412 {
8413 lo_half[num] = simplify_gen_subreg (SImode, op,
8414 GET_MODE (op) == VOIDmode
8415 ? DImode : GET_MODE (op), 0);
8416 hi_half[num] = simplify_gen_subreg (SImode, op,
8417 GET_MODE (op) == VOIDmode
8418 ? DImode : GET_MODE (op), 4);
8419 }
8420 }
8421}
8422/* Split one or more TImode RTL references into pairs of DImode
8423 references. The RTL can be REG, offsettable MEM, integer constant, or
8424 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8425 split and "num" is its length. lo_half and hi_half are output arrays
8426 that parallel "operands". */
8427
8428void
8429split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8430{
8431 while (num--)
8432 {
8433 rtx op = operands[num];
8434
8435 /* simplify_subreg refuse to split volatile memory addresses, but we
8436 still have to handle it. */
8437 if (GET_CODE (op) == MEM)
8438 {
8439 lo_half[num] = adjust_address (op, DImode, 0);
8440 hi_half[num] = adjust_address (op, DImode, 8);
8441 }
8442 else
8443 {
8444 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8445 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8446 }
8447 }
8448}
8449
8450/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8451 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8452 is the expression of the binary operation. The output may either be
8453 emitted here, or returned to the caller, like all output_* functions.
8454
8455 There is no guarantee that the operands are the same mode, as they
8456 might be within FLOAT or FLOAT_EXTEND expressions. */
8457
8458#ifndef SYSV386_COMPAT
8459/* Set to 1 for compatibility with brain-damaged assemblers. No-one
8460 wants to fix the assemblers because that causes incompatibility
8461 with gcc. No-one wants to fix gcc because that causes
8462 incompatibility with assemblers... You can use the option of
8463 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8464#define SYSV386_COMPAT 1
8465#endif
8466
8467const char *
8468output_387_binary_op (rtx insn, rtx *operands)
8469{
8470 static char buf[30];
8471 const char *p;
8472 const char *ssep;
8473 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8474
8475#ifdef ENABLE_CHECKING
8476 /* Even if we do not want to check the inputs, this documents input
8477 constraints. Which helps in understanding the following code. */
8478 if (STACK_REG_P (operands[0])
8479 && ((REG_P (operands[1])
8480 && REGNO (operands[0]) == REGNO (operands[1])
8481 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8482 || (REG_P (operands[2])
8483 && REGNO (operands[0]) == REGNO (operands[2])
8484 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8485 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8486 ; /* ok */
8487 else
8488 gcc_assert (is_sse);
8489#endif
8490
8491 switch (GET_CODE (operands[3]))
8492 {
8493 case PLUS:
8494 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8495 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8496 p = "fiadd";
8497 else
8498 p = "fadd";
8499 ssep = "add";
8500 break;
8501
8502 case MINUS:
8503 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8504 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8505 p = "fisub";
8506 else
8507 p = "fsub";
8508 ssep = "sub";
8509 break;
8510
8511 case MULT:
8512 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8513 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8514 p = "fimul";
8515 else
8516 p = "fmul";
8517 ssep = "mul";
8518 break;
8519
8520 case DIV:
8521 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8522 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8523 p = "fidiv";
8524 else
8525 p = "fdiv";
8526 ssep = "div";
8527 break;
8528
8529 default:
8530 gcc_unreachable ();
8531 }
8532
8533 if (is_sse)
8534 {
8535 strcpy (buf, ssep);
8536 if (GET_MODE (operands[0]) == SFmode)
8537 strcat (buf, "ss\t{%2, %0|%0, %2}");
8538 else
8539 strcat (buf, "sd\t{%2, %0|%0, %2}");
8540 return buf;
8541 }
8542 strcpy (buf, p);
8543
8544 switch (GET_CODE (operands[3]))
8545 {
8546 case MULT:
8547 case PLUS:
8548 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8549 {
8550 rtx temp = operands[2];
8551 operands[2] = operands[1];
8552 operands[1] = temp;
8553 }
8554
8555 /* know operands[0] == operands[1]. */
8556
8557 if (GET_CODE (operands[2]) == MEM)
8558 {
8559 p = "%z2\t%2";
8560 break;
8561 }
8562
8563 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8564 {
8565 if (STACK_TOP_P (operands[0]))
8566 /* How is it that we are storing to a dead operand[2]?
8567 Well, presumably operands[1] is dead too. We can't
8568 store the result to st(0) as st(0) gets popped on this
8569 instruction. Instead store to operands[2] (which I
8570 think has to be st(1)). st(1) will be popped later.
8571 gcc <= 2.8.1 didn't have this check and generated
8572 assembly code that the Unixware assembler rejected. */
8573 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8574 else
8575 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8576 break;
8577 }
8578
8579 if (STACK_TOP_P (operands[0]))
8580 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8581 else
8582 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8583 break;
8584
8585 case MINUS:
8586 case DIV:
8587 if (GET_CODE (operands[1]) == MEM)
8588 {
8589 p = "r%z1\t%1";
8590 break;
8591 }
8592
8593 if (GET_CODE (operands[2]) == MEM)
8594 {
8595 p = "%z2\t%2";
8596 break;
8597 }
8598
8599 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8600 {
8601#if SYSV386_COMPAT
8602 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8603 derived assemblers, confusingly reverse the direction of
8604 the operation for fsub{r} and fdiv{r} when the
8605 destination register is not st(0). The Intel assembler
8606 doesn't have this brain damage. Read !SYSV386_COMPAT to
8607 figure out what the hardware really does. */
8608 if (STACK_TOP_P (operands[0]))
8609 p = "{p\t%0, %2|rp\t%2, %0}";
8610 else
8611 p = "{rp\t%2, %0|p\t%0, %2}";
8612#else
8613 if (STACK_TOP_P (operands[0]))
8614 /* As above for fmul/fadd, we can't store to st(0). */
8615 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8616 else
8617 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8618#endif
8619 break;
8620 }
8621
8622 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8623 {
8624#if SYSV386_COMPAT
8625 if (STACK_TOP_P (operands[0]))
8626 p = "{rp\t%0, %1|p\t%1, %0}";
8627 else
8628 p = "{p\t%1, %0|rp\t%0, %1}";
8629#else
8630 if (STACK_TOP_P (operands[0]))
8631 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8632 else
8633 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8634#endif
8635 break;
8636 }
8637
8638 if (STACK_TOP_P (operands[0]))
8639 {
8640 if (STACK_TOP_P (operands[1]))
8641 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8642 else
8643 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8644 break;
8645 }
8646 else if (STACK_TOP_P (operands[1]))
8647 {
8648#if SYSV386_COMPAT
8649 p = "{\t%1, %0|r\t%0, %1}";
8650#else
8651 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8652#endif
8653 }
8654 else
8655 {
8656#if SYSV386_COMPAT
8657 p = "{r\t%2, %0|\t%0, %2}";
8658#else
8659 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8660#endif
8661 }
8662 break;
8663
8664 default:
8665 gcc_unreachable ();
8666 }
8667
8668 strcat (buf, p);
8669 return buf;
8670}
8671
8672/* Return needed mode for entity in optimize_mode_switching pass. */
8673
8674int
8675ix86_mode_needed (int entity, rtx insn)
8676{
8677 enum attr_i387_cw mode;
8678
8679 /* The mode UNINITIALIZED is used to store control word after a
8680 function call or ASM pattern. The mode ANY specify that function
8681 has no requirements on the control word and make no changes in the
8682 bits we are interested in. */
8683
8684 if (CALL_P (insn)
8685 || (NONJUMP_INSN_P (insn)
8686 && (asm_noperands (PATTERN (insn)) >= 0
8687 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8688 return I387_CW_UNINITIALIZED;
8689
8690 if (recog_memoized (insn) < 0)
8691 return I387_CW_ANY;
8692
8693 mode = get_attr_i387_cw (insn);
8694
8695 switch (entity)
8696 {
8697 case I387_TRUNC:
8698 if (mode == I387_CW_TRUNC)
8699 return mode;
8700 break;
8701
8702 case I387_FLOOR:
8703 if (mode == I387_CW_FLOOR)
8704 return mode;
8705 break;
8706
8707 case I387_CEIL:
8708 if (mode == I387_CW_CEIL)
8709 return mode;
8710 break;
8711
8712 case I387_MASK_PM:
8713 if (mode == I387_CW_MASK_PM)
8714 return mode;
8715 break;
8716
8717 default:
8718 gcc_unreachable ();
8719 }
8720
8721 return I387_CW_ANY;
8722}
8723
8724/* Output code to initialize control word copies used by trunc?f?i and
8725 rounding patterns. CURRENT_MODE is set to current control word,
8726 while NEW_MODE is set to new control word. */
8727
8728void
8729emit_i387_cw_initialization (int mode)
8730{
8731 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8732 rtx new_mode;
8733
8734 int slot;
8735
8736 rtx reg = gen_reg_rtx (HImode);
8737
8738 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8739 emit_move_insn (reg, stored_mode);
8740
8741 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8742 {
8743 switch (mode)
8744 {
8745 case I387_CW_TRUNC:
8746 /* round toward zero (truncate) */
8747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8748 slot = SLOT_CW_TRUNC;
8749 break;
8750
8751 case I387_CW_FLOOR:
8752 /* round down toward -oo */
8753 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8754 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8755 slot = SLOT_CW_FLOOR;
8756 break;
8757
8758 case I387_CW_CEIL:
8759 /* round up toward +oo */
8760 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8761 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8762 slot = SLOT_CW_CEIL;
8763 break;
8764
8765 case I387_CW_MASK_PM:
8766 /* mask precision exception for nearbyint() */
8767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8768 slot = SLOT_CW_MASK_PM;
8769 break;
8770
8771 default:
8772 gcc_unreachable ();
8773 }
8774 }
8775 else
8776 {
8777 switch (mode)
8778 {
8779 case I387_CW_TRUNC:
8780 /* round toward zero (truncate) */
8781 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8782 slot = SLOT_CW_TRUNC;
8783 break;
8784
8785 case I387_CW_FLOOR:
8786 /* round down toward -oo */
8787 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8788 slot = SLOT_CW_FLOOR;
8789 break;
8790
8791 case I387_CW_CEIL:
8792 /* round up toward +oo */
8793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8794 slot = SLOT_CW_CEIL;
8795 break;
8796
8797 case I387_CW_MASK_PM:
8798 /* mask precision exception for nearbyint() */
8799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8800 slot = SLOT_CW_MASK_PM;
8801 break;
8802
8803 default:
8804 gcc_unreachable ();
8805 }
8806 }
8807
8808 gcc_assert (slot < MAX_386_STACK_LOCALS);
8809
8810 new_mode = assign_386_stack_local (HImode, slot);
8811 emit_move_insn (new_mode, reg);
8812}
8813
8814/* Output code for INSN to convert a float to a signed int. OPERANDS
8815 are the insn operands. The output may be [HSD]Imode and the input
8816 operand may be [SDX]Fmode. */
8817
8818const char *
8819output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8820{
8821 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8822 int dimode_p = GET_MODE (operands[0]) == DImode;
8823 int round_mode = get_attr_i387_cw (insn);
8824
8825 /* Jump through a hoop or two for DImode, since the hardware has no
8826 non-popping instruction. We used to do this a different way, but
8827 that was somewhat fragile and broke with post-reload splitters. */
8828 if ((dimode_p || fisttp) && !stack_top_dies)
8829 output_asm_insn ("fld\t%y1", operands);
8830
8831 gcc_assert (STACK_TOP_P (operands[1]));
8832 gcc_assert (GET_CODE (operands[0]) == MEM);
8833
8834 if (fisttp)
8835 output_asm_insn ("fisttp%z0\t%0", operands);
8836 else
8837 {
8838 if (round_mode != I387_CW_ANY)
8839 output_asm_insn ("fldcw\t%3", operands);
8840 if (stack_top_dies || dimode_p)
8841 output_asm_insn ("fistp%z0\t%0", operands);
8842 else
8843 output_asm_insn ("fist%z0\t%0", operands);
8844 if (round_mode != I387_CW_ANY)
8845 output_asm_insn ("fldcw\t%2", operands);
8846 }
8847
8848 return "";
8849}
8850
8851/* Output code for x87 ffreep insn. The OPNO argument, which may only
8852 have the values zero or one, indicates the ffreep insn's operand
8853 from the OPERANDS array. */
8854
8855static const char *
8856output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8857{
8858 if (TARGET_USE_FFREEP)
8859#if HAVE_AS_IX86_FFREEP
8860 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8861#else
8862 switch (REGNO (operands[opno]))
8863 {
8864 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8865 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8866 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8867 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8868 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8869 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8870 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8871 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8872 }
8873#endif
8874
8875 return opno ? "fstp\t%y1" : "fstp\t%y0";
8876}
8877
8878
8879/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8880 should be used. UNORDERED_P is true when fucom should be used. */
8881
8882const char *
8883output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8884{
8885 int stack_top_dies;
8886 rtx cmp_op0, cmp_op1;
8887 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8888
8889 if (eflags_p)
8890 {
8891 cmp_op0 = operands[0];
8892 cmp_op1 = operands[1];
8893 }
8894 else
8895 {
8896 cmp_op0 = operands[1];
8897 cmp_op1 = operands[2];
8898 }
8899
8900 if (is_sse)
8901 {
8902 if (GET_MODE (operands[0]) == SFmode)
8903 if (unordered_p)
8904 return "ucomiss\t{%1, %0|%0, %1}";
8905 else
8906 return "comiss\t{%1, %0|%0, %1}";
8907 else
8908 if (unordered_p)
8909 return "ucomisd\t{%1, %0|%0, %1}";
8910 else
8911 return "comisd\t{%1, %0|%0, %1}";
8912 }
8913
8914 gcc_assert (STACK_TOP_P (cmp_op0));
8915
8916 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8917
8918 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8919 {
8920 if (stack_top_dies)
8921 {
8922 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8923 return output_387_ffreep (operands, 1);
8924 }
8925 else
8926 return "ftst\n\tfnstsw\t%0";
8927 }
8928
8929 if (STACK_REG_P (cmp_op1)
8930 && stack_top_dies
8931 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8932 && REGNO (cmp_op1) != FIRST_STACK_REG)
8933 {
8934 /* If both the top of the 387 stack dies, and the other operand
8935 is also a stack register that dies, then this must be a
8936 `fcompp' float compare */
8937
8938 if (eflags_p)
8939 {
8940 /* There is no double popping fcomi variant. Fortunately,
8941 eflags is immune from the fstp's cc clobbering. */
8942 if (unordered_p)
8943 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8944 else
8945 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8946 return output_387_ffreep (operands, 0);
8947 }
8948 else
8949 {
8950 if (unordered_p)
8951 return "fucompp\n\tfnstsw\t%0";
8952 else
8953 return "fcompp\n\tfnstsw\t%0";
8954 }
8955 }
8956 else
8957 {
8958 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8959
8960 static const char * const alt[16] =
8961 {
8962 "fcom%z2\t%y2\n\tfnstsw\t%0",
8963 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8964 "fucom%z2\t%y2\n\tfnstsw\t%0",
8965 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8966
8967 "ficom%z2\t%y2\n\tfnstsw\t%0",
8968 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8969 NULL,
8970 NULL,
8971
8972 "fcomi\t{%y1, %0|%0, %y1}",
8973 "fcomip\t{%y1, %0|%0, %y1}",
8974 "fucomi\t{%y1, %0|%0, %y1}",
8975 "fucomip\t{%y1, %0|%0, %y1}",
8976
8977 NULL,
8978 NULL,
8979 NULL,
8980 NULL
8981 };
8982
8983 int mask;
8984 const char *ret;
8985
8986 mask = eflags_p << 3;
8987 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8988 mask |= unordered_p << 1;
8989 mask |= stack_top_dies;
8990
8991 gcc_assert (mask < 16);
8992 ret = alt[mask];
8993 gcc_assert (ret);
8994
8995 return ret;
8996 }
8997}
8998
8999void
9000ix86_output_addr_vec_elt (FILE *file, int value)
9001{
9002 const char *directive = ASM_LONG;
9003
9004#ifdef ASM_QUAD
9005 if (TARGET_64BIT)
9006 directive = ASM_QUAD;
9007#else
9008 gcc_assert (!TARGET_64BIT);
9009#endif
9010
9011 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9012}
9013
9014void
9015ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9016{
9017 if (TARGET_64BIT)
9018 fprintf (file, "%s%s%d-%s%d\n",
9019 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9020 else if (HAVE_AS_GOTOFF_IN_DATA)
9021 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9022#if TARGET_MACHO
9023 else if (TARGET_MACHO)
9024 {
9025 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9026 machopic_output_function_base_name (file);
9027 fprintf(file, "\n");
9028 }
9029#endif
9030 else
9031 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9032 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9033}
9034
9035/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9036 for the target. */
9037
9038void
9039ix86_expand_clear (rtx dest)
9040{
9041 rtx tmp;
9042
9043 /* We play register width games, which are only valid after reload. */
9044 gcc_assert (reload_completed);
9045
9046 /* Avoid HImode and its attendant prefix byte. */
9047 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9048 dest = gen_rtx_REG (SImode, REGNO (dest));
9049
9050 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9051
9052 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9053 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9054 {
9055 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9056 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9057 }
9058
9059 emit_insn (tmp);
9060}
9061
9062/* X is an unchanging MEM. If it is a constant pool reference, return
9063 the constant pool rtx, else NULL. */
9064
9065rtx
9066maybe_get_pool_constant (rtx x)
9067{
9068 x = ix86_delegitimize_address (XEXP (x, 0));
9069
9070 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9071 return get_pool_constant (x);
9072
9073 return NULL_RTX;
9074}
9075
9076void
9077ix86_expand_move (enum machine_mode mode, rtx operands[])
9078{
9079 int strict = (reload_in_progress || reload_completed);
9080 rtx op0, op1;
9081 enum tls_model model;
9082
9083 op0 = operands[0];
9084 op1 = operands[1];
9085
9086 if (GET_CODE (op1) == SYMBOL_REF)
9087 {
9088 model = SYMBOL_REF_TLS_MODEL (op1);
9089 if (model)
9090 {
9091 op1 = legitimize_tls_address (op1, model, true);
9092 op1 = force_operand (op1, op0);
9093 if (op1 == op0)
9094 return;
9095 }
9096 }
9097 else if (GET_CODE (op1) == CONST
9098 && GET_CODE (XEXP (op1, 0)) == PLUS
9099 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9100 {
9101 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9102 if (model)
9103 {
9104 rtx addend = XEXP (XEXP (op1, 0), 1);
9105 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9106 op1 = force_operand (op1, NULL);
9107 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9108 op0, 1, OPTAB_DIRECT);
9109 if (op1 == op0)
9110 return;
9111 }
9112 }
9113
9114 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9115 {
9116 if (TARGET_MACHO && !TARGET_64BIT)
9117 {
9118#if TARGET_MACHO
9119 if (MACHOPIC_PURE)
9120 {
9121 rtx temp = ((reload_in_progress
9122 || ((op0 && GET_CODE (op0) == REG)
9123 && mode == Pmode))
9124 ? op0 : gen_reg_rtx (Pmode));
9125 op1 = machopic_indirect_data_reference (op1, temp);
9126 op1 = machopic_legitimize_pic_address (op1, mode,
9127 temp == op1 ? 0 : temp);
9128 }
9129 else if (MACHOPIC_INDIRECT)
9130 op1 = machopic_indirect_data_reference (op1, 0);
9131 if (op0 == op1)
9132 return;
9133#endif
9134 }
9135 else
9136 {
9137 if (GET_CODE (op0) == MEM)
9138 op1 = force_reg (Pmode, op1);
9139 else
9140 op1 = legitimize_address (op1, op1, Pmode);
9141 }
9142 }
9143 else
9144 {
9145 if (GET_CODE (op0) == MEM
9146 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9147 || !push_operand (op0, mode))
9148 && GET_CODE (op1) == MEM)
9149 op1 = force_reg (mode, op1);
9150
9151 if (push_operand (op0, mode)
9152 && ! general_no_elim_operand (op1, mode))
9153 op1 = copy_to_mode_reg (mode, op1);
9154
9155 /* Force large constants in 64bit compilation into register
9156 to get them CSEed. */
9157 if (TARGET_64BIT && mode == DImode
9158 && immediate_operand (op1, mode)
9159 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9160 && !register_operand (op0, mode)
9161 && optimize && !reload_completed && !reload_in_progress)
9162 op1 = copy_to_mode_reg (mode, op1);
9163
9164 if (FLOAT_MODE_P (mode))
9165 {
9166 /* If we are loading a floating point constant to a register,
9167 force the value to memory now, since we'll get better code
9168 out the back end. */
9169
9170 if (strict)
9171 ;
9172 else if (GET_CODE (op1) == CONST_DOUBLE)
9173 {
9174 op1 = validize_mem (force_const_mem (mode, op1));
9175 if (!register_operand (op0, mode))
9176 {
9177 rtx temp = gen_reg_rtx (mode);
9178 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9179 emit_move_insn (op0, temp);
9180 return;
9181 }
9182 }
9183 }
9184 }
9185
9186 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9187}
9188
9189void
9190ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9191{
9192 rtx op0 = operands[0], op1 = operands[1];
9193
9194 /* Force constants other than zero into memory. We do not know how
9195 the instructions used to build constants modify the upper 64 bits
9196 of the register, once we have that information we may be able
9197 to handle some of them more efficiently. */
9198 if ((reload_in_progress | reload_completed) == 0
9199 && register_operand (op0, mode)
9200 && CONSTANT_P (op1)
9201 && standard_sse_constant_p (op1) <= 0)
9202 op1 = validize_mem (force_const_mem (mode, op1));
9203
9204 /* Make operand1 a register if it isn't already. */
9205 if (!no_new_pseudos
9206 && !register_operand (op0, mode)
9207 && !register_operand (op1, mode))
9208 {
9209 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9210 return;
9211 }
9212
9213 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9214}
9215
9216/* Implement the movmisalign patterns for SSE. Non-SSE modes go
9217 straight to ix86_expand_vector_move. */
9218
9219void
9220ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9221{
9222 rtx op0, op1, m;
9223
9224 op0 = operands[0];
9225 op1 = operands[1];
9226
9227 if (MEM_P (op1))
9228 {
9229 /* If we're optimizing for size, movups is the smallest. */
9230 if (optimize_size)
9231 {
9232 op0 = gen_lowpart (V4SFmode, op0);
9233 op1 = gen_lowpart (V4SFmode, op1);
9234 emit_insn (gen_sse_movups (op0, op1));
9235 return;
9236 }
9237
9238 /* ??? If we have typed data, then it would appear that using
9239 movdqu is the only way to get unaligned data loaded with
9240 integer type. */
9241 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9242 {
9243 op0 = gen_lowpart (V16QImode, op0);
9244 op1 = gen_lowpart (V16QImode, op1);
9245 emit_insn (gen_sse2_movdqu (op0, op1));
9246 return;
9247 }
9248
9249 if (TARGET_SSE2 && mode == V2DFmode)
2209 if (TARGET_64BIT)
2210 {
2211 if (TARGET_ALIGN_DOUBLE)
2212 error ("-malign-double makes no sense in the 64bit mode");
2213 if (TARGET_RTD)
2214 error ("-mrtd calling convention not supported in the 64bit mode");
2215
2216 /* Enable by default the SSE and MMX builtins. Do allow the user to
2217 explicitly disable any of these. In particular, disabling SSE and
2218 MMX for kernel code is extremely useful. */
2219 target_flags
2220 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2221 & ~target_flags_explicit);
2222 }
2223 else
2224 {
2225 /* i386 ABI does not specify red zone. It still makes sense to use it
2226 when programmer takes care to stack from being destroyed. */
2227 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2228 target_flags |= MASK_NO_RED_ZONE;
2229 }
2230
2231 /* Validate -mpreferred-stack-boundary= value, or provide default.
2232 The default of 128 bits is for Pentium III's SSE __m128. We can't
2233 change it because of optimize_size. Otherwise, we can't mix object
2234 files compiled with -Os and -On. */
2235 ix86_preferred_stack_boundary = 128;
2236 if (ix86_preferred_stack_boundary_string)
2237 {
2238 i = atoi (ix86_preferred_stack_boundary_string);
2239 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2240 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2241 TARGET_64BIT ? 4 : 2);
2242 else
2243 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2244 }
2245
2246 /* Accept -msseregparm only if at least SSE support is enabled. */
2247 if (TARGET_SSEREGPARM
2248 && ! TARGET_SSE)
2249 error ("-msseregparm used without SSE enabled");
2250
2251 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2252
2253 if (ix86_fpmath_string != 0)
2254 {
2255 if (! strcmp (ix86_fpmath_string, "387"))
2256 ix86_fpmath = FPMATH_387;
2257 else if (! strcmp (ix86_fpmath_string, "sse"))
2258 {
2259 if (!TARGET_SSE)
2260 {
2261 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2262 ix86_fpmath = FPMATH_387;
2263 }
2264 else
2265 ix86_fpmath = FPMATH_SSE;
2266 }
2267 else if (! strcmp (ix86_fpmath_string, "387,sse")
2268 || ! strcmp (ix86_fpmath_string, "sse,387"))
2269 {
2270 if (!TARGET_SSE)
2271 {
2272 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2273 ix86_fpmath = FPMATH_387;
2274 }
2275 else if (!TARGET_80387)
2276 {
2277 warning (0, "387 instruction set disabled, using SSE arithmetics");
2278 ix86_fpmath = FPMATH_SSE;
2279 }
2280 else
2281 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2282 }
2283 else
2284 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2285 }
2286
2287 /* If the i387 is disabled, then do not return values in it. */
2288 if (!TARGET_80387)
2289 target_flags &= ~MASK_FLOAT_RETURNS;
2290
2291 if ((x86_accumulate_outgoing_args & TUNEMASK)
2292 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2293 && !optimize_size)
2294 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2295
2296 /* ??? Unwind info is not correct around the CFG unless either a frame
2297 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2298 unwind info generation to be aware of the CFG and propagating states
2299 around edges. */
2300 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2301 || flag_exceptions || flag_non_call_exceptions)
2302 && flag_omit_frame_pointer
2303 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2304 {
2305 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2306 warning (0, "unwind tables currently require either a frame pointer "
2307 "or -maccumulate-outgoing-args for correctness");
2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2309 }
2310
2311 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2312 {
2313 char *p;
2314 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2315 p = strchr (internal_label_prefix, 'X');
2316 internal_label_prefix_len = p - internal_label_prefix;
2317 *p = '\0';
2318 }
2319
2320 /* When scheduling description is not available, disable scheduler pass
2321 so it won't slow down the compilation and make x87 code slower. */
2322 if (!TARGET_SCHEDULE)
2323 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2324}
2325
2326/* switch to the appropriate section for output of DECL.
2327 DECL is either a `VAR_DECL' node or a constant of some sort.
2328 RELOC indicates whether forming the initial value of DECL requires
2329 link-time relocations. */
2330
2331static section *
2332x86_64_elf_select_section (tree decl, int reloc,
2333 unsigned HOST_WIDE_INT align)
2334{
2335 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2336 && ix86_in_large_data_p (decl))
2337 {
2338 const char *sname = NULL;
2339 unsigned int flags = SECTION_WRITE;
2340 switch (categorize_decl_for_section (decl, reloc))
2341 {
2342 case SECCAT_DATA:
2343 sname = ".ldata";
2344 break;
2345 case SECCAT_DATA_REL:
2346 sname = ".ldata.rel";
2347 break;
2348 case SECCAT_DATA_REL_LOCAL:
2349 sname = ".ldata.rel.local";
2350 break;
2351 case SECCAT_DATA_REL_RO:
2352 sname = ".ldata.rel.ro";
2353 break;
2354 case SECCAT_DATA_REL_RO_LOCAL:
2355 sname = ".ldata.rel.ro.local";
2356 break;
2357 case SECCAT_BSS:
2358 sname = ".lbss";
2359 flags |= SECTION_BSS;
2360 break;
2361 case SECCAT_RODATA:
2362 case SECCAT_RODATA_MERGE_STR:
2363 case SECCAT_RODATA_MERGE_STR_INIT:
2364 case SECCAT_RODATA_MERGE_CONST:
2365 sname = ".lrodata";
2366 flags = 0;
2367 break;
2368 case SECCAT_SRODATA:
2369 case SECCAT_SDATA:
2370 case SECCAT_SBSS:
2371 gcc_unreachable ();
2372 case SECCAT_TEXT:
2373 case SECCAT_TDATA:
2374 case SECCAT_TBSS:
2375 /* We don't split these for medium model. Place them into
2376 default sections and hope for best. */
2377 break;
2378 }
2379 if (sname)
2380 {
2381 /* We might get called with string constants, but get_named_section
2382 doesn't like them as they are not DECLs. Also, we need to set
2383 flags in that case. */
2384 if (!DECL_P (decl))
2385 return get_section (sname, flags, NULL);
2386 return get_named_section (decl, sname, reloc);
2387 }
2388 }
2389 return default_elf_select_section (decl, reloc, align);
2390}
2391
2392/* Build up a unique section name, expressed as a
2393 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2394 RELOC indicates whether the initial value of EXP requires
2395 link-time relocations. */
2396
2397static void
2398x86_64_elf_unique_section (tree decl, int reloc)
2399{
2400 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2401 && ix86_in_large_data_p (decl))
2402 {
2403 const char *prefix = NULL;
2404 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2405 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2406
2407 switch (categorize_decl_for_section (decl, reloc))
2408 {
2409 case SECCAT_DATA:
2410 case SECCAT_DATA_REL:
2411 case SECCAT_DATA_REL_LOCAL:
2412 case SECCAT_DATA_REL_RO:
2413 case SECCAT_DATA_REL_RO_LOCAL:
2414 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2415 break;
2416 case SECCAT_BSS:
2417 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2418 break;
2419 case SECCAT_RODATA:
2420 case SECCAT_RODATA_MERGE_STR:
2421 case SECCAT_RODATA_MERGE_STR_INIT:
2422 case SECCAT_RODATA_MERGE_CONST:
2423 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2424 break;
2425 case SECCAT_SRODATA:
2426 case SECCAT_SDATA:
2427 case SECCAT_SBSS:
2428 gcc_unreachable ();
2429 case SECCAT_TEXT:
2430 case SECCAT_TDATA:
2431 case SECCAT_TBSS:
2432 /* We don't split these for medium model. Place them into
2433 default sections and hope for best. */
2434 break;
2435 }
2436 if (prefix)
2437 {
2438 const char *name;
2439 size_t nlen, plen;
2440 char *string;
2441 plen = strlen (prefix);
2442
2443 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2444 name = targetm.strip_name_encoding (name);
2445 nlen = strlen (name);
2446
2447 string = alloca (nlen + plen + 1);
2448 memcpy (string, prefix, plen);
2449 memcpy (string + plen, name, nlen + 1);
2450
2451 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2452 return;
2453 }
2454 }
2455 default_unique_section (decl, reloc);
2456}
2457
2458#ifdef COMMON_ASM_OP
2459/* This says how to output assembler code to declare an
2460 uninitialized external linkage data object.
2461
2462 For medium model x86-64 we need to use .largecomm opcode for
2463 large objects. */
2464void
2465x86_elf_aligned_common (FILE *file,
2466 const char *name, unsigned HOST_WIDE_INT size,
2467 int align)
2468{
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && size > (unsigned int)ix86_section_threshold)
2471 fprintf (file, ".largecomm\t");
2472 else
2473 fprintf (file, "%s", COMMON_ASM_OP);
2474 assemble_name (file, name);
2475 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2476 size, align / BITS_PER_UNIT);
2477}
2478
2479/* Utility function for targets to use in implementing
2480 ASM_OUTPUT_ALIGNED_BSS. */
2481
2482void
2483x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2484 const char *name, unsigned HOST_WIDE_INT size,
2485 int align)
2486{
2487 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2488 && size > (unsigned int)ix86_section_threshold)
2489 switch_to_section (get_named_section (decl, ".lbss", 0));
2490 else
2491 switch_to_section (bss_section);
2492 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2493#ifdef ASM_DECLARE_OBJECT_NAME
2494 last_assemble_variable_decl = decl;
2495 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2496#else
2497 /* Standard thing is just output label for the object. */
2498 ASM_OUTPUT_LABEL (file, name);
2499#endif /* ASM_DECLARE_OBJECT_NAME */
2500 ASM_OUTPUT_SKIP (file, size ? size : 1);
2501}
2502#endif
2503
2504void
2505optimization_options (int level, int size ATTRIBUTE_UNUSED)
2506{
2507 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2508 make the problem with not enough registers even worse. */
2509#ifdef INSN_SCHEDULING
2510 if (level > 1)
2511 flag_schedule_insns = 0;
2512#endif
2513
2514 if (TARGET_MACHO)
2515 /* The Darwin libraries never set errno, so we might as well
2516 avoid calling them when that's the only reason we would. */
2517 flag_errno_math = 0;
2518
2519 /* The default values of these switches depend on the TARGET_64BIT
2520 that is not known at this moment. Mark these values with 2 and
2521 let user the to override these. In case there is no command line option
2522 specifying them, we will set the defaults in override_options. */
2523 if (optimize >= 1)
2524 flag_omit_frame_pointer = 2;
2525 flag_pcc_struct_return = 2;
2526 flag_asynchronous_unwind_tables = 2;
2527#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2528 SUBTARGET_OPTIMIZATION_OPTIONS;
2529#endif
2530}
2531
2532/* Table of valid machine attributes. */
2533const struct attribute_spec ix86_attribute_table[] =
2534{
2535 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2536 /* Stdcall attribute says callee is responsible for popping arguments
2537 if they are not variable. */
2538 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2539 /* Fastcall attribute says callee is responsible for popping arguments
2540 if they are not variable. */
2541 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2542 /* Cdecl attribute says the callee is a normal C declaration */
2543 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2544 /* Regparm attribute specifies how many integer arguments are to be
2545 passed in registers. */
2546 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2547 /* Sseregparm attribute says we are using x86_64 calling conventions
2548 for FP arguments. */
2549 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2550 /* force_align_arg_pointer says this function realigns the stack at entry. */
2551 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2552 false, true, true, ix86_handle_cconv_attribute },
2553#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2554 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2555 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2556 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2557#endif
2558 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2559 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2560#ifdef SUBTARGET_ATTRIBUTE_TABLE
2561 SUBTARGET_ATTRIBUTE_TABLE,
2562#endif
2563 { NULL, 0, 0, false, false, false, NULL }
2564};
2565
2566/* Decide whether we can make a sibling call to a function. DECL is the
2567 declaration of the function being targeted by the call and EXP is the
2568 CALL_EXPR representing the call. */
2569
2570static bool
2571ix86_function_ok_for_sibcall (tree decl, tree exp)
2572{
2573 tree func;
2574 rtx a, b;
2575
2576 /* If we are generating position-independent code, we cannot sibcall
2577 optimize any indirect call, or a direct call to a global function,
2578 as the PLT requires %ebx be live. */
2579 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2580 return false;
2581
2582 if (decl)
2583 func = decl;
2584 else
2585 {
2586 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2587 if (POINTER_TYPE_P (func))
2588 func = TREE_TYPE (func);
2589 }
2590
2591 /* Check that the return value locations are the same. Like
2592 if we are returning floats on the 80387 register stack, we cannot
2593 make a sibcall from a function that doesn't return a float to a
2594 function that does or, conversely, from a function that does return
2595 a float to a function that doesn't; the necessary stack adjustment
2596 would not be executed. This is also the place we notice
2597 differences in the return value ABI. Note that it is ok for one
2598 of the functions to have void return type as long as the return
2599 value of the other is passed in a register. */
2600 a = ix86_function_value (TREE_TYPE (exp), func, false);
2601 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2602 cfun->decl, false);
2603 if (STACK_REG_P (a) || STACK_REG_P (b))
2604 {
2605 if (!rtx_equal_p (a, b))
2606 return false;
2607 }
2608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2609 ;
2610 else if (!rtx_equal_p (a, b))
2611 return false;
2612
2613 /* If this call is indirect, we'll need to be able to use a call-clobbered
2614 register for the address of the target function. Make sure that all
2615 such registers are not used for passing parameters. */
2616 if (!decl && !TARGET_64BIT)
2617 {
2618 tree type;
2619
2620 /* We're looking at the CALL_EXPR, we need the type of the function. */
2621 type = TREE_OPERAND (exp, 0); /* pointer expression */
2622 type = TREE_TYPE (type); /* pointer type */
2623 type = TREE_TYPE (type); /* function type */
2624
2625 if (ix86_function_regparm (type, NULL) >= 3)
2626 {
2627 /* ??? Need to count the actual number of registers to be used,
2628 not the possible number of registers. Fix later. */
2629 return false;
2630 }
2631 }
2632
2633#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2634 /* Dllimport'd functions are also called indirectly. */
2635 if (decl && DECL_DLLIMPORT_P (decl)
2636 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2637 return false;
2638#endif
2639
2640 /* If we forced aligned the stack, then sibcalling would unalign the
2641 stack, which may break the called function. */
2642 if (cfun->machine->force_align_arg_pointer)
2643 return false;
2644
2645 /* Otherwise okay. That also includes certain types of indirect calls. */
2646 return true;
2647}
2648
2649/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2650 calling convention attributes;
2651 arguments as in struct attribute_spec.handler. */
2652
2653static tree
2654ix86_handle_cconv_attribute (tree *node, tree name,
2655 tree args,
2656 int flags ATTRIBUTE_UNUSED,
2657 bool *no_add_attrs)
2658{
2659 if (TREE_CODE (*node) != FUNCTION_TYPE
2660 && TREE_CODE (*node) != METHOD_TYPE
2661 && TREE_CODE (*node) != FIELD_DECL
2662 && TREE_CODE (*node) != TYPE_DECL)
2663 {
2664 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2665 IDENTIFIER_POINTER (name));
2666 *no_add_attrs = true;
2667 return NULL_TREE;
2668 }
2669
2670 /* Can combine regparm with all attributes but fastcall. */
2671 if (is_attribute_p ("regparm", name))
2672 {
2673 tree cst;
2674
2675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2676 {
2677 error ("fastcall and regparm attributes are not compatible");
2678 }
2679
2680 cst = TREE_VALUE (args);
2681 if (TREE_CODE (cst) != INTEGER_CST)
2682 {
2683 warning (OPT_Wattributes,
2684 "%qs attribute requires an integer constant argument",
2685 IDENTIFIER_POINTER (name));
2686 *no_add_attrs = true;
2687 }
2688 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2689 {
2690 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2691 IDENTIFIER_POINTER (name), REGPARM_MAX);
2692 *no_add_attrs = true;
2693 }
2694
2695 if (!TARGET_64BIT
2696 && lookup_attribute (ix86_force_align_arg_pointer_string,
2697 TYPE_ATTRIBUTES (*node))
2698 && compare_tree_int (cst, REGPARM_MAX-1))
2699 {
2700 error ("%s functions limited to %d register parameters",
2701 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2702 }
2703
2704 return NULL_TREE;
2705 }
2706
2707 if (TARGET_64BIT)
2708 {
2709 warning (OPT_Wattributes, "%qs attribute ignored",
2710 IDENTIFIER_POINTER (name));
2711 *no_add_attrs = true;
2712 return NULL_TREE;
2713 }
2714
2715 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2716 if (is_attribute_p ("fastcall", name))
2717 {
2718 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2719 {
2720 error ("fastcall and cdecl attributes are not compatible");
2721 }
2722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2723 {
2724 error ("fastcall and stdcall attributes are not compatible");
2725 }
2726 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2727 {
2728 error ("fastcall and regparm attributes are not compatible");
2729 }
2730 }
2731
2732 /* Can combine stdcall with fastcall (redundant), regparm and
2733 sseregparm. */
2734 else if (is_attribute_p ("stdcall", name))
2735 {
2736 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2737 {
2738 error ("stdcall and cdecl attributes are not compatible");
2739 }
2740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2741 {
2742 error ("stdcall and fastcall attributes are not compatible");
2743 }
2744 }
2745
2746 /* Can combine cdecl with regparm and sseregparm. */
2747 else if (is_attribute_p ("cdecl", name))
2748 {
2749 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2750 {
2751 error ("stdcall and cdecl attributes are not compatible");
2752 }
2753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2754 {
2755 error ("fastcall and cdecl attributes are not compatible");
2756 }
2757 }
2758
2759 /* Can combine sseregparm with all attributes. */
2760
2761 return NULL_TREE;
2762}
2763
2764/* Return 0 if the attributes for two types are incompatible, 1 if they
2765 are compatible, and 2 if they are nearly compatible (which causes a
2766 warning to be generated). */
2767
2768static int
2769ix86_comp_type_attributes (tree type1, tree type2)
2770{
2771 /* Check for mismatch of non-default calling convention. */
2772 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2773
2774 if (TREE_CODE (type1) != FUNCTION_TYPE)
2775 return 1;
2776
2777 /* Check for mismatched fastcall/regparm types. */
2778 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2779 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2780 || (ix86_function_regparm (type1, NULL)
2781 != ix86_function_regparm (type2, NULL)))
2782 return 0;
2783
2784 /* Check for mismatched sseregparm types. */
2785 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2786 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2787 return 0;
2788
2789 /* Check for mismatched return types (cdecl vs stdcall). */
2790 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2791 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2792 return 0;
2793
2794 return 1;
2795}
2796
2797/* Return the regparm value for a function with the indicated TYPE and DECL.
2798 DECL may be NULL when calling function indirectly
2799 or considering a libcall. */
2800
2801static int
2802ix86_function_regparm (tree type, tree decl)
2803{
2804 tree attr;
2805 int regparm = ix86_regparm;
2806 bool user_convention = false;
2807
2808 if (!TARGET_64BIT)
2809 {
2810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2811 if (attr)
2812 {
2813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2814 user_convention = true;
2815 }
2816
2817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2818 {
2819 regparm = 2;
2820 user_convention = true;
2821 }
2822
2823 /* Use register calling convention for local functions when possible. */
2824 if (!TARGET_64BIT && !user_convention && decl
2825 && flag_unit_at_a_time && !profile_flag)
2826 {
2827 struct cgraph_local_info *i = cgraph_local_info (decl);
2828 if (i && i->local)
2829 {
2830 int local_regparm, globals = 0, regno;
2831
2832 /* Make sure no regparm register is taken by a global register
2833 variable. */
2834 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2835 if (global_regs[local_regparm])
2836 break;
2837 /* We can't use regparm(3) for nested functions as these use
2838 static chain pointer in third argument. */
2839 if (local_regparm == 3
2840 && decl_function_context (decl)
2841 && !DECL_NO_STATIC_CHAIN (decl))
2842 local_regparm = 2;
2843 /* If the function realigns its stackpointer, the
2844 prologue will clobber %ecx. If we've already
2845 generated code for the callee, the callee
2846 DECL_STRUCT_FUNCTION is gone, so we fall back to
2847 scanning the attributes for the self-realigning
2848 property. */
2849 if ((DECL_STRUCT_FUNCTION (decl)
2850 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2851 || (!DECL_STRUCT_FUNCTION (decl)
2852 && lookup_attribute (ix86_force_align_arg_pointer_string,
2853 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2854 local_regparm = 2;
2855 /* Each global register variable increases register preassure,
2856 so the more global reg vars there are, the smaller regparm
2857 optimization use, unless requested by the user explicitly. */
2858 for (regno = 0; regno < 6; regno++)
2859 if (global_regs[regno])
2860 globals++;
2861 local_regparm
2862 = globals < local_regparm ? local_regparm - globals : 0;
2863
2864 if (local_regparm > regparm)
2865 regparm = local_regparm;
2866 }
2867 }
2868 }
2869 return regparm;
2870}
2871
2872/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2873 DFmode (2) arguments in SSE registers for a function with the
2874 indicated TYPE and DECL. DECL may be NULL when calling function
2875 indirectly or considering a libcall. Otherwise return 0. */
2876
2877static int
2878ix86_function_sseregparm (tree type, tree decl)
2879{
2880 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2881 by the sseregparm attribute. */
2882 if (TARGET_SSEREGPARM
2883 || (type
2884 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2885 {
2886 if (!TARGET_SSE)
2887 {
2888 if (decl)
2889 error ("Calling %qD with attribute sseregparm without "
2890 "SSE/SSE2 enabled", decl);
2891 else
2892 error ("Calling %qT with attribute sseregparm without "
2893 "SSE/SSE2 enabled", type);
2894 return 0;
2895 }
2896
2897 return 2;
2898 }
2899
2900 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2901 (and DFmode for SSE2) arguments in SSE registers,
2902 even for 32-bit targets. */
2903 if (!TARGET_64BIT && decl
2904 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2905 {
2906 struct cgraph_local_info *i = cgraph_local_info (decl);
2907 if (i && i->local)
2908 return TARGET_SSE2 ? 2 : 1;
2909 }
2910
2911 return 0;
2912}
2913
2914/* Return true if EAX is live at the start of the function. Used by
2915 ix86_expand_prologue to determine if we need special help before
2916 calling allocate_stack_worker. */
2917
2918static bool
2919ix86_eax_live_at_start_p (void)
2920{
2921 /* Cheat. Don't bother working forward from ix86_function_regparm
2922 to the function type to whether an actual argument is located in
2923 eax. Instead just look at cfg info, which is still close enough
2924 to correct at this point. This gives false positives for broken
2925 functions that might use uninitialized data that happens to be
2926 allocated in eax, but who cares? */
2927 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2928}
2929
2930/* Value is the number of bytes of arguments automatically
2931 popped when returning from a subroutine call.
2932 FUNDECL is the declaration node of the function (as a tree),
2933 FUNTYPE is the data type of the function (as a tree),
2934 or for a library call it is an identifier node for the subroutine name.
2935 SIZE is the number of bytes of arguments passed on the stack.
2936
2937 On the 80386, the RTD insn may be used to pop them if the number
2938 of args is fixed, but if the number is variable then the caller
2939 must pop them all. RTD can't be used for library calls now
2940 because the library is compiled with the Unix compiler.
2941 Use of RTD is a selectable option, since it is incompatible with
2942 standard Unix calling sequences. If the option is not selected,
2943 the caller must always pop the args.
2944
2945 The attribute stdcall is equivalent to RTD on a per module basis. */
2946
2947int
2948ix86_return_pops_args (tree fundecl, tree funtype, int size)
2949{
2950 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2951
2952 /* Cdecl functions override -mrtd, and never pop the stack. */
2953 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2954
2955 /* Stdcall and fastcall functions will pop the stack if not
2956 variable args. */
2957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2958 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2959 rtd = 1;
2960
2961 if (rtd
2962 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2963 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2964 == void_type_node)))
2965 return size;
2966 }
2967
2968 /* Lose any fake structure return argument if it is passed on the stack. */
2969 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2970 && !TARGET_64BIT
2971 && !KEEP_AGGREGATE_RETURN_POINTER)
2972 {
2973 int nregs = ix86_function_regparm (funtype, fundecl);
2974
2975 if (!nregs)
2976 return GET_MODE_SIZE (Pmode);
2977 }
2978
2979 return 0;
2980}
2981
2982/* Argument support functions. */
2983
2984/* Return true when register may be used to pass function parameters. */
2985bool
2986ix86_function_arg_regno_p (int regno)
2987{
2988 int i;
2989 if (!TARGET_64BIT)
2990 {
2991 if (TARGET_MACHO)
2992 return (regno < REGPARM_MAX
2993 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2994 else
2995 return (regno < REGPARM_MAX
2996 || (TARGET_MMX && MMX_REGNO_P (regno)
2997 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2998 || (TARGET_SSE && SSE_REGNO_P (regno)
2999 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3000 }
3001
3002 if (TARGET_MACHO)
3003 {
3004 if (SSE_REGNO_P (regno) && TARGET_SSE)
3005 return true;
3006 }
3007 else
3008 {
3009 if (TARGET_SSE && SSE_REGNO_P (regno)
3010 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3011 return true;
3012 }
3013 /* RAX is used as hidden argument to va_arg functions. */
3014 if (!regno)
3015 return true;
3016 for (i = 0; i < REGPARM_MAX; i++)
3017 if (regno == x86_64_int_parameter_registers[i])
3018 return true;
3019 return false;
3020}
3021
3022/* Return if we do not know how to pass TYPE solely in registers. */
3023
3024static bool
3025ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3026{
3027 if (must_pass_in_stack_var_size_or_pad (mode, type))
3028 return true;
3029
3030 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3031 The layout_type routine is crafty and tries to trick us into passing
3032 currently unsupported vector types on the stack by using TImode. */
3033 return (!TARGET_64BIT && mode == TImode
3034 && type && TREE_CODE (type) != VECTOR_TYPE);
3035}
3036
3037/* Initialize a variable CUM of type CUMULATIVE_ARGS
3038 for a call to a function whose data type is FNTYPE.
3039 For a library call, FNTYPE is 0. */
3040
3041void
3042init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3043 tree fntype, /* tree ptr for function decl */
3044 rtx libname, /* SYMBOL_REF of library name or 0 */
3045 tree fndecl)
3046{
3047 static CUMULATIVE_ARGS zero_cum;
3048 tree param, next_param;
3049
3050 if (TARGET_DEBUG_ARG)
3051 {
3052 fprintf (stderr, "\ninit_cumulative_args (");
3053 if (fntype)
3054 fprintf (stderr, "fntype code = %s, ret code = %s",
3055 tree_code_name[(int) TREE_CODE (fntype)],
3056 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3057 else
3058 fprintf (stderr, "no fntype");
3059
3060 if (libname)
3061 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3062 }
3063
3064 *cum = zero_cum;
3065
3066 /* Set up the number of registers to use for passing arguments. */
3067 cum->nregs = ix86_regparm;
3068 if (TARGET_SSE)
3069 cum->sse_nregs = SSE_REGPARM_MAX;
3070 if (TARGET_MMX)
3071 cum->mmx_nregs = MMX_REGPARM_MAX;
3072 cum->warn_sse = true;
3073 cum->warn_mmx = true;
3074 cum->maybe_vaarg = false;
3075
3076 /* Use ecx and edx registers if function has fastcall attribute,
3077 else look for regparm information. */
3078 if (fntype && !TARGET_64BIT)
3079 {
3080 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3081 {
3082 cum->nregs = 2;
3083 cum->fastcall = 1;
3084 }
3085 else
3086 cum->nregs = ix86_function_regparm (fntype, fndecl);
3087 }
3088
3089 /* Set up the number of SSE registers used for passing SFmode
3090 and DFmode arguments. Warn for mismatching ABI. */
3091 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3092
3093 /* Determine if this function has variable arguments. This is
3094 indicated by the last argument being 'void_type_mode' if there
3095 are no variable arguments. If there are variable arguments, then
3096 we won't pass anything in registers in 32-bit mode. */
3097
3098 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3099 {
3100 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3101 param != 0; param = next_param)
3102 {
3103 next_param = TREE_CHAIN (param);
3104 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3105 {
3106 if (!TARGET_64BIT)
3107 {
3108 cum->nregs = 0;
3109 cum->sse_nregs = 0;
3110 cum->mmx_nregs = 0;
3111 cum->warn_sse = 0;
3112 cum->warn_mmx = 0;
3113 cum->fastcall = 0;
3114 cum->float_in_sse = 0;
3115 }
3116 cum->maybe_vaarg = true;
3117 }
3118 }
3119 }
3120 if ((!fntype && !libname)
3121 || (fntype && !TYPE_ARG_TYPES (fntype)))
3122 cum->maybe_vaarg = true;
3123
3124 if (TARGET_DEBUG_ARG)
3125 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3126
3127 return;
3128}
3129
3130/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3131 But in the case of vector types, it is some vector mode.
3132
3133 When we have only some of our vector isa extensions enabled, then there
3134 are some modes for which vector_mode_supported_p is false. For these
3135 modes, the generic vector support in gcc will choose some non-vector mode
3136 in order to implement the type. By computing the natural mode, we'll
3137 select the proper ABI location for the operand and not depend on whatever
3138 the middle-end decides to do with these vector types. */
3139
3140static enum machine_mode
3141type_natural_mode (tree type)
3142{
3143 enum machine_mode mode = TYPE_MODE (type);
3144
3145 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3146 {
3147 HOST_WIDE_INT size = int_size_in_bytes (type);
3148 if ((size == 8 || size == 16)
3149 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3150 && TYPE_VECTOR_SUBPARTS (type) > 1)
3151 {
3152 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3153
3154 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3155 mode = MIN_MODE_VECTOR_FLOAT;
3156 else
3157 mode = MIN_MODE_VECTOR_INT;
3158
3159 /* Get the mode which has this inner mode and number of units. */
3160 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3161 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3162 && GET_MODE_INNER (mode) == innermode)
3163 return mode;
3164
3165 gcc_unreachable ();
3166 }
3167 }
3168
3169 return mode;
3170}
3171
3172/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3173 this may not agree with the mode that the type system has chosen for the
3174 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3175 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3176
3177static rtx
3178gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3179 unsigned int regno)
3180{
3181 rtx tmp;
3182
3183 if (orig_mode != BLKmode)
3184 tmp = gen_rtx_REG (orig_mode, regno);
3185 else
3186 {
3187 tmp = gen_rtx_REG (mode, regno);
3188 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3189 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3190 }
3191
3192 return tmp;
3193}
3194
3195/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3196 of this code is to classify each 8bytes of incoming argument by the register
3197 class and assign registers accordingly. */
3198
3199/* Return the union class of CLASS1 and CLASS2.
3200 See the x86-64 PS ABI for details. */
3201
3202static enum x86_64_reg_class
3203merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3204{
3205 /* Rule #1: If both classes are equal, this is the resulting class. */
3206 if (class1 == class2)
3207 return class1;
3208
3209 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3210 the other class. */
3211 if (class1 == X86_64_NO_CLASS)
3212 return class2;
3213 if (class2 == X86_64_NO_CLASS)
3214 return class1;
3215
3216 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3217 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3218 return X86_64_MEMORY_CLASS;
3219
3220 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3221 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3222 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3223 return X86_64_INTEGERSI_CLASS;
3224 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3225 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3226 return X86_64_INTEGER_CLASS;
3227
3228 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3229 MEMORY is used. */
3230 if (class1 == X86_64_X87_CLASS
3231 || class1 == X86_64_X87UP_CLASS
3232 || class1 == X86_64_COMPLEX_X87_CLASS
3233 || class2 == X86_64_X87_CLASS
3234 || class2 == X86_64_X87UP_CLASS
3235 || class2 == X86_64_COMPLEX_X87_CLASS)
3236 return X86_64_MEMORY_CLASS;
3237
3238 /* Rule #6: Otherwise class SSE is used. */
3239 return X86_64_SSE_CLASS;
3240}
3241
3242/* Classify the argument of type TYPE and mode MODE.
3243 CLASSES will be filled by the register class used to pass each word
3244 of the operand. The number of words is returned. In case the parameter
3245 should be passed in memory, 0 is returned. As a special case for zero
3246 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3247
3248 BIT_OFFSET is used internally for handling records and specifies offset
3249 of the offset in bits modulo 256 to avoid overflow cases.
3250
3251 See the x86-64 PS ABI for details.
3252*/
3253
3254static int
3255classify_argument (enum machine_mode mode, tree type,
3256 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3257{
3258 HOST_WIDE_INT bytes =
3259 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3260 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3261
3262 /* Variable sized entities are always passed/returned in memory. */
3263 if (bytes < 0)
3264 return 0;
3265
3266 if (mode != VOIDmode
3267 && targetm.calls.must_pass_in_stack (mode, type))
3268 return 0;
3269
3270 if (type && AGGREGATE_TYPE_P (type))
3271 {
3272 int i;
3273 tree field;
3274 enum x86_64_reg_class subclasses[MAX_CLASSES];
3275
3276 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3277 if (bytes > 16)
3278 return 0;
3279
3280 for (i = 0; i < words; i++)
3281 classes[i] = X86_64_NO_CLASS;
3282
3283 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3284 signalize memory class, so handle it as special case. */
3285 if (!words)
3286 {
3287 classes[0] = X86_64_NO_CLASS;
3288 return 1;
3289 }
3290
3291 /* Classify each field of record and merge classes. */
3292 switch (TREE_CODE (type))
3293 {
3294 case RECORD_TYPE:
3295 /* For classes first merge in the field of the subclasses. */
3296 if (TYPE_BINFO (type))
3297 {
3298 tree binfo, base_binfo;
3299 int basenum;
3300
3301 for (binfo = TYPE_BINFO (type), basenum = 0;
3302 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3303 {
3304 int num;
3305 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3306 tree type = BINFO_TYPE (base_binfo);
3307
3308 num = classify_argument (TYPE_MODE (type),
3309 type, subclasses,
3310 (offset + bit_offset) % 256);
3311 if (!num)
3312 return 0;
3313 for (i = 0; i < num; i++)
3314 {
3315 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3316 classes[i + pos] =
3317 merge_classes (subclasses[i], classes[i + pos]);
3318 }
3319 }
3320 }
3321 /* And now merge the fields of structure. */
3322 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3323 {
3324 if (TREE_CODE (field) == FIELD_DECL)
3325 {
3326 int num;
3327
3328 if (TREE_TYPE (field) == error_mark_node)
3329 continue;
3330
3331 /* Bitfields are always classified as integer. Handle them
3332 early, since later code would consider them to be
3333 misaligned integers. */
3334 if (DECL_BIT_FIELD (field))
3335 {
3336 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3337 i < ((int_bit_position (field) + (bit_offset % 64))
3338 + tree_low_cst (DECL_SIZE (field), 0)
3339 + 63) / 8 / 8; i++)
3340 classes[i] =
3341 merge_classes (X86_64_INTEGER_CLASS,
3342 classes[i]);
3343 }
3344 else
3345 {
3346 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3347 TREE_TYPE (field), subclasses,
3348 (int_bit_position (field)
3349 + bit_offset) % 256);
3350 if (!num)
3351 return 0;
3352 for (i = 0; i < num; i++)
3353 {
3354 int pos =
3355 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3356 classes[i + pos] =
3357 merge_classes (subclasses[i], classes[i + pos]);
3358 }
3359 }
3360 }
3361 }
3362 break;
3363
3364 case ARRAY_TYPE:
3365 /* Arrays are handled as small records. */
3366 {
3367 int num;
3368 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3369 TREE_TYPE (type), subclasses, bit_offset);
3370 if (!num)
3371 return 0;
3372
3373 /* The partial classes are now full classes. */
3374 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3375 subclasses[0] = X86_64_SSE_CLASS;
3376 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3377 subclasses[0] = X86_64_INTEGER_CLASS;
3378
3379 for (i = 0; i < words; i++)
3380 classes[i] = subclasses[i % num];
3381
3382 break;
3383 }
3384 case UNION_TYPE:
3385 case QUAL_UNION_TYPE:
3386 /* Unions are similar to RECORD_TYPE but offset is always 0.
3387 */
3388
3389 /* Unions are not derived. */
3390 gcc_assert (!TYPE_BINFO (type)
3391 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3392 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3393 {
3394 if (TREE_CODE (field) == FIELD_DECL)
3395 {
3396 int num;
3397
3398 if (TREE_TYPE (field) == error_mark_node)
3399 continue;
3400
3401 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3402 TREE_TYPE (field), subclasses,
3403 bit_offset);
3404 if (!num)
3405 return 0;
3406 for (i = 0; i < num; i++)
3407 classes[i] = merge_classes (subclasses[i], classes[i]);
3408 }
3409 }
3410 break;
3411
3412 default:
3413 gcc_unreachable ();
3414 }
3415
3416 /* Final merger cleanup. */
3417 for (i = 0; i < words; i++)
3418 {
3419 /* If one class is MEMORY, everything should be passed in
3420 memory. */
3421 if (classes[i] == X86_64_MEMORY_CLASS)
3422 return 0;
3423
3424 /* The X86_64_SSEUP_CLASS should be always preceded by
3425 X86_64_SSE_CLASS. */
3426 if (classes[i] == X86_64_SSEUP_CLASS
3427 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3428 classes[i] = X86_64_SSE_CLASS;
3429
3430 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3431 if (classes[i] == X86_64_X87UP_CLASS
3432 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3433 classes[i] = X86_64_SSE_CLASS;
3434 }
3435 return words;
3436 }
3437
3438 /* Compute alignment needed. We align all types to natural boundaries with
3439 exception of XFmode that is aligned to 64bits. */
3440 if (mode != VOIDmode && mode != BLKmode)
3441 {
3442 int mode_alignment = GET_MODE_BITSIZE (mode);
3443
3444 if (mode == XFmode)
3445 mode_alignment = 128;
3446 else if (mode == XCmode)
3447 mode_alignment = 256;
3448 if (COMPLEX_MODE_P (mode))
3449 mode_alignment /= 2;
3450 /* Misaligned fields are always returned in memory. */
3451 if (bit_offset % mode_alignment)
3452 return 0;
3453 }
3454
3455 /* for V1xx modes, just use the base mode */
3456 if (VECTOR_MODE_P (mode)
3457 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3458 mode = GET_MODE_INNER (mode);
3459
3460 /* Classification of atomic types. */
3461 switch (mode)
3462 {
3463 case SDmode:
3464 case DDmode:
3465 classes[0] = X86_64_SSE_CLASS;
3466 return 1;
3467 case TDmode:
3468 classes[0] = X86_64_SSE_CLASS;
3469 classes[1] = X86_64_SSEUP_CLASS;
3470 return 2;
3471 case DImode:
3472 case SImode:
3473 case HImode:
3474 case QImode:
3475 case CSImode:
3476 case CHImode:
3477 case CQImode:
3478 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3479 classes[0] = X86_64_INTEGERSI_CLASS;
3480 else
3481 classes[0] = X86_64_INTEGER_CLASS;
3482 return 1;
3483 case CDImode:
3484 case TImode:
3485 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3486 return 2;
3487 case CTImode:
3488 return 0;
3489 case SFmode:
3490 if (!(bit_offset % 64))
3491 classes[0] = X86_64_SSESF_CLASS;
3492 else
3493 classes[0] = X86_64_SSE_CLASS;
3494 return 1;
3495 case DFmode:
3496 classes[0] = X86_64_SSEDF_CLASS;
3497 return 1;
3498 case XFmode:
3499 classes[0] = X86_64_X87_CLASS;
3500 classes[1] = X86_64_X87UP_CLASS;
3501 return 2;
3502 case TFmode:
3503 classes[0] = X86_64_SSE_CLASS;
3504 classes[1] = X86_64_SSEUP_CLASS;
3505 return 2;
3506 case SCmode:
3507 classes[0] = X86_64_SSE_CLASS;
3508 return 1;
3509 case DCmode:
3510 classes[0] = X86_64_SSEDF_CLASS;
3511 classes[1] = X86_64_SSEDF_CLASS;
3512 return 2;
3513 case XCmode:
3514 classes[0] = X86_64_COMPLEX_X87_CLASS;
3515 return 1;
3516 case TCmode:
3517 /* This modes is larger than 16 bytes. */
3518 return 0;
3519 case V4SFmode:
3520 case V4SImode:
3521 case V16QImode:
3522 case V8HImode:
3523 case V2DFmode:
3524 case V2DImode:
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3527 return 2;
3528 case V2SFmode:
3529 case V2SImode:
3530 case V4HImode:
3531 case V8QImode:
3532 classes[0] = X86_64_SSE_CLASS;
3533 return 1;
3534 case BLKmode:
3535 case VOIDmode:
3536 return 0;
3537 default:
3538 gcc_assert (VECTOR_MODE_P (mode));
3539
3540 if (bytes > 16)
3541 return 0;
3542
3543 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3544
3545 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3546 classes[0] = X86_64_INTEGERSI_CLASS;
3547 else
3548 classes[0] = X86_64_INTEGER_CLASS;
3549 classes[1] = X86_64_INTEGER_CLASS;
3550 return 1 + (bytes > 8);
3551 }
3552}
3553
3554/* Examine the argument and return set number of register required in each
3555 class. Return 0 iff parameter should be passed in memory. */
3556static int
3557examine_argument (enum machine_mode mode, tree type, int in_return,
3558 int *int_nregs, int *sse_nregs)
3559{
3560 enum x86_64_reg_class class[MAX_CLASSES];
3561 int n = classify_argument (mode, type, class, 0);
3562
3563 *int_nregs = 0;
3564 *sse_nregs = 0;
3565 if (!n)
3566 return 0;
3567 for (n--; n >= 0; n--)
3568 switch (class[n])
3569 {
3570 case X86_64_INTEGER_CLASS:
3571 case X86_64_INTEGERSI_CLASS:
3572 (*int_nregs)++;
3573 break;
3574 case X86_64_SSE_CLASS:
3575 case X86_64_SSESF_CLASS:
3576 case X86_64_SSEDF_CLASS:
3577 (*sse_nregs)++;
3578 break;
3579 case X86_64_NO_CLASS:
3580 case X86_64_SSEUP_CLASS:
3581 break;
3582 case X86_64_X87_CLASS:
3583 case X86_64_X87UP_CLASS:
3584 if (!in_return)
3585 return 0;
3586 break;
3587 case X86_64_COMPLEX_X87_CLASS:
3588 return in_return ? 2 : 0;
3589 case X86_64_MEMORY_CLASS:
3590 gcc_unreachable ();
3591 }
3592 return 1;
3593}
3594
3595/* Construct container for the argument used by GCC interface. See
3596 FUNCTION_ARG for the detailed description. */
3597
3598static rtx
3599construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3600 tree type, int in_return, int nintregs, int nsseregs,
3601 const int *intreg, int sse_regno)
3602{
3603 /* The following variables hold the static issued_error state. */
3604 static bool issued_sse_arg_error;
3605 static bool issued_sse_ret_error;
3606 static bool issued_x87_ret_error;
3607
3608 enum machine_mode tmpmode;
3609 int bytes =
3610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3611 enum x86_64_reg_class class[MAX_CLASSES];
3612 int n;
3613 int i;
3614 int nexps = 0;
3615 int needed_sseregs, needed_intregs;
3616 rtx exp[MAX_CLASSES];
3617 rtx ret;
3618
3619 n = classify_argument (mode, type, class, 0);
3620 if (TARGET_DEBUG_ARG)
3621 {
3622 if (!n)
3623 fprintf (stderr, "Memory class\n");
3624 else
3625 {
3626 fprintf (stderr, "Classes:");
3627 for (i = 0; i < n; i++)
3628 {
3629 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3630 }
3631 fprintf (stderr, "\n");
3632 }
3633 }
3634 if (!n)
3635 return NULL;
3636 if (!examine_argument (mode, type, in_return, &needed_intregs,
3637 &needed_sseregs))
3638 return NULL;
3639 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3640 return NULL;
3641
3642 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3643 some less clueful developer tries to use floating-point anyway. */
3644 if (needed_sseregs && !TARGET_SSE)
3645 {
3646 if (in_return)
3647 {
3648 if (!issued_sse_ret_error)
3649 {
3650 error ("SSE register return with SSE disabled");
3651 issued_sse_ret_error = true;
3652 }
3653 }
3654 else if (!issued_sse_arg_error)
3655 {
3656 error ("SSE register argument with SSE disabled");
3657 issued_sse_arg_error = true;
3658 }
3659 return NULL;
3660 }
3661
3662 /* Likewise, error if the ABI requires us to return values in the
3663 x87 registers and the user specified -mno-80387. */
3664 if (!TARGET_80387 && in_return)
3665 for (i = 0; i < n; i++)
3666 if (class[i] == X86_64_X87_CLASS
3667 || class[i] == X86_64_X87UP_CLASS
3668 || class[i] == X86_64_COMPLEX_X87_CLASS)
3669 {
3670 if (!issued_x87_ret_error)
3671 {
3672 error ("x87 register return with x87 disabled");
3673 issued_x87_ret_error = true;
3674 }
3675 return NULL;
3676 }
3677
3678 /* First construct simple cases. Avoid SCmode, since we want to use
3679 single register to pass this type. */
3680 if (n == 1 && mode != SCmode)
3681 switch (class[0])
3682 {
3683 case X86_64_INTEGER_CLASS:
3684 case X86_64_INTEGERSI_CLASS:
3685 return gen_rtx_REG (mode, intreg[0]);
3686 case X86_64_SSE_CLASS:
3687 case X86_64_SSESF_CLASS:
3688 case X86_64_SSEDF_CLASS:
3689 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3690 case X86_64_X87_CLASS:
3691 case X86_64_COMPLEX_X87_CLASS:
3692 return gen_rtx_REG (mode, FIRST_STACK_REG);
3693 case X86_64_NO_CLASS:
3694 /* Zero sized array, struct or class. */
3695 return NULL;
3696 default:
3697 gcc_unreachable ();
3698 }
3699 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3700 && mode != BLKmode)
3701 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3702 if (n == 2
3703 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3704 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3705 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3706 && class[1] == X86_64_INTEGER_CLASS
3707 && (mode == CDImode || mode == TImode || mode == TFmode)
3708 && intreg[0] + 1 == intreg[1])
3709 return gen_rtx_REG (mode, intreg[0]);
3710
3711 /* Otherwise figure out the entries of the PARALLEL. */
3712 for (i = 0; i < n; i++)
3713 {
3714 switch (class[i])
3715 {
3716 case X86_64_NO_CLASS:
3717 break;
3718 case X86_64_INTEGER_CLASS:
3719 case X86_64_INTEGERSI_CLASS:
3720 /* Merge TImodes on aligned occasions here too. */
3721 if (i * 8 + 8 > bytes)
3722 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3723 else if (class[i] == X86_64_INTEGERSI_CLASS)
3724 tmpmode = SImode;
3725 else
3726 tmpmode = DImode;
3727 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3728 if (tmpmode == BLKmode)
3729 tmpmode = DImode;
3730 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3731 gen_rtx_REG (tmpmode, *intreg),
3732 GEN_INT (i*8));
3733 intreg++;
3734 break;
3735 case X86_64_SSESF_CLASS:
3736 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3737 gen_rtx_REG (SFmode,
3738 SSE_REGNO (sse_regno)),
3739 GEN_INT (i*8));
3740 sse_regno++;
3741 break;
3742 case X86_64_SSEDF_CLASS:
3743 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3744 gen_rtx_REG (DFmode,
3745 SSE_REGNO (sse_regno)),
3746 GEN_INT (i*8));
3747 sse_regno++;
3748 break;
3749 case X86_64_SSE_CLASS:
3750 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3751 tmpmode = TImode;
3752 else
3753 tmpmode = DImode;
3754 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3755 gen_rtx_REG (tmpmode,
3756 SSE_REGNO (sse_regno)),
3757 GEN_INT (i*8));
3758 if (tmpmode == TImode)
3759 i++;
3760 sse_regno++;
3761 break;
3762 default:
3763 gcc_unreachable ();
3764 }
3765 }
3766
3767 /* Empty aligned struct, union or class. */
3768 if (nexps == 0)
3769 return NULL;
3770
3771 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3772 for (i = 0; i < nexps; i++)
3773 XVECEXP (ret, 0, i) = exp [i];
3774 return ret;
3775}
3776
3777/* Update the data in CUM to advance over an argument
3778 of mode MODE and data type TYPE.
3779 (TYPE is null for libcalls where that information may not be available.) */
3780
3781void
3782function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3783 tree type, int named)
3784{
3785 int bytes =
3786 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3787 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3788
3789 if (type)
3790 mode = type_natural_mode (type);
3791
3792 if (TARGET_DEBUG_ARG)
3793 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3794 "mode=%s, named=%d)\n\n",
3795 words, cum->words, cum->nregs, cum->sse_nregs,
3796 GET_MODE_NAME (mode), named);
3797
3798 if (TARGET_64BIT)
3799 {
3800 int int_nregs, sse_nregs;
3801 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3802 cum->words += words;
3803 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3804 {
3805 cum->nregs -= int_nregs;
3806 cum->sse_nregs -= sse_nregs;
3807 cum->regno += int_nregs;
3808 cum->sse_regno += sse_nregs;
3809 }
3810 else
3811 cum->words += words;
3812 }
3813 else
3814 {
3815 switch (mode)
3816 {
3817 default:
3818 break;
3819
3820 case BLKmode:
3821 if (bytes < 0)
3822 break;
3823 /* FALLTHRU */
3824
3825 case DImode:
3826 case SImode:
3827 case HImode:
3828 case QImode:
3829 cum->words += words;
3830 cum->nregs -= words;
3831 cum->regno += words;
3832
3833 if (cum->nregs <= 0)
3834 {
3835 cum->nregs = 0;
3836 cum->regno = 0;
3837 }
3838 break;
3839
3840 case DFmode:
3841 if (cum->float_in_sse < 2)
3842 break;
3843 case SFmode:
3844 if (cum->float_in_sse < 1)
3845 break;
3846 /* FALLTHRU */
3847
3848 case TImode:
3849 case V16QImode:
3850 case V8HImode:
3851 case V4SImode:
3852 case V2DImode:
3853 case V4SFmode:
3854 case V2DFmode:
3855 if (!type || !AGGREGATE_TYPE_P (type))
3856 {
3857 cum->sse_words += words;
3858 cum->sse_nregs -= 1;
3859 cum->sse_regno += 1;
3860 if (cum->sse_nregs <= 0)
3861 {
3862 cum->sse_nregs = 0;
3863 cum->sse_regno = 0;
3864 }
3865 }
3866 break;
3867
3868 case V8QImode:
3869 case V4HImode:
3870 case V2SImode:
3871 case V2SFmode:
3872 if (!type || !AGGREGATE_TYPE_P (type))
3873 {
3874 cum->mmx_words += words;
3875 cum->mmx_nregs -= 1;
3876 cum->mmx_regno += 1;
3877 if (cum->mmx_nregs <= 0)
3878 {
3879 cum->mmx_nregs = 0;
3880 cum->mmx_regno = 0;
3881 }
3882 }
3883 break;
3884 }
3885 }
3886}
3887
3888/* Define where to put the arguments to a function.
3889 Value is zero to push the argument on the stack,
3890 or a hard register in which to store the argument.
3891
3892 MODE is the argument's machine mode.
3893 TYPE is the data type of the argument (as a tree).
3894 This is null for libcalls where that information may
3895 not be available.
3896 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3897 the preceding args and about the function being called.
3898 NAMED is nonzero if this argument is a named parameter
3899 (otherwise it is an extra parameter matching an ellipsis). */
3900
3901rtx
3902function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3903 tree type, int named)
3904{
3905 enum machine_mode mode = orig_mode;
3906 rtx ret = NULL_RTX;
3907 int bytes =
3908 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3909 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3910 static bool warnedsse, warnedmmx;
3911
3912 /* To simplify the code below, represent vector types with a vector mode
3913 even if MMX/SSE are not active. */
3914 if (type && TREE_CODE (type) == VECTOR_TYPE)
3915 mode = type_natural_mode (type);
3916
3917 /* Handle a hidden AL argument containing number of registers for varargs
3918 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3919 any AL settings. */
3920 if (mode == VOIDmode)
3921 {
3922 if (TARGET_64BIT)
3923 return GEN_INT (cum->maybe_vaarg
3924 ? (cum->sse_nregs < 0
3925 ? SSE_REGPARM_MAX
3926 : cum->sse_regno)
3927 : -1);
3928 else
3929 return constm1_rtx;
3930 }
3931 if (TARGET_64BIT)
3932 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3933 cum->sse_nregs,
3934 &x86_64_int_parameter_registers [cum->regno],
3935 cum->sse_regno);
3936 else
3937 switch (mode)
3938 {
3939 /* For now, pass fp/complex values on the stack. */
3940 default:
3941 break;
3942
3943 case BLKmode:
3944 if (bytes < 0)
3945 break;
3946 /* FALLTHRU */
3947 case DImode:
3948 case SImode:
3949 case HImode:
3950 case QImode:
3951 if (words <= cum->nregs)
3952 {
3953 int regno = cum->regno;
3954
3955 /* Fastcall allocates the first two DWORD (SImode) or
3956 smaller arguments to ECX and EDX. */
3957 if (cum->fastcall)
3958 {
3959 if (mode == BLKmode || mode == DImode)
3960 break;
3961
3962 /* ECX not EAX is the first allocated register. */
3963 if (regno == 0)
3964 regno = 2;
3965 }
3966 ret = gen_rtx_REG (mode, regno);
3967 }
3968 break;
3969 case DFmode:
3970 if (cum->float_in_sse < 2)
3971 break;
3972 case SFmode:
3973 if (cum->float_in_sse < 1)
3974 break;
3975 /* FALLTHRU */
3976 case TImode:
3977 case V16QImode:
3978 case V8HImode:
3979 case V4SImode:
3980 case V2DImode:
3981 case V4SFmode:
3982 case V2DFmode:
3983 if (!type || !AGGREGATE_TYPE_P (type))
3984 {
3985 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3986 {
3987 warnedsse = true;
3988 warning (0, "SSE vector argument without SSE enabled "
3989 "changes the ABI");
3990 }
3991 if (cum->sse_nregs)
3992 ret = gen_reg_or_parallel (mode, orig_mode,
3993 cum->sse_regno + FIRST_SSE_REG);
3994 }
3995 break;
3996 case V8QImode:
3997 case V4HImode:
3998 case V2SImode:
3999 case V2SFmode:
4000 if (!type || !AGGREGATE_TYPE_P (type))
4001 {
4002 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4003 {
4004 warnedmmx = true;
4005 warning (0, "MMX vector argument without MMX enabled "
4006 "changes the ABI");
4007 }
4008 if (cum->mmx_nregs)
4009 ret = gen_reg_or_parallel (mode, orig_mode,
4010 cum->mmx_regno + FIRST_MMX_REG);
4011 }
4012 break;
4013 }
4014
4015 if (TARGET_DEBUG_ARG)
4016 {
4017 fprintf (stderr,
4018 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4019 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4020
4021 if (ret)
4022 print_simple_rtl (stderr, ret);
4023 else
4024 fprintf (stderr, ", stack");
4025
4026 fprintf (stderr, " )\n");
4027 }
4028
4029 return ret;
4030}
4031
4032/* A C expression that indicates when an argument must be passed by
4033 reference. If nonzero for an argument, a copy of that argument is
4034 made in memory and a pointer to the argument is passed instead of
4035 the argument itself. The pointer is passed in whatever way is
4036 appropriate for passing a pointer to that type. */
4037
4038static bool
4039ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4040 enum machine_mode mode ATTRIBUTE_UNUSED,
4041 tree type, bool named ATTRIBUTE_UNUSED)
4042{
4043 if (!TARGET_64BIT)
4044 return 0;
4045
4046 if (type && int_size_in_bytes (type) == -1)
4047 {
4048 if (TARGET_DEBUG_ARG)
4049 fprintf (stderr, "function_arg_pass_by_reference\n");
4050 return 1;
4051 }
4052
4053 return 0;
4054}
4055
4056/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4057 ABI. Only called if TARGET_SSE. */
4058static bool
4059contains_128bit_aligned_vector_p (tree type)
4060{
4061 enum machine_mode mode = TYPE_MODE (type);
4062 if (SSE_REG_MODE_P (mode)
4063 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4064 return true;
4065 if (TYPE_ALIGN (type) < 128)
4066 return false;
4067
4068 if (AGGREGATE_TYPE_P (type))
4069 {
4070 /* Walk the aggregates recursively. */
4071 switch (TREE_CODE (type))
4072 {
4073 case RECORD_TYPE:
4074 case UNION_TYPE:
4075 case QUAL_UNION_TYPE:
4076 {
4077 tree field;
4078
4079 if (TYPE_BINFO (type))
4080 {
4081 tree binfo, base_binfo;
4082 int i;
4083
4084 for (binfo = TYPE_BINFO (type), i = 0;
4085 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4086 if (contains_128bit_aligned_vector_p
4087 (BINFO_TYPE (base_binfo)))
4088 return true;
4089 }
4090 /* And now merge the fields of structure. */
4091 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4092 {
4093 if (TREE_CODE (field) == FIELD_DECL
4094 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4095 return true;
4096 }
4097 break;
4098 }
4099
4100 case ARRAY_TYPE:
4101 /* Just for use if some languages passes arrays by value. */
4102 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4103 return true;
4104 break;
4105
4106 default:
4107 gcc_unreachable ();
4108 }
4109 }
4110 return false;
4111}
4112
4113/* Gives the alignment boundary, in bits, of an argument with the
4114 specified mode and type. */
4115
4116int
4117ix86_function_arg_boundary (enum machine_mode mode, tree type)
4118{
4119 int align;
4120 if (type)
4121 align = TYPE_ALIGN (type);
4122 else
4123 align = GET_MODE_ALIGNMENT (mode);
4124 if (align < PARM_BOUNDARY)
4125 align = PARM_BOUNDARY;
4126 if (!TARGET_64BIT)
4127 {
4128 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4129 make an exception for SSE modes since these require 128bit
4130 alignment.
4131
4132 The handling here differs from field_alignment. ICC aligns MMX
4133 arguments to 4 byte boundaries, while structure fields are aligned
4134 to 8 byte boundaries. */
4135 if (!TARGET_SSE)
4136 align = PARM_BOUNDARY;
4137 else if (!type)
4138 {
4139 if (!SSE_REG_MODE_P (mode))
4140 align = PARM_BOUNDARY;
4141 }
4142 else
4143 {
4144 if (!contains_128bit_aligned_vector_p (type))
4145 align = PARM_BOUNDARY;
4146 }
4147 }
4148 if (align > 128)
4149 align = 128;
4150 return align;
4151}
4152
4153/* Return true if N is a possible register number of function value. */
4154bool
4155ix86_function_value_regno_p (int regno)
4156{
4157 if (TARGET_MACHO)
4158 {
4159 if (!TARGET_64BIT)
4160 {
4161 return ((regno) == 0
4162 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4163 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4164 }
4165 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4166 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4167 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4168 }
4169 else
4170 {
4171 if (regno == 0
4172 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4173 || (regno == FIRST_SSE_REG && TARGET_SSE))
4174 return true;
4175
4176 if (!TARGET_64BIT
4177 && (regno == FIRST_MMX_REG && TARGET_MMX))
4178 return true;
4179
4180 return false;
4181 }
4182}
4183
4184/* Define how to find the value returned by a function.
4185 VALTYPE is the data type of the value (as a tree).
4186 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4187 otherwise, FUNC is 0. */
4188rtx
4189ix86_function_value (tree valtype, tree fntype_or_decl,
4190 bool outgoing ATTRIBUTE_UNUSED)
4191{
4192 enum machine_mode natmode = type_natural_mode (valtype);
4193
4194 if (TARGET_64BIT)
4195 {
4196 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4197 1, REGPARM_MAX, SSE_REGPARM_MAX,
4198 x86_64_int_return_registers, 0);
4199 /* For zero sized structures, construct_container return NULL, but we
4200 need to keep rest of compiler happy by returning meaningful value. */
4201 if (!ret)
4202 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4203 return ret;
4204 }
4205 else
4206 {
4207 tree fn = NULL_TREE, fntype;
4208 if (fntype_or_decl
4209 && DECL_P (fntype_or_decl))
4210 fn = fntype_or_decl;
4211 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4212 return gen_rtx_REG (TYPE_MODE (valtype),
4213 ix86_value_regno (natmode, fn, fntype));
4214 }
4215}
4216
4217/* Return true iff type is returned in memory. */
4218int
4219ix86_return_in_memory (tree type)
4220{
4221 int needed_intregs, needed_sseregs, size;
4222 enum machine_mode mode = type_natural_mode (type);
4223
4224 if (TARGET_64BIT)
4225 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4226
4227 if (mode == BLKmode)
4228 return 1;
4229
4230 size = int_size_in_bytes (type);
4231
4232 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4233 return 0;
4234
4235 if (VECTOR_MODE_P (mode) || mode == TImode)
4236 {
4237 /* User-created vectors small enough to fit in EAX. */
4238 if (size < 8)
4239 return 0;
4240
4241 /* MMX/3dNow values are returned in MM0,
4242 except when it doesn't exits. */
4243 if (size == 8)
4244 return (TARGET_MMX ? 0 : 1);
4245
4246 /* SSE values are returned in XMM0, except when it doesn't exist. */
4247 if (size == 16)
4248 return (TARGET_SSE ? 0 : 1);
4249 }
4250
4251 if (mode == XFmode)
4252 return 0;
4253
4254 if (mode == TDmode)
4255 return 1;
4256
4257 if (size > 12)
4258 return 1;
4259 return 0;
4260}
4261
4262/* When returning SSE vector types, we have a choice of either
4263 (1) being abi incompatible with a -march switch, or
4264 (2) generating an error.
4265 Given no good solution, I think the safest thing is one warning.
4266 The user won't be able to use -Werror, but....
4267
4268 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4269 called in response to actually generating a caller or callee that
4270 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4271 via aggregate_value_p for general type probing from tree-ssa. */
4272
4273static rtx
4274ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4275{
4276 static bool warnedsse, warnedmmx;
4277
4278 if (type)
4279 {
4280 /* Look at the return type of the function, not the function type. */
4281 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4282
4283 if (!TARGET_SSE && !warnedsse)
4284 {
4285 if (mode == TImode
4286 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4287 {
4288 warnedsse = true;
4289 warning (0, "SSE vector return without SSE enabled "
4290 "changes the ABI");
4291 }
4292 }
4293
4294 if (!TARGET_MMX && !warnedmmx)
4295 {
4296 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4297 {
4298 warnedmmx = true;
4299 warning (0, "MMX vector return without MMX enabled "
4300 "changes the ABI");
4301 }
4302 }
4303 }
4304
4305 return NULL;
4306}
4307
4308/* Define how to find the value returned by a library function
4309 assuming the value has mode MODE. */
4310rtx
4311ix86_libcall_value (enum machine_mode mode)
4312{
4313 if (TARGET_64BIT)
4314 {
4315 switch (mode)
4316 {
4317 case SFmode:
4318 case SCmode:
4319 case DFmode:
4320 case DCmode:
4321 case TFmode:
4322 case SDmode:
4323 case DDmode:
4324 case TDmode:
4325 return gen_rtx_REG (mode, FIRST_SSE_REG);
4326 case XFmode:
4327 case XCmode:
4328 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4329 case TCmode:
4330 return NULL;
4331 default:
4332 return gen_rtx_REG (mode, 0);
4333 }
4334 }
4335 else
4336 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4337}
4338
4339/* Given a mode, return the register to use for a return value. */
4340
4341static int
4342ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4343{
4344 gcc_assert (!TARGET_64BIT);
4345
4346 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4347 we normally prevent this case when mmx is not available. However
4348 some ABIs may require the result to be returned like DImode. */
4349 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4350 return TARGET_MMX ? FIRST_MMX_REG : 0;
4351
4352 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4353 we prevent this case when sse is not available. However some ABIs
4354 may require the result to be returned like integer TImode. */
4355 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4356 return TARGET_SSE ? FIRST_SSE_REG : 0;
4357
4358 /* Decimal floating point values can go in %eax, unlike other float modes. */
4359 if (DECIMAL_FLOAT_MODE_P (mode))
4360 return 0;
4361
4362 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4363 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4364 return 0;
4365
4366 /* Floating point return values in %st(0), except for local functions when
4367 SSE math is enabled or for functions with sseregparm attribute. */
4368 if ((func || fntype)
4369 && (mode == SFmode || mode == DFmode))
4370 {
4371 int sse_level = ix86_function_sseregparm (fntype, func);
4372 if ((sse_level >= 1 && mode == SFmode)
4373 || (sse_level == 2 && mode == DFmode))
4374 return FIRST_SSE_REG;
4375 }
4376
4377 return FIRST_FLOAT_REG;
4378}
4379
4380/* Create the va_list data type. */
4381
4382static tree
4383ix86_build_builtin_va_list (void)
4384{
4385 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4386
4387 /* For i386 we use plain pointer to argument area. */
4388 if (!TARGET_64BIT)
4389 return build_pointer_type (char_type_node);
4390
4391 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4392 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4393
4394 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4395 unsigned_type_node);
4396 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4397 unsigned_type_node);
4398 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4399 ptr_type_node);
4400 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4401 ptr_type_node);
4402
4403 va_list_gpr_counter_field = f_gpr;
4404 va_list_fpr_counter_field = f_fpr;
4405
4406 DECL_FIELD_CONTEXT (f_gpr) = record;
4407 DECL_FIELD_CONTEXT (f_fpr) = record;
4408 DECL_FIELD_CONTEXT (f_ovf) = record;
4409 DECL_FIELD_CONTEXT (f_sav) = record;
4410
4411 TREE_CHAIN (record) = type_decl;
4412 TYPE_NAME (record) = type_decl;
4413 TYPE_FIELDS (record) = f_gpr;
4414 TREE_CHAIN (f_gpr) = f_fpr;
4415 TREE_CHAIN (f_fpr) = f_ovf;
4416 TREE_CHAIN (f_ovf) = f_sav;
4417
4418 layout_type (record);
4419
4420 /* The correct type is an array type of one element. */
4421 return build_array_type (record, build_index_type (size_zero_node));
4422}
4423
4424/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4425
4426static void
4427ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4428 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4429 int no_rtl)
4430{
4431 CUMULATIVE_ARGS next_cum;
4432 rtx save_area = NULL_RTX, mem;
4433 rtx label;
4434 rtx label_ref;
4435 rtx tmp_reg;
4436 rtx nsse_reg;
4437 int set;
4438 tree fntype;
4439 int stdarg_p;
4440 int i;
4441
4442 if (!TARGET_64BIT)
4443 return;
4444
4445 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4446 return;
4447
4448 /* Indicate to allocate space on the stack for varargs save area. */
4449 ix86_save_varrargs_registers = 1;
4450
4451 cfun->stack_alignment_needed = 128;
4452
4453 fntype = TREE_TYPE (current_function_decl);
4454 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4455 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4456 != void_type_node));
4457
4458 /* For varargs, we do not want to skip the dummy va_dcl argument.
4459 For stdargs, we do want to skip the last named argument. */
4460 next_cum = *cum;
4461 if (stdarg_p)
4462 function_arg_advance (&next_cum, mode, type, 1);
4463
4464 if (!no_rtl)
4465 save_area = frame_pointer_rtx;
4466
4467 set = get_varargs_alias_set ();
4468
4469 for (i = next_cum.regno;
4470 i < ix86_regparm
4471 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4472 i++)
4473 {
4474 mem = gen_rtx_MEM (Pmode,
4475 plus_constant (save_area, i * UNITS_PER_WORD));
4476 MEM_NOTRAP_P (mem) = 1;
4477 set_mem_alias_set (mem, set);
4478 emit_move_insn (mem, gen_rtx_REG (Pmode,
4479 x86_64_int_parameter_registers[i]));
4480 }
4481
4482 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4483 {
4484 /* Now emit code to save SSE registers. The AX parameter contains number
4485 of SSE parameter registers used to call this function. We use
4486 sse_prologue_save insn template that produces computed jump across
4487 SSE saves. We need some preparation work to get this working. */
4488
4489 label = gen_label_rtx ();
4490 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4491
4492 /* Compute address to jump to :
4493 label - 5*eax + nnamed_sse_arguments*5 */
4494 tmp_reg = gen_reg_rtx (Pmode);
4495 nsse_reg = gen_reg_rtx (Pmode);
4496 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4497 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4498 gen_rtx_MULT (Pmode, nsse_reg,
4499 GEN_INT (4))));
4500 if (next_cum.sse_regno)
4501 emit_move_insn
4502 (nsse_reg,
4503 gen_rtx_CONST (DImode,
4504 gen_rtx_PLUS (DImode,
4505 label_ref,
4506 GEN_INT (next_cum.sse_regno * 4))));
4507 else
4508 emit_move_insn (nsse_reg, label_ref);
4509 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4510
4511 /* Compute address of memory block we save into. We always use pointer
4512 pointing 127 bytes after first byte to store - this is needed to keep
4513 instruction size limited by 4 bytes. */
4514 tmp_reg = gen_reg_rtx (Pmode);
4515 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4516 plus_constant (save_area,
4517 8 * REGPARM_MAX + 127)));
4518 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4519 MEM_NOTRAP_P (mem) = 1;
4520 set_mem_alias_set (mem, set);
4521 set_mem_align (mem, BITS_PER_WORD);
4522
4523 /* And finally do the dirty job! */
4524 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4525 GEN_INT (next_cum.sse_regno), label));
4526 }
4527
4528}
4529
4530/* Implement va_start. */
4531
4532void
4533ix86_va_start (tree valist, rtx nextarg)
4534{
4535 HOST_WIDE_INT words, n_gpr, n_fpr;
4536 tree f_gpr, f_fpr, f_ovf, f_sav;
4537 tree gpr, fpr, ovf, sav, t;
4538 tree type;
4539
4540 /* Only 64bit target needs something special. */
4541 if (!TARGET_64BIT)
4542 {
4543 std_expand_builtin_va_start (valist, nextarg);
4544 return;
4545 }
4546
4547 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4548 f_fpr = TREE_CHAIN (f_gpr);
4549 f_ovf = TREE_CHAIN (f_fpr);
4550 f_sav = TREE_CHAIN (f_ovf);
4551
4552 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4553 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4554 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4555 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4556 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4557
4558 /* Count number of gp and fp argument registers used. */
4559 words = current_function_args_info.words;
4560 n_gpr = current_function_args_info.regno;
4561 n_fpr = current_function_args_info.sse_regno;
4562
4563 if (TARGET_DEBUG_ARG)
4564 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4565 (int) words, (int) n_gpr, (int) n_fpr);
4566
4567 if (cfun->va_list_gpr_size)
4568 {
4569 type = TREE_TYPE (gpr);
4570 t = build2 (MODIFY_EXPR, type, gpr,
4571 build_int_cst (type, n_gpr * 8));
4572 TREE_SIDE_EFFECTS (t) = 1;
4573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4574 }
4575
4576 if (cfun->va_list_fpr_size)
4577 {
4578 type = TREE_TYPE (fpr);
4579 t = build2 (MODIFY_EXPR, type, fpr,
4580 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4581 TREE_SIDE_EFFECTS (t) = 1;
4582 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4583 }
4584
4585 /* Find the overflow area. */
4586 type = TREE_TYPE (ovf);
4587 t = make_tree (type, virtual_incoming_args_rtx);
4588 if (words != 0)
4589 t = build2 (PLUS_EXPR, type, t,
4590 build_int_cst (type, words * UNITS_PER_WORD));
4591 t = build2 (MODIFY_EXPR, type, ovf, t);
4592 TREE_SIDE_EFFECTS (t) = 1;
4593 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4594
4595 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4596 {
4597 /* Find the register save area.
4598 Prologue of the function save it right above stack frame. */
4599 type = TREE_TYPE (sav);
4600 t = make_tree (type, frame_pointer_rtx);
4601 t = build2 (MODIFY_EXPR, type, sav, t);
4602 TREE_SIDE_EFFECTS (t) = 1;
4603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4604 }
4605}
4606
4607/* Implement va_arg. */
4608
4609tree
4610ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4611{
4612 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4613 tree f_gpr, f_fpr, f_ovf, f_sav;
4614 tree gpr, fpr, ovf, sav, t;
4615 int size, rsize;
4616 tree lab_false, lab_over = NULL_TREE;
4617 tree addr, t2;
4618 rtx container;
4619 int indirect_p = 0;
4620 tree ptrtype;
4621 enum machine_mode nat_mode;
4622
4623 /* Only 64bit target needs something special. */
4624 if (!TARGET_64BIT)
4625 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4626
4627 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4628 f_fpr = TREE_CHAIN (f_gpr);
4629 f_ovf = TREE_CHAIN (f_fpr);
4630 f_sav = TREE_CHAIN (f_ovf);
4631
4632 valist = build_va_arg_indirect_ref (valist);
4633 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4634 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4635 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4636 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4637
4638 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4639 if (indirect_p)
4640 type = build_pointer_type (type);
4641 size = int_size_in_bytes (type);
4642 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4643
4644 nat_mode = type_natural_mode (type);
4645 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4646 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4647
4648 /* Pull the value out of the saved registers. */
4649
4650 addr = create_tmp_var (ptr_type_node, "addr");
4651 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4652
4653 if (container)
4654 {
4655 int needed_intregs, needed_sseregs;
4656 bool need_temp;
4657 tree int_addr, sse_addr;
4658
4659 lab_false = create_artificial_label ();
4660 lab_over = create_artificial_label ();
4661
4662 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4663
4664 need_temp = (!REG_P (container)
4665 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4666 || TYPE_ALIGN (type) > 128));
4667
4668 /* In case we are passing structure, verify that it is consecutive block
4669 on the register save area. If not we need to do moves. */
4670 if (!need_temp && !REG_P (container))
4671 {
4672 /* Verify that all registers are strictly consecutive */
4673 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4674 {
4675 int i;
4676
4677 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4678 {
4679 rtx slot = XVECEXP (container, 0, i);
4680 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4681 || INTVAL (XEXP (slot, 1)) != i * 16)
4682 need_temp = 1;
4683 }
4684 }
4685 else
4686 {
4687 int i;
4688
4689 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4690 {
4691 rtx slot = XVECEXP (container, 0, i);
4692 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4693 || INTVAL (XEXP (slot, 1)) != i * 8)
4694 need_temp = 1;
4695 }
4696 }
4697 }
4698 if (!need_temp)
4699 {
4700 int_addr = addr;
4701 sse_addr = addr;
4702 }
4703 else
4704 {
4705 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4706 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4707 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4708 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4709 }
4710
4711 /* First ensure that we fit completely in registers. */
4712 if (needed_intregs)
4713 {
4714 t = build_int_cst (TREE_TYPE (gpr),
4715 (REGPARM_MAX - needed_intregs + 1) * 8);
4716 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4717 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4718 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4719 gimplify_and_add (t, pre_p);
4720 }
4721 if (needed_sseregs)
4722 {
4723 t = build_int_cst (TREE_TYPE (fpr),
4724 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4725 + REGPARM_MAX * 8);
4726 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4727 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4728 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4729 gimplify_and_add (t, pre_p);
4730 }
4731
4732 /* Compute index to start of area used for integer regs. */
4733 if (needed_intregs)
4734 {
4735 /* int_addr = gpr + sav; */
4736 t = fold_convert (ptr_type_node, gpr);
4737 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4738 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4739 gimplify_and_add (t, pre_p);
4740 }
4741 if (needed_sseregs)
4742 {
4743 /* sse_addr = fpr + sav; */
4744 t = fold_convert (ptr_type_node, fpr);
4745 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4746 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4747 gimplify_and_add (t, pre_p);
4748 }
4749 if (need_temp)
4750 {
4751 int i;
4752 tree temp = create_tmp_var (type, "va_arg_tmp");
4753
4754 /* addr = &temp; */
4755 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4756 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4757 gimplify_and_add (t, pre_p);
4758
4759 for (i = 0; i < XVECLEN (container, 0); i++)
4760 {
4761 rtx slot = XVECEXP (container, 0, i);
4762 rtx reg = XEXP (slot, 0);
4763 enum machine_mode mode = GET_MODE (reg);
4764 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4765 tree addr_type = build_pointer_type (piece_type);
4766 tree src_addr, src;
4767 int src_offset;
4768 tree dest_addr, dest;
4769
4770 if (SSE_REGNO_P (REGNO (reg)))
4771 {
4772 src_addr = sse_addr;
4773 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4774 }
4775 else
4776 {
4777 src_addr = int_addr;
4778 src_offset = REGNO (reg) * 8;
4779 }
4780 src_addr = fold_convert (addr_type, src_addr);
4781 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4782 size_int (src_offset)));
4783 src = build_va_arg_indirect_ref (src_addr);
4784
4785 dest_addr = fold_convert (addr_type, addr);
4786 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4787 size_int (INTVAL (XEXP (slot, 1)))));
4788 dest = build_va_arg_indirect_ref (dest_addr);
4789
4790 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4791 gimplify_and_add (t, pre_p);
4792 }
4793 }
4794
4795 if (needed_intregs)
4796 {
4797 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4798 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4799 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4800 gimplify_and_add (t, pre_p);
4801 }
4802 if (needed_sseregs)
4803 {
4804 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4805 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4806 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4807 gimplify_and_add (t, pre_p);
4808 }
4809
4810 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4811 gimplify_and_add (t, pre_p);
4812
4813 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4814 append_to_statement_list (t, pre_p);
4815 }
4816
4817 /* ... otherwise out of the overflow area. */
4818
4819 /* Care for on-stack alignment if needed. */
4820 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4821 || integer_zerop (TYPE_SIZE (type)))
4822 t = ovf;
4823 else
4824 {
4825 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4826 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4827 build_int_cst (TREE_TYPE (ovf), align - 1));
4828 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4829 build_int_cst (TREE_TYPE (t), -align));
4830 }
4831 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4832
4833 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4834 gimplify_and_add (t2, pre_p);
4835
4836 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4837 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4838 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4839 gimplify_and_add (t, pre_p);
4840
4841 if (container)
4842 {
4843 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4844 append_to_statement_list (t, pre_p);
4845 }
4846
4847 ptrtype = build_pointer_type (type);
4848 addr = fold_convert (ptrtype, addr);
4849
4850 if (indirect_p)
4851 addr = build_va_arg_indirect_ref (addr);
4852 return build_va_arg_indirect_ref (addr);
4853}
4854
4855/* Return nonzero if OPNUM's MEM should be matched
4856 in movabs* patterns. */
4857
4858int
4859ix86_check_movabs (rtx insn, int opnum)
4860{
4861 rtx set, mem;
4862
4863 set = PATTERN (insn);
4864 if (GET_CODE (set) == PARALLEL)
4865 set = XVECEXP (set, 0, 0);
4866 gcc_assert (GET_CODE (set) == SET);
4867 mem = XEXP (set, opnum);
4868 while (GET_CODE (mem) == SUBREG)
4869 mem = SUBREG_REG (mem);
4870 gcc_assert (GET_CODE (mem) == MEM);
4871 return (volatile_ok || !MEM_VOLATILE_P (mem));
4872}
4873
4874/* Initialize the table of extra 80387 mathematical constants. */
4875
4876static void
4877init_ext_80387_constants (void)
4878{
4879 static const char * cst[5] =
4880 {
4881 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4882 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4883 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4884 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4885 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4886 };
4887 int i;
4888
4889 for (i = 0; i < 5; i++)
4890 {
4891 real_from_string (&ext_80387_constants_table[i], cst[i]);
4892 /* Ensure each constant is rounded to XFmode precision. */
4893 real_convert (&ext_80387_constants_table[i],
4894 XFmode, &ext_80387_constants_table[i]);
4895 }
4896
4897 ext_80387_constants_init = 1;
4898}
4899
4900/* Return true if the constant is something that can be loaded with
4901 a special instruction. */
4902
4903int
4904standard_80387_constant_p (rtx x)
4905{
4906 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4907 return -1;
4908
4909 if (x == CONST0_RTX (GET_MODE (x)))
4910 return 1;
4911 if (x == CONST1_RTX (GET_MODE (x)))
4912 return 2;
4913
4914 /* For XFmode constants, try to find a special 80387 instruction when
4915 optimizing for size or on those CPUs that benefit from them. */
4916 if (GET_MODE (x) == XFmode
4917 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4918 {
4919 REAL_VALUE_TYPE r;
4920 int i;
4921
4922 if (! ext_80387_constants_init)
4923 init_ext_80387_constants ();
4924
4925 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4926 for (i = 0; i < 5; i++)
4927 if (real_identical (&r, &ext_80387_constants_table[i]))
4928 return i + 3;
4929 }
4930
4931 return 0;
4932}
4933
4934/* Return the opcode of the special instruction to be used to load
4935 the constant X. */
4936
4937const char *
4938standard_80387_constant_opcode (rtx x)
4939{
4940 switch (standard_80387_constant_p (x))
4941 {
4942 case 1:
4943 return "fldz";
4944 case 2:
4945 return "fld1";
4946 case 3:
4947 return "fldlg2";
4948 case 4:
4949 return "fldln2";
4950 case 5:
4951 return "fldl2e";
4952 case 6:
4953 return "fldl2t";
4954 case 7:
4955 return "fldpi";
4956 default:
4957 gcc_unreachable ();
4958 }
4959}
4960
4961/* Return the CONST_DOUBLE representing the 80387 constant that is
4962 loaded by the specified special instruction. The argument IDX
4963 matches the return value from standard_80387_constant_p. */
4964
4965rtx
4966standard_80387_constant_rtx (int idx)
4967{
4968 int i;
4969
4970 if (! ext_80387_constants_init)
4971 init_ext_80387_constants ();
4972
4973 switch (idx)
4974 {
4975 case 3:
4976 case 4:
4977 case 5:
4978 case 6:
4979 case 7:
4980 i = idx - 3;
4981 break;
4982
4983 default:
4984 gcc_unreachable ();
4985 }
4986
4987 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4988 XFmode);
4989}
4990
4991/* Return 1 if mode is a valid mode for sse. */
4992static int
4993standard_sse_mode_p (enum machine_mode mode)
4994{
4995 switch (mode)
4996 {
4997 case V16QImode:
4998 case V8HImode:
4999 case V4SImode:
5000 case V2DImode:
5001 case V4SFmode:
5002 case V2DFmode:
5003 return 1;
5004
5005 default:
5006 return 0;
5007 }
5008}
5009
5010/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5011 */
5012int
5013standard_sse_constant_p (rtx x)
5014{
5015 enum machine_mode mode = GET_MODE (x);
5016
5017 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5018 return 1;
5019 if (vector_all_ones_operand (x, mode)
5020 && standard_sse_mode_p (mode))
5021 return TARGET_SSE2 ? 2 : -1;
5022
5023 return 0;
5024}
5025
5026/* Return the opcode of the special instruction to be used to load
5027 the constant X. */
5028
5029const char *
5030standard_sse_constant_opcode (rtx insn, rtx x)
5031{
5032 switch (standard_sse_constant_p (x))
5033 {
5034 case 1:
5035 if (get_attr_mode (insn) == MODE_V4SF)
5036 return "xorps\t%0, %0";
5037 else if (get_attr_mode (insn) == MODE_V2DF)
5038 return "xorpd\t%0, %0";
5039 else
5040 return "pxor\t%0, %0";
5041 case 2:
5042 return "pcmpeqd\t%0, %0";
5043 }
5044 gcc_unreachable ();
5045}
5046
5047/* Returns 1 if OP contains a symbol reference */
5048
5049int
5050symbolic_reference_mentioned_p (rtx op)
5051{
5052 const char *fmt;
5053 int i;
5054
5055 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5056 return 1;
5057
5058 fmt = GET_RTX_FORMAT (GET_CODE (op));
5059 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5060 {
5061 if (fmt[i] == 'E')
5062 {
5063 int j;
5064
5065 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5066 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5067 return 1;
5068 }
5069
5070 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5071 return 1;
5072 }
5073
5074 return 0;
5075}
5076
5077/* Return 1 if it is appropriate to emit `ret' instructions in the
5078 body of a function. Do this only if the epilogue is simple, needing a
5079 couple of insns. Prior to reloading, we can't tell how many registers
5080 must be saved, so return 0 then. Return 0 if there is no frame
5081 marker to de-allocate. */
5082
5083int
5084ix86_can_use_return_insn_p (void)
5085{
5086 struct ix86_frame frame;
5087
5088 if (! reload_completed || frame_pointer_needed)
5089 return 0;
5090
5091 /* Don't allow more than 32 pop, since that's all we can do
5092 with one instruction. */
5093 if (current_function_pops_args
5094 && current_function_args_size >= 32768)
5095 return 0;
5096
5097 ix86_compute_frame_layout (&frame);
5098 return frame.to_allocate == 0 && frame.nregs == 0;
5099}
5100
5101/* Value should be nonzero if functions must have frame pointers.
5102 Zero means the frame pointer need not be set up (and parms may
5103 be accessed via the stack pointer) in functions that seem suitable. */
5104
5105int
5106ix86_frame_pointer_required (void)
5107{
5108 /* If we accessed previous frames, then the generated code expects
5109 to be able to access the saved ebp value in our frame. */
5110 if (cfun->machine->accesses_prev_frame)
5111 return 1;
5112
5113 /* Several x86 os'es need a frame pointer for other reasons,
5114 usually pertaining to setjmp. */
5115 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5116 return 1;
5117
5118 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5119 the frame pointer by default. Turn it back on now if we've not
5120 got a leaf function. */
5121 if (TARGET_OMIT_LEAF_FRAME_POINTER
5122 && (!current_function_is_leaf
5123 || ix86_current_function_calls_tls_descriptor))
5124 return 1;
5125
5126 if (current_function_profile)
5127 return 1;
5128
5129 return 0;
5130}
5131
5132/* Record that the current function accesses previous call frames. */
5133
5134void
5135ix86_setup_frame_addresses (void)
5136{
5137 cfun->machine->accesses_prev_frame = 1;
5138}
5139
5140#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5141# define USE_HIDDEN_LINKONCE 1
5142#else
5143# define USE_HIDDEN_LINKONCE 0
5144#endif
5145
5146static int pic_labels_used;
5147
5148/* Fills in the label name that should be used for a pc thunk for
5149 the given register. */
5150
5151static void
5152get_pc_thunk_name (char name[32], unsigned int regno)
5153{
5154 gcc_assert (!TARGET_64BIT);
5155
5156 if (USE_HIDDEN_LINKONCE)
5157 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5158 else
5159 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5160}
5161
5162
5163/* This function generates code for -fpic that loads %ebx with
5164 the return address of the caller and then returns. */
5165
5166void
5167ix86_file_end (void)
5168{
5169 rtx xops[2];
5170 int regno;
5171
5172 for (regno = 0; regno < 8; ++regno)
5173 {
5174 char name[32];
5175
5176 if (! ((pic_labels_used >> regno) & 1))
5177 continue;
5178
5179 get_pc_thunk_name (name, regno);
5180
5181#if TARGET_MACHO
5182 if (TARGET_MACHO)
5183 {
5184 switch_to_section (darwin_sections[text_coal_section]);
5185 fputs ("\t.weak_definition\t", asm_out_file);
5186 assemble_name (asm_out_file, name);
5187 fputs ("\n\t.private_extern\t", asm_out_file);
5188 assemble_name (asm_out_file, name);
5189 fputs ("\n", asm_out_file);
5190 ASM_OUTPUT_LABEL (asm_out_file, name);
5191 }
5192 else
5193#endif
5194 if (USE_HIDDEN_LINKONCE)
5195 {
5196 tree decl;
5197
5198 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5199 error_mark_node);
5200 TREE_PUBLIC (decl) = 1;
5201 TREE_STATIC (decl) = 1;
5202 DECL_ONE_ONLY (decl) = 1;
5203
5204 (*targetm.asm_out.unique_section) (decl, 0);
5205 switch_to_section (get_named_section (decl, NULL, 0));
5206
5207 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5208 fputs ("\t.hidden\t", asm_out_file);
5209 assemble_name (asm_out_file, name);
5210 fputc ('\n', asm_out_file);
5211 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5212 }
5213 else
5214 {
5215 switch_to_section (text_section);
5216 ASM_OUTPUT_LABEL (asm_out_file, name);
5217 }
5218
5219 xops[0] = gen_rtx_REG (SImode, regno);
5220 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5221 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5222 output_asm_insn ("ret", xops);
5223 }
5224
5225 if (NEED_INDICATE_EXEC_STACK)
5226 file_end_indicate_exec_stack ();
5227}
5228
5229/* Emit code for the SET_GOT patterns. */
5230
5231const char *
5232output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5233{
5234 rtx xops[3];
5235
5236 xops[0] = dest;
5237 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5238
5239 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5240 {
5241 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5242
5243 if (!flag_pic)
5244 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5245 else
5246 output_asm_insn ("call\t%a2", xops);
5247
5248#if TARGET_MACHO
5249 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5250 is what will be referenced by the Mach-O PIC subsystem. */
5251 if (!label)
5252 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5253#endif
5254
5255 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5256 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5257
5258 if (flag_pic)
5259 output_asm_insn ("pop{l}\t%0", xops);
5260 }
5261 else
5262 {
5263 char name[32];
5264 get_pc_thunk_name (name, REGNO (dest));
5265 pic_labels_used |= 1 << REGNO (dest);
5266
5267 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5268 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5269 output_asm_insn ("call\t%X2", xops);
5270 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5271 is what will be referenced by the Mach-O PIC subsystem. */
5272#if TARGET_MACHO
5273 if (!label)
5274 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5275 else
5276 targetm.asm_out.internal_label (asm_out_file, "L",
5277 CODE_LABEL_NUMBER (label));
5278#endif
5279 }
5280
5281 if (TARGET_MACHO)
5282 return "";
5283
5284 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5285 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5286 else
5287 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5288
5289 return "";
5290}
5291
5292/* Generate an "push" pattern for input ARG. */
5293
5294static rtx
5295gen_push (rtx arg)
5296{
5297 return gen_rtx_SET (VOIDmode,
5298 gen_rtx_MEM (Pmode,
5299 gen_rtx_PRE_DEC (Pmode,
5300 stack_pointer_rtx)),
5301 arg);
5302}
5303
5304/* Return >= 0 if there is an unused call-clobbered register available
5305 for the entire function. */
5306
5307static unsigned int
5308ix86_select_alt_pic_regnum (void)
5309{
5310 if (current_function_is_leaf && !current_function_profile
5311 && !ix86_current_function_calls_tls_descriptor)
5312 {
5313 int i;
5314 for (i = 2; i >= 0; --i)
5315 if (!regs_ever_live[i])
5316 return i;
5317 }
5318
5319 return INVALID_REGNUM;
5320}
5321
5322/* Return 1 if we need to save REGNO. */
5323static int
5324ix86_save_reg (unsigned int regno, int maybe_eh_return)
5325{
5326 if (pic_offset_table_rtx
5327 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329 || current_function_profile
5330 || current_function_calls_eh_return
5331 || current_function_uses_const_pool))
5332 {
5333 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5334 return 0;
5335 return 1;
5336 }
5337
5338 if (current_function_calls_eh_return && maybe_eh_return)
5339 {
5340 unsigned i;
5341 for (i = 0; ; i++)
5342 {
5343 unsigned test = EH_RETURN_DATA_REGNO (i);
5344 if (test == INVALID_REGNUM)
5345 break;
5346 if (test == regno)
5347 return 1;
5348 }
5349 }
5350
5351 if (cfun->machine->force_align_arg_pointer
5352 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5353 return 1;
5354
5355 return (regs_ever_live[regno]
5356 && !call_used_regs[regno]
5357 && !fixed_regs[regno]
5358 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5359}
5360
5361/* Return number of registers to be saved on the stack. */
5362
5363static int
5364ix86_nsaved_regs (void)
5365{
5366 int nregs = 0;
5367 int regno;
5368
5369 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5370 if (ix86_save_reg (regno, true))
5371 nregs++;
5372 return nregs;
5373}
5374
5375/* Return the offset between two registers, one to be eliminated, and the other
5376 its replacement, at the start of a routine. */
5377
5378HOST_WIDE_INT
5379ix86_initial_elimination_offset (int from, int to)
5380{
5381 struct ix86_frame frame;
5382 ix86_compute_frame_layout (&frame);
5383
5384 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5385 return frame.hard_frame_pointer_offset;
5386 else if (from == FRAME_POINTER_REGNUM
5387 && to == HARD_FRAME_POINTER_REGNUM)
5388 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5389 else
5390 {
5391 gcc_assert (to == STACK_POINTER_REGNUM);
5392
5393 if (from == ARG_POINTER_REGNUM)
5394 return frame.stack_pointer_offset;
5395
5396 gcc_assert (from == FRAME_POINTER_REGNUM);
5397 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5398 }
5399}
5400
5401/* Fill structure ix86_frame about frame of currently computed function. */
5402
5403static void
5404ix86_compute_frame_layout (struct ix86_frame *frame)
5405{
5406 HOST_WIDE_INT total_size;
5407 unsigned int stack_alignment_needed;
5408 HOST_WIDE_INT offset;
5409 unsigned int preferred_alignment;
5410 HOST_WIDE_INT size = get_frame_size ();
5411
5412 frame->nregs = ix86_nsaved_regs ();
5413 total_size = size;
5414
5415 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5416 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5417
5418 /* During reload iteration the amount of registers saved can change.
5419 Recompute the value as needed. Do not recompute when amount of registers
5420 didn't change as reload does multiple calls to the function and does not
5421 expect the decision to change within single iteration. */
5422 if (!optimize_size
5423 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5424 {
5425 int count = frame->nregs;
5426
5427 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5428 /* The fast prologue uses move instead of push to save registers. This
5429 is significantly longer, but also executes faster as modern hardware
5430 can execute the moves in parallel, but can't do that for push/pop.
5431
5432 Be careful about choosing what prologue to emit: When function takes
5433 many instructions to execute we may use slow version as well as in
5434 case function is known to be outside hot spot (this is known with
5435 feedback only). Weight the size of function by number of registers
5436 to save as it is cheap to use one or two push instructions but very
5437 slow to use many of them. */
5438 if (count)
5439 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5440 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5441 || (flag_branch_probabilities
5442 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5443 cfun->machine->use_fast_prologue_epilogue = false;
5444 else
5445 cfun->machine->use_fast_prologue_epilogue
5446 = !expensive_function_p (count);
5447 }
5448 if (TARGET_PROLOGUE_USING_MOVE
5449 && cfun->machine->use_fast_prologue_epilogue)
5450 frame->save_regs_using_mov = true;
5451 else
5452 frame->save_regs_using_mov = false;
5453
5454
5455 /* Skip return address and saved base pointer. */
5456 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5457
5458 frame->hard_frame_pointer_offset = offset;
5459
5460 /* Do some sanity checking of stack_alignment_needed and
5461 preferred_alignment, since i386 port is the only using those features
5462 that may break easily. */
5463
5464 gcc_assert (!size || stack_alignment_needed);
5465 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5466 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5467 gcc_assert (stack_alignment_needed
5468 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5469
5470 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5471 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5472
5473 /* Register save area */
5474 offset += frame->nregs * UNITS_PER_WORD;
5475
5476 /* Va-arg area */
5477 if (ix86_save_varrargs_registers)
5478 {
5479 offset += X86_64_VARARGS_SIZE;
5480 frame->va_arg_size = X86_64_VARARGS_SIZE;
5481 }
5482 else
5483 frame->va_arg_size = 0;
5484
5485 /* Align start of frame for local function. */
5486 frame->padding1 = ((offset + stack_alignment_needed - 1)
5487 & -stack_alignment_needed) - offset;
5488
5489 offset += frame->padding1;
5490
5491 /* Frame pointer points here. */
5492 frame->frame_pointer_offset = offset;
5493
5494 offset += size;
5495
5496 /* Add outgoing arguments area. Can be skipped if we eliminated
5497 all the function calls as dead code.
5498 Skipping is however impossible when function calls alloca. Alloca
5499 expander assumes that last current_function_outgoing_args_size
5500 of stack frame are unused. */
5501 if (ACCUMULATE_OUTGOING_ARGS
5502 && (!current_function_is_leaf || current_function_calls_alloca
5503 || ix86_current_function_calls_tls_descriptor))
5504 {
5505 offset += current_function_outgoing_args_size;
5506 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5507 }
5508 else
5509 frame->outgoing_arguments_size = 0;
5510
5511 /* Align stack boundary. Only needed if we're calling another function
5512 or using alloca. */
5513 if (!current_function_is_leaf || current_function_calls_alloca
5514 || ix86_current_function_calls_tls_descriptor)
5515 frame->padding2 = ((offset + preferred_alignment - 1)
5516 & -preferred_alignment) - offset;
5517 else
5518 frame->padding2 = 0;
5519
5520 offset += frame->padding2;
5521
5522 /* We've reached end of stack frame. */
5523 frame->stack_pointer_offset = offset;
5524
5525 /* Size prologue needs to allocate. */
5526 frame->to_allocate =
5527 (size + frame->padding1 + frame->padding2
5528 + frame->outgoing_arguments_size + frame->va_arg_size);
5529
5530 if ((!frame->to_allocate && frame->nregs <= 1)
5531 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5532 frame->save_regs_using_mov = false;
5533
5534 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5535 && current_function_is_leaf
5536 && !ix86_current_function_calls_tls_descriptor)
5537 {
5538 frame->red_zone_size = frame->to_allocate;
5539 if (frame->save_regs_using_mov)
5540 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5541 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5542 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5543 }
5544 else
5545 frame->red_zone_size = 0;
5546 frame->to_allocate -= frame->red_zone_size;
5547 frame->stack_pointer_offset -= frame->red_zone_size;
5548#if 0
5549 fprintf (stderr, "nregs: %i\n", frame->nregs);
5550 fprintf (stderr, "size: %i\n", size);
5551 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5552 fprintf (stderr, "padding1: %i\n", frame->padding1);
5553 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5554 fprintf (stderr, "padding2: %i\n", frame->padding2);
5555 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5556 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5557 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5558 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5559 frame->hard_frame_pointer_offset);
5560 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5561#endif
5562}
5563
5564/* Emit code to save registers in the prologue. */
5565
5566static void
5567ix86_emit_save_regs (void)
5568{
5569 unsigned int regno;
5570 rtx insn;
5571
5572 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5573 if (ix86_save_reg (regno, true))
5574 {
5575 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5576 RTX_FRAME_RELATED_P (insn) = 1;
5577 }
5578}
5579
5580/* Emit code to save registers using MOV insns. First register
5581 is restored from POINTER + OFFSET. */
5582static void
5583ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5584{
5585 unsigned int regno;
5586 rtx insn;
5587
5588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5589 if (ix86_save_reg (regno, true))
5590 {
5591 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5592 Pmode, offset),
5593 gen_rtx_REG (Pmode, regno));
5594 RTX_FRAME_RELATED_P (insn) = 1;
5595 offset += UNITS_PER_WORD;
5596 }
5597}
5598
5599/* Expand prologue or epilogue stack adjustment.
5600 The pattern exist to put a dependency on all ebp-based memory accesses.
5601 STYLE should be negative if instructions should be marked as frame related,
5602 zero if %r11 register is live and cannot be freely used and positive
5603 otherwise. */
5604
5605static void
5606pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5607{
5608 rtx insn;
5609
5610 if (! TARGET_64BIT)
5611 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5612 else if (x86_64_immediate_operand (offset, DImode))
5613 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5614 else
5615 {
5616 rtx r11;
5617 /* r11 is used by indirect sibcall return as well, set before the
5618 epilogue and used after the epilogue. ATM indirect sibcall
5619 shouldn't be used together with huge frame sizes in one
5620 function because of the frame_size check in sibcall.c. */
5621 gcc_assert (style);
5622 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5623 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5624 if (style < 0)
5625 RTX_FRAME_RELATED_P (insn) = 1;
5626 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5627 offset));
5628 }
5629 if (style < 0)
5630 RTX_FRAME_RELATED_P (insn) = 1;
5631}
5632
5633/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5634
5635static rtx
5636ix86_internal_arg_pointer (void)
5637{
5638 bool has_force_align_arg_pointer =
5639 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5640 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5641 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5642 && DECL_NAME (current_function_decl)
5643 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5644 && DECL_FILE_SCOPE_P (current_function_decl))
5645 || ix86_force_align_arg_pointer
5646 || has_force_align_arg_pointer)
5647 {
5648 /* Nested functions can't realign the stack due to a register
5649 conflict. */
5650 if (DECL_CONTEXT (current_function_decl)
5651 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5652 {
5653 if (ix86_force_align_arg_pointer)
5654 warning (0, "-mstackrealign ignored for nested functions");
5655 if (has_force_align_arg_pointer)
5656 error ("%s not supported for nested functions",
5657 ix86_force_align_arg_pointer_string);
5658 return virtual_incoming_args_rtx;
5659 }
5660 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5661 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5662 }
5663 else
5664 return virtual_incoming_args_rtx;
5665}
5666
5667/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5668 This is called from dwarf2out.c to emit call frame instructions
5669 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5670static void
5671ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5672{
5673 rtx unspec = SET_SRC (pattern);
5674 gcc_assert (GET_CODE (unspec) == UNSPEC);
5675
5676 switch (index)
5677 {
5678 case UNSPEC_REG_SAVE:
5679 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5680 SET_DEST (pattern));
5681 break;
5682 case UNSPEC_DEF_CFA:
5683 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5684 INTVAL (XVECEXP (unspec, 0, 0)));
5685 break;
5686 default:
5687 gcc_unreachable ();
5688 }
5689}
5690
5691/* Expand the prologue into a bunch of separate insns. */
5692
5693void
5694ix86_expand_prologue (void)
5695{
5696 rtx insn;
5697 bool pic_reg_used;
5698 struct ix86_frame frame;
5699 HOST_WIDE_INT allocate;
5700
5701 ix86_compute_frame_layout (&frame);
5702
5703 if (cfun->machine->force_align_arg_pointer)
5704 {
5705 rtx x, y;
5706
5707 /* Grab the argument pointer. */
5708 x = plus_constant (stack_pointer_rtx, 4);
5709 y = cfun->machine->force_align_arg_pointer;
5710 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5711 RTX_FRAME_RELATED_P (insn) = 1;
5712
5713 /* The unwind info consists of two parts: install the fafp as the cfa,
5714 and record the fafp as the "save register" of the stack pointer.
5715 The later is there in order that the unwinder can see where it
5716 should restore the stack pointer across the and insn. */
5717 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5718 x = gen_rtx_SET (VOIDmode, y, x);
5719 RTX_FRAME_RELATED_P (x) = 1;
5720 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5721 UNSPEC_REG_SAVE);
5722 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5723 RTX_FRAME_RELATED_P (y) = 1;
5724 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5725 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5726 REG_NOTES (insn) = x;
5727
5728 /* Align the stack. */
5729 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5730 GEN_INT (-16)));
5731
5732 /* And here we cheat like madmen with the unwind info. We force the
5733 cfa register back to sp+4, which is exactly what it was at the
5734 start of the function. Re-pushing the return address results in
5735 the return at the same spot relative to the cfa, and thus is
5736 correct wrt the unwind info. */
5737 x = cfun->machine->force_align_arg_pointer;
5738 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5739 insn = emit_insn (gen_push (x));
5740 RTX_FRAME_RELATED_P (insn) = 1;
5741
5742 x = GEN_INT (4);
5743 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5744 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5745 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5746 REG_NOTES (insn) = x;
5747 }
5748
5749 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5750 slower on all targets. Also sdb doesn't like it. */
5751
5752 if (frame_pointer_needed)
5753 {
5754 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5755 RTX_FRAME_RELATED_P (insn) = 1;
5756
5757 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5758 RTX_FRAME_RELATED_P (insn) = 1;
5759 }
5760
5761 allocate = frame.to_allocate;
5762
5763 if (!frame.save_regs_using_mov)
5764 ix86_emit_save_regs ();
5765 else
5766 allocate += frame.nregs * UNITS_PER_WORD;
5767
5768 /* When using red zone we may start register saving before allocating
5769 the stack frame saving one cycle of the prologue. */
5770 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5771 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5772 : stack_pointer_rtx,
5773 -frame.nregs * UNITS_PER_WORD);
5774
5775 if (allocate == 0)
5776 ;
5777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5779 GEN_INT (-allocate), -1);
5780 else
5781 {
5782 /* Only valid for Win32. */
5783 rtx eax = gen_rtx_REG (SImode, 0);
5784 bool eax_live = ix86_eax_live_at_start_p ();
5785 rtx t;
5786
5787 gcc_assert (!TARGET_64BIT);
5788
5789 if (eax_live)
5790 {
5791 emit_insn (gen_push (eax));
5792 allocate -= 4;
5793 }
5794
5795 emit_move_insn (eax, GEN_INT (allocate));
5796
5797 insn = emit_insn (gen_allocate_stack_worker (eax));
5798 RTX_FRAME_RELATED_P (insn) = 1;
5799 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5800 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5801 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5802 t, REG_NOTES (insn));
5803
5804 if (eax_live)
5805 {
5806 if (frame_pointer_needed)
5807 t = plus_constant (hard_frame_pointer_rtx,
5808 allocate
5809 - frame.to_allocate
5810 - frame.nregs * UNITS_PER_WORD);
5811 else
5812 t = plus_constant (stack_pointer_rtx, allocate);
5813 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5814 }
5815 }
5816
5817 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5818 {
5819 if (!frame_pointer_needed || !frame.to_allocate)
5820 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5821 else
5822 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5823 -frame.nregs * UNITS_PER_WORD);
5824 }
5825
5826 pic_reg_used = false;
5827 if (pic_offset_table_rtx
5828 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5829 || current_function_profile))
5830 {
5831 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5832
5833 if (alt_pic_reg_used != INVALID_REGNUM)
5834 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5835
5836 pic_reg_used = true;
5837 }
5838
5839 if (pic_reg_used)
5840 {
5841 if (TARGET_64BIT)
5842 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5843 else
5844 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5845
5846 /* Even with accurate pre-reload life analysis, we can wind up
5847 deleting all references to the pic register after reload.
5848 Consider if cross-jumping unifies two sides of a branch
5849 controlled by a comparison vs the only read from a global.
5850 In which case, allow the set_got to be deleted, though we're
5851 too late to do anything about the ebx save in the prologue. */
5852 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5853 }
5854
5855 /* Prevent function calls from be scheduled before the call to mcount.
5856 In the pic_reg_used case, make sure that the got load isn't deleted. */
5857 if (current_function_profile)
5858 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5859}
5860
5861/* Emit code to restore saved registers using MOV insns. First register
5862 is restored from POINTER + OFFSET. */
5863static void
5864ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5865 int maybe_eh_return)
5866{
5867 int regno;
5868 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5869
5870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5871 if (ix86_save_reg (regno, maybe_eh_return))
5872 {
5873 /* Ensure that adjust_address won't be forced to produce pointer
5874 out of range allowed by x86-64 instruction set. */
5875 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5876 {
5877 rtx r11;
5878
5879 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5880 emit_move_insn (r11, GEN_INT (offset));
5881 emit_insn (gen_adddi3 (r11, r11, pointer));
5882 base_address = gen_rtx_MEM (Pmode, r11);
5883 offset = 0;
5884 }
5885 emit_move_insn (gen_rtx_REG (Pmode, regno),
5886 adjust_address (base_address, Pmode, offset));
5887 offset += UNITS_PER_WORD;
5888 }
5889}
5890
5891/* Restore function stack, frame, and registers. */
5892
5893void
5894ix86_expand_epilogue (int style)
5895{
5896 int regno;
5897 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5898 struct ix86_frame frame;
5899 HOST_WIDE_INT offset;
5900
5901 ix86_compute_frame_layout (&frame);
5902
5903 /* Calculate start of saved registers relative to ebp. Special care
5904 must be taken for the normal return case of a function using
5905 eh_return: the eax and edx registers are marked as saved, but not
5906 restored along this path. */
5907 offset = frame.nregs;
5908 if (current_function_calls_eh_return && style != 2)
5909 offset -= 2;
5910 offset *= -UNITS_PER_WORD;
5911
5912 /* If we're only restoring one register and sp is not valid then
5913 using a move instruction to restore the register since it's
5914 less work than reloading sp and popping the register.
5915
5916 The default code result in stack adjustment using add/lea instruction,
5917 while this code results in LEAVE instruction (or discrete equivalent),
5918 so it is profitable in some other cases as well. Especially when there
5919 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5920 and there is exactly one register to pop. This heuristic may need some
5921 tuning in future. */
5922 if ((!sp_valid && frame.nregs <= 1)
5923 || (TARGET_EPILOGUE_USING_MOVE
5924 && cfun->machine->use_fast_prologue_epilogue
5925 && (frame.nregs > 1 || frame.to_allocate))
5926 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5927 || (frame_pointer_needed && TARGET_USE_LEAVE
5928 && cfun->machine->use_fast_prologue_epilogue
5929 && frame.nregs == 1)
5930 || current_function_calls_eh_return)
5931 {
5932 /* Restore registers. We can use ebp or esp to address the memory
5933 locations. If both are available, default to ebp, since offsets
5934 are known to be small. Only exception is esp pointing directly to the
5935 end of block of saved registers, where we may simplify addressing
5936 mode. */
5937
5938 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5939 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5940 frame.to_allocate, style == 2);
5941 else
5942 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5943 offset, style == 2);
5944
5945 /* eh_return epilogues need %ecx added to the stack pointer. */
5946 if (style == 2)
5947 {
5948 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5949
5950 if (frame_pointer_needed)
5951 {
5952 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5953 tmp = plus_constant (tmp, UNITS_PER_WORD);
5954 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5955
5956 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5957 emit_move_insn (hard_frame_pointer_rtx, tmp);
5958
5959 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5960 const0_rtx, style);
5961 }
5962 else
5963 {
5964 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5965 tmp = plus_constant (tmp, (frame.to_allocate
5966 + frame.nregs * UNITS_PER_WORD));
5967 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5968 }
5969 }
5970 else if (!frame_pointer_needed)
5971 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5972 GEN_INT (frame.to_allocate
5973 + frame.nregs * UNITS_PER_WORD),
5974 style);
5975 /* If not an i386, mov & pop is faster than "leave". */
5976 else if (TARGET_USE_LEAVE || optimize_size
5977 || !cfun->machine->use_fast_prologue_epilogue)
5978 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5979 else
5980 {
5981 pro_epilogue_adjust_stack (stack_pointer_rtx,
5982 hard_frame_pointer_rtx,
5983 const0_rtx, style);
5984 if (TARGET_64BIT)
5985 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5986 else
5987 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5988 }
5989 }
5990 else
5991 {
5992 /* First step is to deallocate the stack frame so that we can
5993 pop the registers. */
5994 if (!sp_valid)
5995 {
5996 gcc_assert (frame_pointer_needed);
5997 pro_epilogue_adjust_stack (stack_pointer_rtx,
5998 hard_frame_pointer_rtx,
5999 GEN_INT (offset), style);
6000 }
6001 else if (frame.to_allocate)
6002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6003 GEN_INT (frame.to_allocate), style);
6004
6005 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6006 if (ix86_save_reg (regno, false))
6007 {
6008 if (TARGET_64BIT)
6009 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6010 else
6011 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6012 }
6013 if (frame_pointer_needed)
6014 {
6015 /* Leave results in shorter dependency chains on CPUs that are
6016 able to grok it fast. */
6017 if (TARGET_USE_LEAVE)
6018 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6019 else if (TARGET_64BIT)
6020 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6021 else
6022 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6023 }
6024 }
6025
6026 if (cfun->machine->force_align_arg_pointer)
6027 {
6028 emit_insn (gen_addsi3 (stack_pointer_rtx,
6029 cfun->machine->force_align_arg_pointer,
6030 GEN_INT (-4)));
6031 }
6032
6033 /* Sibcall epilogues don't want a return instruction. */
6034 if (style == 0)
6035 return;
6036
6037 if (current_function_pops_args && current_function_args_size)
6038 {
6039 rtx popc = GEN_INT (current_function_pops_args);
6040
6041 /* i386 can only pop 64K bytes. If asked to pop more, pop
6042 return address, do explicit add, and jump indirectly to the
6043 caller. */
6044
6045 if (current_function_pops_args >= 65536)
6046 {
6047 rtx ecx = gen_rtx_REG (SImode, 2);
6048
6049 /* There is no "pascal" calling convention in 64bit ABI. */
6050 gcc_assert (!TARGET_64BIT);
6051
6052 emit_insn (gen_popsi1 (ecx));
6053 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6054 emit_jump_insn (gen_return_indirect_internal (ecx));
6055 }
6056 else
6057 emit_jump_insn (gen_return_pop_internal (popc));
6058 }
6059 else
6060 emit_jump_insn (gen_return_internal ());
6061}
6062
6063/* Reset from the function's potential modifications. */
6064
6065static void
6066ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6067 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6068{
6069 if (pic_offset_table_rtx)
6070 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6071#if TARGET_MACHO
6072 /* Mach-O doesn't support labels at the end of objects, so if
6073 it looks like we might want one, insert a NOP. */
6074 {
6075 rtx insn = get_last_insn ();
6076 while (insn
6077 && NOTE_P (insn)
6078 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6079 insn = PREV_INSN (insn);
6080 if (insn
6081 && (LABEL_P (insn)
6082 || (NOTE_P (insn)
6083 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6084 fputs ("\tnop\n", file);
6085 }
6086#endif
6087
6088}
6089
6090/* Extract the parts of an RTL expression that is a valid memory address
6091 for an instruction. Return 0 if the structure of the address is
6092 grossly off. Return -1 if the address contains ASHIFT, so it is not
6093 strictly valid, but still used for computing length of lea instruction. */
6094
6095int
6096ix86_decompose_address (rtx addr, struct ix86_address *out)
6097{
6098 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6099 rtx base_reg, index_reg;
6100 HOST_WIDE_INT scale = 1;
6101 rtx scale_rtx = NULL_RTX;
6102 int retval = 1;
6103 enum ix86_address_seg seg = SEG_DEFAULT;
6104
6105 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6106 base = addr;
6107 else if (GET_CODE (addr) == PLUS)
6108 {
6109 rtx addends[4], op;
6110 int n = 0, i;
6111
6112 op = addr;
6113 do
6114 {
6115 if (n >= 4)
6116 return 0;
6117 addends[n++] = XEXP (op, 1);
6118 op = XEXP (op, 0);
6119 }
6120 while (GET_CODE (op) == PLUS);
6121 if (n >= 4)
6122 return 0;
6123 addends[n] = op;
6124
6125 for (i = n; i >= 0; --i)
6126 {
6127 op = addends[i];
6128 switch (GET_CODE (op))
6129 {
6130 case MULT:
6131 if (index)
6132 return 0;
6133 index = XEXP (op, 0);
6134 scale_rtx = XEXP (op, 1);
6135 break;
6136
6137 case UNSPEC:
6138 if (XINT (op, 1) == UNSPEC_TP
6139 && TARGET_TLS_DIRECT_SEG_REFS
6140 && seg == SEG_DEFAULT)
6141 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6142 else
6143 return 0;
6144 break;
6145
6146 case REG:
6147 case SUBREG:
6148 if (!base)
6149 base = op;
6150 else if (!index)
6151 index = op;
6152 else
6153 return 0;
6154 break;
6155
6156 case CONST:
6157 case CONST_INT:
6158 case SYMBOL_REF:
6159 case LABEL_REF:
6160 if (disp)
6161 return 0;
6162 disp = op;
6163 break;
6164
6165 default:
6166 return 0;
6167 }
6168 }
6169 }
6170 else if (GET_CODE (addr) == MULT)
6171 {
6172 index = XEXP (addr, 0); /* index*scale */
6173 scale_rtx = XEXP (addr, 1);
6174 }
6175 else if (GET_CODE (addr) == ASHIFT)
6176 {
6177 rtx tmp;
6178
6179 /* We're called for lea too, which implements ashift on occasion. */
6180 index = XEXP (addr, 0);
6181 tmp = XEXP (addr, 1);
6182 if (GET_CODE (tmp) != CONST_INT)
6183 return 0;
6184 scale = INTVAL (tmp);
6185 if ((unsigned HOST_WIDE_INT) scale > 3)
6186 return 0;
6187 scale = 1 << scale;
6188 retval = -1;
6189 }
6190 else
6191 disp = addr; /* displacement */
6192
6193 /* Extract the integral value of scale. */
6194 if (scale_rtx)
6195 {
6196 if (GET_CODE (scale_rtx) != CONST_INT)
6197 return 0;
6198 scale = INTVAL (scale_rtx);
6199 }
6200
6201 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6202 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6203
6204 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6205 if (base_reg && index_reg && scale == 1
6206 && (index_reg == arg_pointer_rtx
6207 || index_reg == frame_pointer_rtx
6208 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6209 {
6210 rtx tmp;
6211 tmp = base, base = index, index = tmp;
6212 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6213 }
6214
6215 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6216 if ((base_reg == hard_frame_pointer_rtx
6217 || base_reg == frame_pointer_rtx
6218 || base_reg == arg_pointer_rtx) && !disp)
6219 disp = const0_rtx;
6220
6221 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6222 Avoid this by transforming to [%esi+0]. */
6223 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6224 && base_reg && !index_reg && !disp
6225 && REG_P (base_reg)
6226 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6227 disp = const0_rtx;
6228
6229 /* Special case: encode reg+reg instead of reg*2. */
6230 if (!base && index && scale && scale == 2)
6231 base = index, base_reg = index_reg, scale = 1;
6232
6233 /* Special case: scaling cannot be encoded without base or displacement. */
6234 if (!base && !disp && index && scale != 1)
6235 disp = const0_rtx;
6236
6237 out->base = base;
6238 out->index = index;
6239 out->disp = disp;
6240 out->scale = scale;
6241 out->seg = seg;
6242
6243 return retval;
6244}
6245
6246/* Return cost of the memory address x.
6247 For i386, it is better to use a complex address than let gcc copy
6248 the address into a reg and make a new pseudo. But not if the address
6249 requires to two regs - that would mean more pseudos with longer
6250 lifetimes. */
6251static int
6252ix86_address_cost (rtx x)
6253{
6254 struct ix86_address parts;
6255 int cost = 1;
6256 int ok = ix86_decompose_address (x, &parts);
6257
6258 gcc_assert (ok);
6259
6260 if (parts.base && GET_CODE (parts.base) == SUBREG)
6261 parts.base = SUBREG_REG (parts.base);
6262 if (parts.index && GET_CODE (parts.index) == SUBREG)
6263 parts.index = SUBREG_REG (parts.index);
6264
6265 /* More complex memory references are better. */
6266 if (parts.disp && parts.disp != const0_rtx)
6267 cost--;
6268 if (parts.seg != SEG_DEFAULT)
6269 cost--;
6270
6271 /* Attempt to minimize number of registers in the address. */
6272 if ((parts.base
6273 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6274 || (parts.index
6275 && (!REG_P (parts.index)
6276 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6277 cost++;
6278
6279 if (parts.base
6280 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6281 && parts.index
6282 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6283 && parts.base != parts.index)
6284 cost++;
6285
6286 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6287 since it's predecode logic can't detect the length of instructions
6288 and it degenerates to vector decoded. Increase cost of such
6289 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6290 to split such addresses or even refuse such addresses at all.
6291
6292 Following addressing modes are affected:
6293 [base+scale*index]
6294 [scale*index+disp]
6295 [base+index]
6296
6297 The first and last case may be avoidable by explicitly coding the zero in
6298 memory address, but I don't have AMD-K6 machine handy to check this
6299 theory. */
6300
6301 if (TARGET_K6
6302 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6303 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6304 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6305 cost += 10;
6306
6307 return cost;
6308}
6309
6310/* If X is a machine specific address (i.e. a symbol or label being
6311 referenced as a displacement from the GOT implemented using an
6312 UNSPEC), then return the base term. Otherwise return X. */
6313
6314rtx
6315ix86_find_base_term (rtx x)
6316{
6317 rtx term;
6318
6319 if (TARGET_64BIT)
6320 {
6321 if (GET_CODE (x) != CONST)
6322 return x;
6323 term = XEXP (x, 0);
6324 if (GET_CODE (term) == PLUS
6325 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6326 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6327 term = XEXP (term, 0);
6328 if (GET_CODE (term) != UNSPEC
6329 || XINT (term, 1) != UNSPEC_GOTPCREL)
6330 return x;
6331
6332 term = XVECEXP (term, 0, 0);
6333
6334 if (GET_CODE (term) != SYMBOL_REF
6335 && GET_CODE (term) != LABEL_REF)
6336 return x;
6337
6338 return term;
6339 }
6340
6341 term = ix86_delegitimize_address (x);
6342
6343 if (GET_CODE (term) != SYMBOL_REF
6344 && GET_CODE (term) != LABEL_REF)
6345 return x;
6346
6347 return term;
6348}
6349
6350/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6351 this is used for to form addresses to local data when -fPIC is in
6352 use. */
6353
6354static bool
6355darwin_local_data_pic (rtx disp)
6356{
6357 if (GET_CODE (disp) == MINUS)
6358 {
6359 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6360 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6361 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6362 {
6363 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6364 if (! strcmp (sym_name, "<pic base>"))
6365 return true;
6366 }
6367 }
6368
6369 return false;
6370}
6371
6372/* Determine if a given RTX is a valid constant. We already know this
6373 satisfies CONSTANT_P. */
6374
6375bool
6376legitimate_constant_p (rtx x)
6377{
6378 switch (GET_CODE (x))
6379 {
6380 case CONST:
6381 x = XEXP (x, 0);
6382
6383 if (GET_CODE (x) == PLUS)
6384 {
6385 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6386 return false;
6387 x = XEXP (x, 0);
6388 }
6389
6390 if (TARGET_MACHO && darwin_local_data_pic (x))
6391 return true;
6392
6393 /* Only some unspecs are valid as "constants". */
6394 if (GET_CODE (x) == UNSPEC)
6395 switch (XINT (x, 1))
6396 {
6397 case UNSPEC_GOTOFF:
6398 return TARGET_64BIT;
6399 case UNSPEC_TPOFF:
6400 case UNSPEC_NTPOFF:
6401 x = XVECEXP (x, 0, 0);
6402 return (GET_CODE (x) == SYMBOL_REF
6403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6404 case UNSPEC_DTPOFF:
6405 x = XVECEXP (x, 0, 0);
6406 return (GET_CODE (x) == SYMBOL_REF
6407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6408 default:
6409 return false;
6410 }
6411
6412 /* We must have drilled down to a symbol. */
6413 if (GET_CODE (x) == LABEL_REF)
6414 return true;
6415 if (GET_CODE (x) != SYMBOL_REF)
6416 return false;
6417 /* FALLTHRU */
6418
6419 case SYMBOL_REF:
6420 /* TLS symbols are never valid. */
6421 if (SYMBOL_REF_TLS_MODEL (x))
6422 return false;
6423 break;
6424
6425 case CONST_DOUBLE:
6426 if (GET_MODE (x) == TImode
6427 && x != CONST0_RTX (TImode)
6428 && !TARGET_64BIT)
6429 return false;
6430 break;
6431
6432 case CONST_VECTOR:
6433 if (x == CONST0_RTX (GET_MODE (x)))
6434 return true;
6435 return false;
6436
6437 default:
6438 break;
6439 }
6440
6441 /* Otherwise we handle everything else in the move patterns. */
6442 return true;
6443}
6444
6445/* Determine if it's legal to put X into the constant pool. This
6446 is not possible for the address of thread-local symbols, which
6447 is checked above. */
6448
6449static bool
6450ix86_cannot_force_const_mem (rtx x)
6451{
6452 /* We can always put integral constants and vectors in memory. */
6453 switch (GET_CODE (x))
6454 {
6455 case CONST_INT:
6456 case CONST_DOUBLE:
6457 case CONST_VECTOR:
6458 return false;
6459
6460 default:
6461 break;
6462 }
6463 return !legitimate_constant_p (x);
6464}
6465
6466/* Determine if a given RTX is a valid constant address. */
6467
6468bool
6469constant_address_p (rtx x)
6470{
6471 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6472}
6473
6474/* Nonzero if the constant value X is a legitimate general operand
6475 when generating PIC code. It is given that flag_pic is on and
6476 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6477
6478bool
6479legitimate_pic_operand_p (rtx x)
6480{
6481 rtx inner;
6482
6483 switch (GET_CODE (x))
6484 {
6485 case CONST:
6486 inner = XEXP (x, 0);
6487 if (GET_CODE (inner) == PLUS
6488 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6489 inner = XEXP (inner, 0);
6490
6491 /* Only some unspecs are valid as "constants". */
6492 if (GET_CODE (inner) == UNSPEC)
6493 switch (XINT (inner, 1))
6494 {
6495 case UNSPEC_GOTOFF:
6496 return TARGET_64BIT;
6497 case UNSPEC_TPOFF:
6498 x = XVECEXP (inner, 0, 0);
6499 return (GET_CODE (x) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6501 default:
6502 return false;
6503 }
6504 /* FALLTHRU */
6505
6506 case SYMBOL_REF:
6507 case LABEL_REF:
6508 return legitimate_pic_address_disp_p (x);
6509
6510 default:
6511 return true;
6512 }
6513}
6514
6515/* Determine if a given CONST RTX is a valid memory displacement
6516 in PIC mode. */
6517
6518int
6519legitimate_pic_address_disp_p (rtx disp)
6520{
6521 bool saw_plus;
6522
6523 /* In 64bit mode we can allow direct addresses of symbols and labels
6524 when they are not dynamic symbols. */
6525 if (TARGET_64BIT)
6526 {
6527 rtx op0 = disp, op1;
6528
6529 switch (GET_CODE (disp))
6530 {
6531 case LABEL_REF:
6532 return true;
6533
6534 case CONST:
6535 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6536 break;
6537 op0 = XEXP (XEXP (disp, 0), 0);
6538 op1 = XEXP (XEXP (disp, 0), 1);
6539 if (GET_CODE (op1) != CONST_INT
6540 || INTVAL (op1) >= 16*1024*1024
6541 || INTVAL (op1) < -16*1024*1024)
6542 break;
6543 if (GET_CODE (op0) == LABEL_REF)
6544 return true;
6545 if (GET_CODE (op0) != SYMBOL_REF)
6546 break;
6547 /* FALLTHRU */
6548
6549 case SYMBOL_REF:
6550 /* TLS references should always be enclosed in UNSPEC. */
6551 if (SYMBOL_REF_TLS_MODEL (op0))
6552 return false;
6553 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6554 return true;
6555 break;
6556
6557 default:
6558 break;
6559 }
6560 }
6561 if (GET_CODE (disp) != CONST)
6562 return 0;
6563 disp = XEXP (disp, 0);
6564
6565 if (TARGET_64BIT)
6566 {
6567 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6568 of GOT tables. We should not need these anyway. */
6569 if (GET_CODE (disp) != UNSPEC
6570 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6571 && XINT (disp, 1) != UNSPEC_GOTOFF))
6572 return 0;
6573
6574 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6575 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6576 return 0;
6577 return 1;
6578 }
6579
6580 saw_plus = false;
6581 if (GET_CODE (disp) == PLUS)
6582 {
6583 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6584 return 0;
6585 disp = XEXP (disp, 0);
6586 saw_plus = true;
6587 }
6588
6589 if (TARGET_MACHO && darwin_local_data_pic (disp))
6590 return 1;
6591
6592 if (GET_CODE (disp) != UNSPEC)
6593 return 0;
6594
6595 switch (XINT (disp, 1))
6596 {
6597 case UNSPEC_GOT:
6598 if (saw_plus)
6599 return false;
6600 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6601 case UNSPEC_GOTOFF:
6602 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6603 While ABI specify also 32bit relocation but we don't produce it in
6604 small PIC model at all. */
6605 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6606 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6607 && !TARGET_64BIT)
6608 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6609 return false;
6610 case UNSPEC_GOTTPOFF:
6611 case UNSPEC_GOTNTPOFF:
6612 case UNSPEC_INDNTPOFF:
6613 if (saw_plus)
6614 return false;
6615 disp = XVECEXP (disp, 0, 0);
6616 return (GET_CODE (disp) == SYMBOL_REF
6617 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6618 case UNSPEC_NTPOFF:
6619 disp = XVECEXP (disp, 0, 0);
6620 return (GET_CODE (disp) == SYMBOL_REF
6621 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6622 case UNSPEC_DTPOFF:
6623 disp = XVECEXP (disp, 0, 0);
6624 return (GET_CODE (disp) == SYMBOL_REF
6625 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6626 }
6627
6628 return 0;
6629}
6630
6631/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6632 memory address for an instruction. The MODE argument is the machine mode
6633 for the MEM expression that wants to use this address.
6634
6635 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6636 convert common non-canonical forms to canonical form so that they will
6637 be recognized. */
6638
6639int
6640legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6641{
6642 struct ix86_address parts;
6643 rtx base, index, disp;
6644 HOST_WIDE_INT scale;
6645 const char *reason = NULL;
6646 rtx reason_rtx = NULL_RTX;
6647
6648 if (TARGET_DEBUG_ADDR)
6649 {
6650 fprintf (stderr,
6651 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6652 GET_MODE_NAME (mode), strict);
6653 debug_rtx (addr);
6654 }
6655
6656 if (ix86_decompose_address (addr, &parts) <= 0)
6657 {
6658 reason = "decomposition failed";
6659 goto report_error;
6660 }
6661
6662 base = parts.base;
6663 index = parts.index;
6664 disp = parts.disp;
6665 scale = parts.scale;
6666
6667 /* Validate base register.
6668
6669 Don't allow SUBREG's that span more than a word here. It can lead to spill
6670 failures when the base is one word out of a two word structure, which is
6671 represented internally as a DImode int. */
6672
6673 if (base)
6674 {
6675 rtx reg;
6676 reason_rtx = base;
6677
6678 if (REG_P (base))
6679 reg = base;
6680 else if (GET_CODE (base) == SUBREG
6681 && REG_P (SUBREG_REG (base))
6682 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6683 <= UNITS_PER_WORD)
6684 reg = SUBREG_REG (base);
6685 else
6686 {
6687 reason = "base is not a register";
6688 goto report_error;
6689 }
6690
6691 if (GET_MODE (base) != Pmode)
6692 {
6693 reason = "base is not in Pmode";
6694 goto report_error;
6695 }
6696
6697 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6698 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6699 {
6700 reason = "base is not valid";
6701 goto report_error;
6702 }
6703 }
6704
6705 /* Validate index register.
6706
6707 Don't allow SUBREG's that span more than a word here -- same as above. */
6708
6709 if (index)
6710 {
6711 rtx reg;
6712 reason_rtx = index;
6713
6714 if (REG_P (index))
6715 reg = index;
6716 else if (GET_CODE (index) == SUBREG
6717 && REG_P (SUBREG_REG (index))
6718 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6719 <= UNITS_PER_WORD)
6720 reg = SUBREG_REG (index);
6721 else
6722 {
6723 reason = "index is not a register";
6724 goto report_error;
6725 }
6726
6727 if (GET_MODE (index) != Pmode)
6728 {
6729 reason = "index is not in Pmode";
6730 goto report_error;
6731 }
6732
6733 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6734 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6735 {
6736 reason = "index is not valid";
6737 goto report_error;
6738 }
6739 }
6740
6741 /* Validate scale factor. */
6742 if (scale != 1)
6743 {
6744 reason_rtx = GEN_INT (scale);
6745 if (!index)
6746 {
6747 reason = "scale without index";
6748 goto report_error;
6749 }
6750
6751 if (scale != 2 && scale != 4 && scale != 8)
6752 {
6753 reason = "scale is not a valid multiplier";
6754 goto report_error;
6755 }
6756 }
6757
6758 /* Validate displacement. */
6759 if (disp)
6760 {
6761 reason_rtx = disp;
6762
6763 if (GET_CODE (disp) == CONST
6764 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6765 switch (XINT (XEXP (disp, 0), 1))
6766 {
6767 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6768 used. While ABI specify also 32bit relocations, we don't produce
6769 them at all and use IP relative instead. */
6770 case UNSPEC_GOT:
6771 case UNSPEC_GOTOFF:
6772 gcc_assert (flag_pic);
6773 if (!TARGET_64BIT)
6774 goto is_legitimate_pic;
6775 reason = "64bit address unspec";
6776 goto report_error;
6777
6778 case UNSPEC_GOTPCREL:
6779 gcc_assert (flag_pic);
6780 goto is_legitimate_pic;
6781
6782 case UNSPEC_GOTTPOFF:
6783 case UNSPEC_GOTNTPOFF:
6784 case UNSPEC_INDNTPOFF:
6785 case UNSPEC_NTPOFF:
6786 case UNSPEC_DTPOFF:
6787 break;
6788
6789 default:
6790 reason = "invalid address unspec";
6791 goto report_error;
6792 }
6793
6794 else if (SYMBOLIC_CONST (disp)
6795 && (flag_pic
6796 || (TARGET_MACHO
6797#if TARGET_MACHO
6798 && MACHOPIC_INDIRECT
6799 && !machopic_operand_p (disp)
6800#endif
6801 )))
6802 {
6803
6804 is_legitimate_pic:
6805 if (TARGET_64BIT && (index || base))
6806 {
6807 /* foo@dtpoff(%rX) is ok. */
6808 if (GET_CODE (disp) != CONST
6809 || GET_CODE (XEXP (disp, 0)) != PLUS
6810 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6811 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6812 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6813 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6814 {
6815 reason = "non-constant pic memory reference";
6816 goto report_error;
6817 }
6818 }
6819 else if (! legitimate_pic_address_disp_p (disp))
6820 {
6821 reason = "displacement is an invalid pic construct";
6822 goto report_error;
6823 }
6824
6825 /* This code used to verify that a symbolic pic displacement
6826 includes the pic_offset_table_rtx register.
6827
6828 While this is good idea, unfortunately these constructs may
6829 be created by "adds using lea" optimization for incorrect
6830 code like:
6831
6832 int a;
6833 int foo(int i)
6834 {
6835 return *(&a+i);
6836 }
6837
6838 This code is nonsensical, but results in addressing
6839 GOT table with pic_offset_table_rtx base. We can't
6840 just refuse it easily, since it gets matched by
6841 "addsi3" pattern, that later gets split to lea in the
6842 case output register differs from input. While this
6843 can be handled by separate addsi pattern for this case
6844 that never results in lea, this seems to be easier and
6845 correct fix for crash to disable this test. */
6846 }
6847 else if (GET_CODE (disp) != LABEL_REF
6848 && GET_CODE (disp) != CONST_INT
6849 && (GET_CODE (disp) != CONST
6850 || !legitimate_constant_p (disp))
6851 && (GET_CODE (disp) != SYMBOL_REF
6852 || !legitimate_constant_p (disp)))
6853 {
6854 reason = "displacement is not constant";
6855 goto report_error;
6856 }
6857 else if (TARGET_64BIT
6858 && !x86_64_immediate_operand (disp, VOIDmode))
6859 {
6860 reason = "displacement is out of range";
6861 goto report_error;
6862 }
6863 }
6864
6865 /* Everything looks valid. */
6866 if (TARGET_DEBUG_ADDR)
6867 fprintf (stderr, "Success.\n");
6868 return TRUE;
6869
6870 report_error:
6871 if (TARGET_DEBUG_ADDR)
6872 {
6873 fprintf (stderr, "Error: %s\n", reason);
6874 debug_rtx (reason_rtx);
6875 }
6876 return FALSE;
6877}
6878
6879/* Return a unique alias set for the GOT. */
6880
6881static HOST_WIDE_INT
6882ix86_GOT_alias_set (void)
6883{
6884 static HOST_WIDE_INT set = -1;
6885 if (set == -1)
6886 set = new_alias_set ();
6887 return set;
6888}
6889
6890/* Return a legitimate reference for ORIG (an address) using the
6891 register REG. If REG is 0, a new pseudo is generated.
6892
6893 There are two types of references that must be handled:
6894
6895 1. Global data references must load the address from the GOT, via
6896 the PIC reg. An insn is emitted to do this load, and the reg is
6897 returned.
6898
6899 2. Static data references, constant pool addresses, and code labels
6900 compute the address as an offset from the GOT, whose base is in
6901 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6902 differentiate them from global data objects. The returned
6903 address is the PIC reg + an unspec constant.
6904
6905 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6906 reg also appears in the address. */
6907
6908static rtx
6909legitimize_pic_address (rtx orig, rtx reg)
6910{
6911 rtx addr = orig;
6912 rtx new = orig;
6913 rtx base;
6914
6915#if TARGET_MACHO
6916 if (TARGET_MACHO && !TARGET_64BIT)
6917 {
6918 if (reg == 0)
6919 reg = gen_reg_rtx (Pmode);
6920 /* Use the generic Mach-O PIC machinery. */
6921 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6922 }
6923#endif
6924
6925 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6926 new = addr;
6927 else if (TARGET_64BIT
6928 && ix86_cmodel != CM_SMALL_PIC
6929 && local_symbolic_operand (addr, Pmode))
6930 {
6931 rtx tmpreg;
6932 /* This symbol may be referenced via a displacement from the PIC
6933 base address (@GOTOFF). */
6934
6935 if (reload_in_progress)
6936 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6937 if (GET_CODE (addr) == CONST)
6938 addr = XEXP (addr, 0);
6939 if (GET_CODE (addr) == PLUS)
6940 {
6941 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6942 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6943 }
6944 else
6945 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6946 new = gen_rtx_CONST (Pmode, new);
6947 if (!reg)
6948 tmpreg = gen_reg_rtx (Pmode);
6949 else
6950 tmpreg = reg;
6951 emit_move_insn (tmpreg, new);
6952
6953 if (reg != 0)
6954 {
6955 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6956 tmpreg, 1, OPTAB_DIRECT);
6957 new = reg;
6958 }
6959 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6960 }
6961 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6962 {
6963 /* This symbol may be referenced via a displacement from the PIC
6964 base address (@GOTOFF). */
6965
6966 if (reload_in_progress)
6967 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6968 if (GET_CODE (addr) == CONST)
6969 addr = XEXP (addr, 0);
6970 if (GET_CODE (addr) == PLUS)
6971 {
6972 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6973 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6974 }
6975 else
6976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6977 new = gen_rtx_CONST (Pmode, new);
6978 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6979
6980 if (reg != 0)
6981 {
6982 emit_move_insn (reg, new);
6983 new = reg;
6984 }
6985 }
6986 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6987 {
6988 if (TARGET_64BIT)
6989 {
6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6991 new = gen_rtx_CONST (Pmode, new);
6992 new = gen_const_mem (Pmode, new);
6993 set_mem_alias_set (new, ix86_GOT_alias_set ());
6994
6995 if (reg == 0)
6996 reg = gen_reg_rtx (Pmode);
6997 /* Use directly gen_movsi, otherwise the address is loaded
6998 into register for CSE. We don't want to CSE this addresses,
6999 instead we CSE addresses from the GOT table, so skip this. */
7000 emit_insn (gen_movsi (reg, new));
7001 new = reg;
7002 }
7003 else
7004 {
7005 /* This symbol must be referenced via a load from the
7006 Global Offset Table (@GOT). */
7007
7008 if (reload_in_progress)
7009 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7010 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7011 new = gen_rtx_CONST (Pmode, new);
7012 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7013 new = gen_const_mem (Pmode, new);
7014 set_mem_alias_set (new, ix86_GOT_alias_set ());
7015
7016 if (reg == 0)
7017 reg = gen_reg_rtx (Pmode);
7018 emit_move_insn (reg, new);
7019 new = reg;
7020 }
7021 }
7022 else
7023 {
7024 if (GET_CODE (addr) == CONST_INT
7025 && !x86_64_immediate_operand (addr, VOIDmode))
7026 {
7027 if (reg)
7028 {
7029 emit_move_insn (reg, addr);
7030 new = reg;
7031 }
7032 else
7033 new = force_reg (Pmode, addr);
7034 }
7035 else if (GET_CODE (addr) == CONST)
7036 {
7037 addr = XEXP (addr, 0);
7038
7039 /* We must match stuff we generate before. Assume the only
7040 unspecs that can get here are ours. Not that we could do
7041 anything with them anyway.... */
7042 if (GET_CODE (addr) == UNSPEC
7043 || (GET_CODE (addr) == PLUS
7044 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7045 return orig;
7046 gcc_assert (GET_CODE (addr) == PLUS);
7047 }
7048 if (GET_CODE (addr) == PLUS)
7049 {
7050 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7051
7052 /* Check first to see if this is a constant offset from a @GOTOFF
7053 symbol reference. */
7054 if (local_symbolic_operand (op0, Pmode)
7055 && GET_CODE (op1) == CONST_INT)
7056 {
7057 if (!TARGET_64BIT)
7058 {
7059 if (reload_in_progress)
7060 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7061 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7062 UNSPEC_GOTOFF);
7063 new = gen_rtx_PLUS (Pmode, new, op1);
7064 new = gen_rtx_CONST (Pmode, new);
7065 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7066
7067 if (reg != 0)
7068 {
7069 emit_move_insn (reg, new);
7070 new = reg;
7071 }
7072 }
7073 else
7074 {
7075 if (INTVAL (op1) < -16*1024*1024
7076 || INTVAL (op1) >= 16*1024*1024)
7077 {
7078 if (!x86_64_immediate_operand (op1, Pmode))
7079 op1 = force_reg (Pmode, op1);
7080 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7081 }
7082 }
7083 }
7084 else
7085 {
7086 base = legitimize_pic_address (XEXP (addr, 0), reg);
7087 new = legitimize_pic_address (XEXP (addr, 1),
7088 base == reg ? NULL_RTX : reg);
7089
7090 if (GET_CODE (new) == CONST_INT)
7091 new = plus_constant (base, INTVAL (new));
7092 else
7093 {
7094 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7095 {
7096 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7097 new = XEXP (new, 1);
7098 }
7099 new = gen_rtx_PLUS (Pmode, base, new);
7100 }
7101 }
7102 }
7103 }
7104 return new;
7105}
7106
7107/* Load the thread pointer. If TO_REG is true, force it into a register. */
7108
7109static rtx
7110get_thread_pointer (int to_reg)
7111{
7112 rtx tp, reg, insn;
7113
7114 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7115 if (!to_reg)
7116 return tp;
7117
7118 reg = gen_reg_rtx (Pmode);
7119 insn = gen_rtx_SET (VOIDmode, reg, tp);
7120 insn = emit_insn (insn);
7121
7122 return reg;
7123}
7124
7125/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7126 false if we expect this to be used for a memory address and true if
7127 we expect to load the address into a register. */
7128
7129static rtx
7130legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7131{
7132 rtx dest, base, off, pic, tp;
7133 int type;
7134
7135 switch (model)
7136 {
7137 case TLS_MODEL_GLOBAL_DYNAMIC:
7138 dest = gen_reg_rtx (Pmode);
7139 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7140
7141 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7142 {
7143 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7144
7145 start_sequence ();
7146 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7147 insns = get_insns ();
7148 end_sequence ();
7149
7150 emit_libcall_block (insns, dest, rax, x);
7151 }
7152 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7153 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7154 else
7155 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7156
7157 if (TARGET_GNU2_TLS)
7158 {
7159 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7160
7161 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7162 }
7163 break;
7164
7165 case TLS_MODEL_LOCAL_DYNAMIC:
7166 base = gen_reg_rtx (Pmode);
7167 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7168
7169 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7170 {
7171 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7172
7173 start_sequence ();
7174 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7175 insns = get_insns ();
7176 end_sequence ();
7177
7178 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7179 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7180 emit_libcall_block (insns, base, rax, note);
7181 }
7182 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7183 emit_insn (gen_tls_local_dynamic_base_64 (base));
7184 else
7185 emit_insn (gen_tls_local_dynamic_base_32 (base));
7186
7187 if (TARGET_GNU2_TLS)
7188 {
7189 rtx x = ix86_tls_module_base ();
7190
7191 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7192 gen_rtx_MINUS (Pmode, x, tp));
7193 }
7194
7195 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7196 off = gen_rtx_CONST (Pmode, off);
7197
7198 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7199
7200 if (TARGET_GNU2_TLS)
7201 {
7202 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7203
7204 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7205 }
7206
7207 break;
7208
7209 case TLS_MODEL_INITIAL_EXEC:
7210 if (TARGET_64BIT)
7211 {
7212 pic = NULL;
7213 type = UNSPEC_GOTNTPOFF;
7214 }
7215 else if (flag_pic)
7216 {
7217 if (reload_in_progress)
7218 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7219 pic = pic_offset_table_rtx;
7220 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7221 }
7222 else if (!TARGET_ANY_GNU_TLS)
7223 {
7224 pic = gen_reg_rtx (Pmode);
7225 emit_insn (gen_set_got (pic));
7226 type = UNSPEC_GOTTPOFF;
7227 }
7228 else
7229 {
7230 pic = NULL;
7231 type = UNSPEC_INDNTPOFF;
7232 }
7233
7234 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7235 off = gen_rtx_CONST (Pmode, off);
7236 if (pic)
7237 off = gen_rtx_PLUS (Pmode, pic, off);
7238 off = gen_const_mem (Pmode, off);
7239 set_mem_alias_set (off, ix86_GOT_alias_set ());
7240
7241 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7242 {
7243 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7244 off = force_reg (Pmode, off);
7245 return gen_rtx_PLUS (Pmode, base, off);
7246 }
7247 else
7248 {
7249 base = get_thread_pointer (true);
7250 dest = gen_reg_rtx (Pmode);
7251 emit_insn (gen_subsi3 (dest, base, off));
7252 }
7253 break;
7254
7255 case TLS_MODEL_LOCAL_EXEC:
7256 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7257 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7258 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7259 off = gen_rtx_CONST (Pmode, off);
7260
7261 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7262 {
7263 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7264 return gen_rtx_PLUS (Pmode, base, off);
7265 }
7266 else
7267 {
7268 base = get_thread_pointer (true);
7269 dest = gen_reg_rtx (Pmode);
7270 emit_insn (gen_subsi3 (dest, base, off));
7271 }
7272 break;
7273
7274 default:
7275 gcc_unreachable ();
7276 }
7277
7278 return dest;
7279}
7280
7281/* Try machine-dependent ways of modifying an illegitimate address
7282 to be legitimate. If we find one, return the new, valid address.
7283 This macro is used in only one place: `memory_address' in explow.c.
7284
7285 OLDX is the address as it was before break_out_memory_refs was called.
7286 In some cases it is useful to look at this to decide what needs to be done.
7287
7288 MODE and WIN are passed so that this macro can use
7289 GO_IF_LEGITIMATE_ADDRESS.
7290
7291 It is always safe for this macro to do nothing. It exists to recognize
7292 opportunities to optimize the output.
7293
7294 For the 80386, we handle X+REG by loading X into a register R and
7295 using R+REG. R will go in a general reg and indexing will be used.
7296 However, if REG is a broken-out memory address or multiplication,
7297 nothing needs to be done because REG can certainly go in a general reg.
7298
7299 When -fpic is used, special handling is needed for symbolic references.
7300 See comments by legitimize_pic_address in i386.c for details. */
7301
7302rtx
7303legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7304{
7305 int changed = 0;
7306 unsigned log;
7307
7308 if (TARGET_DEBUG_ADDR)
7309 {
7310 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7311 GET_MODE_NAME (mode));
7312 debug_rtx (x);
7313 }
7314
7315 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7316 if (log)
7317 return legitimize_tls_address (x, log, false);
7318 if (GET_CODE (x) == CONST
7319 && GET_CODE (XEXP (x, 0)) == PLUS
7320 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7321 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7322 {
7323 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7324 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7325 }
7326
7327 if (flag_pic && SYMBOLIC_CONST (x))
7328 return legitimize_pic_address (x, 0);
7329
7330 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7331 if (GET_CODE (x) == ASHIFT
7332 && GET_CODE (XEXP (x, 1)) == CONST_INT
7333 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7334 {
7335 changed = 1;
7336 log = INTVAL (XEXP (x, 1));
7337 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7338 GEN_INT (1 << log));
7339 }
7340
7341 if (GET_CODE (x) == PLUS)
7342 {
7343 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7344
7345 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7346 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7348 {
7349 changed = 1;
7350 log = INTVAL (XEXP (XEXP (x, 0), 1));
7351 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7352 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7353 GEN_INT (1 << log));
7354 }
7355
7356 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7357 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7358 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7359 {
7360 changed = 1;
7361 log = INTVAL (XEXP (XEXP (x, 1), 1));
7362 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7363 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7364 GEN_INT (1 << log));
7365 }
7366
7367 /* Put multiply first if it isn't already. */
7368 if (GET_CODE (XEXP (x, 1)) == MULT)
7369 {
7370 rtx tmp = XEXP (x, 0);
7371 XEXP (x, 0) = XEXP (x, 1);
7372 XEXP (x, 1) = tmp;
7373 changed = 1;
7374 }
7375
7376 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7377 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7378 created by virtual register instantiation, register elimination, and
7379 similar optimizations. */
7380 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7381 {
7382 changed = 1;
7383 x = gen_rtx_PLUS (Pmode,
7384 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7385 XEXP (XEXP (x, 1), 0)),
7386 XEXP (XEXP (x, 1), 1));
7387 }
7388
7389 /* Canonicalize
7390 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7392 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7393 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7394 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7395 && CONSTANT_P (XEXP (x, 1)))
7396 {
7397 rtx constant;
7398 rtx other = NULL_RTX;
7399
7400 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7401 {
7402 constant = XEXP (x, 1);
7403 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7404 }
7405 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7406 {
7407 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7408 other = XEXP (x, 1);
7409 }
7410 else
7411 constant = 0;
7412
7413 if (constant)
7414 {
7415 changed = 1;
7416 x = gen_rtx_PLUS (Pmode,
7417 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7418 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7419 plus_constant (other, INTVAL (constant)));
7420 }
7421 }
7422
7423 if (changed && legitimate_address_p (mode, x, FALSE))
7424 return x;
7425
7426 if (GET_CODE (XEXP (x, 0)) == MULT)
7427 {
7428 changed = 1;
7429 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7430 }
7431
7432 if (GET_CODE (XEXP (x, 1)) == MULT)
7433 {
7434 changed = 1;
7435 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7436 }
7437
7438 if (changed
7439 && GET_CODE (XEXP (x, 1)) == REG
7440 && GET_CODE (XEXP (x, 0)) == REG)
7441 return x;
7442
7443 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7444 {
7445 changed = 1;
7446 x = legitimize_pic_address (x, 0);
7447 }
7448
7449 if (changed && legitimate_address_p (mode, x, FALSE))
7450 return x;
7451
7452 if (GET_CODE (XEXP (x, 0)) == REG)
7453 {
7454 rtx temp = gen_reg_rtx (Pmode);
7455 rtx val = force_operand (XEXP (x, 1), temp);
7456 if (val != temp)
7457 emit_move_insn (temp, val);
7458
7459 XEXP (x, 1) = temp;
7460 return x;
7461 }
7462
7463 else if (GET_CODE (XEXP (x, 1)) == REG)
7464 {
7465 rtx temp = gen_reg_rtx (Pmode);
7466 rtx val = force_operand (XEXP (x, 0), temp);
7467 if (val != temp)
7468 emit_move_insn (temp, val);
7469
7470 XEXP (x, 0) = temp;
7471 return x;
7472 }
7473 }
7474
7475 return x;
7476}
7477
7478/* Print an integer constant expression in assembler syntax. Addition
7479 and subtraction are the only arithmetic that may appear in these
7480 expressions. FILE is the stdio stream to write to, X is the rtx, and
7481 CODE is the operand print code from the output string. */
7482
7483static void
7484output_pic_addr_const (FILE *file, rtx x, int code)
7485{
7486 char buf[256];
7487
7488 switch (GET_CODE (x))
7489 {
7490 case PC:
7491 gcc_assert (flag_pic);
7492 putc ('.', file);
7493 break;
7494
7495 case SYMBOL_REF:
7496 if (! TARGET_MACHO || TARGET_64BIT)
7497 output_addr_const (file, x);
7498 else
7499 {
7500 const char *name = XSTR (x, 0);
7501
7502 /* Mark the decl as referenced so that cgraph will output the function. */
7503 if (SYMBOL_REF_DECL (x))
7504 mark_decl_referenced (SYMBOL_REF_DECL (x));
7505
7506#if TARGET_MACHO
7507 if (MACHOPIC_INDIRECT
7508 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7509 name = machopic_indirection_name (x, /*stub_p=*/true);
7510#endif
7511 assemble_name (file, name);
7512 }
7513 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7514 fputs ("@PLT", file);
7515 break;
7516
7517 case LABEL_REF:
7518 x = XEXP (x, 0);
7519 /* FALLTHRU */
7520 case CODE_LABEL:
7521 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7522 assemble_name (asm_out_file, buf);
7523 break;
7524
7525 case CONST_INT:
7526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7527 break;
7528
7529 case CONST:
7530 /* This used to output parentheses around the expression,
7531 but that does not work on the 386 (either ATT or BSD assembler). */
7532 output_pic_addr_const (file, XEXP (x, 0), code);
7533 break;
7534
7535 case CONST_DOUBLE:
7536 if (GET_MODE (x) == VOIDmode)
7537 {
7538 /* We can use %d if the number is <32 bits and positive. */
7539 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7540 fprintf (file, "0x%lx%08lx",
7541 (unsigned long) CONST_DOUBLE_HIGH (x),
7542 (unsigned long) CONST_DOUBLE_LOW (x));
7543 else
7544 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7545 }
7546 else
7547 /* We can't handle floating point constants;
7548 PRINT_OPERAND must handle them. */
7549 output_operand_lossage ("floating constant misused");
7550 break;
7551
7552 case PLUS:
7553 /* Some assemblers need integer constants to appear first. */
7554 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7555 {
7556 output_pic_addr_const (file, XEXP (x, 0), code);
7557 putc ('+', file);
7558 output_pic_addr_const (file, XEXP (x, 1), code);
7559 }
7560 else
7561 {
7562 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7563 output_pic_addr_const (file, XEXP (x, 1), code);
7564 putc ('+', file);
7565 output_pic_addr_const (file, XEXP (x, 0), code);
7566 }
7567 break;
7568
7569 case MINUS:
7570 if (!TARGET_MACHO)
7571 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7572 output_pic_addr_const (file, XEXP (x, 0), code);
7573 putc ('-', file);
7574 output_pic_addr_const (file, XEXP (x, 1), code);
7575 if (!TARGET_MACHO)
7576 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7577 break;
7578
7579 case UNSPEC:
7580 gcc_assert (XVECLEN (x, 0) == 1);
7581 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7582 switch (XINT (x, 1))
7583 {
7584 case UNSPEC_GOT:
7585 fputs ("@GOT", file);
7586 break;
7587 case UNSPEC_GOTOFF:
7588 fputs ("@GOTOFF", file);
7589 break;
7590 case UNSPEC_GOTPCREL:
7591 fputs ("@GOTPCREL(%rip)", file);
7592 break;
7593 case UNSPEC_GOTTPOFF:
7594 /* FIXME: This might be @TPOFF in Sun ld too. */
7595 fputs ("@GOTTPOFF", file);
7596 break;
7597 case UNSPEC_TPOFF:
7598 fputs ("@TPOFF", file);
7599 break;
7600 case UNSPEC_NTPOFF:
7601 if (TARGET_64BIT)
7602 fputs ("@TPOFF", file);
7603 else
7604 fputs ("@NTPOFF", file);
7605 break;
7606 case UNSPEC_DTPOFF:
7607 fputs ("@DTPOFF", file);
7608 break;
7609 case UNSPEC_GOTNTPOFF:
7610 if (TARGET_64BIT)
7611 fputs ("@GOTTPOFF(%rip)", file);
7612 else
7613 fputs ("@GOTNTPOFF", file);
7614 break;
7615 case UNSPEC_INDNTPOFF:
7616 fputs ("@INDNTPOFF", file);
7617 break;
7618 default:
7619 output_operand_lossage ("invalid UNSPEC as operand");
7620 break;
7621 }
7622 break;
7623
7624 default:
7625 output_operand_lossage ("invalid expression as operand");
7626 }
7627}
7628
7629/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7630 We need to emit DTP-relative relocations. */
7631
7632static void
7633i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7634{
7635 fputs (ASM_LONG, file);
7636 output_addr_const (file, x);
7637 fputs ("@DTPOFF", file);
7638 switch (size)
7639 {
7640 case 4:
7641 break;
7642 case 8:
7643 fputs (", 0", file);
7644 break;
7645 default:
7646 gcc_unreachable ();
7647 }
7648}
7649
7650/* In the name of slightly smaller debug output, and to cater to
7651 general assembler lossage, recognize PIC+GOTOFF and turn it back
7652 into a direct symbol reference.
7653
7654 On Darwin, this is necessary to avoid a crash, because Darwin
7655 has a different PIC label for each routine but the DWARF debugging
7656 information is not associated with any particular routine, so it's
7657 necessary to remove references to the PIC label from RTL stored by
7658 the DWARF output code. */
7659
7660static rtx
7661ix86_delegitimize_address (rtx orig_x)
7662{
7663 rtx x = orig_x;
7664 /* reg_addend is NULL or a multiple of some register. */
7665 rtx reg_addend = NULL_RTX;
7666 /* const_addend is NULL or a const_int. */
7667 rtx const_addend = NULL_RTX;
7668 /* This is the result, or NULL. */
7669 rtx result = NULL_RTX;
7670
7671 if (GET_CODE (x) == MEM)
7672 x = XEXP (x, 0);
7673
7674 if (TARGET_64BIT)
7675 {
7676 if (GET_CODE (x) != CONST
7677 || GET_CODE (XEXP (x, 0)) != UNSPEC
7678 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7679 || GET_CODE (orig_x) != MEM)
7680 return orig_x;
7681 return XVECEXP (XEXP (x, 0), 0, 0);
7682 }
7683
7684 if (GET_CODE (x) != PLUS
7685 || GET_CODE (XEXP (x, 1)) != CONST)
7686 return orig_x;
7687
7688 if (GET_CODE (XEXP (x, 0)) == REG
7689 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7690 /* %ebx + GOT/GOTOFF */
7691 ;
7692 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7693 {
7694 /* %ebx + %reg * scale + GOT/GOTOFF */
7695 reg_addend = XEXP (x, 0);
7696 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7697 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7698 reg_addend = XEXP (reg_addend, 1);
7699 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7700 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7701 reg_addend = XEXP (reg_addend, 0);
7702 else
7703 return orig_x;
7704 if (GET_CODE (reg_addend) != REG
7705 && GET_CODE (reg_addend) != MULT
7706 && GET_CODE (reg_addend) != ASHIFT)
7707 return orig_x;
7708 }
7709 else
7710 return orig_x;
7711
7712 x = XEXP (XEXP (x, 1), 0);
7713 if (GET_CODE (x) == PLUS
7714 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7715 {
7716 const_addend = XEXP (x, 1);
7717 x = XEXP (x, 0);
7718 }
7719
7720 if (GET_CODE (x) == UNSPEC
7721 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7722 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7723 result = XVECEXP (x, 0, 0);
7724
7725 if (TARGET_MACHO && darwin_local_data_pic (x)
7726 && GET_CODE (orig_x) != MEM)
7727 result = XEXP (x, 0);
7728
7729 if (! result)
7730 return orig_x;
7731
7732 if (const_addend)
7733 result = gen_rtx_PLUS (Pmode, result, const_addend);
7734 if (reg_addend)
7735 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7736 return result;
7737}
7738
7739static void
7740put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7741 int fp, FILE *file)
7742{
7743 const char *suffix;
7744
7745 if (mode == CCFPmode || mode == CCFPUmode)
7746 {
7747 enum rtx_code second_code, bypass_code;
7748 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7749 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7750 code = ix86_fp_compare_code_to_integer (code);
7751 mode = CCmode;
7752 }
7753 if (reverse)
7754 code = reverse_condition (code);
7755
7756 switch (code)
7757 {
7758 case EQ:
7759 suffix = "e";
7760 break;
7761 case NE:
7762 suffix = "ne";
7763 break;
7764 case GT:
7765 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7766 suffix = "g";
7767 break;
7768 case GTU:
7769 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7770 Those same assemblers have the same but opposite lossage on cmov. */
7771 gcc_assert (mode == CCmode);
7772 suffix = fp ? "nbe" : "a";
7773 break;
7774 case LT:
7775 switch (mode)
7776 {
7777 case CCNOmode:
7778 case CCGOCmode:
7779 suffix = "s";
7780 break;
7781
7782 case CCmode:
7783 case CCGCmode:
7784 suffix = "l";
7785 break;
7786
7787 default:
7788 gcc_unreachable ();
7789 }
7790 break;
7791 case LTU:
7792 gcc_assert (mode == CCmode);
7793 suffix = "b";
7794 break;
7795 case GE:
7796 switch (mode)
7797 {
7798 case CCNOmode:
7799 case CCGOCmode:
7800 suffix = "ns";
7801 break;
7802
7803 case CCmode:
7804 case CCGCmode:
7805 suffix = "ge";
7806 break;
7807
7808 default:
7809 gcc_unreachable ();
7810 }
7811 break;
7812 case GEU:
7813 /* ??? As above. */
7814 gcc_assert (mode == CCmode);
7815 suffix = fp ? "nb" : "ae";
7816 break;
7817 case LE:
7818 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7819 suffix = "le";
7820 break;
7821 case LEU:
7822 gcc_assert (mode == CCmode);
7823 suffix = "be";
7824 break;
7825 case UNORDERED:
7826 suffix = fp ? "u" : "p";
7827 break;
7828 case ORDERED:
7829 suffix = fp ? "nu" : "np";
7830 break;
7831 default:
7832 gcc_unreachable ();
7833 }
7834 fputs (suffix, file);
7835}
7836
7837/* Print the name of register X to FILE based on its machine mode and number.
7838 If CODE is 'w', pretend the mode is HImode.
7839 If CODE is 'b', pretend the mode is QImode.
7840 If CODE is 'k', pretend the mode is SImode.
7841 If CODE is 'q', pretend the mode is DImode.
7842 If CODE is 'h', pretend the reg is the 'high' byte register.
7843 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7844
7845void
7846print_reg (rtx x, int code, FILE *file)
7847{
7848 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7849 && REGNO (x) != FRAME_POINTER_REGNUM
7850 && REGNO (x) != FLAGS_REG
7851 && REGNO (x) != FPSR_REG);
7852
7853 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7854 putc ('%', file);
7855
7856 if (code == 'w' || MMX_REG_P (x))
7857 code = 2;
7858 else if (code == 'b')
7859 code = 1;
7860 else if (code == 'k')
7861 code = 4;
7862 else if (code == 'q')
7863 code = 8;
7864 else if (code == 'y')
7865 code = 3;
7866 else if (code == 'h')
7867 code = 0;
7868 else
7869 code = GET_MODE_SIZE (GET_MODE (x));
7870
7871 /* Irritatingly, AMD extended registers use different naming convention
7872 from the normal registers. */
7873 if (REX_INT_REG_P (x))
7874 {
7875 gcc_assert (TARGET_64BIT);
7876 switch (code)
7877 {
7878 case 0:
7879 error ("extended registers have no high halves");
7880 break;
7881 case 1:
7882 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7883 break;
7884 case 2:
7885 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7886 break;
7887 case 4:
7888 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7889 break;
7890 case 8:
7891 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7892 break;
7893 default:
7894 error ("unsupported operand size for extended register");
7895 break;
7896 }
7897 return;
7898 }
7899 switch (code)
7900 {
7901 case 3:
7902 if (STACK_TOP_P (x))
7903 {
7904 fputs ("st(0)", file);
7905 break;
7906 }
7907 /* FALLTHRU */
7908 case 8:
7909 case 4:
7910 case 12:
7911 if (! ANY_FP_REG_P (x))
7912 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7913 /* FALLTHRU */
7914 case 16:
7915 case 2:
7916 normal:
7917 fputs (hi_reg_name[REGNO (x)], file);
7918 break;
7919 case 1:
7920 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7921 goto normal;
7922 fputs (qi_reg_name[REGNO (x)], file);
7923 break;
7924 case 0:
7925 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7926 goto normal;
7927 fputs (qi_high_reg_name[REGNO (x)], file);
7928 break;
7929 default:
7930 gcc_unreachable ();
7931 }
7932}
7933
7934/* Locate some local-dynamic symbol still in use by this function
7935 so that we can print its name in some tls_local_dynamic_base
7936 pattern. */
7937
7938static const char *
7939get_some_local_dynamic_name (void)
7940{
7941 rtx insn;
7942
7943 if (cfun->machine->some_ld_name)
7944 return cfun->machine->some_ld_name;
7945
7946 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7947 if (INSN_P (insn)
7948 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7949 return cfun->machine->some_ld_name;
7950
7951 gcc_unreachable ();
7952}
7953
7954static int
7955get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7956{
7957 rtx x = *px;
7958
7959 if (GET_CODE (x) == SYMBOL_REF
7960 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7961 {
7962 cfun->machine->some_ld_name = XSTR (x, 0);
7963 return 1;
7964 }
7965
7966 return 0;
7967}
7968
7969/* Meaning of CODE:
7970 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7971 C -- print opcode suffix for set/cmov insn.
7972 c -- like C, but print reversed condition
7973 F,f -- likewise, but for floating-point.
7974 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7975 otherwise nothing
7976 R -- print the prefix for register names.
7977 z -- print the opcode suffix for the size of the current operand.
7978 * -- print a star (in certain assembler syntax)
7979 A -- print an absolute memory reference.
7980 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7981 s -- print a shift double count, followed by the assemblers argument
7982 delimiter.
7983 b -- print the QImode name of the register for the indicated operand.
7984 %b0 would print %al if operands[0] is reg 0.
7985 w -- likewise, print the HImode name of the register.
7986 k -- likewise, print the SImode name of the register.
7987 q -- likewise, print the DImode name of the register.
7988 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7989 y -- print "st(0)" instead of "st" as a register.
7990 D -- print condition for SSE cmp instruction.
7991 P -- if PIC, print an @PLT suffix.
7992 X -- don't print any sort of PIC '@' suffix for a symbol.
7993 & -- print some in-use local-dynamic symbol name.
7994 H -- print a memory address offset by 8; used for sse high-parts
7995 */
7996
7997void
7998print_operand (FILE *file, rtx x, int code)
7999{
8000 if (code)
8001 {
8002 switch (code)
8003 {
8004 case '*':
8005 if (ASSEMBLER_DIALECT == ASM_ATT)
8006 putc ('*', file);
8007 return;
8008
8009 case '&':
8010 assemble_name (file, get_some_local_dynamic_name ());
8011 return;
8012
8013 case 'A':
8014 switch (ASSEMBLER_DIALECT)
8015 {
8016 case ASM_ATT:
8017 putc ('*', file);
8018 break;
8019
8020 case ASM_INTEL:
8021 /* Intel syntax. For absolute addresses, registers should not
8022 be surrounded by braces. */
8023 if (GET_CODE (x) != REG)
8024 {
8025 putc ('[', file);
8026 PRINT_OPERAND (file, x, 0);
8027 putc (']', file);
8028 return;
8029 }
8030 break;
8031
8032 default:
8033 gcc_unreachable ();
8034 }
8035
8036 PRINT_OPERAND (file, x, 0);
8037 return;
8038
8039
8040 case 'L':
8041 if (ASSEMBLER_DIALECT == ASM_ATT)
8042 putc ('l', file);
8043 return;
8044
8045 case 'W':
8046 if (ASSEMBLER_DIALECT == ASM_ATT)
8047 putc ('w', file);
8048 return;
8049
8050 case 'B':
8051 if (ASSEMBLER_DIALECT == ASM_ATT)
8052 putc ('b', file);
8053 return;
8054
8055 case 'Q':
8056 if (ASSEMBLER_DIALECT == ASM_ATT)
8057 putc ('l', file);
8058 return;
8059
8060 case 'S':
8061 if (ASSEMBLER_DIALECT == ASM_ATT)
8062 putc ('s', file);
8063 return;
8064
8065 case 'T':
8066 if (ASSEMBLER_DIALECT == ASM_ATT)
8067 putc ('t', file);
8068 return;
8069
8070 case 'z':
8071 /* 387 opcodes don't get size suffixes if the operands are
8072 registers. */
8073 if (STACK_REG_P (x))
8074 return;
8075
8076 /* Likewise if using Intel opcodes. */
8077 if (ASSEMBLER_DIALECT == ASM_INTEL)
8078 return;
8079
8080 /* This is the size of op from size of operand. */
8081 switch (GET_MODE_SIZE (GET_MODE (x)))
8082 {
8083 case 2:
8084#ifdef HAVE_GAS_FILDS_FISTS
8085 putc ('s', file);
8086#endif
8087 return;
8088
8089 case 4:
8090 if (GET_MODE (x) == SFmode)
8091 {
8092 putc ('s', file);
8093 return;
8094 }
8095 else
8096 putc ('l', file);
8097 return;
8098
8099 case 12:
8100 case 16:
8101 putc ('t', file);
8102 return;
8103
8104 case 8:
8105 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8106 {
8107#ifdef GAS_MNEMONICS
8108 putc ('q', file);
8109#else
8110 putc ('l', file);
8111 putc ('l', file);
8112#endif
8113 }
8114 else
8115 putc ('l', file);
8116 return;
8117
8118 default:
8119 gcc_unreachable ();
8120 }
8121
8122 case 'b':
8123 case 'w':
8124 case 'k':
8125 case 'q':
8126 case 'h':
8127 case 'y':
8128 case 'X':
8129 case 'P':
8130 break;
8131
8132 case 's':
8133 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8134 {
8135 PRINT_OPERAND (file, x, 0);
8136 putc (',', file);
8137 }
8138 return;
8139
8140 case 'D':
8141 /* Little bit of braindamage here. The SSE compare instructions
8142 does use completely different names for the comparisons that the
8143 fp conditional moves. */
8144 switch (GET_CODE (x))
8145 {
8146 case EQ:
8147 case UNEQ:
8148 fputs ("eq", file);
8149 break;
8150 case LT:
8151 case UNLT:
8152 fputs ("lt", file);
8153 break;
8154 case LE:
8155 case UNLE:
8156 fputs ("le", file);
8157 break;
8158 case UNORDERED:
8159 fputs ("unord", file);
8160 break;
8161 case NE:
8162 case LTGT:
8163 fputs ("neq", file);
8164 break;
8165 case UNGE:
8166 case GE:
8167 fputs ("nlt", file);
8168 break;
8169 case UNGT:
8170 case GT:
8171 fputs ("nle", file);
8172 break;
8173 case ORDERED:
8174 fputs ("ord", file);
8175 break;
8176 default:
8177 gcc_unreachable ();
8178 }
8179 return;
8180 case 'O':
8181#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8182 if (ASSEMBLER_DIALECT == ASM_ATT)
8183 {
8184 switch (GET_MODE (x))
8185 {
8186 case HImode: putc ('w', file); break;
8187 case SImode:
8188 case SFmode: putc ('l', file); break;
8189 case DImode:
8190 case DFmode: putc ('q', file); break;
8191 default: gcc_unreachable ();
8192 }
8193 putc ('.', file);
8194 }
8195#endif
8196 return;
8197 case 'C':
8198 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8199 return;
8200 case 'F':
8201#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8202 if (ASSEMBLER_DIALECT == ASM_ATT)
8203 putc ('.', file);
8204#endif
8205 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8206 return;
8207
8208 /* Like above, but reverse condition */
8209 case 'c':
8210 /* Check to see if argument to %c is really a constant
8211 and not a condition code which needs to be reversed. */
8212 if (!COMPARISON_P (x))
8213 {
8214 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8215 return;
8216 }
8217 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8218 return;
8219 case 'f':
8220#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8221 if (ASSEMBLER_DIALECT == ASM_ATT)
8222 putc ('.', file);
8223#endif
8224 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8225 return;
8226
8227 case 'H':
8228 /* It doesn't actually matter what mode we use here, as we're
8229 only going to use this for printing. */
8230 x = adjust_address_nv (x, DImode, 8);
8231 break;
8232
8233 case '+':
8234 {
8235 rtx x;
8236
8237 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8238 return;
8239
8240 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8241 if (x)
8242 {
8243 int pred_val = INTVAL (XEXP (x, 0));
8244
8245 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8246 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8247 {
8248 int taken = pred_val > REG_BR_PROB_BASE / 2;
8249 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8250
8251 /* Emit hints only in the case default branch prediction
8252 heuristics would fail. */
8253 if (taken != cputaken)
8254 {
8255 /* We use 3e (DS) prefix for taken branches and
8256 2e (CS) prefix for not taken branches. */
8257 if (taken)
8258 fputs ("ds ; ", file);
8259 else
8260 fputs ("cs ; ", file);
8261 }
8262 }
8263 }
8264 return;
8265 }
8266 default:
8267 output_operand_lossage ("invalid operand code '%c'", code);
8268 }
8269 }
8270
8271 if (GET_CODE (x) == REG)
8272 print_reg (x, code, file);
8273
8274 else if (GET_CODE (x) == MEM)
8275 {
8276 /* No `byte ptr' prefix for call instructions. */
8277 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8278 {
8279 const char * size;
8280 switch (GET_MODE_SIZE (GET_MODE (x)))
8281 {
8282 case 1: size = "BYTE"; break;
8283 case 2: size = "WORD"; break;
8284 case 4: size = "DWORD"; break;
8285 case 8: size = "QWORD"; break;
8286 case 12: size = "XWORD"; break;
8287 case 16: size = "XMMWORD"; break;
8288 default:
8289 gcc_unreachable ();
8290 }
8291
8292 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8293 if (code == 'b')
8294 size = "BYTE";
8295 else if (code == 'w')
8296 size = "WORD";
8297 else if (code == 'k')
8298 size = "DWORD";
8299
8300 fputs (size, file);
8301 fputs (" PTR ", file);
8302 }
8303
8304 x = XEXP (x, 0);
8305 /* Avoid (%rip) for call operands. */
8306 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8307 && GET_CODE (x) != CONST_INT)
8308 output_addr_const (file, x);
8309 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8310 output_operand_lossage ("invalid constraints for operand");
8311 else
8312 output_address (x);
8313 }
8314
8315 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8316 {
8317 REAL_VALUE_TYPE r;
8318 long l;
8319
8320 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8321 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8322
8323 if (ASSEMBLER_DIALECT == ASM_ATT)
8324 putc ('$', file);
8325 fprintf (file, "0x%08lx", l);
8326 }
8327
8328 /* These float cases don't actually occur as immediate operands. */
8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8330 {
8331 char dstr[30];
8332
8333 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8334 fprintf (file, "%s", dstr);
8335 }
8336
8337 else if (GET_CODE (x) == CONST_DOUBLE
8338 && GET_MODE (x) == XFmode)
8339 {
8340 char dstr[30];
8341
8342 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8343 fprintf (file, "%s", dstr);
8344 }
8345
8346 else
8347 {
8348 /* We have patterns that allow zero sets of memory, for instance.
8349 In 64-bit mode, we should probably support all 8-byte vectors,
8350 since we can in fact encode that into an immediate. */
8351 if (GET_CODE (x) == CONST_VECTOR)
8352 {
8353 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8354 x = const0_rtx;
8355 }
8356
8357 if (code != 'P')
8358 {
8359 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8360 {
8361 if (ASSEMBLER_DIALECT == ASM_ATT)
8362 putc ('$', file);
8363 }
8364 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8365 || GET_CODE (x) == LABEL_REF)
8366 {
8367 if (ASSEMBLER_DIALECT == ASM_ATT)
8368 putc ('$', file);
8369 else
8370 fputs ("OFFSET FLAT:", file);
8371 }
8372 }
8373 if (GET_CODE (x) == CONST_INT)
8374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8375 else if (flag_pic)
8376 output_pic_addr_const (file, x, code);
8377 else
8378 output_addr_const (file, x);
8379 }
8380}
8381
8382/* Print a memory operand whose address is ADDR. */
8383
8384void
8385print_operand_address (FILE *file, rtx addr)
8386{
8387 struct ix86_address parts;
8388 rtx base, index, disp;
8389 int scale;
8390 int ok = ix86_decompose_address (addr, &parts);
8391
8392 gcc_assert (ok);
8393
8394 base = parts.base;
8395 index = parts.index;
8396 disp = parts.disp;
8397 scale = parts.scale;
8398
8399 switch (parts.seg)
8400 {
8401 case SEG_DEFAULT:
8402 break;
8403 case SEG_FS:
8404 case SEG_GS:
8405 if (USER_LABEL_PREFIX[0] == 0)
8406 putc ('%', file);
8407 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8408 break;
8409 default:
8410 gcc_unreachable ();
8411 }
8412
8413 if (!base && !index)
8414 {
8415 /* Displacement only requires special attention. */
8416
8417 if (GET_CODE (disp) == CONST_INT)
8418 {
8419 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8420 {
8421 if (USER_LABEL_PREFIX[0] == 0)
8422 putc ('%', file);
8423 fputs ("ds:", file);
8424 }
8425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8426 }
8427 else if (flag_pic)
8428 output_pic_addr_const (file, disp, 0);
8429 else
8430 output_addr_const (file, disp);
8431
8432 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8433 if (TARGET_64BIT)
8434 {
8435 if (GET_CODE (disp) == CONST
8436 && GET_CODE (XEXP (disp, 0)) == PLUS
8437 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8438 disp = XEXP (XEXP (disp, 0), 0);
8439 if (GET_CODE (disp) == LABEL_REF
8440 || (GET_CODE (disp) == SYMBOL_REF
8441 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8442 fputs ("(%rip)", file);
8443 }
8444 }
8445 else
8446 {
8447 if (ASSEMBLER_DIALECT == ASM_ATT)
8448 {
8449 if (disp)
8450 {
8451 if (flag_pic)
8452 output_pic_addr_const (file, disp, 0);
8453 else if (GET_CODE (disp) == LABEL_REF)
8454 output_asm_label (disp);
8455 else
8456 output_addr_const (file, disp);
8457 }
8458
8459 putc ('(', file);
8460 if (base)
8461 print_reg (base, 0, file);
8462 if (index)
8463 {
8464 putc (',', file);
8465 print_reg (index, 0, file);
8466 if (scale != 1)
8467 fprintf (file, ",%d", scale);
8468 }
8469 putc (')', file);
8470 }
8471 else
8472 {
8473 rtx offset = NULL_RTX;
8474
8475 if (disp)
8476 {
8477 /* Pull out the offset of a symbol; print any symbol itself. */
8478 if (GET_CODE (disp) == CONST
8479 && GET_CODE (XEXP (disp, 0)) == PLUS
8480 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8481 {
8482 offset = XEXP (XEXP (disp, 0), 1);
8483 disp = gen_rtx_CONST (VOIDmode,
8484 XEXP (XEXP (disp, 0), 0));
8485 }
8486
8487 if (flag_pic)
8488 output_pic_addr_const (file, disp, 0);
8489 else if (GET_CODE (disp) == LABEL_REF)
8490 output_asm_label (disp);
8491 else if (GET_CODE (disp) == CONST_INT)
8492 offset = disp;
8493 else
8494 output_addr_const (file, disp);
8495 }
8496
8497 putc ('[', file);
8498 if (base)
8499 {
8500 print_reg (base, 0, file);
8501 if (offset)
8502 {
8503 if (INTVAL (offset) >= 0)
8504 putc ('+', file);
8505 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8506 }
8507 }
8508 else if (offset)
8509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8510 else
8511 putc ('0', file);
8512
8513 if (index)
8514 {
8515 putc ('+', file);
8516 print_reg (index, 0, file);
8517 if (scale != 1)
8518 fprintf (file, "*%d", scale);
8519 }
8520 putc (']', file);
8521 }
8522 }
8523}
8524
8525bool
8526output_addr_const_extra (FILE *file, rtx x)
8527{
8528 rtx op;
8529
8530 if (GET_CODE (x) != UNSPEC)
8531 return false;
8532
8533 op = XVECEXP (x, 0, 0);
8534 switch (XINT (x, 1))
8535 {
8536 case UNSPEC_GOTTPOFF:
8537 output_addr_const (file, op);
8538 /* FIXME: This might be @TPOFF in Sun ld. */
8539 fputs ("@GOTTPOFF", file);
8540 break;
8541 case UNSPEC_TPOFF:
8542 output_addr_const (file, op);
8543 fputs ("@TPOFF", file);
8544 break;
8545 case UNSPEC_NTPOFF:
8546 output_addr_const (file, op);
8547 if (TARGET_64BIT)
8548 fputs ("@TPOFF", file);
8549 else
8550 fputs ("@NTPOFF", file);
8551 break;
8552 case UNSPEC_DTPOFF:
8553 output_addr_const (file, op);
8554 fputs ("@DTPOFF", file);
8555 break;
8556 case UNSPEC_GOTNTPOFF:
8557 output_addr_const (file, op);
8558 if (TARGET_64BIT)
8559 fputs ("@GOTTPOFF(%rip)", file);
8560 else
8561 fputs ("@GOTNTPOFF", file);
8562 break;
8563 case UNSPEC_INDNTPOFF:
8564 output_addr_const (file, op);
8565 fputs ("@INDNTPOFF", file);
8566 break;
8567
8568 default:
8569 return false;
8570 }
8571
8572 return true;
8573}
8574
8575/* Split one or more DImode RTL references into pairs of SImode
8576 references. The RTL can be REG, offsettable MEM, integer constant, or
8577 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8578 split and "num" is its length. lo_half and hi_half are output arrays
8579 that parallel "operands". */
8580
8581void
8582split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8583{
8584 while (num--)
8585 {
8586 rtx op = operands[num];
8587
8588 /* simplify_subreg refuse to split volatile memory addresses,
8589 but we still have to handle it. */
8590 if (GET_CODE (op) == MEM)
8591 {
8592 lo_half[num] = adjust_address (op, SImode, 0);
8593 hi_half[num] = adjust_address (op, SImode, 4);
8594 }
8595 else
8596 {
8597 lo_half[num] = simplify_gen_subreg (SImode, op,
8598 GET_MODE (op) == VOIDmode
8599 ? DImode : GET_MODE (op), 0);
8600 hi_half[num] = simplify_gen_subreg (SImode, op,
8601 GET_MODE (op) == VOIDmode
8602 ? DImode : GET_MODE (op), 4);
8603 }
8604 }
8605}
8606/* Split one or more TImode RTL references into pairs of DImode
8607 references. The RTL can be REG, offsettable MEM, integer constant, or
8608 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8609 split and "num" is its length. lo_half and hi_half are output arrays
8610 that parallel "operands". */
8611
8612void
8613split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8614{
8615 while (num--)
8616 {
8617 rtx op = operands[num];
8618
8619 /* simplify_subreg refuse to split volatile memory addresses, but we
8620 still have to handle it. */
8621 if (GET_CODE (op) == MEM)
8622 {
8623 lo_half[num] = adjust_address (op, DImode, 0);
8624 hi_half[num] = adjust_address (op, DImode, 8);
8625 }
8626 else
8627 {
8628 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8629 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8630 }
8631 }
8632}
8633
8634/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8635 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8636 is the expression of the binary operation. The output may either be
8637 emitted here, or returned to the caller, like all output_* functions.
8638
8639 There is no guarantee that the operands are the same mode, as they
8640 might be within FLOAT or FLOAT_EXTEND expressions. */
8641
8642#ifndef SYSV386_COMPAT
8643/* Set to 1 for compatibility with brain-damaged assemblers. No-one
8644 wants to fix the assemblers because that causes incompatibility
8645 with gcc. No-one wants to fix gcc because that causes
8646 incompatibility with assemblers... You can use the option of
8647 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8648#define SYSV386_COMPAT 1
8649#endif
8650
8651const char *
8652output_387_binary_op (rtx insn, rtx *operands)
8653{
8654 static char buf[30];
8655 const char *p;
8656 const char *ssep;
8657 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8658
8659#ifdef ENABLE_CHECKING
8660 /* Even if we do not want to check the inputs, this documents input
8661 constraints. Which helps in understanding the following code. */
8662 if (STACK_REG_P (operands[0])
8663 && ((REG_P (operands[1])
8664 && REGNO (operands[0]) == REGNO (operands[1])
8665 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8666 || (REG_P (operands[2])
8667 && REGNO (operands[0]) == REGNO (operands[2])
8668 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8669 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8670 ; /* ok */
8671 else
8672 gcc_assert (is_sse);
8673#endif
8674
8675 switch (GET_CODE (operands[3]))
8676 {
8677 case PLUS:
8678 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8679 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8680 p = "fiadd";
8681 else
8682 p = "fadd";
8683 ssep = "add";
8684 break;
8685
8686 case MINUS:
8687 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8688 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8689 p = "fisub";
8690 else
8691 p = "fsub";
8692 ssep = "sub";
8693 break;
8694
8695 case MULT:
8696 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8697 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8698 p = "fimul";
8699 else
8700 p = "fmul";
8701 ssep = "mul";
8702 break;
8703
8704 case DIV:
8705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8707 p = "fidiv";
8708 else
8709 p = "fdiv";
8710 ssep = "div";
8711 break;
8712
8713 default:
8714 gcc_unreachable ();
8715 }
8716
8717 if (is_sse)
8718 {
8719 strcpy (buf, ssep);
8720 if (GET_MODE (operands[0]) == SFmode)
8721 strcat (buf, "ss\t{%2, %0|%0, %2}");
8722 else
8723 strcat (buf, "sd\t{%2, %0|%0, %2}");
8724 return buf;
8725 }
8726 strcpy (buf, p);
8727
8728 switch (GET_CODE (operands[3]))
8729 {
8730 case MULT:
8731 case PLUS:
8732 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8733 {
8734 rtx temp = operands[2];
8735 operands[2] = operands[1];
8736 operands[1] = temp;
8737 }
8738
8739 /* know operands[0] == operands[1]. */
8740
8741 if (GET_CODE (operands[2]) == MEM)
8742 {
8743 p = "%z2\t%2";
8744 break;
8745 }
8746
8747 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8748 {
8749 if (STACK_TOP_P (operands[0]))
8750 /* How is it that we are storing to a dead operand[2]?
8751 Well, presumably operands[1] is dead too. We can't
8752 store the result to st(0) as st(0) gets popped on this
8753 instruction. Instead store to operands[2] (which I
8754 think has to be st(1)). st(1) will be popped later.
8755 gcc <= 2.8.1 didn't have this check and generated
8756 assembly code that the Unixware assembler rejected. */
8757 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8758 else
8759 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8760 break;
8761 }
8762
8763 if (STACK_TOP_P (operands[0]))
8764 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8765 else
8766 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8767 break;
8768
8769 case MINUS:
8770 case DIV:
8771 if (GET_CODE (operands[1]) == MEM)
8772 {
8773 p = "r%z1\t%1";
8774 break;
8775 }
8776
8777 if (GET_CODE (operands[2]) == MEM)
8778 {
8779 p = "%z2\t%2";
8780 break;
8781 }
8782
8783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8784 {
8785#if SYSV386_COMPAT
8786 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8787 derived assemblers, confusingly reverse the direction of
8788 the operation for fsub{r} and fdiv{r} when the
8789 destination register is not st(0). The Intel assembler
8790 doesn't have this brain damage. Read !SYSV386_COMPAT to
8791 figure out what the hardware really does. */
8792 if (STACK_TOP_P (operands[0]))
8793 p = "{p\t%0, %2|rp\t%2, %0}";
8794 else
8795 p = "{rp\t%2, %0|p\t%0, %2}";
8796#else
8797 if (STACK_TOP_P (operands[0]))
8798 /* As above for fmul/fadd, we can't store to st(0). */
8799 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8800 else
8801 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8802#endif
8803 break;
8804 }
8805
8806 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8807 {
8808#if SYSV386_COMPAT
8809 if (STACK_TOP_P (operands[0]))
8810 p = "{rp\t%0, %1|p\t%1, %0}";
8811 else
8812 p = "{p\t%1, %0|rp\t%0, %1}";
8813#else
8814 if (STACK_TOP_P (operands[0]))
8815 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8816 else
8817 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8818#endif
8819 break;
8820 }
8821
8822 if (STACK_TOP_P (operands[0]))
8823 {
8824 if (STACK_TOP_P (operands[1]))
8825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8826 else
8827 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8828 break;
8829 }
8830 else if (STACK_TOP_P (operands[1]))
8831 {
8832#if SYSV386_COMPAT
8833 p = "{\t%1, %0|r\t%0, %1}";
8834#else
8835 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8836#endif
8837 }
8838 else
8839 {
8840#if SYSV386_COMPAT
8841 p = "{r\t%2, %0|\t%0, %2}";
8842#else
8843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8844#endif
8845 }
8846 break;
8847
8848 default:
8849 gcc_unreachable ();
8850 }
8851
8852 strcat (buf, p);
8853 return buf;
8854}
8855
8856/* Return needed mode for entity in optimize_mode_switching pass. */
8857
8858int
8859ix86_mode_needed (int entity, rtx insn)
8860{
8861 enum attr_i387_cw mode;
8862
8863 /* The mode UNINITIALIZED is used to store control word after a
8864 function call or ASM pattern. The mode ANY specify that function
8865 has no requirements on the control word and make no changes in the
8866 bits we are interested in. */
8867
8868 if (CALL_P (insn)
8869 || (NONJUMP_INSN_P (insn)
8870 && (asm_noperands (PATTERN (insn)) >= 0
8871 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8872 return I387_CW_UNINITIALIZED;
8873
8874 if (recog_memoized (insn) < 0)
8875 return I387_CW_ANY;
8876
8877 mode = get_attr_i387_cw (insn);
8878
8879 switch (entity)
8880 {
8881 case I387_TRUNC:
8882 if (mode == I387_CW_TRUNC)
8883 return mode;
8884 break;
8885
8886 case I387_FLOOR:
8887 if (mode == I387_CW_FLOOR)
8888 return mode;
8889 break;
8890
8891 case I387_CEIL:
8892 if (mode == I387_CW_CEIL)
8893 return mode;
8894 break;
8895
8896 case I387_MASK_PM:
8897 if (mode == I387_CW_MASK_PM)
8898 return mode;
8899 break;
8900
8901 default:
8902 gcc_unreachable ();
8903 }
8904
8905 return I387_CW_ANY;
8906}
8907
8908/* Output code to initialize control word copies used by trunc?f?i and
8909 rounding patterns. CURRENT_MODE is set to current control word,
8910 while NEW_MODE is set to new control word. */
8911
8912void
8913emit_i387_cw_initialization (int mode)
8914{
8915 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8916 rtx new_mode;
8917
8918 int slot;
8919
8920 rtx reg = gen_reg_rtx (HImode);
8921
8922 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8923 emit_move_insn (reg, stored_mode);
8924
8925 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8926 {
8927 switch (mode)
8928 {
8929 case I387_CW_TRUNC:
8930 /* round toward zero (truncate) */
8931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8932 slot = SLOT_CW_TRUNC;
8933 break;
8934
8935 case I387_CW_FLOOR:
8936 /* round down toward -oo */
8937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8939 slot = SLOT_CW_FLOOR;
8940 break;
8941
8942 case I387_CW_CEIL:
8943 /* round up toward +oo */
8944 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8946 slot = SLOT_CW_CEIL;
8947 break;
8948
8949 case I387_CW_MASK_PM:
8950 /* mask precision exception for nearbyint() */
8951 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8952 slot = SLOT_CW_MASK_PM;
8953 break;
8954
8955 default:
8956 gcc_unreachable ();
8957 }
8958 }
8959 else
8960 {
8961 switch (mode)
8962 {
8963 case I387_CW_TRUNC:
8964 /* round toward zero (truncate) */
8965 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8966 slot = SLOT_CW_TRUNC;
8967 break;
8968
8969 case I387_CW_FLOOR:
8970 /* round down toward -oo */
8971 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8972 slot = SLOT_CW_FLOOR;
8973 break;
8974
8975 case I387_CW_CEIL:
8976 /* round up toward +oo */
8977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8978 slot = SLOT_CW_CEIL;
8979 break;
8980
8981 case I387_CW_MASK_PM:
8982 /* mask precision exception for nearbyint() */
8983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8984 slot = SLOT_CW_MASK_PM;
8985 break;
8986
8987 default:
8988 gcc_unreachable ();
8989 }
8990 }
8991
8992 gcc_assert (slot < MAX_386_STACK_LOCALS);
8993
8994 new_mode = assign_386_stack_local (HImode, slot);
8995 emit_move_insn (new_mode, reg);
8996}
8997
8998/* Output code for INSN to convert a float to a signed int. OPERANDS
8999 are the insn operands. The output may be [HSD]Imode and the input
9000 operand may be [SDX]Fmode. */
9001
9002const char *
9003output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9004{
9005 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9006 int dimode_p = GET_MODE (operands[0]) == DImode;
9007 int round_mode = get_attr_i387_cw (insn);
9008
9009 /* Jump through a hoop or two for DImode, since the hardware has no
9010 non-popping instruction. We used to do this a different way, but
9011 that was somewhat fragile and broke with post-reload splitters. */
9012 if ((dimode_p || fisttp) && !stack_top_dies)
9013 output_asm_insn ("fld\t%y1", operands);
9014
9015 gcc_assert (STACK_TOP_P (operands[1]));
9016 gcc_assert (GET_CODE (operands[0]) == MEM);
9017
9018 if (fisttp)
9019 output_asm_insn ("fisttp%z0\t%0", operands);
9020 else
9021 {
9022 if (round_mode != I387_CW_ANY)
9023 output_asm_insn ("fldcw\t%3", operands);
9024 if (stack_top_dies || dimode_p)
9025 output_asm_insn ("fistp%z0\t%0", operands);
9026 else
9027 output_asm_insn ("fist%z0\t%0", operands);
9028 if (round_mode != I387_CW_ANY)
9029 output_asm_insn ("fldcw\t%2", operands);
9030 }
9031
9032 return "";
9033}
9034
9035/* Output code for x87 ffreep insn. The OPNO argument, which may only
9036 have the values zero or one, indicates the ffreep insn's operand
9037 from the OPERANDS array. */
9038
9039static const char *
9040output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9041{
9042 if (TARGET_USE_FFREEP)
9043#if HAVE_AS_IX86_FFREEP
9044 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9045#else
9046 switch (REGNO (operands[opno]))
9047 {
9048 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9049 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9050 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9051 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9052 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9053 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9054 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9055 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9056 }
9057#endif
9058
9059 return opno ? "fstp\t%y1" : "fstp\t%y0";
9060}
9061
9062
9063/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9064 should be used. UNORDERED_P is true when fucom should be used. */
9065
9066const char *
9067output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9068{
9069 int stack_top_dies;
9070 rtx cmp_op0, cmp_op1;
9071 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9072
9073 if (eflags_p)
9074 {
9075 cmp_op0 = operands[0];
9076 cmp_op1 = operands[1];
9077 }
9078 else
9079 {
9080 cmp_op0 = operands[1];
9081 cmp_op1 = operands[2];
9082 }
9083
9084 if (is_sse)
9085 {
9086 if (GET_MODE (operands[0]) == SFmode)
9087 if (unordered_p)
9088 return "ucomiss\t{%1, %0|%0, %1}";
9089 else
9090 return "comiss\t{%1, %0|%0, %1}";
9091 else
9092 if (unordered_p)
9093 return "ucomisd\t{%1, %0|%0, %1}";
9094 else
9095 return "comisd\t{%1, %0|%0, %1}";
9096 }
9097
9098 gcc_assert (STACK_TOP_P (cmp_op0));
9099
9100 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9101
9102 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9103 {
9104 if (stack_top_dies)
9105 {
9106 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9107 return output_387_ffreep (operands, 1);
9108 }
9109 else
9110 return "ftst\n\tfnstsw\t%0";
9111 }
9112
9113 if (STACK_REG_P (cmp_op1)
9114 && stack_top_dies
9115 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9116 && REGNO (cmp_op1) != FIRST_STACK_REG)
9117 {
9118 /* If both the top of the 387 stack dies, and the other operand
9119 is also a stack register that dies, then this must be a
9120 `fcompp' float compare */
9121
9122 if (eflags_p)
9123 {
9124 /* There is no double popping fcomi variant. Fortunately,
9125 eflags is immune from the fstp's cc clobbering. */
9126 if (unordered_p)
9127 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9128 else
9129 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9130 return output_387_ffreep (operands, 0);
9131 }
9132 else
9133 {
9134 if (unordered_p)
9135 return "fucompp\n\tfnstsw\t%0";
9136 else
9137 return "fcompp\n\tfnstsw\t%0";
9138 }
9139 }
9140 else
9141 {
9142 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9143
9144 static const char * const alt[16] =
9145 {
9146 "fcom%z2\t%y2\n\tfnstsw\t%0",
9147 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9148 "fucom%z2\t%y2\n\tfnstsw\t%0",
9149 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9150
9151 "ficom%z2\t%y2\n\tfnstsw\t%0",
9152 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9153 NULL,
9154 NULL,
9155
9156 "fcomi\t{%y1, %0|%0, %y1}",
9157 "fcomip\t{%y1, %0|%0, %y1}",
9158 "fucomi\t{%y1, %0|%0, %y1}",
9159 "fucomip\t{%y1, %0|%0, %y1}",
9160
9161 NULL,
9162 NULL,
9163 NULL,
9164 NULL
9165 };
9166
9167 int mask;
9168 const char *ret;
9169
9170 mask = eflags_p << 3;
9171 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9172 mask |= unordered_p << 1;
9173 mask |= stack_top_dies;
9174
9175 gcc_assert (mask < 16);
9176 ret = alt[mask];
9177 gcc_assert (ret);
9178
9179 return ret;
9180 }
9181}
9182
9183void
9184ix86_output_addr_vec_elt (FILE *file, int value)
9185{
9186 const char *directive = ASM_LONG;
9187
9188#ifdef ASM_QUAD
9189 if (TARGET_64BIT)
9190 directive = ASM_QUAD;
9191#else
9192 gcc_assert (!TARGET_64BIT);
9193#endif
9194
9195 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9196}
9197
9198void
9199ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9200{
9201 if (TARGET_64BIT)
9202 fprintf (file, "%s%s%d-%s%d\n",
9203 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9204 else if (HAVE_AS_GOTOFF_IN_DATA)
9205 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9206#if TARGET_MACHO
9207 else if (TARGET_MACHO)
9208 {
9209 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9210 machopic_output_function_base_name (file);
9211 fprintf(file, "\n");
9212 }
9213#endif
9214 else
9215 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9216 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9217}
9218
9219/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9220 for the target. */
9221
9222void
9223ix86_expand_clear (rtx dest)
9224{
9225 rtx tmp;
9226
9227 /* We play register width games, which are only valid after reload. */
9228 gcc_assert (reload_completed);
9229
9230 /* Avoid HImode and its attendant prefix byte. */
9231 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9232 dest = gen_rtx_REG (SImode, REGNO (dest));
9233
9234 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9235
9236 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9237 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9238 {
9239 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9241 }
9242
9243 emit_insn (tmp);
9244}
9245
9246/* X is an unchanging MEM. If it is a constant pool reference, return
9247 the constant pool rtx, else NULL. */
9248
9249rtx
9250maybe_get_pool_constant (rtx x)
9251{
9252 x = ix86_delegitimize_address (XEXP (x, 0));
9253
9254 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9255 return get_pool_constant (x);
9256
9257 return NULL_RTX;
9258}
9259
9260void
9261ix86_expand_move (enum machine_mode mode, rtx operands[])
9262{
9263 int strict = (reload_in_progress || reload_completed);
9264 rtx op0, op1;
9265 enum tls_model model;
9266
9267 op0 = operands[0];
9268 op1 = operands[1];
9269
9270 if (GET_CODE (op1) == SYMBOL_REF)
9271 {
9272 model = SYMBOL_REF_TLS_MODEL (op1);
9273 if (model)
9274 {
9275 op1 = legitimize_tls_address (op1, model, true);
9276 op1 = force_operand (op1, op0);
9277 if (op1 == op0)
9278 return;
9279 }
9280 }
9281 else if (GET_CODE (op1) == CONST
9282 && GET_CODE (XEXP (op1, 0)) == PLUS
9283 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9284 {
9285 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9286 if (model)
9287 {
9288 rtx addend = XEXP (XEXP (op1, 0), 1);
9289 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9290 op1 = force_operand (op1, NULL);
9291 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9292 op0, 1, OPTAB_DIRECT);
9293 if (op1 == op0)
9294 return;
9295 }
9296 }
9297
9298 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9299 {
9300 if (TARGET_MACHO && !TARGET_64BIT)
9301 {
9302#if TARGET_MACHO
9303 if (MACHOPIC_PURE)
9304 {
9305 rtx temp = ((reload_in_progress
9306 || ((op0 && GET_CODE (op0) == REG)
9307 && mode == Pmode))
9308 ? op0 : gen_reg_rtx (Pmode));
9309 op1 = machopic_indirect_data_reference (op1, temp);
9310 op1 = machopic_legitimize_pic_address (op1, mode,
9311 temp == op1 ? 0 : temp);
9312 }
9313 else if (MACHOPIC_INDIRECT)
9314 op1 = machopic_indirect_data_reference (op1, 0);
9315 if (op0 == op1)
9316 return;
9317#endif
9318 }
9319 else
9320 {
9321 if (GET_CODE (op0) == MEM)
9322 op1 = force_reg (Pmode, op1);
9323 else
9324 op1 = legitimize_address (op1, op1, Pmode);
9325 }
9326 }
9327 else
9328 {
9329 if (GET_CODE (op0) == MEM
9330 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9331 || !push_operand (op0, mode))
9332 && GET_CODE (op1) == MEM)
9333 op1 = force_reg (mode, op1);
9334
9335 if (push_operand (op0, mode)
9336 && ! general_no_elim_operand (op1, mode))
9337 op1 = copy_to_mode_reg (mode, op1);
9338
9339 /* Force large constants in 64bit compilation into register
9340 to get them CSEed. */
9341 if (TARGET_64BIT && mode == DImode
9342 && immediate_operand (op1, mode)
9343 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9344 && !register_operand (op0, mode)
9345 && optimize && !reload_completed && !reload_in_progress)
9346 op1 = copy_to_mode_reg (mode, op1);
9347
9348 if (FLOAT_MODE_P (mode))
9349 {
9350 /* If we are loading a floating point constant to a register,
9351 force the value to memory now, since we'll get better code
9352 out the back end. */
9353
9354 if (strict)
9355 ;
9356 else if (GET_CODE (op1) == CONST_DOUBLE)
9357 {
9358 op1 = validize_mem (force_const_mem (mode, op1));
9359 if (!register_operand (op0, mode))
9360 {
9361 rtx temp = gen_reg_rtx (mode);
9362 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9363 emit_move_insn (op0, temp);
9364 return;
9365 }
9366 }
9367 }
9368 }
9369
9370 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9371}
9372
9373void
9374ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9375{
9376 rtx op0 = operands[0], op1 = operands[1];
9377
9378 /* Force constants other than zero into memory. We do not know how
9379 the instructions used to build constants modify the upper 64 bits
9380 of the register, once we have that information we may be able
9381 to handle some of them more efficiently. */
9382 if ((reload_in_progress | reload_completed) == 0
9383 && register_operand (op0, mode)
9384 && CONSTANT_P (op1)
9385 && standard_sse_constant_p (op1) <= 0)
9386 op1 = validize_mem (force_const_mem (mode, op1));
9387
9388 /* Make operand1 a register if it isn't already. */
9389 if (!no_new_pseudos
9390 && !register_operand (op0, mode)
9391 && !register_operand (op1, mode))
9392 {
9393 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9394 return;
9395 }
9396
9397 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9398}
9399
9400/* Implement the movmisalign patterns for SSE. Non-SSE modes go
9401 straight to ix86_expand_vector_move. */
9402
9403void
9404ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9405{
9406 rtx op0, op1, m;
9407
9408 op0 = operands[0];
9409 op1 = operands[1];
9410
9411 if (MEM_P (op1))
9412 {
9413 /* If we're optimizing for size, movups is the smallest. */
9414 if (optimize_size)
9415 {
9416 op0 = gen_lowpart (V4SFmode, op0);
9417 op1 = gen_lowpart (V4SFmode, op1);
9418 emit_insn (gen_sse_movups (op0, op1));
9419 return;
9420 }
9421
9422 /* ??? If we have typed data, then it would appear that using
9423 movdqu is the only way to get unaligned data loaded with
9424 integer type. */
9425 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9426 {
9427 op0 = gen_lowpart (V16QImode, op0);
9428 op1 = gen_lowpart (V16QImode, op1);
9429 emit_insn (gen_sse2_movdqu (op0, op1));
9430 return;
9431 }
9432
9433 if (TARGET_SSE2 && mode == V2DFmode)
9250 {
9251 rtx zero;
9434 {
9435 rtx zero;
9252
9436
9437 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9438 {
9439 op0 = gen_lowpart (V2DFmode, op0);
9440 op1 = gen_lowpart (V2DFmode, op1);
9441 emit_insn (gen_sse2_movupd (op0, op1));
9442 return;
9443 }
9444
9253 /* When SSE registers are split into halves, we can avoid
9254 writing to the top half twice. */
9255 if (TARGET_SSE_SPLIT_REGS)
9256 {
9257 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9258 zero = op0;
9259 }
9260 else
9261 {
9262 /* ??? Not sure about the best option for the Intel chips.
9263 The following would seem to satisfy; the register is
9264 entirely cleared, breaking the dependency chain. We
9265 then store to the upper half, with a dependency depth
9266 of one. A rumor has it that Intel recommends two movsd
9267 followed by an unpacklpd, but this is unconfirmed. And
9268 given that the dependency depth of the unpacklpd would
9269 still be one, I'm not sure why this would be better. */
9270 zero = CONST0_RTX (V2DFmode);
9271 }
9272
9273 m = adjust_address (op1, DFmode, 0);
9274 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9275 m = adjust_address (op1, DFmode, 8);
9276 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9277 }
9278 else
9445 /* When SSE registers are split into halves, we can avoid
9446 writing to the top half twice. */
9447 if (TARGET_SSE_SPLIT_REGS)
9448 {
9449 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9450 zero = op0;
9451 }
9452 else
9453 {
9454 /* ??? Not sure about the best option for the Intel chips.
9455 The following would seem to satisfy; the register is
9456 entirely cleared, breaking the dependency chain. We
9457 then store to the upper half, with a dependency depth
9458 of one. A rumor has it that Intel recommends two movsd
9459 followed by an unpacklpd, but this is unconfirmed. And
9460 given that the dependency depth of the unpacklpd would
9461 still be one, I'm not sure why this would be better. */
9462 zero = CONST0_RTX (V2DFmode);
9463 }
9464
9465 m = adjust_address (op1, DFmode, 0);
9466 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9467 m = adjust_address (op1, DFmode, 8);
9468 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9469 }
9470 else
9279 {
9471 {
9472 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9473 {
9474 op0 = gen_lowpart (V4SFmode, op0);
9475 op1 = gen_lowpart (V4SFmode, op1);
9476 emit_insn (gen_sse_movups (op0, op1));
9477 return;
9478 }
9479
9280 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9281 emit_move_insn (op0, CONST0_RTX (mode));
9282 else
9283 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9284
9285 if (mode != V4SFmode)
9286 op0 = gen_lowpart (V4SFmode, op0);
9287 m = adjust_address (op1, V2SFmode, 0);
9288 emit_insn (gen_sse_loadlps (op0, op0, m));
9289 m = adjust_address (op1, V2SFmode, 8);
9290 emit_insn (gen_sse_loadhps (op0, op0, m));
9291 }
9292 }
9293 else if (MEM_P (op0))
9294 {
9295 /* If we're optimizing for size, movups is the smallest. */
9296 if (optimize_size)
9297 {
9298 op0 = gen_lowpart (V4SFmode, op0);
9299 op1 = gen_lowpart (V4SFmode, op1);
9300 emit_insn (gen_sse_movups (op0, op1));
9301 return;
9302 }
9303
9304 /* ??? Similar to above, only less clear because of quote
9305 typeless stores unquote. */
9306 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9307 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9308 {
9309 op0 = gen_lowpart (V16QImode, op0);
9310 op1 = gen_lowpart (V16QImode, op1);
9311 emit_insn (gen_sse2_movdqu (op0, op1));
9312 return;
9313 }
9314
9315 if (TARGET_SSE2 && mode == V2DFmode)
9316 {
9317 m = adjust_address (op0, DFmode, 0);
9318 emit_insn (gen_sse2_storelpd (m, op1));
9319 m = adjust_address (op0, DFmode, 8);
9320 emit_insn (gen_sse2_storehpd (m, op1));
9321 }
9322 else
9323 {
9324 if (mode != V4SFmode)
9325 op1 = gen_lowpart (V4SFmode, op1);
9326 m = adjust_address (op0, V2SFmode, 0);
9327 emit_insn (gen_sse_storelps (m, op1));
9328 m = adjust_address (op0, V2SFmode, 8);
9329 emit_insn (gen_sse_storehps (m, op1));
9330 }
9331 }
9332 else
9333 gcc_unreachable ();
9334}
9335
9336/* Expand a push in MODE. This is some mode for which we do not support
9337 proper push instructions, at least from the registers that we expect
9338 the value to live in. */
9339
9340void
9341ix86_expand_push (enum machine_mode mode, rtx x)
9342{
9343 rtx tmp;
9344
9345 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9346 GEN_INT (-GET_MODE_SIZE (mode)),
9347 stack_pointer_rtx, 1, OPTAB_DIRECT);
9348 if (tmp != stack_pointer_rtx)
9349 emit_move_insn (stack_pointer_rtx, tmp);
9350
9351 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9352 emit_move_insn (tmp, x);
9353}
9354
9355/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9356 destination to use for the operation. If different from the true
9357 destination in operands[0], a copy operation will be required. */
9358
9359rtx
9360ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9361 rtx operands[])
9362{
9363 int matching_memory;
9364 rtx src1, src2, dst;
9365
9366 dst = operands[0];
9367 src1 = operands[1];
9368 src2 = operands[2];
9369
9370 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9371 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9372 && (rtx_equal_p (dst, src2)
9373 || immediate_operand (src1, mode)))
9374 {
9375 rtx temp = src1;
9376 src1 = src2;
9377 src2 = temp;
9378 }
9379
9380 /* If the destination is memory, and we do not have matching source
9381 operands, do things in registers. */
9382 matching_memory = 0;
9383 if (GET_CODE (dst) == MEM)
9384 {
9385 if (rtx_equal_p (dst, src1))
9386 matching_memory = 1;
9387 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9388 && rtx_equal_p (dst, src2))
9389 matching_memory = 2;
9390 else
9391 dst = gen_reg_rtx (mode);
9392 }
9393
9394 /* Both source operands cannot be in memory. */
9395 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9396 {
9397 if (matching_memory != 2)
9398 src2 = force_reg (mode, src2);
9399 else
9400 src1 = force_reg (mode, src1);
9401 }
9402
9403 /* If the operation is not commutable, source 1 cannot be a constant
9404 or non-matching memory. */
9405 if ((CONSTANT_P (src1)
9406 || (!matching_memory && GET_CODE (src1) == MEM))
9407 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9408 src1 = force_reg (mode, src1);
9409
9410 src1 = operands[1] = src1;
9411 src2 = operands[2] = src2;
9412 return dst;
9413}
9414
9415/* Similarly, but assume that the destination has already been
9416 set up properly. */
9417
9418void
9419ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9420 enum machine_mode mode, rtx operands[])
9421{
9422 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9423 gcc_assert (dst == operands[0]);
9424}
9425
9426/* Attempt to expand a binary operator. Make the expansion closer to the
9427 actual machine, then just general_operand, which will allow 3 separate
9428 memory references (one output, two input) in a single insn. */
9429
9430void
9431ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9432 rtx operands[])
9433{
9434 rtx src1, src2, dst, op, clob;
9435
9436 dst = ix86_fixup_binary_operands (code, mode, operands);
9437 src1 = operands[1];
9438 src2 = operands[2];
9439
9440 /* Emit the instruction. */
9441
9442 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9443 if (reload_in_progress)
9444 {
9445 /* Reload doesn't know about the flags register, and doesn't know that
9446 it doesn't want to clobber it. We can only do this with PLUS. */
9447 gcc_assert (code == PLUS);
9448 emit_insn (op);
9449 }
9450 else
9451 {
9452 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9453 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9454 }
9455
9456 /* Fix up the destination if needed. */
9457 if (dst != operands[0])
9458 emit_move_insn (operands[0], dst);
9459}
9460
9461/* Return TRUE or FALSE depending on whether the binary operator meets the
9462 appropriate constraints. */
9463
9464int
9465ix86_binary_operator_ok (enum rtx_code code,
9466 enum machine_mode mode ATTRIBUTE_UNUSED,
9467 rtx operands[3])
9468{
9469 /* Both source operands cannot be in memory. */
9470 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9471 return 0;
9472 /* If the operation is not commutable, source 1 cannot be a constant. */
9473 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9474 return 0;
9475 /* If the destination is memory, we must have a matching source operand. */
9476 if (GET_CODE (operands[0]) == MEM
9477 && ! (rtx_equal_p (operands[0], operands[1])
9478 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9479 && rtx_equal_p (operands[0], operands[2]))))
9480 return 0;
9481 /* If the operation is not commutable and the source 1 is memory, we must
9482 have a matching destination. */
9483 if (GET_CODE (operands[1]) == MEM
9484 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9485 && ! rtx_equal_p (operands[0], operands[1]))
9486 return 0;
9487 return 1;
9488}
9489
9490/* Attempt to expand a unary operator. Make the expansion closer to the
9491 actual machine, then just general_operand, which will allow 2 separate
9492 memory references (one output, one input) in a single insn. */
9493
9494void
9495ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9496 rtx operands[])
9497{
9498 int matching_memory;
9499 rtx src, dst, op, clob;
9500
9501 dst = operands[0];
9502 src = operands[1];
9503
9504 /* If the destination is memory, and we do not have matching source
9505 operands, do things in registers. */
9506 matching_memory = 0;
9507 if (MEM_P (dst))
9508 {
9509 if (rtx_equal_p (dst, src))
9510 matching_memory = 1;
9511 else
9512 dst = gen_reg_rtx (mode);
9513 }
9514
9515 /* When source operand is memory, destination must match. */
9516 if (MEM_P (src) && !matching_memory)
9517 src = force_reg (mode, src);
9518
9519 /* Emit the instruction. */
9520
9521 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9522 if (reload_in_progress || code == NOT)
9523 {
9524 /* Reload doesn't know about the flags register, and doesn't know that
9525 it doesn't want to clobber it. */
9526 gcc_assert (code == NOT);
9527 emit_insn (op);
9528 }
9529 else
9530 {
9531 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9532 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9533 }
9534
9535 /* Fix up the destination if needed. */
9536 if (dst != operands[0])
9537 emit_move_insn (operands[0], dst);
9538}
9539
9540/* Return TRUE or FALSE depending on whether the unary operator meets the
9541 appropriate constraints. */
9542
9543int
9544ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9545 enum machine_mode mode ATTRIBUTE_UNUSED,
9546 rtx operands[2] ATTRIBUTE_UNUSED)
9547{
9548 /* If one of operands is memory, source and destination must match. */
9549 if ((GET_CODE (operands[0]) == MEM
9550 || GET_CODE (operands[1]) == MEM)
9551 && ! rtx_equal_p (operands[0], operands[1]))
9552 return FALSE;
9553 return TRUE;
9554}
9555
9556/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9557 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9558 true, then replicate the mask for all elements of the vector register.
9559 If INVERT is true, then create a mask excluding the sign bit. */
9560
9561rtx
9562ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9563{
9564 enum machine_mode vec_mode;
9565 HOST_WIDE_INT hi, lo;
9566 int shift = 63;
9567 rtvec v;
9568 rtx mask;
9569
9570 /* Find the sign bit, sign extended to 2*HWI. */
9571 if (mode == SFmode)
9572 lo = 0x80000000, hi = lo < 0;
9573 else if (HOST_BITS_PER_WIDE_INT >= 64)
9574 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9575 else
9576 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9577
9578 if (invert)
9579 lo = ~lo, hi = ~hi;
9580
9581 /* Force this value into the low part of a fp vector constant. */
9582 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9583 mask = gen_lowpart (mode, mask);
9584
9585 if (mode == SFmode)
9586 {
9587 if (vect)
9588 v = gen_rtvec (4, mask, mask, mask, mask);
9589 else
9590 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9591 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9592 vec_mode = V4SFmode;
9593 }
9594 else
9595 {
9596 if (vect)
9597 v = gen_rtvec (2, mask, mask);
9598 else
9599 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9600 vec_mode = V2DFmode;
9601 }
9602
9603 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9604}
9605
9606/* Generate code for floating point ABS or NEG. */
9607
9608void
9609ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9610 rtx operands[])
9611{
9612 rtx mask, set, use, clob, dst, src;
9613 bool matching_memory;
9614 bool use_sse = false;
9615 bool vector_mode = VECTOR_MODE_P (mode);
9616 enum machine_mode elt_mode = mode;
9617
9618 if (vector_mode)
9619 {
9620 elt_mode = GET_MODE_INNER (mode);
9621 use_sse = true;
9622 }
9623 else if (TARGET_SSE_MATH)
9624 use_sse = SSE_FLOAT_MODE_P (mode);
9625
9626 /* NEG and ABS performed with SSE use bitwise mask operations.
9627 Create the appropriate mask now. */
9628 if (use_sse)
9629 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9630 else
9631 mask = NULL_RTX;
9632
9633 dst = operands[0];
9634 src = operands[1];
9635
9636 /* If the destination is memory, and we don't have matching source
9637 operands or we're using the x87, do things in registers. */
9638 matching_memory = false;
9639 if (MEM_P (dst))
9640 {
9641 if (use_sse && rtx_equal_p (dst, src))
9642 matching_memory = true;
9643 else
9644 dst = gen_reg_rtx (mode);
9645 }
9646 if (MEM_P (src) && !matching_memory)
9647 src = force_reg (mode, src);
9648
9649 if (vector_mode)
9650 {
9651 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9652 set = gen_rtx_SET (VOIDmode, dst, set);
9653 emit_insn (set);
9654 }
9655 else
9656 {
9657 set = gen_rtx_fmt_e (code, mode, src);
9658 set = gen_rtx_SET (VOIDmode, dst, set);
9659 if (mask)
9660 {
9661 use = gen_rtx_USE (VOIDmode, mask);
9662 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9663 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9664 gen_rtvec (3, set, use, clob)));
9665 }
9666 else
9667 emit_insn (set);
9668 }
9669
9670 if (dst != operands[0])
9671 emit_move_insn (operands[0], dst);
9672}
9673
9674/* Expand a copysign operation. Special case operand 0 being a constant. */
9675
9676void
9677ix86_expand_copysign (rtx operands[])
9678{
9679 enum machine_mode mode, vmode;
9680 rtx dest, op0, op1, mask, nmask;
9681
9682 dest = operands[0];
9683 op0 = operands[1];
9684 op1 = operands[2];
9685
9686 mode = GET_MODE (dest);
9687 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9688
9689 if (GET_CODE (op0) == CONST_DOUBLE)
9690 {
9691 rtvec v;
9692
9693 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9694 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9695
9696 if (op0 == CONST0_RTX (mode))
9697 op0 = CONST0_RTX (vmode);
9698 else
9699 {
9700 if (mode == SFmode)
9701 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9702 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9703 else
9704 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9705 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9706 }
9707
9708 mask = ix86_build_signbit_mask (mode, 0, 0);
9709
9710 if (mode == SFmode)
9711 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9712 else
9713 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9714 }
9715 else
9716 {
9717 nmask = ix86_build_signbit_mask (mode, 0, 1);
9718 mask = ix86_build_signbit_mask (mode, 0, 0);
9719
9720 if (mode == SFmode)
9721 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9722 else
9723 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9724 }
9725}
9726
9727/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9728 be a constant, and so has already been expanded into a vector constant. */
9729
9730void
9731ix86_split_copysign_const (rtx operands[])
9732{
9733 enum machine_mode mode, vmode;
9734 rtx dest, op0, op1, mask, x;
9735
9736 dest = operands[0];
9737 op0 = operands[1];
9738 op1 = operands[2];
9739 mask = operands[3];
9740
9741 mode = GET_MODE (dest);
9742 vmode = GET_MODE (mask);
9743
9744 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9745 x = gen_rtx_AND (vmode, dest, mask);
9746 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9747
9748 if (op0 != CONST0_RTX (vmode))
9749 {
9750 x = gen_rtx_IOR (vmode, dest, op0);
9751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9752 }
9753}
9754
9755/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9756 so we have to do two masks. */
9757
9758void
9759ix86_split_copysign_var (rtx operands[])
9760{
9761 enum machine_mode mode, vmode;
9762 rtx dest, scratch, op0, op1, mask, nmask, x;
9763
9764 dest = operands[0];
9765 scratch = operands[1];
9766 op0 = operands[2];
9767 op1 = operands[3];
9768 nmask = operands[4];
9769 mask = operands[5];
9770
9771 mode = GET_MODE (dest);
9772 vmode = GET_MODE (mask);
9773
9774 if (rtx_equal_p (op0, op1))
9775 {
9776 /* Shouldn't happen often (it's useless, obviously), but when it does
9777 we'd generate incorrect code if we continue below. */
9778 emit_move_insn (dest, op0);
9779 return;
9780 }
9781
9782 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9783 {
9784 gcc_assert (REGNO (op1) == REGNO (scratch));
9785
9786 x = gen_rtx_AND (vmode, scratch, mask);
9787 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9788
9789 dest = mask;
9790 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9791 x = gen_rtx_NOT (vmode, dest);
9792 x = gen_rtx_AND (vmode, x, op0);
9793 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9794 }
9795 else
9796 {
9797 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9798 {
9799 x = gen_rtx_AND (vmode, scratch, mask);
9800 }
9801 else /* alternative 2,4 */
9802 {
9803 gcc_assert (REGNO (mask) == REGNO (scratch));
9804 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9805 x = gen_rtx_AND (vmode, scratch, op1);
9806 }
9807 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9808
9809 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9810 {
9811 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9812 x = gen_rtx_AND (vmode, dest, nmask);
9813 }
9814 else /* alternative 3,4 */
9815 {
9816 gcc_assert (REGNO (nmask) == REGNO (dest));
9817 dest = nmask;
9818 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9819 x = gen_rtx_AND (vmode, dest, op0);
9820 }
9821 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9822 }
9823
9824 x = gen_rtx_IOR (vmode, dest, scratch);
9825 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9826}
9827
9828/* Return TRUE or FALSE depending on whether the first SET in INSN
9829 has source and destination with matching CC modes, and that the
9830 CC mode is at least as constrained as REQ_MODE. */
9831
9832int
9833ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9834{
9835 rtx set;
9836 enum machine_mode set_mode;
9837
9838 set = PATTERN (insn);
9839 if (GET_CODE (set) == PARALLEL)
9840 set = XVECEXP (set, 0, 0);
9841 gcc_assert (GET_CODE (set) == SET);
9842 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9843
9844 set_mode = GET_MODE (SET_DEST (set));
9845 switch (set_mode)
9846 {
9847 case CCNOmode:
9848 if (req_mode != CCNOmode
9849 && (req_mode != CCmode
9850 || XEXP (SET_SRC (set), 1) != const0_rtx))
9851 return 0;
9852 break;
9853 case CCmode:
9854 if (req_mode == CCGCmode)
9855 return 0;
9856 /* FALLTHRU */
9857 case CCGCmode:
9858 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9859 return 0;
9860 /* FALLTHRU */
9861 case CCGOCmode:
9862 if (req_mode == CCZmode)
9863 return 0;
9864 /* FALLTHRU */
9865 case CCZmode:
9866 break;
9867
9868 default:
9869 gcc_unreachable ();
9870 }
9871
9872 return (GET_MODE (SET_SRC (set)) == set_mode);
9873}
9874
9875/* Generate insn patterns to do an integer compare of OPERANDS. */
9876
9877static rtx
9878ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9879{
9880 enum machine_mode cmpmode;
9881 rtx tmp, flags;
9882
9883 cmpmode = SELECT_CC_MODE (code, op0, op1);
9884 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9885
9886 /* This is very simple, but making the interface the same as in the
9887 FP case makes the rest of the code easier. */
9888 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9889 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9890
9891 /* Return the test that should be put into the flags user, i.e.
9892 the bcc, scc, or cmov instruction. */
9893 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9894}
9895
9896/* Figure out whether to use ordered or unordered fp comparisons.
9897 Return the appropriate mode to use. */
9898
9899enum machine_mode
9900ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9901{
9902 /* ??? In order to make all comparisons reversible, we do all comparisons
9903 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9904 all forms trapping and nontrapping comparisons, we can make inequality
9905 comparisons trapping again, since it results in better code when using
9906 FCOM based compares. */
9907 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9908}
9909
9910enum machine_mode
9911ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9912{
9913 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9914 return ix86_fp_compare_mode (code);
9915 switch (code)
9916 {
9917 /* Only zero flag is needed. */
9918 case EQ: /* ZF=0 */
9919 case NE: /* ZF!=0 */
9920 return CCZmode;
9921 /* Codes needing carry flag. */
9922 case GEU: /* CF=0 */
9923 case GTU: /* CF=0 & ZF=0 */
9924 case LTU: /* CF=1 */
9925 case LEU: /* CF=1 | ZF=1 */
9926 return CCmode;
9927 /* Codes possibly doable only with sign flag when
9928 comparing against zero. */
9929 case GE: /* SF=OF or SF=0 */
9930 case LT: /* SF<>OF or SF=1 */
9931 if (op1 == const0_rtx)
9932 return CCGOCmode;
9933 else
9934 /* For other cases Carry flag is not required. */
9935 return CCGCmode;
9936 /* Codes doable only with sign flag when comparing
9937 against zero, but we miss jump instruction for it
9938 so we need to use relational tests against overflow
9939 that thus needs to be zero. */
9940 case GT: /* ZF=0 & SF=OF */
9941 case LE: /* ZF=1 | SF<>OF */
9942 if (op1 == const0_rtx)
9943 return CCNOmode;
9944 else
9945 return CCGCmode;
9946 /* strcmp pattern do (use flags) and combine may ask us for proper
9947 mode. */
9948 case USE:
9949 return CCmode;
9950 default:
9951 gcc_unreachable ();
9952 }
9953}
9954
9955/* Return the fixed registers used for condition codes. */
9956
9957static bool
9958ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9959{
9960 *p1 = FLAGS_REG;
9961 *p2 = FPSR_REG;
9962 return true;
9963}
9964
9965/* If two condition code modes are compatible, return a condition code
9966 mode which is compatible with both. Otherwise, return
9967 VOIDmode. */
9968
9969static enum machine_mode
9970ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9971{
9972 if (m1 == m2)
9973 return m1;
9974
9975 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9976 return VOIDmode;
9977
9978 if ((m1 == CCGCmode && m2 == CCGOCmode)
9979 || (m1 == CCGOCmode && m2 == CCGCmode))
9980 return CCGCmode;
9981
9982 switch (m1)
9983 {
9984 default:
9985 gcc_unreachable ();
9986
9987 case CCmode:
9988 case CCGCmode:
9989 case CCGOCmode:
9990 case CCNOmode:
9991 case CCZmode:
9992 switch (m2)
9993 {
9994 default:
9995 return VOIDmode;
9996
9997 case CCmode:
9998 case CCGCmode:
9999 case CCGOCmode:
10000 case CCNOmode:
10001 case CCZmode:
10002 return CCmode;
10003 }
10004
10005 case CCFPmode:
10006 case CCFPUmode:
10007 /* These are only compatible with themselves, which we already
10008 checked above. */
10009 return VOIDmode;
10010 }
10011}
10012
10013/* Return true if we should use an FCOMI instruction for this fp comparison. */
10014
10015int
10016ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10017{
10018 enum rtx_code swapped_code = swap_condition (code);
10019 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10020 || (ix86_fp_comparison_cost (swapped_code)
10021 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10022}
10023
10024/* Swap, force into registers, or otherwise massage the two operands
10025 to a fp comparison. The operands are updated in place; the new
10026 comparison code is returned. */
10027
10028static enum rtx_code
10029ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10030{
10031 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10032 rtx op0 = *pop0, op1 = *pop1;
10033 enum machine_mode op_mode = GET_MODE (op0);
10034 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10035
10036 /* All of the unordered compare instructions only work on registers.
10037 The same is true of the fcomi compare instructions. The XFmode
10038 compare instructions require registers except when comparing
10039 against zero or when converting operand 1 from fixed point to
10040 floating point. */
10041
10042 if (!is_sse
10043 && (fpcmp_mode == CCFPUmode
10044 || (op_mode == XFmode
10045 && ! (standard_80387_constant_p (op0) == 1
10046 || standard_80387_constant_p (op1) == 1)
10047 && GET_CODE (op1) != FLOAT)
10048 || ix86_use_fcomi_compare (code)))
10049 {
10050 op0 = force_reg (op_mode, op0);
10051 op1 = force_reg (op_mode, op1);
10052 }
10053 else
10054 {
10055 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10056 things around if they appear profitable, otherwise force op0
10057 into a register. */
10058
10059 if (standard_80387_constant_p (op0) == 0
10060 || (GET_CODE (op0) == MEM
10061 && ! (standard_80387_constant_p (op1) == 0
10062 || GET_CODE (op1) == MEM)))
10063 {
10064 rtx tmp;
10065 tmp = op0, op0 = op1, op1 = tmp;
10066 code = swap_condition (code);
10067 }
10068
10069 if (GET_CODE (op0) != REG)
10070 op0 = force_reg (op_mode, op0);
10071
10072 if (CONSTANT_P (op1))
10073 {
10074 int tmp = standard_80387_constant_p (op1);
10075 if (tmp == 0)
10076 op1 = validize_mem (force_const_mem (op_mode, op1));
10077 else if (tmp == 1)
10078 {
10079 if (TARGET_CMOVE)
10080 op1 = force_reg (op_mode, op1);
10081 }
10082 else
10083 op1 = force_reg (op_mode, op1);
10084 }
10085 }
10086
10087 /* Try to rearrange the comparison to make it cheaper. */
10088 if (ix86_fp_comparison_cost (code)
10089 > ix86_fp_comparison_cost (swap_condition (code))
10090 && (GET_CODE (op1) == REG || !no_new_pseudos))
10091 {
10092 rtx tmp;
10093 tmp = op0, op0 = op1, op1 = tmp;
10094 code = swap_condition (code);
10095 if (GET_CODE (op0) != REG)
10096 op0 = force_reg (op_mode, op0);
10097 }
10098
10099 *pop0 = op0;
10100 *pop1 = op1;
10101 return code;
10102}
10103
10104/* Convert comparison codes we use to represent FP comparison to integer
10105 code that will result in proper branch. Return UNKNOWN if no such code
10106 is available. */
10107
10108enum rtx_code
10109ix86_fp_compare_code_to_integer (enum rtx_code code)
10110{
10111 switch (code)
10112 {
10113 case GT:
10114 return GTU;
10115 case GE:
10116 return GEU;
10117 case ORDERED:
10118 case UNORDERED:
10119 return code;
10120 break;
10121 case UNEQ:
10122 return EQ;
10123 break;
10124 case UNLT:
10125 return LTU;
10126 break;
10127 case UNLE:
10128 return LEU;
10129 break;
10130 case LTGT:
10131 return NE;
10132 break;
10133 default:
10134 return UNKNOWN;
10135 }
10136}
10137
10138/* Split comparison code CODE into comparisons we can do using branch
10139 instructions. BYPASS_CODE is comparison code for branch that will
10140 branch around FIRST_CODE and SECOND_CODE. If some of branches
10141 is not required, set value to UNKNOWN.
10142 We never require more than two branches. */
10143
10144void
10145ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10146 enum rtx_code *first_code,
10147 enum rtx_code *second_code)
10148{
10149 *first_code = code;
10150 *bypass_code = UNKNOWN;
10151 *second_code = UNKNOWN;
10152
10153 /* The fcomi comparison sets flags as follows:
10154
10155 cmp ZF PF CF
10156 > 0 0 0
10157 < 0 0 1
10158 = 1 0 0
10159 un 1 1 1 */
10160
10161 switch (code)
10162 {
10163 case GT: /* GTU - CF=0 & ZF=0 */
10164 case GE: /* GEU - CF=0 */
10165 case ORDERED: /* PF=0 */
10166 case UNORDERED: /* PF=1 */
10167 case UNEQ: /* EQ - ZF=1 */
10168 case UNLT: /* LTU - CF=1 */
10169 case UNLE: /* LEU - CF=1 | ZF=1 */
10170 case LTGT: /* EQ - ZF=0 */
10171 break;
10172 case LT: /* LTU - CF=1 - fails on unordered */
10173 *first_code = UNLT;
10174 *bypass_code = UNORDERED;
10175 break;
10176 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10177 *first_code = UNLE;
10178 *bypass_code = UNORDERED;
10179 break;
10180 case EQ: /* EQ - ZF=1 - fails on unordered */
10181 *first_code = UNEQ;
10182 *bypass_code = UNORDERED;
10183 break;
10184 case NE: /* NE - ZF=0 - fails on unordered */
10185 *first_code = LTGT;
10186 *second_code = UNORDERED;
10187 break;
10188 case UNGE: /* GEU - CF=0 - fails on unordered */
10189 *first_code = GE;
10190 *second_code = UNORDERED;
10191 break;
10192 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10193 *first_code = GT;
10194 *second_code = UNORDERED;
10195 break;
10196 default:
10197 gcc_unreachable ();
10198 }
10199 if (!TARGET_IEEE_FP)
10200 {
10201 *second_code = UNKNOWN;
10202 *bypass_code = UNKNOWN;
10203 }
10204}
10205
10206/* Return cost of comparison done fcom + arithmetics operations on AX.
10207 All following functions do use number of instructions as a cost metrics.
10208 In future this should be tweaked to compute bytes for optimize_size and
10209 take into account performance of various instructions on various CPUs. */
10210static int
10211ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10212{
10213 if (!TARGET_IEEE_FP)
10214 return 4;
10215 /* The cost of code output by ix86_expand_fp_compare. */
10216 switch (code)
10217 {
10218 case UNLE:
10219 case UNLT:
10220 case LTGT:
10221 case GT:
10222 case GE:
10223 case UNORDERED:
10224 case ORDERED:
10225 case UNEQ:
10226 return 4;
10227 break;
10228 case LT:
10229 case NE:
10230 case EQ:
10231 case UNGE:
10232 return 5;
10233 break;
10234 case LE:
10235 case UNGT:
10236 return 6;
10237 break;
10238 default:
10239 gcc_unreachable ();
10240 }
10241}
10242
10243/* Return cost of comparison done using fcomi operation.
10244 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10245static int
10246ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10247{
10248 enum rtx_code bypass_code, first_code, second_code;
10249 /* Return arbitrarily high cost when instruction is not supported - this
10250 prevents gcc from using it. */
10251 if (!TARGET_CMOVE)
10252 return 1024;
10253 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10254 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10255}
10256
10257/* Return cost of comparison done using sahf operation.
10258 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10259static int
10260ix86_fp_comparison_sahf_cost (enum rtx_code code)
10261{
10262 enum rtx_code bypass_code, first_code, second_code;
10263 /* Return arbitrarily high cost when instruction is not preferred - this
10264 avoids gcc from using it. */
10265 if (!TARGET_USE_SAHF && !optimize_size)
10266 return 1024;
10267 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10268 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10269}
10270
10271/* Compute cost of the comparison done using any method.
10272 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10273static int
10274ix86_fp_comparison_cost (enum rtx_code code)
10275{
10276 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10277 int min;
10278
10279 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10280 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10281
10282 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10283 if (min > sahf_cost)
10284 min = sahf_cost;
10285 if (min > fcomi_cost)
10286 min = fcomi_cost;
10287 return min;
10288}
10289
10290/* Generate insn patterns to do a floating point compare of OPERANDS. */
10291
10292static rtx
10293ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10294 rtx *second_test, rtx *bypass_test)
10295{
10296 enum machine_mode fpcmp_mode, intcmp_mode;
10297 rtx tmp, tmp2;
10298 int cost = ix86_fp_comparison_cost (code);
10299 enum rtx_code bypass_code, first_code, second_code;
10300
10301 fpcmp_mode = ix86_fp_compare_mode (code);
10302 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10303
10304 if (second_test)
10305 *second_test = NULL_RTX;
10306 if (bypass_test)
10307 *bypass_test = NULL_RTX;
10308
10309 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10310
10311 /* Do fcomi/sahf based test when profitable. */
10312 if ((bypass_code == UNKNOWN || bypass_test)
10313 && (second_code == UNKNOWN || second_test)
10314 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10315 {
10316 if (TARGET_CMOVE)
10317 {
10318 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10319 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10320 tmp);
10321 emit_insn (tmp);
10322 }
10323 else
10324 {
10325 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10326 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10327 if (!scratch)
10328 scratch = gen_reg_rtx (HImode);
10329 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10330 emit_insn (gen_x86_sahf_1 (scratch));
10331 }
10332
10333 /* The FP codes work out to act like unsigned. */
10334 intcmp_mode = fpcmp_mode;
10335 code = first_code;
10336 if (bypass_code != UNKNOWN)
10337 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10338 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10339 const0_rtx);
10340 if (second_code != UNKNOWN)
10341 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10342 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10343 const0_rtx);
10344 }
10345 else
10346 {
10347 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10348 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10349 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10350 if (!scratch)
10351 scratch = gen_reg_rtx (HImode);
10352 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10353
10354 /* In the unordered case, we have to check C2 for NaN's, which
10355 doesn't happen to work out to anything nice combination-wise.
10356 So do some bit twiddling on the value we've got in AH to come
10357 up with an appropriate set of condition codes. */
10358
10359 intcmp_mode = CCNOmode;
10360 switch (code)
10361 {
10362 case GT:
10363 case UNGT:
10364 if (code == GT || !TARGET_IEEE_FP)
10365 {
10366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10367 code = EQ;
10368 }
10369 else
10370 {
10371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10372 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10374 intcmp_mode = CCmode;
10375 code = GEU;
10376 }
10377 break;
10378 case LT:
10379 case UNLT:
10380 if (code == LT && TARGET_IEEE_FP)
10381 {
10382 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10383 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10384 intcmp_mode = CCmode;
10385 code = EQ;
10386 }
10387 else
10388 {
10389 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10390 code = NE;
10391 }
10392 break;
10393 case GE:
10394 case UNGE:
10395 if (code == GE || !TARGET_IEEE_FP)
10396 {
10397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10398 code = EQ;
10399 }
10400 else
10401 {
10402 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10403 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10404 GEN_INT (0x01)));
10405 code = NE;
10406 }
10407 break;
10408 case LE:
10409 case UNLE:
10410 if (code == LE && TARGET_IEEE_FP)
10411 {
10412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10413 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10414 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10415 intcmp_mode = CCmode;
10416 code = LTU;
10417 }
10418 else
10419 {
10420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10421 code = NE;
10422 }
10423 break;
10424 case EQ:
10425 case UNEQ:
10426 if (code == EQ && TARGET_IEEE_FP)
10427 {
10428 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10429 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10430 intcmp_mode = CCmode;
10431 code = EQ;
10432 }
10433 else
10434 {
10435 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10436 code = NE;
10437 break;
10438 }
10439 break;
10440 case NE:
10441 case LTGT:
10442 if (code == NE && TARGET_IEEE_FP)
10443 {
10444 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10445 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10446 GEN_INT (0x40)));
10447 code = NE;
10448 }
10449 else
10450 {
10451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10452 code = EQ;
10453 }
10454 break;
10455
10456 case UNORDERED:
10457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10458 code = NE;
10459 break;
10460 case ORDERED:
10461 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10462 code = EQ;
10463 break;
10464
10465 default:
10466 gcc_unreachable ();
10467 }
10468 }
10469
10470 /* Return the test that should be put into the flags user, i.e.
10471 the bcc, scc, or cmov instruction. */
10472 return gen_rtx_fmt_ee (code, VOIDmode,
10473 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10474 const0_rtx);
10475}
10476
10477rtx
10478ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10479{
10480 rtx op0, op1, ret;
10481 op0 = ix86_compare_op0;
10482 op1 = ix86_compare_op1;
10483
10484 if (second_test)
10485 *second_test = NULL_RTX;
10486 if (bypass_test)
10487 *bypass_test = NULL_RTX;
10488
10489 if (ix86_compare_emitted)
10490 {
10491 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10492 ix86_compare_emitted = NULL_RTX;
10493 }
10494 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10495 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10496 second_test, bypass_test);
10497 else
10498 ret = ix86_expand_int_compare (code, op0, op1);
10499
10500 return ret;
10501}
10502
10503/* Return true if the CODE will result in nontrivial jump sequence. */
10504bool
10505ix86_fp_jump_nontrivial_p (enum rtx_code code)
10506{
10507 enum rtx_code bypass_code, first_code, second_code;
10508 if (!TARGET_CMOVE)
10509 return true;
10510 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10511 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10512}
10513
10514void
10515ix86_expand_branch (enum rtx_code code, rtx label)
10516{
10517 rtx tmp;
10518
10519 /* If we have emitted a compare insn, go straight to simple.
10520 ix86_expand_compare won't emit anything if ix86_compare_emitted
10521 is non NULL. */
10522 if (ix86_compare_emitted)
10523 goto simple;
10524
10525 switch (GET_MODE (ix86_compare_op0))
10526 {
10527 case QImode:
10528 case HImode:
10529 case SImode:
10530 simple:
10531 tmp = ix86_expand_compare (code, NULL, NULL);
10532 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10533 gen_rtx_LABEL_REF (VOIDmode, label),
10534 pc_rtx);
10535 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10536 return;
10537
10538 case SFmode:
10539 case DFmode:
10540 case XFmode:
10541 {
10542 rtvec vec;
10543 int use_fcomi;
10544 enum rtx_code bypass_code, first_code, second_code;
10545
10546 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10547 &ix86_compare_op1);
10548
10549 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10550
10551 /* Check whether we will use the natural sequence with one jump. If
10552 so, we can expand jump early. Otherwise delay expansion by
10553 creating compound insn to not confuse optimizers. */
10554 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10555 && TARGET_CMOVE)
10556 {
10557 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10558 gen_rtx_LABEL_REF (VOIDmode, label),
10559 pc_rtx, NULL_RTX, NULL_RTX);
10560 }
10561 else
10562 {
10563 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10564 ix86_compare_op0, ix86_compare_op1);
10565 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10566 gen_rtx_LABEL_REF (VOIDmode, label),
10567 pc_rtx);
10568 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10569
10570 use_fcomi = ix86_use_fcomi_compare (code);
10571 vec = rtvec_alloc (3 + !use_fcomi);
10572 RTVEC_ELT (vec, 0) = tmp;
10573 RTVEC_ELT (vec, 1)
10574 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10575 RTVEC_ELT (vec, 2)
10576 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10577 if (! use_fcomi)
10578 RTVEC_ELT (vec, 3)
10579 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10580
10581 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10582 }
10583 return;
10584 }
10585
10586 case DImode:
10587 if (TARGET_64BIT)
10588 goto simple;
10589 case TImode:
10590 /* Expand DImode branch into multiple compare+branch. */
10591 {
10592 rtx lo[2], hi[2], label2;
10593 enum rtx_code code1, code2, code3;
10594 enum machine_mode submode;
10595
10596 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10597 {
10598 tmp = ix86_compare_op0;
10599 ix86_compare_op0 = ix86_compare_op1;
10600 ix86_compare_op1 = tmp;
10601 code = swap_condition (code);
10602 }
10603 if (GET_MODE (ix86_compare_op0) == DImode)
10604 {
10605 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10606 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10607 submode = SImode;
10608 }
10609 else
10610 {
10611 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10612 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10613 submode = DImode;
10614 }
10615
10616 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10617 avoid two branches. This costs one extra insn, so disable when
10618 optimizing for size. */
10619
10620 if ((code == EQ || code == NE)
10621 && (!optimize_size
10622 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10623 {
10624 rtx xor0, xor1;
10625
10626 xor1 = hi[0];
10627 if (hi[1] != const0_rtx)
10628 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10629 NULL_RTX, 0, OPTAB_WIDEN);
10630
10631 xor0 = lo[0];
10632 if (lo[1] != const0_rtx)
10633 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10634 NULL_RTX, 0, OPTAB_WIDEN);
10635
10636 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10637 NULL_RTX, 0, OPTAB_WIDEN);
10638
10639 ix86_compare_op0 = tmp;
10640 ix86_compare_op1 = const0_rtx;
10641 ix86_expand_branch (code, label);
10642 return;
10643 }
10644
10645 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10646 op1 is a constant and the low word is zero, then we can just
10647 examine the high word. */
10648
10649 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10650 switch (code)
10651 {
10652 case LT: case LTU: case GE: case GEU:
10653 ix86_compare_op0 = hi[0];
10654 ix86_compare_op1 = hi[1];
10655 ix86_expand_branch (code, label);
10656 return;
10657 default:
10658 break;
10659 }
10660
10661 /* Otherwise, we need two or three jumps. */
10662
10663 label2 = gen_label_rtx ();
10664
10665 code1 = code;
10666 code2 = swap_condition (code);
10667 code3 = unsigned_condition (code);
10668
10669 switch (code)
10670 {
10671 case LT: case GT: case LTU: case GTU:
10672 break;
10673
10674 case LE: code1 = LT; code2 = GT; break;
10675 case GE: code1 = GT; code2 = LT; break;
10676 case LEU: code1 = LTU; code2 = GTU; break;
10677 case GEU: code1 = GTU; code2 = LTU; break;
10678
10679 case EQ: code1 = UNKNOWN; code2 = NE; break;
10680 case NE: code2 = UNKNOWN; break;
10681
10682 default:
10683 gcc_unreachable ();
10684 }
10685
10686 /*
10687 * a < b =>
10688 * if (hi(a) < hi(b)) goto true;
10689 * if (hi(a) > hi(b)) goto false;
10690 * if (lo(a) < lo(b)) goto true;
10691 * false:
10692 */
10693
10694 ix86_compare_op0 = hi[0];
10695 ix86_compare_op1 = hi[1];
10696
10697 if (code1 != UNKNOWN)
10698 ix86_expand_branch (code1, label);
10699 if (code2 != UNKNOWN)
10700 ix86_expand_branch (code2, label2);
10701
10702 ix86_compare_op0 = lo[0];
10703 ix86_compare_op1 = lo[1];
10704 ix86_expand_branch (code3, label);
10705
10706 if (code2 != UNKNOWN)
10707 emit_label (label2);
10708 return;
10709 }
10710
10711 default:
10712 gcc_unreachable ();
10713 }
10714}
10715
10716/* Split branch based on floating point condition. */
10717void
10718ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10719 rtx target1, rtx target2, rtx tmp, rtx pushed)
10720{
10721 rtx second, bypass;
10722 rtx label = NULL_RTX;
10723 rtx condition;
10724 int bypass_probability = -1, second_probability = -1, probability = -1;
10725 rtx i;
10726
10727 if (target2 != pc_rtx)
10728 {
10729 rtx tmp = target2;
10730 code = reverse_condition_maybe_unordered (code);
10731 target2 = target1;
10732 target1 = tmp;
10733 }
10734
10735 condition = ix86_expand_fp_compare (code, op1, op2,
10736 tmp, &second, &bypass);
10737
10738 /* Remove pushed operand from stack. */
10739 if (pushed)
10740 ix86_free_from_memory (GET_MODE (pushed));
10741
10742 if (split_branch_probability >= 0)
10743 {
10744 /* Distribute the probabilities across the jumps.
10745 Assume the BYPASS and SECOND to be always test
10746 for UNORDERED. */
10747 probability = split_branch_probability;
10748
10749 /* Value of 1 is low enough to make no need for probability
10750 to be updated. Later we may run some experiments and see
10751 if unordered values are more frequent in practice. */
10752 if (bypass)
10753 bypass_probability = 1;
10754 if (second)
10755 second_probability = 1;
10756 }
10757 if (bypass != NULL_RTX)
10758 {
10759 label = gen_label_rtx ();
10760 i = emit_jump_insn (gen_rtx_SET
10761 (VOIDmode, pc_rtx,
10762 gen_rtx_IF_THEN_ELSE (VOIDmode,
10763 bypass,
10764 gen_rtx_LABEL_REF (VOIDmode,
10765 label),
10766 pc_rtx)));
10767 if (bypass_probability >= 0)
10768 REG_NOTES (i)
10769 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10770 GEN_INT (bypass_probability),
10771 REG_NOTES (i));
10772 }
10773 i = emit_jump_insn (gen_rtx_SET
10774 (VOIDmode, pc_rtx,
10775 gen_rtx_IF_THEN_ELSE (VOIDmode,
10776 condition, target1, target2)));
10777 if (probability >= 0)
10778 REG_NOTES (i)
10779 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10780 GEN_INT (probability),
10781 REG_NOTES (i));
10782 if (second != NULL_RTX)
10783 {
10784 i = emit_jump_insn (gen_rtx_SET
10785 (VOIDmode, pc_rtx,
10786 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10787 target2)));
10788 if (second_probability >= 0)
10789 REG_NOTES (i)
10790 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10791 GEN_INT (second_probability),
10792 REG_NOTES (i));
10793 }
10794 if (label != NULL_RTX)
10795 emit_label (label);
10796}
10797
10798int
10799ix86_expand_setcc (enum rtx_code code, rtx dest)
10800{
10801 rtx ret, tmp, tmpreg, equiv;
10802 rtx second_test, bypass_test;
10803
10804 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10805 return 0; /* FAIL */
10806
10807 gcc_assert (GET_MODE (dest) == QImode);
10808
10809 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10810 PUT_MODE (ret, QImode);
10811
10812 tmp = dest;
10813 tmpreg = dest;
10814
10815 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10816 if (bypass_test || second_test)
10817 {
10818 rtx test = second_test;
10819 int bypass = 0;
10820 rtx tmp2 = gen_reg_rtx (QImode);
10821 if (bypass_test)
10822 {
10823 gcc_assert (!second_test);
10824 test = bypass_test;
10825 bypass = 1;
10826 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10827 }
10828 PUT_MODE (test, QImode);
10829 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10830
10831 if (bypass)
10832 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10833 else
10834 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10835 }
10836
10837 /* Attach a REG_EQUAL note describing the comparison result. */
10838 if (ix86_compare_op0 && ix86_compare_op1)
10839 {
10840 equiv = simplify_gen_relational (code, QImode,
10841 GET_MODE (ix86_compare_op0),
10842 ix86_compare_op0, ix86_compare_op1);
10843 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10844 }
10845
10846 return 1; /* DONE */
10847}
10848
10849/* Expand comparison setting or clearing carry flag. Return true when
10850 successful and set pop for the operation. */
10851static bool
10852ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10853{
10854 enum machine_mode mode =
10855 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10856
10857 /* Do not handle DImode compares that go through special path. Also we can't
10858 deal with FP compares yet. This is possible to add. */
10859 if (mode == (TARGET_64BIT ? TImode : DImode))
10860 return false;
10861 if (FLOAT_MODE_P (mode))
10862 {
10863 rtx second_test = NULL, bypass_test = NULL;
10864 rtx compare_op, compare_seq;
10865
10866 /* Shortcut: following common codes never translate into carry flag compares. */
10867 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10868 || code == ORDERED || code == UNORDERED)
10869 return false;
10870
10871 /* These comparisons require zero flag; swap operands so they won't. */
10872 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10873 && !TARGET_IEEE_FP)
10874 {
10875 rtx tmp = op0;
10876 op0 = op1;
10877 op1 = tmp;
10878 code = swap_condition (code);
10879 }
10880
10881 /* Try to expand the comparison and verify that we end up with carry flag
10882 based comparison. This is fails to be true only when we decide to expand
10883 comparison using arithmetic that is not too common scenario. */
10884 start_sequence ();
10885 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10886 &second_test, &bypass_test);
10887 compare_seq = get_insns ();
10888 end_sequence ();
10889
10890 if (second_test || bypass_test)
10891 return false;
10892 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10893 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10894 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10895 else
10896 code = GET_CODE (compare_op);
10897 if (code != LTU && code != GEU)
10898 return false;
10899 emit_insn (compare_seq);
10900 *pop = compare_op;
10901 return true;
10902 }
10903 if (!INTEGRAL_MODE_P (mode))
10904 return false;
10905 switch (code)
10906 {
10907 case LTU:
10908 case GEU:
10909 break;
10910
10911 /* Convert a==0 into (unsigned)a<1. */
10912 case EQ:
10913 case NE:
10914 if (op1 != const0_rtx)
10915 return false;
10916 op1 = const1_rtx;
10917 code = (code == EQ ? LTU : GEU);
10918 break;
10919
10920 /* Convert a>b into b<a or a>=b-1. */
10921 case GTU:
10922 case LEU:
10923 if (GET_CODE (op1) == CONST_INT)
10924 {
10925 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10926 /* Bail out on overflow. We still can swap operands but that
10927 would force loading of the constant into register. */
10928 if (op1 == const0_rtx
10929 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10930 return false;
10931 code = (code == GTU ? GEU : LTU);
10932 }
10933 else
10934 {
10935 rtx tmp = op1;
10936 op1 = op0;
10937 op0 = tmp;
10938 code = (code == GTU ? LTU : GEU);
10939 }
10940 break;
10941
10942 /* Convert a>=0 into (unsigned)a<0x80000000. */
10943 case LT:
10944 case GE:
10945 if (mode == DImode || op1 != const0_rtx)
10946 return false;
10947 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10948 code = (code == LT ? GEU : LTU);
10949 break;
10950 case LE:
10951 case GT:
10952 if (mode == DImode || op1 != constm1_rtx)
10953 return false;
10954 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10955 code = (code == LE ? GEU : LTU);
10956 break;
10957
10958 default:
10959 return false;
10960 }
10961 /* Swapping operands may cause constant to appear as first operand. */
10962 if (!nonimmediate_operand (op0, VOIDmode))
10963 {
10964 if (no_new_pseudos)
10965 return false;
10966 op0 = force_reg (mode, op0);
10967 }
10968 ix86_compare_op0 = op0;
10969 ix86_compare_op1 = op1;
10970 *pop = ix86_expand_compare (code, NULL, NULL);
10971 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10972 return true;
10973}
10974
10975int
10976ix86_expand_int_movcc (rtx operands[])
10977{
10978 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10979 rtx compare_seq, compare_op;
10980 rtx second_test, bypass_test;
10981 enum machine_mode mode = GET_MODE (operands[0]);
10982 bool sign_bit_compare_p = false;;
10983
10984 start_sequence ();
10985 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10986 compare_seq = get_insns ();
10987 end_sequence ();
10988
10989 compare_code = GET_CODE (compare_op);
10990
10991 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10992 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10993 sign_bit_compare_p = true;
10994
10995 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10996 HImode insns, we'd be swallowed in word prefix ops. */
10997
10998 if ((mode != HImode || TARGET_FAST_PREFIX)
10999 && (mode != (TARGET_64BIT ? TImode : DImode))
11000 && GET_CODE (operands[2]) == CONST_INT
11001 && GET_CODE (operands[3]) == CONST_INT)
11002 {
11003 rtx out = operands[0];
11004 HOST_WIDE_INT ct = INTVAL (operands[2]);
11005 HOST_WIDE_INT cf = INTVAL (operands[3]);
11006 HOST_WIDE_INT diff;
11007
11008 diff = ct - cf;
11009 /* Sign bit compares are better done using shifts than we do by using
11010 sbb. */
11011 if (sign_bit_compare_p
11012 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11013 ix86_compare_op1, &compare_op))
11014 {
11015 /* Detect overlap between destination and compare sources. */
11016 rtx tmp = out;
11017
11018 if (!sign_bit_compare_p)
11019 {
11020 bool fpcmp = false;
11021
11022 compare_code = GET_CODE (compare_op);
11023
11024 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11025 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11026 {
11027 fpcmp = true;
11028 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11029 }
11030
11031 /* To simplify rest of code, restrict to the GEU case. */
11032 if (compare_code == LTU)
11033 {
11034 HOST_WIDE_INT tmp = ct;
11035 ct = cf;
11036 cf = tmp;
11037 compare_code = reverse_condition (compare_code);
11038 code = reverse_condition (code);
11039 }
11040 else
11041 {
11042 if (fpcmp)
11043 PUT_CODE (compare_op,
11044 reverse_condition_maybe_unordered
11045 (GET_CODE (compare_op)));
11046 else
11047 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11048 }
11049 diff = ct - cf;
11050
11051 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11052 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11053 tmp = gen_reg_rtx (mode);
11054
11055 if (mode == DImode)
11056 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11057 else
11058 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11059 }
11060 else
11061 {
11062 if (code == GT || code == GE)
11063 code = reverse_condition (code);
11064 else
11065 {
11066 HOST_WIDE_INT tmp = ct;
11067 ct = cf;
11068 cf = tmp;
11069 diff = ct - cf;
11070 }
11071 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11072 ix86_compare_op1, VOIDmode, 0, -1);
11073 }
11074
11075 if (diff == 1)
11076 {
11077 /*
11078 * cmpl op0,op1
11079 * sbbl dest,dest
11080 * [addl dest, ct]
11081 *
11082 * Size 5 - 8.
11083 */
11084 if (ct)
11085 tmp = expand_simple_binop (mode, PLUS,
11086 tmp, GEN_INT (ct),
11087 copy_rtx (tmp), 1, OPTAB_DIRECT);
11088 }
11089 else if (cf == -1)
11090 {
11091 /*
11092 * cmpl op0,op1
11093 * sbbl dest,dest
11094 * orl $ct, dest
11095 *
11096 * Size 8.
11097 */
11098 tmp = expand_simple_binop (mode, IOR,
11099 tmp, GEN_INT (ct),
11100 copy_rtx (tmp), 1, OPTAB_DIRECT);
11101 }
11102 else if (diff == -1 && ct)
11103 {
11104 /*
11105 * cmpl op0,op1
11106 * sbbl dest,dest
11107 * notl dest
11108 * [addl dest, cf]
11109 *
11110 * Size 8 - 11.
11111 */
11112 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11113 if (cf)
11114 tmp = expand_simple_binop (mode, PLUS,
11115 copy_rtx (tmp), GEN_INT (cf),
11116 copy_rtx (tmp), 1, OPTAB_DIRECT);
11117 }
11118 else
11119 {
11120 /*
11121 * cmpl op0,op1
11122 * sbbl dest,dest
11123 * [notl dest]
11124 * andl cf - ct, dest
11125 * [addl dest, ct]
11126 *
11127 * Size 8 - 11.
11128 */
11129
11130 if (cf == 0)
11131 {
11132 cf = ct;
11133 ct = 0;
11134 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11135 }
11136
11137 tmp = expand_simple_binop (mode, AND,
11138 copy_rtx (tmp),
11139 gen_int_mode (cf - ct, mode),
11140 copy_rtx (tmp), 1, OPTAB_DIRECT);
11141 if (ct)
11142 tmp = expand_simple_binop (mode, PLUS,
11143 copy_rtx (tmp), GEN_INT (ct),
11144 copy_rtx (tmp), 1, OPTAB_DIRECT);
11145 }
11146
11147 if (!rtx_equal_p (tmp, out))
11148 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11149
11150 return 1; /* DONE */
11151 }
11152
11153 if (diff < 0)
11154 {
11155 HOST_WIDE_INT tmp;
11156 tmp = ct, ct = cf, cf = tmp;
11157 diff = -diff;
11158 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11159 {
11160 /* We may be reversing unordered compare to normal compare, that
11161 is not valid in general (we may convert non-trapping condition
11162 to trapping one), however on i386 we currently emit all
11163 comparisons unordered. */
11164 compare_code = reverse_condition_maybe_unordered (compare_code);
11165 code = reverse_condition_maybe_unordered (code);
11166 }
11167 else
11168 {
11169 compare_code = reverse_condition (compare_code);
11170 code = reverse_condition (code);
11171 }
11172 }
11173
11174 compare_code = UNKNOWN;
11175 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11176 && GET_CODE (ix86_compare_op1) == CONST_INT)
11177 {
11178 if (ix86_compare_op1 == const0_rtx
11179 && (code == LT || code == GE))
11180 compare_code = code;
11181 else if (ix86_compare_op1 == constm1_rtx)
11182 {
11183 if (code == LE)
11184 compare_code = LT;
11185 else if (code == GT)
11186 compare_code = GE;
11187 }
11188 }
11189
11190 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11191 if (compare_code != UNKNOWN
11192 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11193 && (cf == -1 || ct == -1))
11194 {
11195 /* If lea code below could be used, only optimize
11196 if it results in a 2 insn sequence. */
11197
11198 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11199 || diff == 3 || diff == 5 || diff == 9)
11200 || (compare_code == LT && ct == -1)
11201 || (compare_code == GE && cf == -1))
11202 {
11203 /*
11204 * notl op1 (if necessary)
11205 * sarl $31, op1
11206 * orl cf, op1
11207 */
11208 if (ct != -1)
11209 {
11210 cf = ct;
11211 ct = -1;
11212 code = reverse_condition (code);
11213 }
11214
11215 out = emit_store_flag (out, code, ix86_compare_op0,
11216 ix86_compare_op1, VOIDmode, 0, -1);
11217
11218 out = expand_simple_binop (mode, IOR,
11219 out, GEN_INT (cf),
11220 out, 1, OPTAB_DIRECT);
11221 if (out != operands[0])
11222 emit_move_insn (operands[0], out);
11223
11224 return 1; /* DONE */
11225 }
11226 }
11227
11228
11229 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11230 || diff == 3 || diff == 5 || diff == 9)
11231 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11232 && (mode != DImode
11233 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11234 {
11235 /*
11236 * xorl dest,dest
11237 * cmpl op1,op2
11238 * setcc dest
11239 * lea cf(dest*(ct-cf)),dest
11240 *
11241 * Size 14.
11242 *
11243 * This also catches the degenerate setcc-only case.
11244 */
11245
11246 rtx tmp;
11247 int nops;
11248
11249 out = emit_store_flag (out, code, ix86_compare_op0,
11250 ix86_compare_op1, VOIDmode, 0, 1);
11251
11252 nops = 0;
11253 /* On x86_64 the lea instruction operates on Pmode, so we need
11254 to get arithmetics done in proper mode to match. */
11255 if (diff == 1)
11256 tmp = copy_rtx (out);
11257 else
11258 {
11259 rtx out1;
11260 out1 = copy_rtx (out);
11261 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11262 nops++;
11263 if (diff & 1)
11264 {
11265 tmp = gen_rtx_PLUS (mode, tmp, out1);
11266 nops++;
11267 }
11268 }
11269 if (cf != 0)
11270 {
11271 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11272 nops++;
11273 }
11274 if (!rtx_equal_p (tmp, out))
11275 {
11276 if (nops == 1)
11277 out = force_operand (tmp, copy_rtx (out));
11278 else
11279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11280 }
11281 if (!rtx_equal_p (out, operands[0]))
11282 emit_move_insn (operands[0], copy_rtx (out));
11283
11284 return 1; /* DONE */
11285 }
11286
11287 /*
11288 * General case: Jumpful:
11289 * xorl dest,dest cmpl op1, op2
11290 * cmpl op1, op2 movl ct, dest
11291 * setcc dest jcc 1f
11292 * decl dest movl cf, dest
11293 * andl (cf-ct),dest 1:
11294 * addl ct,dest
11295 *
11296 * Size 20. Size 14.
11297 *
11298 * This is reasonably steep, but branch mispredict costs are
11299 * high on modern cpus, so consider failing only if optimizing
11300 * for space.
11301 */
11302
11303 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11304 && BRANCH_COST >= 2)
11305 {
11306 if (cf == 0)
11307 {
11308 cf = ct;
11309 ct = 0;
11310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11311 /* We may be reversing unordered compare to normal compare,
11312 that is not valid in general (we may convert non-trapping
11313 condition to trapping one), however on i386 we currently
11314 emit all comparisons unordered. */
11315 code = reverse_condition_maybe_unordered (code);
11316 else
11317 {
11318 code = reverse_condition (code);
11319 if (compare_code != UNKNOWN)
11320 compare_code = reverse_condition (compare_code);
11321 }
11322 }
11323
11324 if (compare_code != UNKNOWN)
11325 {
11326 /* notl op1 (if needed)
11327 sarl $31, op1
11328 andl (cf-ct), op1
11329 addl ct, op1
11330
11331 For x < 0 (resp. x <= -1) there will be no notl,
11332 so if possible swap the constants to get rid of the
11333 complement.
11334 True/false will be -1/0 while code below (store flag
11335 followed by decrement) is 0/-1, so the constants need
11336 to be exchanged once more. */
11337
11338 if (compare_code == GE || !cf)
11339 {
11340 code = reverse_condition (code);
11341 compare_code = LT;
11342 }
11343 else
11344 {
11345 HOST_WIDE_INT tmp = cf;
11346 cf = ct;
11347 ct = tmp;
11348 }
11349
11350 out = emit_store_flag (out, code, ix86_compare_op0,
11351 ix86_compare_op1, VOIDmode, 0, -1);
11352 }
11353 else
11354 {
11355 out = emit_store_flag (out, code, ix86_compare_op0,
11356 ix86_compare_op1, VOIDmode, 0, 1);
11357
11358 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11359 copy_rtx (out), 1, OPTAB_DIRECT);
11360 }
11361
11362 out = expand_simple_binop (mode, AND, copy_rtx (out),
11363 gen_int_mode (cf - ct, mode),
11364 copy_rtx (out), 1, OPTAB_DIRECT);
11365 if (ct)
11366 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11367 copy_rtx (out), 1, OPTAB_DIRECT);
11368 if (!rtx_equal_p (out, operands[0]))
11369 emit_move_insn (operands[0], copy_rtx (out));
11370
11371 return 1; /* DONE */
11372 }
11373 }
11374
11375 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11376 {
11377 /* Try a few things more with specific constants and a variable. */
11378
11379 optab op;
11380 rtx var, orig_out, out, tmp;
11381
11382 if (BRANCH_COST <= 2)
11383 return 0; /* FAIL */
11384
11385 /* If one of the two operands is an interesting constant, load a
11386 constant with the above and mask it in with a logical operation. */
11387
11388 if (GET_CODE (operands[2]) == CONST_INT)
11389 {
11390 var = operands[3];
11391 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11392 operands[3] = constm1_rtx, op = and_optab;
11393 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11394 operands[3] = const0_rtx, op = ior_optab;
11395 else
11396 return 0; /* FAIL */
11397 }
11398 else if (GET_CODE (operands[3]) == CONST_INT)
11399 {
11400 var = operands[2];
11401 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11402 operands[2] = constm1_rtx, op = and_optab;
11403 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11404 operands[2] = const0_rtx, op = ior_optab;
11405 else
11406 return 0; /* FAIL */
11407 }
11408 else
11409 return 0; /* FAIL */
11410
11411 orig_out = operands[0];
11412 tmp = gen_reg_rtx (mode);
11413 operands[0] = tmp;
11414
11415 /* Recurse to get the constant loaded. */
11416 if (ix86_expand_int_movcc (operands) == 0)
11417 return 0; /* FAIL */
11418
11419 /* Mask in the interesting variable. */
11420 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11421 OPTAB_WIDEN);
11422 if (!rtx_equal_p (out, orig_out))
11423 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11424
11425 return 1; /* DONE */
11426 }
11427
11428 /*
11429 * For comparison with above,
11430 *
11431 * movl cf,dest
11432 * movl ct,tmp
11433 * cmpl op1,op2
11434 * cmovcc tmp,dest
11435 *
11436 * Size 15.
11437 */
11438
11439 if (! nonimmediate_operand (operands[2], mode))
11440 operands[2] = force_reg (mode, operands[2]);
11441 if (! nonimmediate_operand (operands[3], mode))
11442 operands[3] = force_reg (mode, operands[3]);
11443
11444 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11445 {
11446 rtx tmp = gen_reg_rtx (mode);
11447 emit_move_insn (tmp, operands[3]);
11448 operands[3] = tmp;
11449 }
11450 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11451 {
11452 rtx tmp = gen_reg_rtx (mode);
11453 emit_move_insn (tmp, operands[2]);
11454 operands[2] = tmp;
11455 }
11456
11457 if (! register_operand (operands[2], VOIDmode)
11458 && (mode == QImode
11459 || ! register_operand (operands[3], VOIDmode)))
11460 operands[2] = force_reg (mode, operands[2]);
11461
11462 if (mode == QImode
11463 && ! register_operand (operands[3], VOIDmode))
11464 operands[3] = force_reg (mode, operands[3]);
11465
11466 emit_insn (compare_seq);
11467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11468 gen_rtx_IF_THEN_ELSE (mode,
11469 compare_op, operands[2],
11470 operands[3])));
11471 if (bypass_test)
11472 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11473 gen_rtx_IF_THEN_ELSE (mode,
11474 bypass_test,
11475 copy_rtx (operands[3]),
11476 copy_rtx (operands[0]))));
11477 if (second_test)
11478 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11479 gen_rtx_IF_THEN_ELSE (mode,
11480 second_test,
11481 copy_rtx (operands[2]),
11482 copy_rtx (operands[0]))));
11483
11484 return 1; /* DONE */
11485}
11486
11487/* Swap, force into registers, or otherwise massage the two operands
11488 to an sse comparison with a mask result. Thus we differ a bit from
11489 ix86_prepare_fp_compare_args which expects to produce a flags result.
11490
11491 The DEST operand exists to help determine whether to commute commutative
11492 operators. The POP0/POP1 operands are updated in place. The new
11493 comparison code is returned, or UNKNOWN if not implementable. */
11494
11495static enum rtx_code
11496ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11497 rtx *pop0, rtx *pop1)
11498{
11499 rtx tmp;
11500
11501 switch (code)
11502 {
11503 case LTGT:
11504 case UNEQ:
11505 /* We have no LTGT as an operator. We could implement it with
11506 NE & ORDERED, but this requires an extra temporary. It's
11507 not clear that it's worth it. */
11508 return UNKNOWN;
11509
11510 case LT:
11511 case LE:
11512 case UNGT:
11513 case UNGE:
11514 /* These are supported directly. */
11515 break;
11516
11517 case EQ:
11518 case NE:
11519 case UNORDERED:
11520 case ORDERED:
11521 /* For commutative operators, try to canonicalize the destination
11522 operand to be first in the comparison - this helps reload to
11523 avoid extra moves. */
11524 if (!dest || !rtx_equal_p (dest, *pop1))
11525 break;
11526 /* FALLTHRU */
11527
11528 case GE:
11529 case GT:
11530 case UNLE:
11531 case UNLT:
11532 /* These are not supported directly. Swap the comparison operands
11533 to transform into something that is supported. */
11534 tmp = *pop0;
11535 *pop0 = *pop1;
11536 *pop1 = tmp;
11537 code = swap_condition (code);
11538 break;
11539
11540 default:
11541 gcc_unreachable ();
11542 }
11543
11544 return code;
11545}
11546
11547/* Detect conditional moves that exactly match min/max operational
11548 semantics. Note that this is IEEE safe, as long as we don't
11549 interchange the operands.
11550
11551 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11552 and TRUE if the operation is successful and instructions are emitted. */
11553
11554static bool
11555ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11556 rtx cmp_op1, rtx if_true, rtx if_false)
11557{
11558 enum machine_mode mode;
11559 bool is_min;
11560 rtx tmp;
11561
11562 if (code == LT)
11563 ;
11564 else if (code == UNGE)
11565 {
11566 tmp = if_true;
11567 if_true = if_false;
11568 if_false = tmp;
11569 }
11570 else
11571 return false;
11572
11573 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11574 is_min = true;
11575 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11576 is_min = false;
11577 else
11578 return false;
11579
11580 mode = GET_MODE (dest);
11581
11582 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11583 but MODE may be a vector mode and thus not appropriate. */
11584 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11585 {
11586 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11587 rtvec v;
11588
11589 if_true = force_reg (mode, if_true);
11590 v = gen_rtvec (2, if_true, if_false);
11591 tmp = gen_rtx_UNSPEC (mode, v, u);
11592 }
11593 else
11594 {
11595 code = is_min ? SMIN : SMAX;
11596 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11597 }
11598
11599 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11600 return true;
11601}
11602
11603/* Expand an sse vector comparison. Return the register with the result. */
11604
11605static rtx
11606ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11607 rtx op_true, rtx op_false)
11608{
11609 enum machine_mode mode = GET_MODE (dest);
11610 rtx x;
11611
11612 cmp_op0 = force_reg (mode, cmp_op0);
11613 if (!nonimmediate_operand (cmp_op1, mode))
11614 cmp_op1 = force_reg (mode, cmp_op1);
11615
11616 if (optimize
11617 || reg_overlap_mentioned_p (dest, op_true)
11618 || reg_overlap_mentioned_p (dest, op_false))
11619 dest = gen_reg_rtx (mode);
11620
11621 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11622 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11623
11624 return dest;
11625}
11626
11627/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11628 operations. This is used for both scalar and vector conditional moves. */
11629
11630static void
11631ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11632{
11633 enum machine_mode mode = GET_MODE (dest);
11634 rtx t2, t3, x;
11635
11636 if (op_false == CONST0_RTX (mode))
11637 {
11638 op_true = force_reg (mode, op_true);
11639 x = gen_rtx_AND (mode, cmp, op_true);
11640 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11641 }
11642 else if (op_true == CONST0_RTX (mode))
11643 {
11644 op_false = force_reg (mode, op_false);
11645 x = gen_rtx_NOT (mode, cmp);
11646 x = gen_rtx_AND (mode, x, op_false);
11647 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11648 }
11649 else
11650 {
11651 op_true = force_reg (mode, op_true);
11652 op_false = force_reg (mode, op_false);
11653
11654 t2 = gen_reg_rtx (mode);
11655 if (optimize)
11656 t3 = gen_reg_rtx (mode);
11657 else
11658 t3 = dest;
11659
11660 x = gen_rtx_AND (mode, op_true, cmp);
11661 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11662
11663 x = gen_rtx_NOT (mode, cmp);
11664 x = gen_rtx_AND (mode, x, op_false);
11665 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11666
11667 x = gen_rtx_IOR (mode, t3, t2);
11668 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11669 }
11670}
11671
11672/* Expand a floating-point conditional move. Return true if successful. */
11673
11674int
11675ix86_expand_fp_movcc (rtx operands[])
11676{
11677 enum machine_mode mode = GET_MODE (operands[0]);
11678 enum rtx_code code = GET_CODE (operands[1]);
11679 rtx tmp, compare_op, second_test, bypass_test;
11680
11681 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11682 {
11683 enum machine_mode cmode;
11684
11685 /* Since we've no cmove for sse registers, don't force bad register
11686 allocation just to gain access to it. Deny movcc when the
11687 comparison mode doesn't match the move mode. */
11688 cmode = GET_MODE (ix86_compare_op0);
11689 if (cmode == VOIDmode)
11690 cmode = GET_MODE (ix86_compare_op1);
11691 if (cmode != mode)
11692 return 0;
11693
11694 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11695 &ix86_compare_op0,
11696 &ix86_compare_op1);
11697 if (code == UNKNOWN)
11698 return 0;
11699
11700 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11701 ix86_compare_op1, operands[2],
11702 operands[3]))
11703 return 1;
11704
11705 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11706 ix86_compare_op1, operands[2], operands[3]);
11707 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11708 return 1;
11709 }
11710
11711 /* The floating point conditional move instructions don't directly
11712 support conditions resulting from a signed integer comparison. */
11713
11714 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11715
11716 /* The floating point conditional move instructions don't directly
11717 support signed integer comparisons. */
11718
11719 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11720 {
11721 gcc_assert (!second_test && !bypass_test);
11722 tmp = gen_reg_rtx (QImode);
11723 ix86_expand_setcc (code, tmp);
11724 code = NE;
11725 ix86_compare_op0 = tmp;
11726 ix86_compare_op1 = const0_rtx;
11727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11728 }
11729 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11730 {
11731 tmp = gen_reg_rtx (mode);
11732 emit_move_insn (tmp, operands[3]);
11733 operands[3] = tmp;
11734 }
11735 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11736 {
11737 tmp = gen_reg_rtx (mode);
11738 emit_move_insn (tmp, operands[2]);
11739 operands[2] = tmp;
11740 }
11741
11742 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11743 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11744 operands[2], operands[3])));
11745 if (bypass_test)
11746 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11747 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11748 operands[3], operands[0])));
11749 if (second_test)
11750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11751 gen_rtx_IF_THEN_ELSE (mode, second_test,
11752 operands[2], operands[0])));
11753
11754 return 1;
11755}
11756
11757/* Expand a floating-point vector conditional move; a vcond operation
11758 rather than a movcc operation. */
11759
11760bool
11761ix86_expand_fp_vcond (rtx operands[])
11762{
11763 enum rtx_code code = GET_CODE (operands[3]);
11764 rtx cmp;
11765
11766 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11767 &operands[4], &operands[5]);
11768 if (code == UNKNOWN)
11769 return false;
11770
11771 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11772 operands[5], operands[1], operands[2]))
11773 return true;
11774
11775 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11776 operands[1], operands[2]);
11777 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11778 return true;
11779}
11780
11781/* Expand a signed integral vector conditional move. */
11782
11783bool
11784ix86_expand_int_vcond (rtx operands[])
11785{
11786 enum machine_mode mode = GET_MODE (operands[0]);
11787 enum rtx_code code = GET_CODE (operands[3]);
11788 bool negate = false;
11789 rtx x, cop0, cop1;
11790
11791 cop0 = operands[4];
11792 cop1 = operands[5];
11793
11794 /* Canonicalize the comparison to EQ, GT, GTU. */
11795 switch (code)
11796 {
11797 case EQ:
11798 case GT:
11799 case GTU:
11800 break;
11801
11802 case NE:
11803 case LE:
11804 case LEU:
11805 code = reverse_condition (code);
11806 negate = true;
11807 break;
11808
11809 case GE:
11810 case GEU:
11811 code = reverse_condition (code);
11812 negate = true;
11813 /* FALLTHRU */
11814
11815 case LT:
11816 case LTU:
11817 code = swap_condition (code);
11818 x = cop0, cop0 = cop1, cop1 = x;
11819 break;
11820
11821 default:
11822 gcc_unreachable ();
11823 }
11824
11825 /* Unsigned parallel compare is not supported by the hardware. Play some
11826 tricks to turn this into a signed comparison against 0. */
11827 if (code == GTU)
11828 {
11829 cop0 = force_reg (mode, cop0);
11830
11831 switch (mode)
11832 {
11833 case V4SImode:
11834 {
11835 rtx t1, t2, mask;
11836
11837 /* Perform a parallel modulo subtraction. */
11838 t1 = gen_reg_rtx (mode);
11839 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11840
11841 /* Extract the original sign bit of op0. */
11842 mask = GEN_INT (-0x80000000);
11843 mask = gen_rtx_CONST_VECTOR (mode,
11844 gen_rtvec (4, mask, mask, mask, mask));
11845 mask = force_reg (mode, mask);
11846 t2 = gen_reg_rtx (mode);
11847 emit_insn (gen_andv4si3 (t2, cop0, mask));
11848
11849 /* XOR it back into the result of the subtraction. This results
11850 in the sign bit set iff we saw unsigned underflow. */
11851 x = gen_reg_rtx (mode);
11852 emit_insn (gen_xorv4si3 (x, t1, t2));
11853
11854 code = GT;
11855 }
11856 break;
11857
11858 case V16QImode:
11859 case V8HImode:
11860 /* Perform a parallel unsigned saturating subtraction. */
11861 x = gen_reg_rtx (mode);
11862 emit_insn (gen_rtx_SET (VOIDmode, x,
11863 gen_rtx_US_MINUS (mode, cop0, cop1)));
11864
11865 code = EQ;
11866 negate = !negate;
11867 break;
11868
11869 default:
11870 gcc_unreachable ();
11871 }
11872
11873 cop0 = x;
11874 cop1 = CONST0_RTX (mode);
11875 }
11876
11877 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11878 operands[1+negate], operands[2-negate]);
11879
11880 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11881 operands[2-negate]);
11882 return true;
11883}
11884
11885/* Expand conditional increment or decrement using adb/sbb instructions.
11886 The default case using setcc followed by the conditional move can be
11887 done by generic code. */
11888int
11889ix86_expand_int_addcc (rtx operands[])
11890{
11891 enum rtx_code code = GET_CODE (operands[1]);
11892 rtx compare_op;
11893 rtx val = const0_rtx;
11894 bool fpcmp = false;
11895 enum machine_mode mode = GET_MODE (operands[0]);
11896
11897 if (operands[3] != const1_rtx
11898 && operands[3] != constm1_rtx)
11899 return 0;
11900 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11901 ix86_compare_op1, &compare_op))
11902 return 0;
11903 code = GET_CODE (compare_op);
11904
11905 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11906 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11907 {
11908 fpcmp = true;
11909 code = ix86_fp_compare_code_to_integer (code);
11910 }
11911
11912 if (code != LTU)
11913 {
11914 val = constm1_rtx;
11915 if (fpcmp)
11916 PUT_CODE (compare_op,
11917 reverse_condition_maybe_unordered
11918 (GET_CODE (compare_op)));
11919 else
11920 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11921 }
11922 PUT_MODE (compare_op, mode);
11923
11924 /* Construct either adc or sbb insn. */
11925 if ((code == LTU) == (operands[3] == constm1_rtx))
11926 {
11927 switch (GET_MODE (operands[0]))
11928 {
11929 case QImode:
11930 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11931 break;
11932 case HImode:
11933 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11934 break;
11935 case SImode:
11936 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11937 break;
11938 case DImode:
11939 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11940 break;
11941 default:
11942 gcc_unreachable ();
11943 }
11944 }
11945 else
11946 {
11947 switch (GET_MODE (operands[0]))
11948 {
11949 case QImode:
11950 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11951 break;
11952 case HImode:
11953 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11954 break;
11955 case SImode:
11956 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11957 break;
11958 case DImode:
11959 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11960 break;
11961 default:
11962 gcc_unreachable ();
11963 }
11964 }
11965 return 1; /* DONE */
11966}
11967
11968
11969/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11970 works for floating pointer parameters and nonoffsetable memories.
11971 For pushes, it returns just stack offsets; the values will be saved
11972 in the right order. Maximally three parts are generated. */
11973
11974static int
11975ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11976{
11977 int size;
11978
11979 if (!TARGET_64BIT)
11980 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11981 else
11982 size = (GET_MODE_SIZE (mode) + 4) / 8;
11983
11984 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11985 gcc_assert (size >= 2 && size <= 3);
11986
11987 /* Optimize constant pool reference to immediates. This is used by fp
11988 moves, that force all constants to memory to allow combining. */
11989 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11990 {
11991 rtx tmp = maybe_get_pool_constant (operand);
11992 if (tmp)
11993 operand = tmp;
11994 }
11995
11996 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11997 {
11998 /* The only non-offsetable memories we handle are pushes. */
11999 int ok = push_operand (operand, VOIDmode);
12000
12001 gcc_assert (ok);
12002
12003 operand = copy_rtx (operand);
12004 PUT_MODE (operand, Pmode);
12005 parts[0] = parts[1] = parts[2] = operand;
12006 return size;
12007 }
12008
12009 if (GET_CODE (operand) == CONST_VECTOR)
12010 {
12011 enum machine_mode imode = int_mode_for_mode (mode);
12012 /* Caution: if we looked through a constant pool memory above,
12013 the operand may actually have a different mode now. That's
12014 ok, since we want to pun this all the way back to an integer. */
12015 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12016 gcc_assert (operand != NULL);
12017 mode = imode;
12018 }
12019
12020 if (!TARGET_64BIT)
12021 {
12022 if (mode == DImode)
12023 split_di (&operand, 1, &parts[0], &parts[1]);
12024 else
12025 {
12026 if (REG_P (operand))
12027 {
12028 gcc_assert (reload_completed);
12029 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12030 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12031 if (size == 3)
12032 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12033 }
12034 else if (offsettable_memref_p (operand))
12035 {
12036 operand = adjust_address (operand, SImode, 0);
12037 parts[0] = operand;
12038 parts[1] = adjust_address (operand, SImode, 4);
12039 if (size == 3)
12040 parts[2] = adjust_address (operand, SImode, 8);
12041 }
12042 else if (GET_CODE (operand) == CONST_DOUBLE)
12043 {
12044 REAL_VALUE_TYPE r;
12045 long l[4];
12046
12047 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12048 switch (mode)
12049 {
12050 case XFmode:
12051 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12052 parts[2] = gen_int_mode (l[2], SImode);
12053 break;
12054 case DFmode:
12055 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12056 break;
12057 default:
12058 gcc_unreachable ();
12059 }
12060 parts[1] = gen_int_mode (l[1], SImode);
12061 parts[0] = gen_int_mode (l[0], SImode);
12062 }
12063 else
12064 gcc_unreachable ();
12065 }
12066 }
12067 else
12068 {
12069 if (mode == TImode)
12070 split_ti (&operand, 1, &parts[0], &parts[1]);
12071 if (mode == XFmode || mode == TFmode)
12072 {
12073 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12074 if (REG_P (operand))
12075 {
12076 gcc_assert (reload_completed);
12077 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12078 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12079 }
12080 else if (offsettable_memref_p (operand))
12081 {
12082 operand = adjust_address (operand, DImode, 0);
12083 parts[0] = operand;
12084 parts[1] = adjust_address (operand, upper_mode, 8);
12085 }
12086 else if (GET_CODE (operand) == CONST_DOUBLE)
12087 {
12088 REAL_VALUE_TYPE r;
12089 long l[4];
12090
12091 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12092 real_to_target (l, &r, mode);
12093
12094 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12095 if (HOST_BITS_PER_WIDE_INT >= 64)
12096 parts[0]
12097 = gen_int_mode
12098 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12099 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12100 DImode);
12101 else
12102 parts[0] = immed_double_const (l[0], l[1], DImode);
12103
12104 if (upper_mode == SImode)
12105 parts[1] = gen_int_mode (l[2], SImode);
12106 else if (HOST_BITS_PER_WIDE_INT >= 64)
12107 parts[1]
12108 = gen_int_mode
12109 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12110 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12111 DImode);
12112 else
12113 parts[1] = immed_double_const (l[2], l[3], DImode);
12114 }
12115 else
12116 gcc_unreachable ();
12117 }
12118 }
12119
12120 return size;
12121}
12122
12123/* Emit insns to perform a move or push of DI, DF, and XF values.
12124 Return false when normal moves are needed; true when all required
12125 insns have been emitted. Operands 2-4 contain the input values
12126 int the correct order; operands 5-7 contain the output values. */
12127
12128void
12129ix86_split_long_move (rtx operands[])
12130{
12131 rtx part[2][3];
12132 int nparts;
12133 int push = 0;
12134 int collisions = 0;
12135 enum machine_mode mode = GET_MODE (operands[0]);
12136
12137 /* The DFmode expanders may ask us to move double.
12138 For 64bit target this is single move. By hiding the fact
12139 here we simplify i386.md splitters. */
12140 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12141 {
12142 /* Optimize constant pool reference to immediates. This is used by
12143 fp moves, that force all constants to memory to allow combining. */
12144
12145 if (GET_CODE (operands[1]) == MEM
12146 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12147 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12148 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12149 if (push_operand (operands[0], VOIDmode))
12150 {
12151 operands[0] = copy_rtx (operands[0]);
12152 PUT_MODE (operands[0], Pmode);
12153 }
12154 else
12155 operands[0] = gen_lowpart (DImode, operands[0]);
12156 operands[1] = gen_lowpart (DImode, operands[1]);
12157 emit_move_insn (operands[0], operands[1]);
12158 return;
12159 }
12160
12161 /* The only non-offsettable memory we handle is push. */
12162 if (push_operand (operands[0], VOIDmode))
12163 push = 1;
12164 else
12165 gcc_assert (GET_CODE (operands[0]) != MEM
12166 || offsettable_memref_p (operands[0]));
12167
12168 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12169 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12170
12171 /* When emitting push, take care for source operands on the stack. */
12172 if (push && GET_CODE (operands[1]) == MEM
12173 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12174 {
12175 if (nparts == 3)
12176 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12177 XEXP (part[1][2], 0));
12178 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12179 XEXP (part[1][1], 0));
12180 }
12181
12182 /* We need to do copy in the right order in case an address register
12183 of the source overlaps the destination. */
12184 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12185 {
12186 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12187 collisions++;
12188 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12189 collisions++;
12190 if (nparts == 3
12191 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12192 collisions++;
12193
12194 /* Collision in the middle part can be handled by reordering. */
12195 if (collisions == 1 && nparts == 3
12196 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12197 {
12198 rtx tmp;
12199 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12200 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12201 }
12202
12203 /* If there are more collisions, we can't handle it by reordering.
12204 Do an lea to the last part and use only one colliding move. */
12205 else if (collisions > 1)
12206 {
12207 rtx base;
12208
12209 collisions = 1;
12210
12211 base = part[0][nparts - 1];
12212
12213 /* Handle the case when the last part isn't valid for lea.
12214 Happens in 64-bit mode storing the 12-byte XFmode. */
12215 if (GET_MODE (base) != Pmode)
12216 base = gen_rtx_REG (Pmode, REGNO (base));
12217
12218 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12219 part[1][0] = replace_equiv_address (part[1][0], base);
12220 part[1][1] = replace_equiv_address (part[1][1],
12221 plus_constant (base, UNITS_PER_WORD));
12222 if (nparts == 3)
12223 part[1][2] = replace_equiv_address (part[1][2],
12224 plus_constant (base, 8));
12225 }
12226 }
12227
12228 if (push)
12229 {
12230 if (!TARGET_64BIT)
12231 {
12232 if (nparts == 3)
12233 {
12234 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12235 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12236 emit_move_insn (part[0][2], part[1][2]);
12237 }
12238 }
12239 else
12240 {
12241 /* In 64bit mode we don't have 32bit push available. In case this is
12242 register, it is OK - we will just use larger counterpart. We also
12243 retype memory - these comes from attempt to avoid REX prefix on
12244 moving of second half of TFmode value. */
12245 if (GET_MODE (part[1][1]) == SImode)
12246 {
12247 switch (GET_CODE (part[1][1]))
12248 {
12249 case MEM:
12250 part[1][1] = adjust_address (part[1][1], DImode, 0);
12251 break;
12252
12253 case REG:
12254 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12255 break;
12256
12257 default:
12258 gcc_unreachable ();
12259 }
12260
12261 if (GET_MODE (part[1][0]) == SImode)
12262 part[1][0] = part[1][1];
12263 }
12264 }
12265 emit_move_insn (part[0][1], part[1][1]);
12266 emit_move_insn (part[0][0], part[1][0]);
12267 return;
12268 }
12269
12270 /* Choose correct order to not overwrite the source before it is copied. */
12271 if ((REG_P (part[0][0])
12272 && REG_P (part[1][1])
12273 && (REGNO (part[0][0]) == REGNO (part[1][1])
12274 || (nparts == 3
12275 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12276 || (collisions > 0
12277 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12278 {
12279 if (nparts == 3)
12280 {
12281 operands[2] = part[0][2];
12282 operands[3] = part[0][1];
12283 operands[4] = part[0][0];
12284 operands[5] = part[1][2];
12285 operands[6] = part[1][1];
12286 operands[7] = part[1][0];
12287 }
12288 else
12289 {
12290 operands[2] = part[0][1];
12291 operands[3] = part[0][0];
12292 operands[5] = part[1][1];
12293 operands[6] = part[1][0];
12294 }
12295 }
12296 else
12297 {
12298 if (nparts == 3)
12299 {
12300 operands[2] = part[0][0];
12301 operands[3] = part[0][1];
12302 operands[4] = part[0][2];
12303 operands[5] = part[1][0];
12304 operands[6] = part[1][1];
12305 operands[7] = part[1][2];
12306 }
12307 else
12308 {
12309 operands[2] = part[0][0];
12310 operands[3] = part[0][1];
12311 operands[5] = part[1][0];
12312 operands[6] = part[1][1];
12313 }
12314 }
12315
12316 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12317 if (optimize_size)
12318 {
12319 if (GET_CODE (operands[5]) == CONST_INT
12320 && operands[5] != const0_rtx
12321 && REG_P (operands[2]))
12322 {
12323 if (GET_CODE (operands[6]) == CONST_INT
12324 && INTVAL (operands[6]) == INTVAL (operands[5]))
12325 operands[6] = operands[2];
12326
12327 if (nparts == 3
12328 && GET_CODE (operands[7]) == CONST_INT
12329 && INTVAL (operands[7]) == INTVAL (operands[5]))
12330 operands[7] = operands[2];
12331 }
12332
12333 if (nparts == 3
12334 && GET_CODE (operands[6]) == CONST_INT
12335 && operands[6] != const0_rtx
12336 && REG_P (operands[3])
12337 && GET_CODE (operands[7]) == CONST_INT
12338 && INTVAL (operands[7]) == INTVAL (operands[6]))
12339 operands[7] = operands[3];
12340 }
12341
12342 emit_move_insn (operands[2], operands[5]);
12343 emit_move_insn (operands[3], operands[6]);
12344 if (nparts == 3)
12345 emit_move_insn (operands[4], operands[7]);
12346
12347 return;
12348}
12349
12350/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12351 left shift by a constant, either using a single shift or
12352 a sequence of add instructions. */
12353
12354static void
12355ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12356{
12357 if (count == 1)
12358 {
12359 emit_insn ((mode == DImode
12360 ? gen_addsi3
12361 : gen_adddi3) (operand, operand, operand));
12362 }
12363 else if (!optimize_size
12364 && count * ix86_cost->add <= ix86_cost->shift_const)
12365 {
12366 int i;
12367 for (i=0; i<count; i++)
12368 {
12369 emit_insn ((mode == DImode
12370 ? gen_addsi3
12371 : gen_adddi3) (operand, operand, operand));
12372 }
12373 }
12374 else
12375 emit_insn ((mode == DImode
12376 ? gen_ashlsi3
12377 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12378}
12379
12380void
12381ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12382{
12383 rtx low[2], high[2];
12384 int count;
12385 const int single_width = mode == DImode ? 32 : 64;
12386
12387 if (GET_CODE (operands[2]) == CONST_INT)
12388 {
12389 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12390 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12391
12392 if (count >= single_width)
12393 {
12394 emit_move_insn (high[0], low[1]);
12395 emit_move_insn (low[0], const0_rtx);
12396
12397 if (count > single_width)
12398 ix86_expand_ashl_const (high[0], count - single_width, mode);
12399 }
12400 else
12401 {
12402 if (!rtx_equal_p (operands[0], operands[1]))
12403 emit_move_insn (operands[0], operands[1]);
12404 emit_insn ((mode == DImode
12405 ? gen_x86_shld_1
12406 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12407 ix86_expand_ashl_const (low[0], count, mode);
12408 }
12409 return;
12410 }
12411
12412 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12413
12414 if (operands[1] == const1_rtx)
12415 {
12416 /* Assuming we've chosen a QImode capable registers, then 1 << N
12417 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12418 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12419 {
12420 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12421
12422 ix86_expand_clear (low[0]);
12423 ix86_expand_clear (high[0]);
12424 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12425
12426 d = gen_lowpart (QImode, low[0]);
12427 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12428 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12429 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12430
12431 d = gen_lowpart (QImode, high[0]);
12432 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12433 s = gen_rtx_NE (QImode, flags, const0_rtx);
12434 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12435 }
12436
12437 /* Otherwise, we can get the same results by manually performing
12438 a bit extract operation on bit 5/6, and then performing the two
12439 shifts. The two methods of getting 0/1 into low/high are exactly
12440 the same size. Avoiding the shift in the bit extract case helps
12441 pentium4 a bit; no one else seems to care much either way. */
12442 else
12443 {
12444 rtx x;
12445
12446 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12447 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12448 else
12449 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12450 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12451
12452 emit_insn ((mode == DImode
12453 ? gen_lshrsi3
12454 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12455 emit_insn ((mode == DImode
12456 ? gen_andsi3
12457 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12458 emit_move_insn (low[0], high[0]);
12459 emit_insn ((mode == DImode
12460 ? gen_xorsi3
12461 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12462 }
12463
12464 emit_insn ((mode == DImode
12465 ? gen_ashlsi3
12466 : gen_ashldi3) (low[0], low[0], operands[2]));
12467 emit_insn ((mode == DImode
12468 ? gen_ashlsi3
12469 : gen_ashldi3) (high[0], high[0], operands[2]));
12470 return;
12471 }
12472
12473 if (operands[1] == constm1_rtx)
12474 {
12475 /* For -1 << N, we can avoid the shld instruction, because we
12476 know that we're shifting 0...31/63 ones into a -1. */
12477 emit_move_insn (low[0], constm1_rtx);
12478 if (optimize_size)
12479 emit_move_insn (high[0], low[0]);
12480 else
12481 emit_move_insn (high[0], constm1_rtx);
12482 }
12483 else
12484 {
12485 if (!rtx_equal_p (operands[0], operands[1]))
12486 emit_move_insn (operands[0], operands[1]);
12487
12488 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12489 emit_insn ((mode == DImode
12490 ? gen_x86_shld_1
12491 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12492 }
12493
12494 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12495
12496 if (TARGET_CMOVE && scratch)
12497 {
12498 ix86_expand_clear (scratch);
12499 emit_insn ((mode == DImode
12500 ? gen_x86_shift_adj_1
12501 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12502 }
12503 else
12504 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12505}
12506
12507void
12508ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12509{
12510 rtx low[2], high[2];
12511 int count;
12512 const int single_width = mode == DImode ? 32 : 64;
12513
12514 if (GET_CODE (operands[2]) == CONST_INT)
12515 {
12516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12517 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12518
12519 if (count == single_width * 2 - 1)
12520 {
12521 emit_move_insn (high[0], high[1]);
12522 emit_insn ((mode == DImode
12523 ? gen_ashrsi3
12524 : gen_ashrdi3) (high[0], high[0],
12525 GEN_INT (single_width - 1)));
12526 emit_move_insn (low[0], high[0]);
12527
12528 }
12529 else if (count >= single_width)
12530 {
12531 emit_move_insn (low[0], high[1]);
12532 emit_move_insn (high[0], low[0]);
12533 emit_insn ((mode == DImode
12534 ? gen_ashrsi3
12535 : gen_ashrdi3) (high[0], high[0],
12536 GEN_INT (single_width - 1)));
12537 if (count > single_width)
12538 emit_insn ((mode == DImode
12539 ? gen_ashrsi3
12540 : gen_ashrdi3) (low[0], low[0],
12541 GEN_INT (count - single_width)));
12542 }
12543 else
12544 {
12545 if (!rtx_equal_p (operands[0], operands[1]))
12546 emit_move_insn (operands[0], operands[1]);
12547 emit_insn ((mode == DImode
12548 ? gen_x86_shrd_1
12549 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12550 emit_insn ((mode == DImode
12551 ? gen_ashrsi3
12552 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12553 }
12554 }
12555 else
12556 {
12557 if (!rtx_equal_p (operands[0], operands[1]))
12558 emit_move_insn (operands[0], operands[1]);
12559
12560 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12561
12562 emit_insn ((mode == DImode
12563 ? gen_x86_shrd_1
12564 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12565 emit_insn ((mode == DImode
12566 ? gen_ashrsi3
12567 : gen_ashrdi3) (high[0], high[0], operands[2]));
12568
12569 if (TARGET_CMOVE && scratch)
12570 {
12571 emit_move_insn (scratch, high[0]);
12572 emit_insn ((mode == DImode
12573 ? gen_ashrsi3
12574 : gen_ashrdi3) (scratch, scratch,
12575 GEN_INT (single_width - 1)));
12576 emit_insn ((mode == DImode
12577 ? gen_x86_shift_adj_1
12578 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12579 scratch));
12580 }
12581 else
12582 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12583 }
12584}
12585
12586void
12587ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12588{
12589 rtx low[2], high[2];
12590 int count;
12591 const int single_width = mode == DImode ? 32 : 64;
12592
12593 if (GET_CODE (operands[2]) == CONST_INT)
12594 {
12595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12596 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12597
12598 if (count >= single_width)
12599 {
12600 emit_move_insn (low[0], high[1]);
12601 ix86_expand_clear (high[0]);
12602
12603 if (count > single_width)
12604 emit_insn ((mode == DImode
12605 ? gen_lshrsi3
12606 : gen_lshrdi3) (low[0], low[0],
12607 GEN_INT (count - single_width)));
12608 }
12609 else
12610 {
12611 if (!rtx_equal_p (operands[0], operands[1]))
12612 emit_move_insn (operands[0], operands[1]);
12613 emit_insn ((mode == DImode
12614 ? gen_x86_shrd_1
12615 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12616 emit_insn ((mode == DImode
12617 ? gen_lshrsi3
12618 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12619 }
12620 }
12621 else
12622 {
12623 if (!rtx_equal_p (operands[0], operands[1]))
12624 emit_move_insn (operands[0], operands[1]);
12625
12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12627
12628 emit_insn ((mode == DImode
12629 ? gen_x86_shrd_1
12630 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12631 emit_insn ((mode == DImode
12632 ? gen_lshrsi3
12633 : gen_lshrdi3) (high[0], high[0], operands[2]));
12634
12635 /* Heh. By reversing the arguments, we can reuse this pattern. */
12636 if (TARGET_CMOVE && scratch)
12637 {
12638 ix86_expand_clear (scratch);
12639 emit_insn ((mode == DImode
12640 ? gen_x86_shift_adj_1
12641 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12642 scratch));
12643 }
12644 else
12645 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12646 }
12647}
12648
12649/* Helper function for the string operations below. Dest VARIABLE whether
12650 it is aligned to VALUE bytes. If true, jump to the label. */
12651static rtx
12652ix86_expand_aligntest (rtx variable, int value)
12653{
12654 rtx label = gen_label_rtx ();
12655 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12656 if (GET_MODE (variable) == DImode)
12657 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12658 else
12659 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12660 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12661 1, label);
12662 return label;
12663}
12664
12665/* Adjust COUNTER by the VALUE. */
12666static void
12667ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12668{
12669 if (GET_MODE (countreg) == DImode)
12670 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12671 else
12672 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12673}
12674
12675/* Zero extend possibly SImode EXP to Pmode register. */
12676rtx
12677ix86_zero_extend_to_Pmode (rtx exp)
12678{
12679 rtx r;
12680 if (GET_MODE (exp) == VOIDmode)
12681 return force_reg (Pmode, exp);
12682 if (GET_MODE (exp) == Pmode)
12683 return copy_to_mode_reg (Pmode, exp);
12684 r = gen_reg_rtx (Pmode);
12685 emit_insn (gen_zero_extendsidi2 (r, exp));
12686 return r;
12687}
12688
12689/* Expand string move (memcpy) operation. Use i386 string operations when
12690 profitable. expand_clrmem contains similar code. */
12691int
12692ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12693{
12694 rtx srcreg, destreg, countreg, srcexp, destexp;
12695 enum machine_mode counter_mode;
12696 HOST_WIDE_INT align = 0;
12697 unsigned HOST_WIDE_INT count = 0;
12698
12699 if (GET_CODE (align_exp) == CONST_INT)
12700 align = INTVAL (align_exp);
12701
12702 /* Can't use any of this if the user has appropriated esi or edi. */
12703 if (global_regs[4] || global_regs[5])
12704 return 0;
12705
12706 /* This simple hack avoids all inlining code and simplifies code below. */
12707 if (!TARGET_ALIGN_STRINGOPS)
12708 align = 64;
12709
12710 if (GET_CODE (count_exp) == CONST_INT)
12711 {
12712 count = INTVAL (count_exp);
12713 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12714 return 0;
12715 }
12716
12717 /* Figure out proper mode for counter. For 32bits it is always SImode,
12718 for 64bits use SImode when possible, otherwise DImode.
12719 Set count to number of bytes copied when known at compile time. */
12720 if (!TARGET_64BIT
12721 || GET_MODE (count_exp) == SImode
12722 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12723 counter_mode = SImode;
12724 else
12725 counter_mode = DImode;
12726
12727 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12728
12729 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12730 if (destreg != XEXP (dst, 0))
12731 dst = replace_equiv_address_nv (dst, destreg);
12732 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12733 if (srcreg != XEXP (src, 0))
12734 src = replace_equiv_address_nv (src, srcreg);
12735
12736 /* When optimizing for size emit simple rep ; movsb instruction for
12737 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12738 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12739 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12740 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12741 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12742 known to be zero or not. The rep; movsb sequence causes higher
12743 register pressure though, so take that into account. */
12744
12745 if ((!optimize || optimize_size)
12746 && (count == 0
12747 || ((count & 0x03)
12748 && (!optimize_size
12749 || count > 5 * 4
12750 || (count & 3) + count / 4 > 6))))
12751 {
12752 emit_insn (gen_cld ());
12753 countreg = ix86_zero_extend_to_Pmode (count_exp);
12754 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12755 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12756 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12757 destexp, srcexp));
12758 }
12759
12760 /* For constant aligned (or small unaligned) copies use rep movsl
12761 followed by code copying the rest. For PentiumPro ensure 8 byte
12762 alignment to allow rep movsl acceleration. */
12763
12764 else if (count != 0
12765 && (align >= 8
12766 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12767 || optimize_size || count < (unsigned int) 64))
12768 {
12769 unsigned HOST_WIDE_INT offset = 0;
12770 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12771 rtx srcmem, dstmem;
12772
12773 emit_insn (gen_cld ());
12774 if (count & ~(size - 1))
12775 {
12776 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12777 {
12778 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12779
12780 while (offset < (count & ~(size - 1)))
12781 {
12782 srcmem = adjust_automodify_address_nv (src, movs_mode,
12783 srcreg, offset);
12784 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12785 destreg, offset);
12786 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12787 offset += size;
12788 }
12789 }
12790 else
12791 {
12792 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12793 & (TARGET_64BIT ? -1 : 0x3fffffff));
12794 countreg = copy_to_mode_reg (counter_mode, countreg);
12795 countreg = ix86_zero_extend_to_Pmode (countreg);
12796
12797 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12798 GEN_INT (size == 4 ? 2 : 3));
12799 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12800 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12801
12802 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12803 countreg, destexp, srcexp));
12804 offset = count & ~(size - 1);
12805 }
12806 }
12807 if (size == 8 && (count & 0x04))
12808 {
12809 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12810 offset);
12811 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12812 offset);
12813 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12814 offset += 4;
12815 }
12816 if (count & 0x02)
12817 {
12818 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12819 offset);
12820 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12821 offset);
12822 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12823 offset += 2;
12824 }
12825 if (count & 0x01)
12826 {
12827 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12828 offset);
12829 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12830 offset);
12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12832 }
12833 }
12834 /* The generic code based on the glibc implementation:
12835 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12836 allowing accelerated copying there)
12837 - copy the data using rep movsl
12838 - copy the rest. */
12839 else
12840 {
12841 rtx countreg2;
12842 rtx label = NULL;
12843 rtx srcmem, dstmem;
12844 int desired_alignment = (TARGET_PENTIUMPRO
12845 && (count == 0 || count >= (unsigned int) 260)
12846 ? 8 : UNITS_PER_WORD);
12847 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12848 dst = change_address (dst, BLKmode, destreg);
12849 src = change_address (src, BLKmode, srcreg);
12850
12851 /* In case we don't know anything about the alignment, default to
12852 library version, since it is usually equally fast and result in
12853 shorter code.
12854
12855 Also emit call when we know that the count is large and call overhead
12856 will not be important. */
12857 if (!TARGET_INLINE_ALL_STRINGOPS
12858 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12859 return 0;
12860
12861 if (TARGET_SINGLE_STRINGOP)
12862 emit_insn (gen_cld ());
12863
12864 countreg2 = gen_reg_rtx (Pmode);
12865 countreg = copy_to_mode_reg (counter_mode, count_exp);
12866
12867 /* We don't use loops to align destination and to copy parts smaller
12868 than 4 bytes, because gcc is able to optimize such code better (in
12869 the case the destination or the count really is aligned, gcc is often
12870 able to predict the branches) and also it is friendlier to the
12871 hardware branch prediction.
12872
12873 Using loops is beneficial for generic case, because we can
12874 handle small counts using the loops. Many CPUs (such as Athlon)
12875 have large REP prefix setup costs.
12876
12877 This is quite costly. Maybe we can revisit this decision later or
12878 add some customizability to this code. */
12879
12880 if (count == 0 && align < desired_alignment)
12881 {
12882 label = gen_label_rtx ();
12883 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12884 LEU, 0, counter_mode, 1, label);
12885 }
12886 if (align <= 1)
12887 {
12888 rtx label = ix86_expand_aligntest (destreg, 1);
12889 srcmem = change_address (src, QImode, srcreg);
12890 dstmem = change_address (dst, QImode, destreg);
12891 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12892 ix86_adjust_counter (countreg, 1);
12893 emit_label (label);
12894 LABEL_NUSES (label) = 1;
12895 }
12896 if (align <= 2)
12897 {
12898 rtx label = ix86_expand_aligntest (destreg, 2);
12899 srcmem = change_address (src, HImode, srcreg);
12900 dstmem = change_address (dst, HImode, destreg);
12901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12902 ix86_adjust_counter (countreg, 2);
12903 emit_label (label);
12904 LABEL_NUSES (label) = 1;
12905 }
12906 if (align <= 4 && desired_alignment > 4)
12907 {
12908 rtx label = ix86_expand_aligntest (destreg, 4);
12909 srcmem = change_address (src, SImode, srcreg);
12910 dstmem = change_address (dst, SImode, destreg);
12911 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12912 ix86_adjust_counter (countreg, 4);
12913 emit_label (label);
12914 LABEL_NUSES (label) = 1;
12915 }
12916
12917 if (label && desired_alignment > 4 && !TARGET_64BIT)
12918 {
12919 emit_label (label);
12920 LABEL_NUSES (label) = 1;
12921 label = NULL_RTX;
12922 }
12923 if (!TARGET_SINGLE_STRINGOP)
12924 emit_insn (gen_cld ());
12925 if (TARGET_64BIT)
12926 {
12927 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12928 GEN_INT (3)));
12929 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12930 }
12931 else
12932 {
12933 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12934 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12935 }
12936 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12937 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12938 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12939 countreg2, destexp, srcexp));
12940
12941 if (label)
12942 {
12943 emit_label (label);
12944 LABEL_NUSES (label) = 1;
12945 }
12946 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12947 {
12948 srcmem = change_address (src, SImode, srcreg);
12949 dstmem = change_address (dst, SImode, destreg);
12950 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12951 }
12952 if ((align <= 4 || count == 0) && TARGET_64BIT)
12953 {
12954 rtx label = ix86_expand_aligntest (countreg, 4);
12955 srcmem = change_address (src, SImode, srcreg);
12956 dstmem = change_address (dst, SImode, destreg);
12957 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12958 emit_label (label);
12959 LABEL_NUSES (label) = 1;
12960 }
12961 if (align > 2 && count != 0 && (count & 2))
12962 {
12963 srcmem = change_address (src, HImode, srcreg);
12964 dstmem = change_address (dst, HImode, destreg);
12965 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12966 }
12967 if (align <= 2 || count == 0)
12968 {
12969 rtx label = ix86_expand_aligntest (countreg, 2);
12970 srcmem = change_address (src, HImode, srcreg);
12971 dstmem = change_address (dst, HImode, destreg);
12972 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12973 emit_label (label);
12974 LABEL_NUSES (label) = 1;
12975 }
12976 if (align > 1 && count != 0 && (count & 1))
12977 {
12978 srcmem = change_address (src, QImode, srcreg);
12979 dstmem = change_address (dst, QImode, destreg);
12980 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12981 }
12982 if (align <= 1 || count == 0)
12983 {
12984 rtx label = ix86_expand_aligntest (countreg, 1);
12985 srcmem = change_address (src, QImode, srcreg);
12986 dstmem = change_address (dst, QImode, destreg);
12987 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12988 emit_label (label);
12989 LABEL_NUSES (label) = 1;
12990 }
12991 }
12992
12993 return 1;
12994}
12995
12996/* Expand string clear operation (bzero). Use i386 string operations when
12997 profitable. expand_movmem contains similar code. */
12998int
12999ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13000{
13001 rtx destreg, zeroreg, countreg, destexp;
13002 enum machine_mode counter_mode;
13003 HOST_WIDE_INT align = 0;
13004 unsigned HOST_WIDE_INT count = 0;
13005
13006 if (GET_CODE (align_exp) == CONST_INT)
13007 align = INTVAL (align_exp);
13008
13009 /* Can't use any of this if the user has appropriated esi. */
13010 if (global_regs[4])
13011 return 0;
13012
13013 /* This simple hack avoids all inlining code and simplifies code below. */
13014 if (!TARGET_ALIGN_STRINGOPS)
13015 align = 32;
13016
13017 if (GET_CODE (count_exp) == CONST_INT)
13018 {
13019 count = INTVAL (count_exp);
13020 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13021 return 0;
13022 }
13023 /* Figure out proper mode for counter. For 32bits it is always SImode,
13024 for 64bits use SImode when possible, otherwise DImode.
13025 Set count to number of bytes copied when known at compile time. */
13026 if (!TARGET_64BIT
13027 || GET_MODE (count_exp) == SImode
13028 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13029 counter_mode = SImode;
13030 else
13031 counter_mode = DImode;
13032
13033 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13034 if (destreg != XEXP (dst, 0))
13035 dst = replace_equiv_address_nv (dst, destreg);
13036
13037
13038 /* When optimizing for size emit simple rep ; movsb instruction for
13039 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13040 sequence is 7 bytes long, so if optimizing for size and count is
13041 small enough that some stosl, stosw and stosb instructions without
13042 rep are shorter, fall back into the next if. */
13043
13044 if ((!optimize || optimize_size)
13045 && (count == 0
13046 || ((count & 0x03)
13047 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13048 {
13049 emit_insn (gen_cld ());
13050
13051 countreg = ix86_zero_extend_to_Pmode (count_exp);
13052 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13053 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13054 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13055 }
13056 else if (count != 0
13057 && (align >= 8
13058 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13059 || optimize_size || count < (unsigned int) 64))
13060 {
13061 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13062 unsigned HOST_WIDE_INT offset = 0;
13063
13064 emit_insn (gen_cld ());
13065
13066 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13067 if (count & ~(size - 1))
13068 {
13069 unsigned HOST_WIDE_INT repcount;
13070 unsigned int max_nonrep;
13071
13072 repcount = count >> (size == 4 ? 2 : 3);
13073 if (!TARGET_64BIT)
13074 repcount &= 0x3fffffff;
13075
13076 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13077 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13078 bytes. In both cases the latter seems to be faster for small
13079 values of N. */
13080 max_nonrep = size == 4 ? 7 : 4;
13081 if (!optimize_size)
13082 switch (ix86_tune)
13083 {
13084 case PROCESSOR_PENTIUM4:
13085 case PROCESSOR_NOCONA:
13086 max_nonrep = 3;
13087 break;
13088 default:
13089 break;
13090 }
13091
13092 if (repcount <= max_nonrep)
13093 while (repcount-- > 0)
13094 {
13095 rtx mem = adjust_automodify_address_nv (dst,
13096 GET_MODE (zeroreg),
13097 destreg, offset);
13098 emit_insn (gen_strset (destreg, mem, zeroreg));
13099 offset += size;
13100 }
13101 else
13102 {
13103 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13104 countreg = ix86_zero_extend_to_Pmode (countreg);
13105 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13106 GEN_INT (size == 4 ? 2 : 3));
13107 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13108 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13109 destexp));
13110 offset = count & ~(size - 1);
13111 }
13112 }
13113 if (size == 8 && (count & 0x04))
13114 {
13115 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13116 offset);
13117 emit_insn (gen_strset (destreg, mem,
13118 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13119 offset += 4;
13120 }
13121 if (count & 0x02)
13122 {
13123 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13124 offset);
13125 emit_insn (gen_strset (destreg, mem,
13126 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13127 offset += 2;
13128 }
13129 if (count & 0x01)
13130 {
13131 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13132 offset);
13133 emit_insn (gen_strset (destreg, mem,
13134 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13135 }
13136 }
13137 else
13138 {
13139 rtx countreg2;
13140 rtx label = NULL;
13141 /* Compute desired alignment of the string operation. */
13142 int desired_alignment = (TARGET_PENTIUMPRO
13143 && (count == 0 || count >= (unsigned int) 260)
13144 ? 8 : UNITS_PER_WORD);
13145
13146 /* In case we don't know anything about the alignment, default to
13147 library version, since it is usually equally fast and result in
13148 shorter code.
13149
13150 Also emit call when we know that the count is large and call overhead
13151 will not be important. */
13152 if (!TARGET_INLINE_ALL_STRINGOPS
13153 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13154 return 0;
13155
13156 if (TARGET_SINGLE_STRINGOP)
13157 emit_insn (gen_cld ());
13158
13159 countreg2 = gen_reg_rtx (Pmode);
13160 countreg = copy_to_mode_reg (counter_mode, count_exp);
13161 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13162 /* Get rid of MEM_OFFSET, it won't be accurate. */
13163 dst = change_address (dst, BLKmode, destreg);
13164
13165 if (count == 0 && align < desired_alignment)
13166 {
13167 label = gen_label_rtx ();
13168 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13169 LEU, 0, counter_mode, 1, label);
13170 }
13171 if (align <= 1)
13172 {
13173 rtx label = ix86_expand_aligntest (destreg, 1);
13174 emit_insn (gen_strset (destreg, dst,
13175 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13176 ix86_adjust_counter (countreg, 1);
13177 emit_label (label);
13178 LABEL_NUSES (label) = 1;
13179 }
13180 if (align <= 2)
13181 {
13182 rtx label = ix86_expand_aligntest (destreg, 2);
13183 emit_insn (gen_strset (destreg, dst,
13184 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13185 ix86_adjust_counter (countreg, 2);
13186 emit_label (label);
13187 LABEL_NUSES (label) = 1;
13188 }
13189 if (align <= 4 && desired_alignment > 4)
13190 {
13191 rtx label = ix86_expand_aligntest (destreg, 4);
13192 emit_insn (gen_strset (destreg, dst,
13193 (TARGET_64BIT
13194 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13195 : zeroreg)));
13196 ix86_adjust_counter (countreg, 4);
13197 emit_label (label);
13198 LABEL_NUSES (label) = 1;
13199 }
13200
13201 if (label && desired_alignment > 4 && !TARGET_64BIT)
13202 {
13203 emit_label (label);
13204 LABEL_NUSES (label) = 1;
13205 label = NULL_RTX;
13206 }
13207
13208 if (!TARGET_SINGLE_STRINGOP)
13209 emit_insn (gen_cld ());
13210 if (TARGET_64BIT)
13211 {
13212 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13213 GEN_INT (3)));
13214 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13215 }
13216 else
13217 {
13218 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13219 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13220 }
13221 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13222 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13223
13224 if (label)
13225 {
13226 emit_label (label);
13227 LABEL_NUSES (label) = 1;
13228 }
13229
13230 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13231 emit_insn (gen_strset (destreg, dst,
13232 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13233 if (TARGET_64BIT && (align <= 4 || count == 0))
13234 {
13235 rtx label = ix86_expand_aligntest (countreg, 4);
13236 emit_insn (gen_strset (destreg, dst,
13237 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13238 emit_label (label);
13239 LABEL_NUSES (label) = 1;
13240 }
13241 if (align > 2 && count != 0 && (count & 2))
13242 emit_insn (gen_strset (destreg, dst,
13243 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13244 if (align <= 2 || count == 0)
13245 {
13246 rtx label = ix86_expand_aligntest (countreg, 2);
13247 emit_insn (gen_strset (destreg, dst,
13248 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13249 emit_label (label);
13250 LABEL_NUSES (label) = 1;
13251 }
13252 if (align > 1 && count != 0 && (count & 1))
13253 emit_insn (gen_strset (destreg, dst,
13254 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13255 if (align <= 1 || count == 0)
13256 {
13257 rtx label = ix86_expand_aligntest (countreg, 1);
13258 emit_insn (gen_strset (destreg, dst,
13259 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13260 emit_label (label);
13261 LABEL_NUSES (label) = 1;
13262 }
13263 }
13264 return 1;
13265}
13266
13267/* Expand strlen. */
13268int
13269ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13270{
13271 rtx addr, scratch1, scratch2, scratch3, scratch4;
13272
13273 /* The generic case of strlen expander is long. Avoid it's
13274 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13275
13276 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13277 && !TARGET_INLINE_ALL_STRINGOPS
13278 && !optimize_size
13279 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13280 return 0;
13281
13282 addr = force_reg (Pmode, XEXP (src, 0));
13283 scratch1 = gen_reg_rtx (Pmode);
13284
13285 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13286 && !optimize_size)
13287 {
13288 /* Well it seems that some optimizer does not combine a call like
13289 foo(strlen(bar), strlen(bar));
13290 when the move and the subtraction is done here. It does calculate
13291 the length just once when these instructions are done inside of
13292 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13293 often used and I use one fewer register for the lifetime of
13294 output_strlen_unroll() this is better. */
13295
13296 emit_move_insn (out, addr);
13297
13298 ix86_expand_strlensi_unroll_1 (out, src, align);
13299
13300 /* strlensi_unroll_1 returns the address of the zero at the end of
13301 the string, like memchr(), so compute the length by subtracting
13302 the start address. */
13303 if (TARGET_64BIT)
13304 emit_insn (gen_subdi3 (out, out, addr));
13305 else
13306 emit_insn (gen_subsi3 (out, out, addr));
13307 }
13308 else
13309 {
13310 rtx unspec;
13311 scratch2 = gen_reg_rtx (Pmode);
13312 scratch3 = gen_reg_rtx (Pmode);
13313 scratch4 = force_reg (Pmode, constm1_rtx);
13314
13315 emit_move_insn (scratch3, addr);
13316 eoschar = force_reg (QImode, eoschar);
13317
13318 emit_insn (gen_cld ());
13319 src = replace_equiv_address_nv (src, scratch3);
13320
13321 /* If .md starts supporting :P, this can be done in .md. */
13322 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13323 scratch4), UNSPEC_SCAS);
13324 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13325 if (TARGET_64BIT)
13326 {
13327 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13328 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13329 }
13330 else
13331 {
13332 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13333 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13334 }
13335 }
13336 return 1;
13337}
13338
13339/* Expand the appropriate insns for doing strlen if not just doing
13340 repnz; scasb
13341
13342 out = result, initialized with the start address
13343 align_rtx = alignment of the address.
13344 scratch = scratch register, initialized with the startaddress when
13345 not aligned, otherwise undefined
13346
13347 This is just the body. It needs the initializations mentioned above and
13348 some address computing at the end. These things are done in i386.md. */
13349
13350static void
13351ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13352{
13353 int align;
13354 rtx tmp;
13355 rtx align_2_label = NULL_RTX;
13356 rtx align_3_label = NULL_RTX;
13357 rtx align_4_label = gen_label_rtx ();
13358 rtx end_0_label = gen_label_rtx ();
13359 rtx mem;
13360 rtx tmpreg = gen_reg_rtx (SImode);
13361 rtx scratch = gen_reg_rtx (SImode);
13362 rtx cmp;
13363
13364 align = 0;
13365 if (GET_CODE (align_rtx) == CONST_INT)
13366 align = INTVAL (align_rtx);
13367
13368 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13369
13370 /* Is there a known alignment and is it less than 4? */
13371 if (align < 4)
13372 {
13373 rtx scratch1 = gen_reg_rtx (Pmode);
13374 emit_move_insn (scratch1, out);
13375 /* Is there a known alignment and is it not 2? */
13376 if (align != 2)
13377 {
13378 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13379 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13380
13381 /* Leave just the 3 lower bits. */
13382 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13383 NULL_RTX, 0, OPTAB_WIDEN);
13384
13385 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13386 Pmode, 1, align_4_label);
13387 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13388 Pmode, 1, align_2_label);
13389 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13390 Pmode, 1, align_3_label);
13391 }
13392 else
13393 {
13394 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13395 check if is aligned to 4 - byte. */
13396
13397 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13398 NULL_RTX, 0, OPTAB_WIDEN);
13399
13400 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13401 Pmode, 1, align_4_label);
13402 }
13403
13404 mem = change_address (src, QImode, out);
13405
13406 /* Now compare the bytes. */
13407
13408 /* Compare the first n unaligned byte on a byte per byte basis. */
13409 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13410 QImode, 1, end_0_label);
13411
13412 /* Increment the address. */
13413 if (TARGET_64BIT)
13414 emit_insn (gen_adddi3 (out, out, const1_rtx));
13415 else
13416 emit_insn (gen_addsi3 (out, out, const1_rtx));
13417
13418 /* Not needed with an alignment of 2 */
13419 if (align != 2)
13420 {
13421 emit_label (align_2_label);
13422
13423 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13424 end_0_label);
13425
13426 if (TARGET_64BIT)
13427 emit_insn (gen_adddi3 (out, out, const1_rtx));
13428 else
13429 emit_insn (gen_addsi3 (out, out, const1_rtx));
13430
13431 emit_label (align_3_label);
13432 }
13433
13434 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13435 end_0_label);
13436
13437 if (TARGET_64BIT)
13438 emit_insn (gen_adddi3 (out, out, const1_rtx));
13439 else
13440 emit_insn (gen_addsi3 (out, out, const1_rtx));
13441 }
13442
13443 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13444 align this loop. It gives only huge programs, but does not help to
13445 speed up. */
13446 emit_label (align_4_label);
13447
13448 mem = change_address (src, SImode, out);
13449 emit_move_insn (scratch, mem);
13450 if (TARGET_64BIT)
13451 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13452 else
13453 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13454
13455 /* This formula yields a nonzero result iff one of the bytes is zero.
13456 This saves three branches inside loop and many cycles. */
13457
13458 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13459 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13460 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13461 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13462 gen_int_mode (0x80808080, SImode)));
13463 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13464 align_4_label);
13465
13466 if (TARGET_CMOVE)
13467 {
13468 rtx reg = gen_reg_rtx (SImode);
13469 rtx reg2 = gen_reg_rtx (Pmode);
13470 emit_move_insn (reg, tmpreg);
13471 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13472
13473 /* If zero is not in the first two bytes, move two bytes forward. */
13474 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13475 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13476 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13477 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13478 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13479 reg,
13480 tmpreg)));
13481 /* Emit lea manually to avoid clobbering of flags. */
13482 emit_insn (gen_rtx_SET (SImode, reg2,
13483 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13484
13485 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13486 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13487 emit_insn (gen_rtx_SET (VOIDmode, out,
13488 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13489 reg2,
13490 out)));
13491
13492 }
13493 else
13494 {
13495 rtx end_2_label = gen_label_rtx ();
13496 /* Is zero in the first two bytes? */
13497
13498 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13499 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13500 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13501 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13502 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13503 pc_rtx);
13504 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13505 JUMP_LABEL (tmp) = end_2_label;
13506
13507 /* Not in the first two. Move two bytes forward. */
13508 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13509 if (TARGET_64BIT)
13510 emit_insn (gen_adddi3 (out, out, const2_rtx));
13511 else
13512 emit_insn (gen_addsi3 (out, out, const2_rtx));
13513
13514 emit_label (end_2_label);
13515
13516 }
13517
13518 /* Avoid branch in fixing the byte. */
13519 tmpreg = gen_lowpart (QImode, tmpreg);
13520 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13521 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13522 if (TARGET_64BIT)
13523 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13524 else
13525 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13526
13527 emit_label (end_0_label);
13528}
13529
13530void
13531ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13532 rtx callarg2 ATTRIBUTE_UNUSED,
13533 rtx pop, int sibcall)
13534{
13535 rtx use = NULL, call;
13536
13537 if (pop == const0_rtx)
13538 pop = NULL;
13539 gcc_assert (!TARGET_64BIT || !pop);
13540
13541 if (TARGET_MACHO && !TARGET_64BIT)
13542 {
13543#if TARGET_MACHO
13544 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13545 fnaddr = machopic_indirect_call_target (fnaddr);
13546#endif
13547 }
13548 else
13549 {
13550 /* Static functions and indirect calls don't need the pic register. */
13551 if (! TARGET_64BIT && flag_pic
13552 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13553 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13554 use_reg (&use, pic_offset_table_rtx);
13555 }
13556
13557 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13558 {
13559 rtx al = gen_rtx_REG (QImode, 0);
13560 emit_move_insn (al, callarg2);
13561 use_reg (&use, al);
13562 }
13563
13564 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13565 {
13566 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13567 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13568 }
13569 if (sibcall && TARGET_64BIT
13570 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13571 {
13572 rtx addr;
13573 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13574 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13575 emit_move_insn (fnaddr, addr);
13576 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13577 }
13578
13579 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13580 if (retval)
13581 call = gen_rtx_SET (VOIDmode, retval, call);
13582 if (pop)
13583 {
13584 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13585 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13586 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13587 }
13588
13589 call = emit_call_insn (call);
13590 if (use)
13591 CALL_INSN_FUNCTION_USAGE (call) = use;
13592}
13593
13594
13595/* Clear stack slot assignments remembered from previous functions.
13596 This is called from INIT_EXPANDERS once before RTL is emitted for each
13597 function. */
13598
13599static struct machine_function *
13600ix86_init_machine_status (void)
13601{
13602 struct machine_function *f;
13603
13604 f = ggc_alloc_cleared (sizeof (struct machine_function));
13605 f->use_fast_prologue_epilogue_nregs = -1;
13606 f->tls_descriptor_call_expanded_p = 0;
13607
13608 return f;
13609}
13610
13611/* Return a MEM corresponding to a stack slot with mode MODE.
13612 Allocate a new slot if necessary.
13613
13614 The RTL for a function can have several slots available: N is
13615 which slot to use. */
13616
13617rtx
13618assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13619{
13620 struct stack_local_entry *s;
13621
13622 gcc_assert (n < MAX_386_STACK_LOCALS);
13623
13624 /* Virtual slot is valid only before vregs are instantiated. */
13625 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13626
13627 for (s = ix86_stack_locals; s; s = s->next)
13628 if (s->mode == mode && s->n == n)
13629 return s->rtl;
13630
13631 s = (struct stack_local_entry *)
13632 ggc_alloc (sizeof (struct stack_local_entry));
13633 s->n = n;
13634 s->mode = mode;
13635 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13636
13637 s->next = ix86_stack_locals;
13638 ix86_stack_locals = s;
13639 return s->rtl;
13640}
13641
13642/* Construct the SYMBOL_REF for the tls_get_addr function. */
13643
13644static GTY(()) rtx ix86_tls_symbol;
13645rtx
13646ix86_tls_get_addr (void)
13647{
13648
13649 if (!ix86_tls_symbol)
13650 {
13651 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13652 (TARGET_ANY_GNU_TLS
13653 && !TARGET_64BIT)
13654 ? "___tls_get_addr"
13655 : "__tls_get_addr");
13656 }
13657
13658 return ix86_tls_symbol;
13659}
13660
13661/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13662
13663static GTY(()) rtx ix86_tls_module_base_symbol;
13664rtx
13665ix86_tls_module_base (void)
13666{
13667
13668 if (!ix86_tls_module_base_symbol)
13669 {
13670 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13671 "_TLS_MODULE_BASE_");
13672 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13673 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13674 }
13675
13676 return ix86_tls_module_base_symbol;
13677}
13678
13679/* Calculate the length of the memory address in the instruction
13680 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13681
13682int
13683memory_address_length (rtx addr)
13684{
13685 struct ix86_address parts;
13686 rtx base, index, disp;
13687 int len;
13688 int ok;
13689
13690 if (GET_CODE (addr) == PRE_DEC
13691 || GET_CODE (addr) == POST_INC
13692 || GET_CODE (addr) == PRE_MODIFY
13693 || GET_CODE (addr) == POST_MODIFY)
13694 return 0;
13695
13696 ok = ix86_decompose_address (addr, &parts);
13697 gcc_assert (ok);
13698
13699 if (parts.base && GET_CODE (parts.base) == SUBREG)
13700 parts.base = SUBREG_REG (parts.base);
13701 if (parts.index && GET_CODE (parts.index) == SUBREG)
13702 parts.index = SUBREG_REG (parts.index);
13703
13704 base = parts.base;
13705 index = parts.index;
13706 disp = parts.disp;
13707 len = 0;
13708
13709 /* Rule of thumb:
13710 - esp as the base always wants an index,
13711 - ebp as the base always wants a displacement. */
13712
13713 /* Register Indirect. */
13714 if (base && !index && !disp)
13715 {
13716 /* esp (for its index) and ebp (for its displacement) need
13717 the two-byte modrm form. */
13718 if (addr == stack_pointer_rtx
13719 || addr == arg_pointer_rtx
13720 || addr == frame_pointer_rtx
13721 || addr == hard_frame_pointer_rtx)
13722 len = 1;
13723 }
13724
13725 /* Direct Addressing. */
13726 else if (disp && !base && !index)
13727 len = 4;
13728
13729 else
13730 {
13731 /* Find the length of the displacement constant. */
13732 if (disp)
13733 {
13734 if (base && satisfies_constraint_K (disp))
13735 len = 1;
13736 else
13737 len = 4;
13738 }
13739 /* ebp always wants a displacement. */
13740 else if (base == hard_frame_pointer_rtx)
13741 len = 1;
13742
13743 /* An index requires the two-byte modrm form.... */
13744 if (index
13745 /* ...like esp, which always wants an index. */
13746 || base == stack_pointer_rtx
13747 || base == arg_pointer_rtx
13748 || base == frame_pointer_rtx)
13749 len += 1;
13750 }
13751
13752 return len;
13753}
13754
13755/* Compute default value for "length_immediate" attribute. When SHORTFORM
13756 is set, expect that insn have 8bit immediate alternative. */
13757int
13758ix86_attr_length_immediate_default (rtx insn, int shortform)
13759{
13760 int len = 0;
13761 int i;
13762 extract_insn_cached (insn);
13763 for (i = recog_data.n_operands - 1; i >= 0; --i)
13764 if (CONSTANT_P (recog_data.operand[i]))
13765 {
13766 gcc_assert (!len);
13767 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13768 len = 1;
13769 else
13770 {
13771 switch (get_attr_mode (insn))
13772 {
13773 case MODE_QI:
13774 len+=1;
13775 break;
13776 case MODE_HI:
13777 len+=2;
13778 break;
13779 case MODE_SI:
13780 len+=4;
13781 break;
13782 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13783 case MODE_DI:
13784 len+=4;
13785 break;
13786 default:
13787 fatal_insn ("unknown insn mode", insn);
13788 }
13789 }
13790 }
13791 return len;
13792}
13793/* Compute default value for "length_address" attribute. */
13794int
13795ix86_attr_length_address_default (rtx insn)
13796{
13797 int i;
13798
13799 if (get_attr_type (insn) == TYPE_LEA)
13800 {
13801 rtx set = PATTERN (insn);
13802
13803 if (GET_CODE (set) == PARALLEL)
13804 set = XVECEXP (set, 0, 0);
13805
13806 gcc_assert (GET_CODE (set) == SET);
13807
13808 return memory_address_length (SET_SRC (set));
13809 }
13810
13811 extract_insn_cached (insn);
13812 for (i = recog_data.n_operands - 1; i >= 0; --i)
13813 if (GET_CODE (recog_data.operand[i]) == MEM)
13814 {
13815 return memory_address_length (XEXP (recog_data.operand[i], 0));
13816 break;
13817 }
13818 return 0;
13819}
13820
13821/* Return the maximum number of instructions a cpu can issue. */
13822
13823static int
13824ix86_issue_rate (void)
13825{
13826 switch (ix86_tune)
13827 {
13828 case PROCESSOR_PENTIUM:
13829 case PROCESSOR_K6:
13830 return 2;
13831
13832 case PROCESSOR_PENTIUMPRO:
13833 case PROCESSOR_PENTIUM4:
13834 case PROCESSOR_ATHLON:
13835 case PROCESSOR_K8:
9480 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9481 emit_move_insn (op0, CONST0_RTX (mode));
9482 else
9483 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9484
9485 if (mode != V4SFmode)
9486 op0 = gen_lowpart (V4SFmode, op0);
9487 m = adjust_address (op1, V2SFmode, 0);
9488 emit_insn (gen_sse_loadlps (op0, op0, m));
9489 m = adjust_address (op1, V2SFmode, 8);
9490 emit_insn (gen_sse_loadhps (op0, op0, m));
9491 }
9492 }
9493 else if (MEM_P (op0))
9494 {
9495 /* If we're optimizing for size, movups is the smallest. */
9496 if (optimize_size)
9497 {
9498 op0 = gen_lowpart (V4SFmode, op0);
9499 op1 = gen_lowpart (V4SFmode, op1);
9500 emit_insn (gen_sse_movups (op0, op1));
9501 return;
9502 }
9503
9504 /* ??? Similar to above, only less clear because of quote
9505 typeless stores unquote. */
9506 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9507 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9508 {
9509 op0 = gen_lowpart (V16QImode, op0);
9510 op1 = gen_lowpart (V16QImode, op1);
9511 emit_insn (gen_sse2_movdqu (op0, op1));
9512 return;
9513 }
9514
9515 if (TARGET_SSE2 && mode == V2DFmode)
9516 {
9517 m = adjust_address (op0, DFmode, 0);
9518 emit_insn (gen_sse2_storelpd (m, op1));
9519 m = adjust_address (op0, DFmode, 8);
9520 emit_insn (gen_sse2_storehpd (m, op1));
9521 }
9522 else
9523 {
9524 if (mode != V4SFmode)
9525 op1 = gen_lowpart (V4SFmode, op1);
9526 m = adjust_address (op0, V2SFmode, 0);
9527 emit_insn (gen_sse_storelps (m, op1));
9528 m = adjust_address (op0, V2SFmode, 8);
9529 emit_insn (gen_sse_storehps (m, op1));
9530 }
9531 }
9532 else
9533 gcc_unreachable ();
9534}
9535
9536/* Expand a push in MODE. This is some mode for which we do not support
9537 proper push instructions, at least from the registers that we expect
9538 the value to live in. */
9539
9540void
9541ix86_expand_push (enum machine_mode mode, rtx x)
9542{
9543 rtx tmp;
9544
9545 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9546 GEN_INT (-GET_MODE_SIZE (mode)),
9547 stack_pointer_rtx, 1, OPTAB_DIRECT);
9548 if (tmp != stack_pointer_rtx)
9549 emit_move_insn (stack_pointer_rtx, tmp);
9550
9551 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9552 emit_move_insn (tmp, x);
9553}
9554
9555/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9556 destination to use for the operation. If different from the true
9557 destination in operands[0], a copy operation will be required. */
9558
9559rtx
9560ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9561 rtx operands[])
9562{
9563 int matching_memory;
9564 rtx src1, src2, dst;
9565
9566 dst = operands[0];
9567 src1 = operands[1];
9568 src2 = operands[2];
9569
9570 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9571 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9572 && (rtx_equal_p (dst, src2)
9573 || immediate_operand (src1, mode)))
9574 {
9575 rtx temp = src1;
9576 src1 = src2;
9577 src2 = temp;
9578 }
9579
9580 /* If the destination is memory, and we do not have matching source
9581 operands, do things in registers. */
9582 matching_memory = 0;
9583 if (GET_CODE (dst) == MEM)
9584 {
9585 if (rtx_equal_p (dst, src1))
9586 matching_memory = 1;
9587 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9588 && rtx_equal_p (dst, src2))
9589 matching_memory = 2;
9590 else
9591 dst = gen_reg_rtx (mode);
9592 }
9593
9594 /* Both source operands cannot be in memory. */
9595 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9596 {
9597 if (matching_memory != 2)
9598 src2 = force_reg (mode, src2);
9599 else
9600 src1 = force_reg (mode, src1);
9601 }
9602
9603 /* If the operation is not commutable, source 1 cannot be a constant
9604 or non-matching memory. */
9605 if ((CONSTANT_P (src1)
9606 || (!matching_memory && GET_CODE (src1) == MEM))
9607 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9608 src1 = force_reg (mode, src1);
9609
9610 src1 = operands[1] = src1;
9611 src2 = operands[2] = src2;
9612 return dst;
9613}
9614
9615/* Similarly, but assume that the destination has already been
9616 set up properly. */
9617
9618void
9619ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9620 enum machine_mode mode, rtx operands[])
9621{
9622 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9623 gcc_assert (dst == operands[0]);
9624}
9625
9626/* Attempt to expand a binary operator. Make the expansion closer to the
9627 actual machine, then just general_operand, which will allow 3 separate
9628 memory references (one output, two input) in a single insn. */
9629
9630void
9631ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9632 rtx operands[])
9633{
9634 rtx src1, src2, dst, op, clob;
9635
9636 dst = ix86_fixup_binary_operands (code, mode, operands);
9637 src1 = operands[1];
9638 src2 = operands[2];
9639
9640 /* Emit the instruction. */
9641
9642 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9643 if (reload_in_progress)
9644 {
9645 /* Reload doesn't know about the flags register, and doesn't know that
9646 it doesn't want to clobber it. We can only do this with PLUS. */
9647 gcc_assert (code == PLUS);
9648 emit_insn (op);
9649 }
9650 else
9651 {
9652 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9653 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9654 }
9655
9656 /* Fix up the destination if needed. */
9657 if (dst != operands[0])
9658 emit_move_insn (operands[0], dst);
9659}
9660
9661/* Return TRUE or FALSE depending on whether the binary operator meets the
9662 appropriate constraints. */
9663
9664int
9665ix86_binary_operator_ok (enum rtx_code code,
9666 enum machine_mode mode ATTRIBUTE_UNUSED,
9667 rtx operands[3])
9668{
9669 /* Both source operands cannot be in memory. */
9670 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9671 return 0;
9672 /* If the operation is not commutable, source 1 cannot be a constant. */
9673 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9674 return 0;
9675 /* If the destination is memory, we must have a matching source operand. */
9676 if (GET_CODE (operands[0]) == MEM
9677 && ! (rtx_equal_p (operands[0], operands[1])
9678 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9679 && rtx_equal_p (operands[0], operands[2]))))
9680 return 0;
9681 /* If the operation is not commutable and the source 1 is memory, we must
9682 have a matching destination. */
9683 if (GET_CODE (operands[1]) == MEM
9684 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9685 && ! rtx_equal_p (operands[0], operands[1]))
9686 return 0;
9687 return 1;
9688}
9689
9690/* Attempt to expand a unary operator. Make the expansion closer to the
9691 actual machine, then just general_operand, which will allow 2 separate
9692 memory references (one output, one input) in a single insn. */
9693
9694void
9695ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9696 rtx operands[])
9697{
9698 int matching_memory;
9699 rtx src, dst, op, clob;
9700
9701 dst = operands[0];
9702 src = operands[1];
9703
9704 /* If the destination is memory, and we do not have matching source
9705 operands, do things in registers. */
9706 matching_memory = 0;
9707 if (MEM_P (dst))
9708 {
9709 if (rtx_equal_p (dst, src))
9710 matching_memory = 1;
9711 else
9712 dst = gen_reg_rtx (mode);
9713 }
9714
9715 /* When source operand is memory, destination must match. */
9716 if (MEM_P (src) && !matching_memory)
9717 src = force_reg (mode, src);
9718
9719 /* Emit the instruction. */
9720
9721 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9722 if (reload_in_progress || code == NOT)
9723 {
9724 /* Reload doesn't know about the flags register, and doesn't know that
9725 it doesn't want to clobber it. */
9726 gcc_assert (code == NOT);
9727 emit_insn (op);
9728 }
9729 else
9730 {
9731 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9732 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9733 }
9734
9735 /* Fix up the destination if needed. */
9736 if (dst != operands[0])
9737 emit_move_insn (operands[0], dst);
9738}
9739
9740/* Return TRUE or FALSE depending on whether the unary operator meets the
9741 appropriate constraints. */
9742
9743int
9744ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9745 enum machine_mode mode ATTRIBUTE_UNUSED,
9746 rtx operands[2] ATTRIBUTE_UNUSED)
9747{
9748 /* If one of operands is memory, source and destination must match. */
9749 if ((GET_CODE (operands[0]) == MEM
9750 || GET_CODE (operands[1]) == MEM)
9751 && ! rtx_equal_p (operands[0], operands[1]))
9752 return FALSE;
9753 return TRUE;
9754}
9755
9756/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9757 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9758 true, then replicate the mask for all elements of the vector register.
9759 If INVERT is true, then create a mask excluding the sign bit. */
9760
9761rtx
9762ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9763{
9764 enum machine_mode vec_mode;
9765 HOST_WIDE_INT hi, lo;
9766 int shift = 63;
9767 rtvec v;
9768 rtx mask;
9769
9770 /* Find the sign bit, sign extended to 2*HWI. */
9771 if (mode == SFmode)
9772 lo = 0x80000000, hi = lo < 0;
9773 else if (HOST_BITS_PER_WIDE_INT >= 64)
9774 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9775 else
9776 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9777
9778 if (invert)
9779 lo = ~lo, hi = ~hi;
9780
9781 /* Force this value into the low part of a fp vector constant. */
9782 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9783 mask = gen_lowpart (mode, mask);
9784
9785 if (mode == SFmode)
9786 {
9787 if (vect)
9788 v = gen_rtvec (4, mask, mask, mask, mask);
9789 else
9790 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9791 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9792 vec_mode = V4SFmode;
9793 }
9794 else
9795 {
9796 if (vect)
9797 v = gen_rtvec (2, mask, mask);
9798 else
9799 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9800 vec_mode = V2DFmode;
9801 }
9802
9803 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9804}
9805
9806/* Generate code for floating point ABS or NEG. */
9807
9808void
9809ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9810 rtx operands[])
9811{
9812 rtx mask, set, use, clob, dst, src;
9813 bool matching_memory;
9814 bool use_sse = false;
9815 bool vector_mode = VECTOR_MODE_P (mode);
9816 enum machine_mode elt_mode = mode;
9817
9818 if (vector_mode)
9819 {
9820 elt_mode = GET_MODE_INNER (mode);
9821 use_sse = true;
9822 }
9823 else if (TARGET_SSE_MATH)
9824 use_sse = SSE_FLOAT_MODE_P (mode);
9825
9826 /* NEG and ABS performed with SSE use bitwise mask operations.
9827 Create the appropriate mask now. */
9828 if (use_sse)
9829 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9830 else
9831 mask = NULL_RTX;
9832
9833 dst = operands[0];
9834 src = operands[1];
9835
9836 /* If the destination is memory, and we don't have matching source
9837 operands or we're using the x87, do things in registers. */
9838 matching_memory = false;
9839 if (MEM_P (dst))
9840 {
9841 if (use_sse && rtx_equal_p (dst, src))
9842 matching_memory = true;
9843 else
9844 dst = gen_reg_rtx (mode);
9845 }
9846 if (MEM_P (src) && !matching_memory)
9847 src = force_reg (mode, src);
9848
9849 if (vector_mode)
9850 {
9851 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9852 set = gen_rtx_SET (VOIDmode, dst, set);
9853 emit_insn (set);
9854 }
9855 else
9856 {
9857 set = gen_rtx_fmt_e (code, mode, src);
9858 set = gen_rtx_SET (VOIDmode, dst, set);
9859 if (mask)
9860 {
9861 use = gen_rtx_USE (VOIDmode, mask);
9862 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9863 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9864 gen_rtvec (3, set, use, clob)));
9865 }
9866 else
9867 emit_insn (set);
9868 }
9869
9870 if (dst != operands[0])
9871 emit_move_insn (operands[0], dst);
9872}
9873
9874/* Expand a copysign operation. Special case operand 0 being a constant. */
9875
9876void
9877ix86_expand_copysign (rtx operands[])
9878{
9879 enum machine_mode mode, vmode;
9880 rtx dest, op0, op1, mask, nmask;
9881
9882 dest = operands[0];
9883 op0 = operands[1];
9884 op1 = operands[2];
9885
9886 mode = GET_MODE (dest);
9887 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9888
9889 if (GET_CODE (op0) == CONST_DOUBLE)
9890 {
9891 rtvec v;
9892
9893 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9894 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9895
9896 if (op0 == CONST0_RTX (mode))
9897 op0 = CONST0_RTX (vmode);
9898 else
9899 {
9900 if (mode == SFmode)
9901 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9902 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9903 else
9904 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9905 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9906 }
9907
9908 mask = ix86_build_signbit_mask (mode, 0, 0);
9909
9910 if (mode == SFmode)
9911 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9912 else
9913 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9914 }
9915 else
9916 {
9917 nmask = ix86_build_signbit_mask (mode, 0, 1);
9918 mask = ix86_build_signbit_mask (mode, 0, 0);
9919
9920 if (mode == SFmode)
9921 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9922 else
9923 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9924 }
9925}
9926
9927/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9928 be a constant, and so has already been expanded into a vector constant. */
9929
9930void
9931ix86_split_copysign_const (rtx operands[])
9932{
9933 enum machine_mode mode, vmode;
9934 rtx dest, op0, op1, mask, x;
9935
9936 dest = operands[0];
9937 op0 = operands[1];
9938 op1 = operands[2];
9939 mask = operands[3];
9940
9941 mode = GET_MODE (dest);
9942 vmode = GET_MODE (mask);
9943
9944 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9945 x = gen_rtx_AND (vmode, dest, mask);
9946 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9947
9948 if (op0 != CONST0_RTX (vmode))
9949 {
9950 x = gen_rtx_IOR (vmode, dest, op0);
9951 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9952 }
9953}
9954
9955/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9956 so we have to do two masks. */
9957
9958void
9959ix86_split_copysign_var (rtx operands[])
9960{
9961 enum machine_mode mode, vmode;
9962 rtx dest, scratch, op0, op1, mask, nmask, x;
9963
9964 dest = operands[0];
9965 scratch = operands[1];
9966 op0 = operands[2];
9967 op1 = operands[3];
9968 nmask = operands[4];
9969 mask = operands[5];
9970
9971 mode = GET_MODE (dest);
9972 vmode = GET_MODE (mask);
9973
9974 if (rtx_equal_p (op0, op1))
9975 {
9976 /* Shouldn't happen often (it's useless, obviously), but when it does
9977 we'd generate incorrect code if we continue below. */
9978 emit_move_insn (dest, op0);
9979 return;
9980 }
9981
9982 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9983 {
9984 gcc_assert (REGNO (op1) == REGNO (scratch));
9985
9986 x = gen_rtx_AND (vmode, scratch, mask);
9987 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9988
9989 dest = mask;
9990 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9991 x = gen_rtx_NOT (vmode, dest);
9992 x = gen_rtx_AND (vmode, x, op0);
9993 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9994 }
9995 else
9996 {
9997 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9998 {
9999 x = gen_rtx_AND (vmode, scratch, mask);
10000 }
10001 else /* alternative 2,4 */
10002 {
10003 gcc_assert (REGNO (mask) == REGNO (scratch));
10004 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10005 x = gen_rtx_AND (vmode, scratch, op1);
10006 }
10007 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10008
10009 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10010 {
10011 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10012 x = gen_rtx_AND (vmode, dest, nmask);
10013 }
10014 else /* alternative 3,4 */
10015 {
10016 gcc_assert (REGNO (nmask) == REGNO (dest));
10017 dest = nmask;
10018 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10019 x = gen_rtx_AND (vmode, dest, op0);
10020 }
10021 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10022 }
10023
10024 x = gen_rtx_IOR (vmode, dest, scratch);
10025 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10026}
10027
10028/* Return TRUE or FALSE depending on whether the first SET in INSN
10029 has source and destination with matching CC modes, and that the
10030 CC mode is at least as constrained as REQ_MODE. */
10031
10032int
10033ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10034{
10035 rtx set;
10036 enum machine_mode set_mode;
10037
10038 set = PATTERN (insn);
10039 if (GET_CODE (set) == PARALLEL)
10040 set = XVECEXP (set, 0, 0);
10041 gcc_assert (GET_CODE (set) == SET);
10042 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10043
10044 set_mode = GET_MODE (SET_DEST (set));
10045 switch (set_mode)
10046 {
10047 case CCNOmode:
10048 if (req_mode != CCNOmode
10049 && (req_mode != CCmode
10050 || XEXP (SET_SRC (set), 1) != const0_rtx))
10051 return 0;
10052 break;
10053 case CCmode:
10054 if (req_mode == CCGCmode)
10055 return 0;
10056 /* FALLTHRU */
10057 case CCGCmode:
10058 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10059 return 0;
10060 /* FALLTHRU */
10061 case CCGOCmode:
10062 if (req_mode == CCZmode)
10063 return 0;
10064 /* FALLTHRU */
10065 case CCZmode:
10066 break;
10067
10068 default:
10069 gcc_unreachable ();
10070 }
10071
10072 return (GET_MODE (SET_SRC (set)) == set_mode);
10073}
10074
10075/* Generate insn patterns to do an integer compare of OPERANDS. */
10076
10077static rtx
10078ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10079{
10080 enum machine_mode cmpmode;
10081 rtx tmp, flags;
10082
10083 cmpmode = SELECT_CC_MODE (code, op0, op1);
10084 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10085
10086 /* This is very simple, but making the interface the same as in the
10087 FP case makes the rest of the code easier. */
10088 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10089 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10090
10091 /* Return the test that should be put into the flags user, i.e.
10092 the bcc, scc, or cmov instruction. */
10093 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10094}
10095
10096/* Figure out whether to use ordered or unordered fp comparisons.
10097 Return the appropriate mode to use. */
10098
10099enum machine_mode
10100ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10101{
10102 /* ??? In order to make all comparisons reversible, we do all comparisons
10103 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10104 all forms trapping and nontrapping comparisons, we can make inequality
10105 comparisons trapping again, since it results in better code when using
10106 FCOM based compares. */
10107 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10108}
10109
10110enum machine_mode
10111ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10112{
10113 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10114 return ix86_fp_compare_mode (code);
10115 switch (code)
10116 {
10117 /* Only zero flag is needed. */
10118 case EQ: /* ZF=0 */
10119 case NE: /* ZF!=0 */
10120 return CCZmode;
10121 /* Codes needing carry flag. */
10122 case GEU: /* CF=0 */
10123 case GTU: /* CF=0 & ZF=0 */
10124 case LTU: /* CF=1 */
10125 case LEU: /* CF=1 | ZF=1 */
10126 return CCmode;
10127 /* Codes possibly doable only with sign flag when
10128 comparing against zero. */
10129 case GE: /* SF=OF or SF=0 */
10130 case LT: /* SF<>OF or SF=1 */
10131 if (op1 == const0_rtx)
10132 return CCGOCmode;
10133 else
10134 /* For other cases Carry flag is not required. */
10135 return CCGCmode;
10136 /* Codes doable only with sign flag when comparing
10137 against zero, but we miss jump instruction for it
10138 so we need to use relational tests against overflow
10139 that thus needs to be zero. */
10140 case GT: /* ZF=0 & SF=OF */
10141 case LE: /* ZF=1 | SF<>OF */
10142 if (op1 == const0_rtx)
10143 return CCNOmode;
10144 else
10145 return CCGCmode;
10146 /* strcmp pattern do (use flags) and combine may ask us for proper
10147 mode. */
10148 case USE:
10149 return CCmode;
10150 default:
10151 gcc_unreachable ();
10152 }
10153}
10154
10155/* Return the fixed registers used for condition codes. */
10156
10157static bool
10158ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10159{
10160 *p1 = FLAGS_REG;
10161 *p2 = FPSR_REG;
10162 return true;
10163}
10164
10165/* If two condition code modes are compatible, return a condition code
10166 mode which is compatible with both. Otherwise, return
10167 VOIDmode. */
10168
10169static enum machine_mode
10170ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10171{
10172 if (m1 == m2)
10173 return m1;
10174
10175 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10176 return VOIDmode;
10177
10178 if ((m1 == CCGCmode && m2 == CCGOCmode)
10179 || (m1 == CCGOCmode && m2 == CCGCmode))
10180 return CCGCmode;
10181
10182 switch (m1)
10183 {
10184 default:
10185 gcc_unreachable ();
10186
10187 case CCmode:
10188 case CCGCmode:
10189 case CCGOCmode:
10190 case CCNOmode:
10191 case CCZmode:
10192 switch (m2)
10193 {
10194 default:
10195 return VOIDmode;
10196
10197 case CCmode:
10198 case CCGCmode:
10199 case CCGOCmode:
10200 case CCNOmode:
10201 case CCZmode:
10202 return CCmode;
10203 }
10204
10205 case CCFPmode:
10206 case CCFPUmode:
10207 /* These are only compatible with themselves, which we already
10208 checked above. */
10209 return VOIDmode;
10210 }
10211}
10212
10213/* Return true if we should use an FCOMI instruction for this fp comparison. */
10214
10215int
10216ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10217{
10218 enum rtx_code swapped_code = swap_condition (code);
10219 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10220 || (ix86_fp_comparison_cost (swapped_code)
10221 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10222}
10223
10224/* Swap, force into registers, or otherwise massage the two operands
10225 to a fp comparison. The operands are updated in place; the new
10226 comparison code is returned. */
10227
10228static enum rtx_code
10229ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10230{
10231 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10232 rtx op0 = *pop0, op1 = *pop1;
10233 enum machine_mode op_mode = GET_MODE (op0);
10234 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10235
10236 /* All of the unordered compare instructions only work on registers.
10237 The same is true of the fcomi compare instructions. The XFmode
10238 compare instructions require registers except when comparing
10239 against zero or when converting operand 1 from fixed point to
10240 floating point. */
10241
10242 if (!is_sse
10243 && (fpcmp_mode == CCFPUmode
10244 || (op_mode == XFmode
10245 && ! (standard_80387_constant_p (op0) == 1
10246 || standard_80387_constant_p (op1) == 1)
10247 && GET_CODE (op1) != FLOAT)
10248 || ix86_use_fcomi_compare (code)))
10249 {
10250 op0 = force_reg (op_mode, op0);
10251 op1 = force_reg (op_mode, op1);
10252 }
10253 else
10254 {
10255 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10256 things around if they appear profitable, otherwise force op0
10257 into a register. */
10258
10259 if (standard_80387_constant_p (op0) == 0
10260 || (GET_CODE (op0) == MEM
10261 && ! (standard_80387_constant_p (op1) == 0
10262 || GET_CODE (op1) == MEM)))
10263 {
10264 rtx tmp;
10265 tmp = op0, op0 = op1, op1 = tmp;
10266 code = swap_condition (code);
10267 }
10268
10269 if (GET_CODE (op0) != REG)
10270 op0 = force_reg (op_mode, op0);
10271
10272 if (CONSTANT_P (op1))
10273 {
10274 int tmp = standard_80387_constant_p (op1);
10275 if (tmp == 0)
10276 op1 = validize_mem (force_const_mem (op_mode, op1));
10277 else if (tmp == 1)
10278 {
10279 if (TARGET_CMOVE)
10280 op1 = force_reg (op_mode, op1);
10281 }
10282 else
10283 op1 = force_reg (op_mode, op1);
10284 }
10285 }
10286
10287 /* Try to rearrange the comparison to make it cheaper. */
10288 if (ix86_fp_comparison_cost (code)
10289 > ix86_fp_comparison_cost (swap_condition (code))
10290 && (GET_CODE (op1) == REG || !no_new_pseudos))
10291 {
10292 rtx tmp;
10293 tmp = op0, op0 = op1, op1 = tmp;
10294 code = swap_condition (code);
10295 if (GET_CODE (op0) != REG)
10296 op0 = force_reg (op_mode, op0);
10297 }
10298
10299 *pop0 = op0;
10300 *pop1 = op1;
10301 return code;
10302}
10303
10304/* Convert comparison codes we use to represent FP comparison to integer
10305 code that will result in proper branch. Return UNKNOWN if no such code
10306 is available. */
10307
10308enum rtx_code
10309ix86_fp_compare_code_to_integer (enum rtx_code code)
10310{
10311 switch (code)
10312 {
10313 case GT:
10314 return GTU;
10315 case GE:
10316 return GEU;
10317 case ORDERED:
10318 case UNORDERED:
10319 return code;
10320 break;
10321 case UNEQ:
10322 return EQ;
10323 break;
10324 case UNLT:
10325 return LTU;
10326 break;
10327 case UNLE:
10328 return LEU;
10329 break;
10330 case LTGT:
10331 return NE;
10332 break;
10333 default:
10334 return UNKNOWN;
10335 }
10336}
10337
10338/* Split comparison code CODE into comparisons we can do using branch
10339 instructions. BYPASS_CODE is comparison code for branch that will
10340 branch around FIRST_CODE and SECOND_CODE. If some of branches
10341 is not required, set value to UNKNOWN.
10342 We never require more than two branches. */
10343
10344void
10345ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10346 enum rtx_code *first_code,
10347 enum rtx_code *second_code)
10348{
10349 *first_code = code;
10350 *bypass_code = UNKNOWN;
10351 *second_code = UNKNOWN;
10352
10353 /* The fcomi comparison sets flags as follows:
10354
10355 cmp ZF PF CF
10356 > 0 0 0
10357 < 0 0 1
10358 = 1 0 0
10359 un 1 1 1 */
10360
10361 switch (code)
10362 {
10363 case GT: /* GTU - CF=0 & ZF=0 */
10364 case GE: /* GEU - CF=0 */
10365 case ORDERED: /* PF=0 */
10366 case UNORDERED: /* PF=1 */
10367 case UNEQ: /* EQ - ZF=1 */
10368 case UNLT: /* LTU - CF=1 */
10369 case UNLE: /* LEU - CF=1 | ZF=1 */
10370 case LTGT: /* EQ - ZF=0 */
10371 break;
10372 case LT: /* LTU - CF=1 - fails on unordered */
10373 *first_code = UNLT;
10374 *bypass_code = UNORDERED;
10375 break;
10376 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10377 *first_code = UNLE;
10378 *bypass_code = UNORDERED;
10379 break;
10380 case EQ: /* EQ - ZF=1 - fails on unordered */
10381 *first_code = UNEQ;
10382 *bypass_code = UNORDERED;
10383 break;
10384 case NE: /* NE - ZF=0 - fails on unordered */
10385 *first_code = LTGT;
10386 *second_code = UNORDERED;
10387 break;
10388 case UNGE: /* GEU - CF=0 - fails on unordered */
10389 *first_code = GE;
10390 *second_code = UNORDERED;
10391 break;
10392 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10393 *first_code = GT;
10394 *second_code = UNORDERED;
10395 break;
10396 default:
10397 gcc_unreachable ();
10398 }
10399 if (!TARGET_IEEE_FP)
10400 {
10401 *second_code = UNKNOWN;
10402 *bypass_code = UNKNOWN;
10403 }
10404}
10405
10406/* Return cost of comparison done fcom + arithmetics operations on AX.
10407 All following functions do use number of instructions as a cost metrics.
10408 In future this should be tweaked to compute bytes for optimize_size and
10409 take into account performance of various instructions on various CPUs. */
10410static int
10411ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10412{
10413 if (!TARGET_IEEE_FP)
10414 return 4;
10415 /* The cost of code output by ix86_expand_fp_compare. */
10416 switch (code)
10417 {
10418 case UNLE:
10419 case UNLT:
10420 case LTGT:
10421 case GT:
10422 case GE:
10423 case UNORDERED:
10424 case ORDERED:
10425 case UNEQ:
10426 return 4;
10427 break;
10428 case LT:
10429 case NE:
10430 case EQ:
10431 case UNGE:
10432 return 5;
10433 break;
10434 case LE:
10435 case UNGT:
10436 return 6;
10437 break;
10438 default:
10439 gcc_unreachable ();
10440 }
10441}
10442
10443/* Return cost of comparison done using fcomi operation.
10444 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10445static int
10446ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10447{
10448 enum rtx_code bypass_code, first_code, second_code;
10449 /* Return arbitrarily high cost when instruction is not supported - this
10450 prevents gcc from using it. */
10451 if (!TARGET_CMOVE)
10452 return 1024;
10453 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10454 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10455}
10456
10457/* Return cost of comparison done using sahf operation.
10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10459static int
10460ix86_fp_comparison_sahf_cost (enum rtx_code code)
10461{
10462 enum rtx_code bypass_code, first_code, second_code;
10463 /* Return arbitrarily high cost when instruction is not preferred - this
10464 avoids gcc from using it. */
10465 if (!TARGET_USE_SAHF && !optimize_size)
10466 return 1024;
10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10469}
10470
10471/* Compute cost of the comparison done using any method.
10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10473static int
10474ix86_fp_comparison_cost (enum rtx_code code)
10475{
10476 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10477 int min;
10478
10479 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10480 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10481
10482 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10483 if (min > sahf_cost)
10484 min = sahf_cost;
10485 if (min > fcomi_cost)
10486 min = fcomi_cost;
10487 return min;
10488}
10489
10490/* Generate insn patterns to do a floating point compare of OPERANDS. */
10491
10492static rtx
10493ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10494 rtx *second_test, rtx *bypass_test)
10495{
10496 enum machine_mode fpcmp_mode, intcmp_mode;
10497 rtx tmp, tmp2;
10498 int cost = ix86_fp_comparison_cost (code);
10499 enum rtx_code bypass_code, first_code, second_code;
10500
10501 fpcmp_mode = ix86_fp_compare_mode (code);
10502 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10503
10504 if (second_test)
10505 *second_test = NULL_RTX;
10506 if (bypass_test)
10507 *bypass_test = NULL_RTX;
10508
10509 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10510
10511 /* Do fcomi/sahf based test when profitable. */
10512 if ((bypass_code == UNKNOWN || bypass_test)
10513 && (second_code == UNKNOWN || second_test)
10514 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10515 {
10516 if (TARGET_CMOVE)
10517 {
10518 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10519 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10520 tmp);
10521 emit_insn (tmp);
10522 }
10523 else
10524 {
10525 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10526 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10527 if (!scratch)
10528 scratch = gen_reg_rtx (HImode);
10529 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10530 emit_insn (gen_x86_sahf_1 (scratch));
10531 }
10532
10533 /* The FP codes work out to act like unsigned. */
10534 intcmp_mode = fpcmp_mode;
10535 code = first_code;
10536 if (bypass_code != UNKNOWN)
10537 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10538 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10539 const0_rtx);
10540 if (second_code != UNKNOWN)
10541 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10542 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10543 const0_rtx);
10544 }
10545 else
10546 {
10547 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10548 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10549 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10550 if (!scratch)
10551 scratch = gen_reg_rtx (HImode);
10552 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10553
10554 /* In the unordered case, we have to check C2 for NaN's, which
10555 doesn't happen to work out to anything nice combination-wise.
10556 So do some bit twiddling on the value we've got in AH to come
10557 up with an appropriate set of condition codes. */
10558
10559 intcmp_mode = CCNOmode;
10560 switch (code)
10561 {
10562 case GT:
10563 case UNGT:
10564 if (code == GT || !TARGET_IEEE_FP)
10565 {
10566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10567 code = EQ;
10568 }
10569 else
10570 {
10571 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10572 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10573 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10574 intcmp_mode = CCmode;
10575 code = GEU;
10576 }
10577 break;
10578 case LT:
10579 case UNLT:
10580 if (code == LT && TARGET_IEEE_FP)
10581 {
10582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10583 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10584 intcmp_mode = CCmode;
10585 code = EQ;
10586 }
10587 else
10588 {
10589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10590 code = NE;
10591 }
10592 break;
10593 case GE:
10594 case UNGE:
10595 if (code == GE || !TARGET_IEEE_FP)
10596 {
10597 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10598 code = EQ;
10599 }
10600 else
10601 {
10602 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10603 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10604 GEN_INT (0x01)));
10605 code = NE;
10606 }
10607 break;
10608 case LE:
10609 case UNLE:
10610 if (code == LE && TARGET_IEEE_FP)
10611 {
10612 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10613 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10614 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10615 intcmp_mode = CCmode;
10616 code = LTU;
10617 }
10618 else
10619 {
10620 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10621 code = NE;
10622 }
10623 break;
10624 case EQ:
10625 case UNEQ:
10626 if (code == EQ && TARGET_IEEE_FP)
10627 {
10628 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10629 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10630 intcmp_mode = CCmode;
10631 code = EQ;
10632 }
10633 else
10634 {
10635 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10636 code = NE;
10637 break;
10638 }
10639 break;
10640 case NE:
10641 case LTGT:
10642 if (code == NE && TARGET_IEEE_FP)
10643 {
10644 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10645 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10646 GEN_INT (0x40)));
10647 code = NE;
10648 }
10649 else
10650 {
10651 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10652 code = EQ;
10653 }
10654 break;
10655
10656 case UNORDERED:
10657 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10658 code = NE;
10659 break;
10660 case ORDERED:
10661 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10662 code = EQ;
10663 break;
10664
10665 default:
10666 gcc_unreachable ();
10667 }
10668 }
10669
10670 /* Return the test that should be put into the flags user, i.e.
10671 the bcc, scc, or cmov instruction. */
10672 return gen_rtx_fmt_ee (code, VOIDmode,
10673 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10674 const0_rtx);
10675}
10676
10677rtx
10678ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10679{
10680 rtx op0, op1, ret;
10681 op0 = ix86_compare_op0;
10682 op1 = ix86_compare_op1;
10683
10684 if (second_test)
10685 *second_test = NULL_RTX;
10686 if (bypass_test)
10687 *bypass_test = NULL_RTX;
10688
10689 if (ix86_compare_emitted)
10690 {
10691 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10692 ix86_compare_emitted = NULL_RTX;
10693 }
10694 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10695 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10696 second_test, bypass_test);
10697 else
10698 ret = ix86_expand_int_compare (code, op0, op1);
10699
10700 return ret;
10701}
10702
10703/* Return true if the CODE will result in nontrivial jump sequence. */
10704bool
10705ix86_fp_jump_nontrivial_p (enum rtx_code code)
10706{
10707 enum rtx_code bypass_code, first_code, second_code;
10708 if (!TARGET_CMOVE)
10709 return true;
10710 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10711 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10712}
10713
10714void
10715ix86_expand_branch (enum rtx_code code, rtx label)
10716{
10717 rtx tmp;
10718
10719 /* If we have emitted a compare insn, go straight to simple.
10720 ix86_expand_compare won't emit anything if ix86_compare_emitted
10721 is non NULL. */
10722 if (ix86_compare_emitted)
10723 goto simple;
10724
10725 switch (GET_MODE (ix86_compare_op0))
10726 {
10727 case QImode:
10728 case HImode:
10729 case SImode:
10730 simple:
10731 tmp = ix86_expand_compare (code, NULL, NULL);
10732 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10733 gen_rtx_LABEL_REF (VOIDmode, label),
10734 pc_rtx);
10735 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10736 return;
10737
10738 case SFmode:
10739 case DFmode:
10740 case XFmode:
10741 {
10742 rtvec vec;
10743 int use_fcomi;
10744 enum rtx_code bypass_code, first_code, second_code;
10745
10746 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10747 &ix86_compare_op1);
10748
10749 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10750
10751 /* Check whether we will use the natural sequence with one jump. If
10752 so, we can expand jump early. Otherwise delay expansion by
10753 creating compound insn to not confuse optimizers. */
10754 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10755 && TARGET_CMOVE)
10756 {
10757 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10758 gen_rtx_LABEL_REF (VOIDmode, label),
10759 pc_rtx, NULL_RTX, NULL_RTX);
10760 }
10761 else
10762 {
10763 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10764 ix86_compare_op0, ix86_compare_op1);
10765 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10766 gen_rtx_LABEL_REF (VOIDmode, label),
10767 pc_rtx);
10768 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10769
10770 use_fcomi = ix86_use_fcomi_compare (code);
10771 vec = rtvec_alloc (3 + !use_fcomi);
10772 RTVEC_ELT (vec, 0) = tmp;
10773 RTVEC_ELT (vec, 1)
10774 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10775 RTVEC_ELT (vec, 2)
10776 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10777 if (! use_fcomi)
10778 RTVEC_ELT (vec, 3)
10779 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10780
10781 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10782 }
10783 return;
10784 }
10785
10786 case DImode:
10787 if (TARGET_64BIT)
10788 goto simple;
10789 case TImode:
10790 /* Expand DImode branch into multiple compare+branch. */
10791 {
10792 rtx lo[2], hi[2], label2;
10793 enum rtx_code code1, code2, code3;
10794 enum machine_mode submode;
10795
10796 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10797 {
10798 tmp = ix86_compare_op0;
10799 ix86_compare_op0 = ix86_compare_op1;
10800 ix86_compare_op1 = tmp;
10801 code = swap_condition (code);
10802 }
10803 if (GET_MODE (ix86_compare_op0) == DImode)
10804 {
10805 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10806 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10807 submode = SImode;
10808 }
10809 else
10810 {
10811 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10812 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10813 submode = DImode;
10814 }
10815
10816 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10817 avoid two branches. This costs one extra insn, so disable when
10818 optimizing for size. */
10819
10820 if ((code == EQ || code == NE)
10821 && (!optimize_size
10822 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10823 {
10824 rtx xor0, xor1;
10825
10826 xor1 = hi[0];
10827 if (hi[1] != const0_rtx)
10828 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10829 NULL_RTX, 0, OPTAB_WIDEN);
10830
10831 xor0 = lo[0];
10832 if (lo[1] != const0_rtx)
10833 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10834 NULL_RTX, 0, OPTAB_WIDEN);
10835
10836 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10837 NULL_RTX, 0, OPTAB_WIDEN);
10838
10839 ix86_compare_op0 = tmp;
10840 ix86_compare_op1 = const0_rtx;
10841 ix86_expand_branch (code, label);
10842 return;
10843 }
10844
10845 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10846 op1 is a constant and the low word is zero, then we can just
10847 examine the high word. */
10848
10849 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10850 switch (code)
10851 {
10852 case LT: case LTU: case GE: case GEU:
10853 ix86_compare_op0 = hi[0];
10854 ix86_compare_op1 = hi[1];
10855 ix86_expand_branch (code, label);
10856 return;
10857 default:
10858 break;
10859 }
10860
10861 /* Otherwise, we need two or three jumps. */
10862
10863 label2 = gen_label_rtx ();
10864
10865 code1 = code;
10866 code2 = swap_condition (code);
10867 code3 = unsigned_condition (code);
10868
10869 switch (code)
10870 {
10871 case LT: case GT: case LTU: case GTU:
10872 break;
10873
10874 case LE: code1 = LT; code2 = GT; break;
10875 case GE: code1 = GT; code2 = LT; break;
10876 case LEU: code1 = LTU; code2 = GTU; break;
10877 case GEU: code1 = GTU; code2 = LTU; break;
10878
10879 case EQ: code1 = UNKNOWN; code2 = NE; break;
10880 case NE: code2 = UNKNOWN; break;
10881
10882 default:
10883 gcc_unreachable ();
10884 }
10885
10886 /*
10887 * a < b =>
10888 * if (hi(a) < hi(b)) goto true;
10889 * if (hi(a) > hi(b)) goto false;
10890 * if (lo(a) < lo(b)) goto true;
10891 * false:
10892 */
10893
10894 ix86_compare_op0 = hi[0];
10895 ix86_compare_op1 = hi[1];
10896
10897 if (code1 != UNKNOWN)
10898 ix86_expand_branch (code1, label);
10899 if (code2 != UNKNOWN)
10900 ix86_expand_branch (code2, label2);
10901
10902 ix86_compare_op0 = lo[0];
10903 ix86_compare_op1 = lo[1];
10904 ix86_expand_branch (code3, label);
10905
10906 if (code2 != UNKNOWN)
10907 emit_label (label2);
10908 return;
10909 }
10910
10911 default:
10912 gcc_unreachable ();
10913 }
10914}
10915
10916/* Split branch based on floating point condition. */
10917void
10918ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10919 rtx target1, rtx target2, rtx tmp, rtx pushed)
10920{
10921 rtx second, bypass;
10922 rtx label = NULL_RTX;
10923 rtx condition;
10924 int bypass_probability = -1, second_probability = -1, probability = -1;
10925 rtx i;
10926
10927 if (target2 != pc_rtx)
10928 {
10929 rtx tmp = target2;
10930 code = reverse_condition_maybe_unordered (code);
10931 target2 = target1;
10932 target1 = tmp;
10933 }
10934
10935 condition = ix86_expand_fp_compare (code, op1, op2,
10936 tmp, &second, &bypass);
10937
10938 /* Remove pushed operand from stack. */
10939 if (pushed)
10940 ix86_free_from_memory (GET_MODE (pushed));
10941
10942 if (split_branch_probability >= 0)
10943 {
10944 /* Distribute the probabilities across the jumps.
10945 Assume the BYPASS and SECOND to be always test
10946 for UNORDERED. */
10947 probability = split_branch_probability;
10948
10949 /* Value of 1 is low enough to make no need for probability
10950 to be updated. Later we may run some experiments and see
10951 if unordered values are more frequent in practice. */
10952 if (bypass)
10953 bypass_probability = 1;
10954 if (second)
10955 second_probability = 1;
10956 }
10957 if (bypass != NULL_RTX)
10958 {
10959 label = gen_label_rtx ();
10960 i = emit_jump_insn (gen_rtx_SET
10961 (VOIDmode, pc_rtx,
10962 gen_rtx_IF_THEN_ELSE (VOIDmode,
10963 bypass,
10964 gen_rtx_LABEL_REF (VOIDmode,
10965 label),
10966 pc_rtx)));
10967 if (bypass_probability >= 0)
10968 REG_NOTES (i)
10969 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10970 GEN_INT (bypass_probability),
10971 REG_NOTES (i));
10972 }
10973 i = emit_jump_insn (gen_rtx_SET
10974 (VOIDmode, pc_rtx,
10975 gen_rtx_IF_THEN_ELSE (VOIDmode,
10976 condition, target1, target2)));
10977 if (probability >= 0)
10978 REG_NOTES (i)
10979 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10980 GEN_INT (probability),
10981 REG_NOTES (i));
10982 if (second != NULL_RTX)
10983 {
10984 i = emit_jump_insn (gen_rtx_SET
10985 (VOIDmode, pc_rtx,
10986 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10987 target2)));
10988 if (second_probability >= 0)
10989 REG_NOTES (i)
10990 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10991 GEN_INT (second_probability),
10992 REG_NOTES (i));
10993 }
10994 if (label != NULL_RTX)
10995 emit_label (label);
10996}
10997
10998int
10999ix86_expand_setcc (enum rtx_code code, rtx dest)
11000{
11001 rtx ret, tmp, tmpreg, equiv;
11002 rtx second_test, bypass_test;
11003
11004 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11005 return 0; /* FAIL */
11006
11007 gcc_assert (GET_MODE (dest) == QImode);
11008
11009 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11010 PUT_MODE (ret, QImode);
11011
11012 tmp = dest;
11013 tmpreg = dest;
11014
11015 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11016 if (bypass_test || second_test)
11017 {
11018 rtx test = second_test;
11019 int bypass = 0;
11020 rtx tmp2 = gen_reg_rtx (QImode);
11021 if (bypass_test)
11022 {
11023 gcc_assert (!second_test);
11024 test = bypass_test;
11025 bypass = 1;
11026 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11027 }
11028 PUT_MODE (test, QImode);
11029 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11030
11031 if (bypass)
11032 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11033 else
11034 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11035 }
11036
11037 /* Attach a REG_EQUAL note describing the comparison result. */
11038 if (ix86_compare_op0 && ix86_compare_op1)
11039 {
11040 equiv = simplify_gen_relational (code, QImode,
11041 GET_MODE (ix86_compare_op0),
11042 ix86_compare_op0, ix86_compare_op1);
11043 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11044 }
11045
11046 return 1; /* DONE */
11047}
11048
11049/* Expand comparison setting or clearing carry flag. Return true when
11050 successful and set pop for the operation. */
11051static bool
11052ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11053{
11054 enum machine_mode mode =
11055 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11056
11057 /* Do not handle DImode compares that go through special path. Also we can't
11058 deal with FP compares yet. This is possible to add. */
11059 if (mode == (TARGET_64BIT ? TImode : DImode))
11060 return false;
11061 if (FLOAT_MODE_P (mode))
11062 {
11063 rtx second_test = NULL, bypass_test = NULL;
11064 rtx compare_op, compare_seq;
11065
11066 /* Shortcut: following common codes never translate into carry flag compares. */
11067 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11068 || code == ORDERED || code == UNORDERED)
11069 return false;
11070
11071 /* These comparisons require zero flag; swap operands so they won't. */
11072 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11073 && !TARGET_IEEE_FP)
11074 {
11075 rtx tmp = op0;
11076 op0 = op1;
11077 op1 = tmp;
11078 code = swap_condition (code);
11079 }
11080
11081 /* Try to expand the comparison and verify that we end up with carry flag
11082 based comparison. This is fails to be true only when we decide to expand
11083 comparison using arithmetic that is not too common scenario. */
11084 start_sequence ();
11085 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11086 &second_test, &bypass_test);
11087 compare_seq = get_insns ();
11088 end_sequence ();
11089
11090 if (second_test || bypass_test)
11091 return false;
11092 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11093 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11094 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11095 else
11096 code = GET_CODE (compare_op);
11097 if (code != LTU && code != GEU)
11098 return false;
11099 emit_insn (compare_seq);
11100 *pop = compare_op;
11101 return true;
11102 }
11103 if (!INTEGRAL_MODE_P (mode))
11104 return false;
11105 switch (code)
11106 {
11107 case LTU:
11108 case GEU:
11109 break;
11110
11111 /* Convert a==0 into (unsigned)a<1. */
11112 case EQ:
11113 case NE:
11114 if (op1 != const0_rtx)
11115 return false;
11116 op1 = const1_rtx;
11117 code = (code == EQ ? LTU : GEU);
11118 break;
11119
11120 /* Convert a>b into b<a or a>=b-1. */
11121 case GTU:
11122 case LEU:
11123 if (GET_CODE (op1) == CONST_INT)
11124 {
11125 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11126 /* Bail out on overflow. We still can swap operands but that
11127 would force loading of the constant into register. */
11128 if (op1 == const0_rtx
11129 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11130 return false;
11131 code = (code == GTU ? GEU : LTU);
11132 }
11133 else
11134 {
11135 rtx tmp = op1;
11136 op1 = op0;
11137 op0 = tmp;
11138 code = (code == GTU ? LTU : GEU);
11139 }
11140 break;
11141
11142 /* Convert a>=0 into (unsigned)a<0x80000000. */
11143 case LT:
11144 case GE:
11145 if (mode == DImode || op1 != const0_rtx)
11146 return false;
11147 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11148 code = (code == LT ? GEU : LTU);
11149 break;
11150 case LE:
11151 case GT:
11152 if (mode == DImode || op1 != constm1_rtx)
11153 return false;
11154 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11155 code = (code == LE ? GEU : LTU);
11156 break;
11157
11158 default:
11159 return false;
11160 }
11161 /* Swapping operands may cause constant to appear as first operand. */
11162 if (!nonimmediate_operand (op0, VOIDmode))
11163 {
11164 if (no_new_pseudos)
11165 return false;
11166 op0 = force_reg (mode, op0);
11167 }
11168 ix86_compare_op0 = op0;
11169 ix86_compare_op1 = op1;
11170 *pop = ix86_expand_compare (code, NULL, NULL);
11171 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11172 return true;
11173}
11174
11175int
11176ix86_expand_int_movcc (rtx operands[])
11177{
11178 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11179 rtx compare_seq, compare_op;
11180 rtx second_test, bypass_test;
11181 enum machine_mode mode = GET_MODE (operands[0]);
11182 bool sign_bit_compare_p = false;;
11183
11184 start_sequence ();
11185 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11186 compare_seq = get_insns ();
11187 end_sequence ();
11188
11189 compare_code = GET_CODE (compare_op);
11190
11191 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11192 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11193 sign_bit_compare_p = true;
11194
11195 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11196 HImode insns, we'd be swallowed in word prefix ops. */
11197
11198 if ((mode != HImode || TARGET_FAST_PREFIX)
11199 && (mode != (TARGET_64BIT ? TImode : DImode))
11200 && GET_CODE (operands[2]) == CONST_INT
11201 && GET_CODE (operands[3]) == CONST_INT)
11202 {
11203 rtx out = operands[0];
11204 HOST_WIDE_INT ct = INTVAL (operands[2]);
11205 HOST_WIDE_INT cf = INTVAL (operands[3]);
11206 HOST_WIDE_INT diff;
11207
11208 diff = ct - cf;
11209 /* Sign bit compares are better done using shifts than we do by using
11210 sbb. */
11211 if (sign_bit_compare_p
11212 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11213 ix86_compare_op1, &compare_op))
11214 {
11215 /* Detect overlap between destination and compare sources. */
11216 rtx tmp = out;
11217
11218 if (!sign_bit_compare_p)
11219 {
11220 bool fpcmp = false;
11221
11222 compare_code = GET_CODE (compare_op);
11223
11224 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11225 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11226 {
11227 fpcmp = true;
11228 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11229 }
11230
11231 /* To simplify rest of code, restrict to the GEU case. */
11232 if (compare_code == LTU)
11233 {
11234 HOST_WIDE_INT tmp = ct;
11235 ct = cf;
11236 cf = tmp;
11237 compare_code = reverse_condition (compare_code);
11238 code = reverse_condition (code);
11239 }
11240 else
11241 {
11242 if (fpcmp)
11243 PUT_CODE (compare_op,
11244 reverse_condition_maybe_unordered
11245 (GET_CODE (compare_op)));
11246 else
11247 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11248 }
11249 diff = ct - cf;
11250
11251 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11252 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11253 tmp = gen_reg_rtx (mode);
11254
11255 if (mode == DImode)
11256 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11257 else
11258 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11259 }
11260 else
11261 {
11262 if (code == GT || code == GE)
11263 code = reverse_condition (code);
11264 else
11265 {
11266 HOST_WIDE_INT tmp = ct;
11267 ct = cf;
11268 cf = tmp;
11269 diff = ct - cf;
11270 }
11271 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11272 ix86_compare_op1, VOIDmode, 0, -1);
11273 }
11274
11275 if (diff == 1)
11276 {
11277 /*
11278 * cmpl op0,op1
11279 * sbbl dest,dest
11280 * [addl dest, ct]
11281 *
11282 * Size 5 - 8.
11283 */
11284 if (ct)
11285 tmp = expand_simple_binop (mode, PLUS,
11286 tmp, GEN_INT (ct),
11287 copy_rtx (tmp), 1, OPTAB_DIRECT);
11288 }
11289 else if (cf == -1)
11290 {
11291 /*
11292 * cmpl op0,op1
11293 * sbbl dest,dest
11294 * orl $ct, dest
11295 *
11296 * Size 8.
11297 */
11298 tmp = expand_simple_binop (mode, IOR,
11299 tmp, GEN_INT (ct),
11300 copy_rtx (tmp), 1, OPTAB_DIRECT);
11301 }
11302 else if (diff == -1 && ct)
11303 {
11304 /*
11305 * cmpl op0,op1
11306 * sbbl dest,dest
11307 * notl dest
11308 * [addl dest, cf]
11309 *
11310 * Size 8 - 11.
11311 */
11312 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11313 if (cf)
11314 tmp = expand_simple_binop (mode, PLUS,
11315 copy_rtx (tmp), GEN_INT (cf),
11316 copy_rtx (tmp), 1, OPTAB_DIRECT);
11317 }
11318 else
11319 {
11320 /*
11321 * cmpl op0,op1
11322 * sbbl dest,dest
11323 * [notl dest]
11324 * andl cf - ct, dest
11325 * [addl dest, ct]
11326 *
11327 * Size 8 - 11.
11328 */
11329
11330 if (cf == 0)
11331 {
11332 cf = ct;
11333 ct = 0;
11334 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11335 }
11336
11337 tmp = expand_simple_binop (mode, AND,
11338 copy_rtx (tmp),
11339 gen_int_mode (cf - ct, mode),
11340 copy_rtx (tmp), 1, OPTAB_DIRECT);
11341 if (ct)
11342 tmp = expand_simple_binop (mode, PLUS,
11343 copy_rtx (tmp), GEN_INT (ct),
11344 copy_rtx (tmp), 1, OPTAB_DIRECT);
11345 }
11346
11347 if (!rtx_equal_p (tmp, out))
11348 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11349
11350 return 1; /* DONE */
11351 }
11352
11353 if (diff < 0)
11354 {
11355 HOST_WIDE_INT tmp;
11356 tmp = ct, ct = cf, cf = tmp;
11357 diff = -diff;
11358 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11359 {
11360 /* We may be reversing unordered compare to normal compare, that
11361 is not valid in general (we may convert non-trapping condition
11362 to trapping one), however on i386 we currently emit all
11363 comparisons unordered. */
11364 compare_code = reverse_condition_maybe_unordered (compare_code);
11365 code = reverse_condition_maybe_unordered (code);
11366 }
11367 else
11368 {
11369 compare_code = reverse_condition (compare_code);
11370 code = reverse_condition (code);
11371 }
11372 }
11373
11374 compare_code = UNKNOWN;
11375 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11376 && GET_CODE (ix86_compare_op1) == CONST_INT)
11377 {
11378 if (ix86_compare_op1 == const0_rtx
11379 && (code == LT || code == GE))
11380 compare_code = code;
11381 else if (ix86_compare_op1 == constm1_rtx)
11382 {
11383 if (code == LE)
11384 compare_code = LT;
11385 else if (code == GT)
11386 compare_code = GE;
11387 }
11388 }
11389
11390 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11391 if (compare_code != UNKNOWN
11392 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11393 && (cf == -1 || ct == -1))
11394 {
11395 /* If lea code below could be used, only optimize
11396 if it results in a 2 insn sequence. */
11397
11398 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11399 || diff == 3 || diff == 5 || diff == 9)
11400 || (compare_code == LT && ct == -1)
11401 || (compare_code == GE && cf == -1))
11402 {
11403 /*
11404 * notl op1 (if necessary)
11405 * sarl $31, op1
11406 * orl cf, op1
11407 */
11408 if (ct != -1)
11409 {
11410 cf = ct;
11411 ct = -1;
11412 code = reverse_condition (code);
11413 }
11414
11415 out = emit_store_flag (out, code, ix86_compare_op0,
11416 ix86_compare_op1, VOIDmode, 0, -1);
11417
11418 out = expand_simple_binop (mode, IOR,
11419 out, GEN_INT (cf),
11420 out, 1, OPTAB_DIRECT);
11421 if (out != operands[0])
11422 emit_move_insn (operands[0], out);
11423
11424 return 1; /* DONE */
11425 }
11426 }
11427
11428
11429 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11430 || diff == 3 || diff == 5 || diff == 9)
11431 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11432 && (mode != DImode
11433 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11434 {
11435 /*
11436 * xorl dest,dest
11437 * cmpl op1,op2
11438 * setcc dest
11439 * lea cf(dest*(ct-cf)),dest
11440 *
11441 * Size 14.
11442 *
11443 * This also catches the degenerate setcc-only case.
11444 */
11445
11446 rtx tmp;
11447 int nops;
11448
11449 out = emit_store_flag (out, code, ix86_compare_op0,
11450 ix86_compare_op1, VOIDmode, 0, 1);
11451
11452 nops = 0;
11453 /* On x86_64 the lea instruction operates on Pmode, so we need
11454 to get arithmetics done in proper mode to match. */
11455 if (diff == 1)
11456 tmp = copy_rtx (out);
11457 else
11458 {
11459 rtx out1;
11460 out1 = copy_rtx (out);
11461 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11462 nops++;
11463 if (diff & 1)
11464 {
11465 tmp = gen_rtx_PLUS (mode, tmp, out1);
11466 nops++;
11467 }
11468 }
11469 if (cf != 0)
11470 {
11471 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11472 nops++;
11473 }
11474 if (!rtx_equal_p (tmp, out))
11475 {
11476 if (nops == 1)
11477 out = force_operand (tmp, copy_rtx (out));
11478 else
11479 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11480 }
11481 if (!rtx_equal_p (out, operands[0]))
11482 emit_move_insn (operands[0], copy_rtx (out));
11483
11484 return 1; /* DONE */
11485 }
11486
11487 /*
11488 * General case: Jumpful:
11489 * xorl dest,dest cmpl op1, op2
11490 * cmpl op1, op2 movl ct, dest
11491 * setcc dest jcc 1f
11492 * decl dest movl cf, dest
11493 * andl (cf-ct),dest 1:
11494 * addl ct,dest
11495 *
11496 * Size 20. Size 14.
11497 *
11498 * This is reasonably steep, but branch mispredict costs are
11499 * high on modern cpus, so consider failing only if optimizing
11500 * for space.
11501 */
11502
11503 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11504 && BRANCH_COST >= 2)
11505 {
11506 if (cf == 0)
11507 {
11508 cf = ct;
11509 ct = 0;
11510 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11511 /* We may be reversing unordered compare to normal compare,
11512 that is not valid in general (we may convert non-trapping
11513 condition to trapping one), however on i386 we currently
11514 emit all comparisons unordered. */
11515 code = reverse_condition_maybe_unordered (code);
11516 else
11517 {
11518 code = reverse_condition (code);
11519 if (compare_code != UNKNOWN)
11520 compare_code = reverse_condition (compare_code);
11521 }
11522 }
11523
11524 if (compare_code != UNKNOWN)
11525 {
11526 /* notl op1 (if needed)
11527 sarl $31, op1
11528 andl (cf-ct), op1
11529 addl ct, op1
11530
11531 For x < 0 (resp. x <= -1) there will be no notl,
11532 so if possible swap the constants to get rid of the
11533 complement.
11534 True/false will be -1/0 while code below (store flag
11535 followed by decrement) is 0/-1, so the constants need
11536 to be exchanged once more. */
11537
11538 if (compare_code == GE || !cf)
11539 {
11540 code = reverse_condition (code);
11541 compare_code = LT;
11542 }
11543 else
11544 {
11545 HOST_WIDE_INT tmp = cf;
11546 cf = ct;
11547 ct = tmp;
11548 }
11549
11550 out = emit_store_flag (out, code, ix86_compare_op0,
11551 ix86_compare_op1, VOIDmode, 0, -1);
11552 }
11553 else
11554 {
11555 out = emit_store_flag (out, code, ix86_compare_op0,
11556 ix86_compare_op1, VOIDmode, 0, 1);
11557
11558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11559 copy_rtx (out), 1, OPTAB_DIRECT);
11560 }
11561
11562 out = expand_simple_binop (mode, AND, copy_rtx (out),
11563 gen_int_mode (cf - ct, mode),
11564 copy_rtx (out), 1, OPTAB_DIRECT);
11565 if (ct)
11566 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11567 copy_rtx (out), 1, OPTAB_DIRECT);
11568 if (!rtx_equal_p (out, operands[0]))
11569 emit_move_insn (operands[0], copy_rtx (out));
11570
11571 return 1; /* DONE */
11572 }
11573 }
11574
11575 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11576 {
11577 /* Try a few things more with specific constants and a variable. */
11578
11579 optab op;
11580 rtx var, orig_out, out, tmp;
11581
11582 if (BRANCH_COST <= 2)
11583 return 0; /* FAIL */
11584
11585 /* If one of the two operands is an interesting constant, load a
11586 constant with the above and mask it in with a logical operation. */
11587
11588 if (GET_CODE (operands[2]) == CONST_INT)
11589 {
11590 var = operands[3];
11591 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11592 operands[3] = constm1_rtx, op = and_optab;
11593 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11594 operands[3] = const0_rtx, op = ior_optab;
11595 else
11596 return 0; /* FAIL */
11597 }
11598 else if (GET_CODE (operands[3]) == CONST_INT)
11599 {
11600 var = operands[2];
11601 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11602 operands[2] = constm1_rtx, op = and_optab;
11603 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11604 operands[2] = const0_rtx, op = ior_optab;
11605 else
11606 return 0; /* FAIL */
11607 }
11608 else
11609 return 0; /* FAIL */
11610
11611 orig_out = operands[0];
11612 tmp = gen_reg_rtx (mode);
11613 operands[0] = tmp;
11614
11615 /* Recurse to get the constant loaded. */
11616 if (ix86_expand_int_movcc (operands) == 0)
11617 return 0; /* FAIL */
11618
11619 /* Mask in the interesting variable. */
11620 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11621 OPTAB_WIDEN);
11622 if (!rtx_equal_p (out, orig_out))
11623 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11624
11625 return 1; /* DONE */
11626 }
11627
11628 /*
11629 * For comparison with above,
11630 *
11631 * movl cf,dest
11632 * movl ct,tmp
11633 * cmpl op1,op2
11634 * cmovcc tmp,dest
11635 *
11636 * Size 15.
11637 */
11638
11639 if (! nonimmediate_operand (operands[2], mode))
11640 operands[2] = force_reg (mode, operands[2]);
11641 if (! nonimmediate_operand (operands[3], mode))
11642 operands[3] = force_reg (mode, operands[3]);
11643
11644 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11645 {
11646 rtx tmp = gen_reg_rtx (mode);
11647 emit_move_insn (tmp, operands[3]);
11648 operands[3] = tmp;
11649 }
11650 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11651 {
11652 rtx tmp = gen_reg_rtx (mode);
11653 emit_move_insn (tmp, operands[2]);
11654 operands[2] = tmp;
11655 }
11656
11657 if (! register_operand (operands[2], VOIDmode)
11658 && (mode == QImode
11659 || ! register_operand (operands[3], VOIDmode)))
11660 operands[2] = force_reg (mode, operands[2]);
11661
11662 if (mode == QImode
11663 && ! register_operand (operands[3], VOIDmode))
11664 operands[3] = force_reg (mode, operands[3]);
11665
11666 emit_insn (compare_seq);
11667 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11668 gen_rtx_IF_THEN_ELSE (mode,
11669 compare_op, operands[2],
11670 operands[3])));
11671 if (bypass_test)
11672 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11673 gen_rtx_IF_THEN_ELSE (mode,
11674 bypass_test,
11675 copy_rtx (operands[3]),
11676 copy_rtx (operands[0]))));
11677 if (second_test)
11678 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11679 gen_rtx_IF_THEN_ELSE (mode,
11680 second_test,
11681 copy_rtx (operands[2]),
11682 copy_rtx (operands[0]))));
11683
11684 return 1; /* DONE */
11685}
11686
11687/* Swap, force into registers, or otherwise massage the two operands
11688 to an sse comparison with a mask result. Thus we differ a bit from
11689 ix86_prepare_fp_compare_args which expects to produce a flags result.
11690
11691 The DEST operand exists to help determine whether to commute commutative
11692 operators. The POP0/POP1 operands are updated in place. The new
11693 comparison code is returned, or UNKNOWN if not implementable. */
11694
11695static enum rtx_code
11696ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11697 rtx *pop0, rtx *pop1)
11698{
11699 rtx tmp;
11700
11701 switch (code)
11702 {
11703 case LTGT:
11704 case UNEQ:
11705 /* We have no LTGT as an operator. We could implement it with
11706 NE & ORDERED, but this requires an extra temporary. It's
11707 not clear that it's worth it. */
11708 return UNKNOWN;
11709
11710 case LT:
11711 case LE:
11712 case UNGT:
11713 case UNGE:
11714 /* These are supported directly. */
11715 break;
11716
11717 case EQ:
11718 case NE:
11719 case UNORDERED:
11720 case ORDERED:
11721 /* For commutative operators, try to canonicalize the destination
11722 operand to be first in the comparison - this helps reload to
11723 avoid extra moves. */
11724 if (!dest || !rtx_equal_p (dest, *pop1))
11725 break;
11726 /* FALLTHRU */
11727
11728 case GE:
11729 case GT:
11730 case UNLE:
11731 case UNLT:
11732 /* These are not supported directly. Swap the comparison operands
11733 to transform into something that is supported. */
11734 tmp = *pop0;
11735 *pop0 = *pop1;
11736 *pop1 = tmp;
11737 code = swap_condition (code);
11738 break;
11739
11740 default:
11741 gcc_unreachable ();
11742 }
11743
11744 return code;
11745}
11746
11747/* Detect conditional moves that exactly match min/max operational
11748 semantics. Note that this is IEEE safe, as long as we don't
11749 interchange the operands.
11750
11751 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11752 and TRUE if the operation is successful and instructions are emitted. */
11753
11754static bool
11755ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11756 rtx cmp_op1, rtx if_true, rtx if_false)
11757{
11758 enum machine_mode mode;
11759 bool is_min;
11760 rtx tmp;
11761
11762 if (code == LT)
11763 ;
11764 else if (code == UNGE)
11765 {
11766 tmp = if_true;
11767 if_true = if_false;
11768 if_false = tmp;
11769 }
11770 else
11771 return false;
11772
11773 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11774 is_min = true;
11775 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11776 is_min = false;
11777 else
11778 return false;
11779
11780 mode = GET_MODE (dest);
11781
11782 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11783 but MODE may be a vector mode and thus not appropriate. */
11784 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11785 {
11786 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11787 rtvec v;
11788
11789 if_true = force_reg (mode, if_true);
11790 v = gen_rtvec (2, if_true, if_false);
11791 tmp = gen_rtx_UNSPEC (mode, v, u);
11792 }
11793 else
11794 {
11795 code = is_min ? SMIN : SMAX;
11796 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11797 }
11798
11799 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11800 return true;
11801}
11802
11803/* Expand an sse vector comparison. Return the register with the result. */
11804
11805static rtx
11806ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11807 rtx op_true, rtx op_false)
11808{
11809 enum machine_mode mode = GET_MODE (dest);
11810 rtx x;
11811
11812 cmp_op0 = force_reg (mode, cmp_op0);
11813 if (!nonimmediate_operand (cmp_op1, mode))
11814 cmp_op1 = force_reg (mode, cmp_op1);
11815
11816 if (optimize
11817 || reg_overlap_mentioned_p (dest, op_true)
11818 || reg_overlap_mentioned_p (dest, op_false))
11819 dest = gen_reg_rtx (mode);
11820
11821 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11823
11824 return dest;
11825}
11826
11827/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11828 operations. This is used for both scalar and vector conditional moves. */
11829
11830static void
11831ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11832{
11833 enum machine_mode mode = GET_MODE (dest);
11834 rtx t2, t3, x;
11835
11836 if (op_false == CONST0_RTX (mode))
11837 {
11838 op_true = force_reg (mode, op_true);
11839 x = gen_rtx_AND (mode, cmp, op_true);
11840 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11841 }
11842 else if (op_true == CONST0_RTX (mode))
11843 {
11844 op_false = force_reg (mode, op_false);
11845 x = gen_rtx_NOT (mode, cmp);
11846 x = gen_rtx_AND (mode, x, op_false);
11847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11848 }
11849 else
11850 {
11851 op_true = force_reg (mode, op_true);
11852 op_false = force_reg (mode, op_false);
11853
11854 t2 = gen_reg_rtx (mode);
11855 if (optimize)
11856 t3 = gen_reg_rtx (mode);
11857 else
11858 t3 = dest;
11859
11860 x = gen_rtx_AND (mode, op_true, cmp);
11861 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11862
11863 x = gen_rtx_NOT (mode, cmp);
11864 x = gen_rtx_AND (mode, x, op_false);
11865 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11866
11867 x = gen_rtx_IOR (mode, t3, t2);
11868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11869 }
11870}
11871
11872/* Expand a floating-point conditional move. Return true if successful. */
11873
11874int
11875ix86_expand_fp_movcc (rtx operands[])
11876{
11877 enum machine_mode mode = GET_MODE (operands[0]);
11878 enum rtx_code code = GET_CODE (operands[1]);
11879 rtx tmp, compare_op, second_test, bypass_test;
11880
11881 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11882 {
11883 enum machine_mode cmode;
11884
11885 /* Since we've no cmove for sse registers, don't force bad register
11886 allocation just to gain access to it. Deny movcc when the
11887 comparison mode doesn't match the move mode. */
11888 cmode = GET_MODE (ix86_compare_op0);
11889 if (cmode == VOIDmode)
11890 cmode = GET_MODE (ix86_compare_op1);
11891 if (cmode != mode)
11892 return 0;
11893
11894 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11895 &ix86_compare_op0,
11896 &ix86_compare_op1);
11897 if (code == UNKNOWN)
11898 return 0;
11899
11900 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11901 ix86_compare_op1, operands[2],
11902 operands[3]))
11903 return 1;
11904
11905 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11906 ix86_compare_op1, operands[2], operands[3]);
11907 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11908 return 1;
11909 }
11910
11911 /* The floating point conditional move instructions don't directly
11912 support conditions resulting from a signed integer comparison. */
11913
11914 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11915
11916 /* The floating point conditional move instructions don't directly
11917 support signed integer comparisons. */
11918
11919 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11920 {
11921 gcc_assert (!second_test && !bypass_test);
11922 tmp = gen_reg_rtx (QImode);
11923 ix86_expand_setcc (code, tmp);
11924 code = NE;
11925 ix86_compare_op0 = tmp;
11926 ix86_compare_op1 = const0_rtx;
11927 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11928 }
11929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11930 {
11931 tmp = gen_reg_rtx (mode);
11932 emit_move_insn (tmp, operands[3]);
11933 operands[3] = tmp;
11934 }
11935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11936 {
11937 tmp = gen_reg_rtx (mode);
11938 emit_move_insn (tmp, operands[2]);
11939 operands[2] = tmp;
11940 }
11941
11942 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11943 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11944 operands[2], operands[3])));
11945 if (bypass_test)
11946 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11947 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11948 operands[3], operands[0])));
11949 if (second_test)
11950 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11951 gen_rtx_IF_THEN_ELSE (mode, second_test,
11952 operands[2], operands[0])));
11953
11954 return 1;
11955}
11956
11957/* Expand a floating-point vector conditional move; a vcond operation
11958 rather than a movcc operation. */
11959
11960bool
11961ix86_expand_fp_vcond (rtx operands[])
11962{
11963 enum rtx_code code = GET_CODE (operands[3]);
11964 rtx cmp;
11965
11966 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11967 &operands[4], &operands[5]);
11968 if (code == UNKNOWN)
11969 return false;
11970
11971 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11972 operands[5], operands[1], operands[2]))
11973 return true;
11974
11975 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11976 operands[1], operands[2]);
11977 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11978 return true;
11979}
11980
11981/* Expand a signed integral vector conditional move. */
11982
11983bool
11984ix86_expand_int_vcond (rtx operands[])
11985{
11986 enum machine_mode mode = GET_MODE (operands[0]);
11987 enum rtx_code code = GET_CODE (operands[3]);
11988 bool negate = false;
11989 rtx x, cop0, cop1;
11990
11991 cop0 = operands[4];
11992 cop1 = operands[5];
11993
11994 /* Canonicalize the comparison to EQ, GT, GTU. */
11995 switch (code)
11996 {
11997 case EQ:
11998 case GT:
11999 case GTU:
12000 break;
12001
12002 case NE:
12003 case LE:
12004 case LEU:
12005 code = reverse_condition (code);
12006 negate = true;
12007 break;
12008
12009 case GE:
12010 case GEU:
12011 code = reverse_condition (code);
12012 negate = true;
12013 /* FALLTHRU */
12014
12015 case LT:
12016 case LTU:
12017 code = swap_condition (code);
12018 x = cop0, cop0 = cop1, cop1 = x;
12019 break;
12020
12021 default:
12022 gcc_unreachable ();
12023 }
12024
12025 /* Unsigned parallel compare is not supported by the hardware. Play some
12026 tricks to turn this into a signed comparison against 0. */
12027 if (code == GTU)
12028 {
12029 cop0 = force_reg (mode, cop0);
12030
12031 switch (mode)
12032 {
12033 case V4SImode:
12034 {
12035 rtx t1, t2, mask;
12036
12037 /* Perform a parallel modulo subtraction. */
12038 t1 = gen_reg_rtx (mode);
12039 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12040
12041 /* Extract the original sign bit of op0. */
12042 mask = GEN_INT (-0x80000000);
12043 mask = gen_rtx_CONST_VECTOR (mode,
12044 gen_rtvec (4, mask, mask, mask, mask));
12045 mask = force_reg (mode, mask);
12046 t2 = gen_reg_rtx (mode);
12047 emit_insn (gen_andv4si3 (t2, cop0, mask));
12048
12049 /* XOR it back into the result of the subtraction. This results
12050 in the sign bit set iff we saw unsigned underflow. */
12051 x = gen_reg_rtx (mode);
12052 emit_insn (gen_xorv4si3 (x, t1, t2));
12053
12054 code = GT;
12055 }
12056 break;
12057
12058 case V16QImode:
12059 case V8HImode:
12060 /* Perform a parallel unsigned saturating subtraction. */
12061 x = gen_reg_rtx (mode);
12062 emit_insn (gen_rtx_SET (VOIDmode, x,
12063 gen_rtx_US_MINUS (mode, cop0, cop1)));
12064
12065 code = EQ;
12066 negate = !negate;
12067 break;
12068
12069 default:
12070 gcc_unreachable ();
12071 }
12072
12073 cop0 = x;
12074 cop1 = CONST0_RTX (mode);
12075 }
12076
12077 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12078 operands[1+negate], operands[2-negate]);
12079
12080 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12081 operands[2-negate]);
12082 return true;
12083}
12084
12085/* Expand conditional increment or decrement using adb/sbb instructions.
12086 The default case using setcc followed by the conditional move can be
12087 done by generic code. */
12088int
12089ix86_expand_int_addcc (rtx operands[])
12090{
12091 enum rtx_code code = GET_CODE (operands[1]);
12092 rtx compare_op;
12093 rtx val = const0_rtx;
12094 bool fpcmp = false;
12095 enum machine_mode mode = GET_MODE (operands[0]);
12096
12097 if (operands[3] != const1_rtx
12098 && operands[3] != constm1_rtx)
12099 return 0;
12100 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12101 ix86_compare_op1, &compare_op))
12102 return 0;
12103 code = GET_CODE (compare_op);
12104
12105 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12106 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12107 {
12108 fpcmp = true;
12109 code = ix86_fp_compare_code_to_integer (code);
12110 }
12111
12112 if (code != LTU)
12113 {
12114 val = constm1_rtx;
12115 if (fpcmp)
12116 PUT_CODE (compare_op,
12117 reverse_condition_maybe_unordered
12118 (GET_CODE (compare_op)));
12119 else
12120 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12121 }
12122 PUT_MODE (compare_op, mode);
12123
12124 /* Construct either adc or sbb insn. */
12125 if ((code == LTU) == (operands[3] == constm1_rtx))
12126 {
12127 switch (GET_MODE (operands[0]))
12128 {
12129 case QImode:
12130 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12131 break;
12132 case HImode:
12133 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12134 break;
12135 case SImode:
12136 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12137 break;
12138 case DImode:
12139 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12140 break;
12141 default:
12142 gcc_unreachable ();
12143 }
12144 }
12145 else
12146 {
12147 switch (GET_MODE (operands[0]))
12148 {
12149 case QImode:
12150 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12151 break;
12152 case HImode:
12153 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12154 break;
12155 case SImode:
12156 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12157 break;
12158 case DImode:
12159 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12160 break;
12161 default:
12162 gcc_unreachable ();
12163 }
12164 }
12165 return 1; /* DONE */
12166}
12167
12168
12169/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12170 works for floating pointer parameters and nonoffsetable memories.
12171 For pushes, it returns just stack offsets; the values will be saved
12172 in the right order. Maximally three parts are generated. */
12173
12174static int
12175ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12176{
12177 int size;
12178
12179 if (!TARGET_64BIT)
12180 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12181 else
12182 size = (GET_MODE_SIZE (mode) + 4) / 8;
12183
12184 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12185 gcc_assert (size >= 2 && size <= 3);
12186
12187 /* Optimize constant pool reference to immediates. This is used by fp
12188 moves, that force all constants to memory to allow combining. */
12189 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12190 {
12191 rtx tmp = maybe_get_pool_constant (operand);
12192 if (tmp)
12193 operand = tmp;
12194 }
12195
12196 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12197 {
12198 /* The only non-offsetable memories we handle are pushes. */
12199 int ok = push_operand (operand, VOIDmode);
12200
12201 gcc_assert (ok);
12202
12203 operand = copy_rtx (operand);
12204 PUT_MODE (operand, Pmode);
12205 parts[0] = parts[1] = parts[2] = operand;
12206 return size;
12207 }
12208
12209 if (GET_CODE (operand) == CONST_VECTOR)
12210 {
12211 enum machine_mode imode = int_mode_for_mode (mode);
12212 /* Caution: if we looked through a constant pool memory above,
12213 the operand may actually have a different mode now. That's
12214 ok, since we want to pun this all the way back to an integer. */
12215 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12216 gcc_assert (operand != NULL);
12217 mode = imode;
12218 }
12219
12220 if (!TARGET_64BIT)
12221 {
12222 if (mode == DImode)
12223 split_di (&operand, 1, &parts[0], &parts[1]);
12224 else
12225 {
12226 if (REG_P (operand))
12227 {
12228 gcc_assert (reload_completed);
12229 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12230 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12231 if (size == 3)
12232 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12233 }
12234 else if (offsettable_memref_p (operand))
12235 {
12236 operand = adjust_address (operand, SImode, 0);
12237 parts[0] = operand;
12238 parts[1] = adjust_address (operand, SImode, 4);
12239 if (size == 3)
12240 parts[2] = adjust_address (operand, SImode, 8);
12241 }
12242 else if (GET_CODE (operand) == CONST_DOUBLE)
12243 {
12244 REAL_VALUE_TYPE r;
12245 long l[4];
12246
12247 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12248 switch (mode)
12249 {
12250 case XFmode:
12251 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12252 parts[2] = gen_int_mode (l[2], SImode);
12253 break;
12254 case DFmode:
12255 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12256 break;
12257 default:
12258 gcc_unreachable ();
12259 }
12260 parts[1] = gen_int_mode (l[1], SImode);
12261 parts[0] = gen_int_mode (l[0], SImode);
12262 }
12263 else
12264 gcc_unreachable ();
12265 }
12266 }
12267 else
12268 {
12269 if (mode == TImode)
12270 split_ti (&operand, 1, &parts[0], &parts[1]);
12271 if (mode == XFmode || mode == TFmode)
12272 {
12273 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12274 if (REG_P (operand))
12275 {
12276 gcc_assert (reload_completed);
12277 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12278 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12279 }
12280 else if (offsettable_memref_p (operand))
12281 {
12282 operand = adjust_address (operand, DImode, 0);
12283 parts[0] = operand;
12284 parts[1] = adjust_address (operand, upper_mode, 8);
12285 }
12286 else if (GET_CODE (operand) == CONST_DOUBLE)
12287 {
12288 REAL_VALUE_TYPE r;
12289 long l[4];
12290
12291 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12292 real_to_target (l, &r, mode);
12293
12294 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12295 if (HOST_BITS_PER_WIDE_INT >= 64)
12296 parts[0]
12297 = gen_int_mode
12298 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12299 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12300 DImode);
12301 else
12302 parts[0] = immed_double_const (l[0], l[1], DImode);
12303
12304 if (upper_mode == SImode)
12305 parts[1] = gen_int_mode (l[2], SImode);
12306 else if (HOST_BITS_PER_WIDE_INT >= 64)
12307 parts[1]
12308 = gen_int_mode
12309 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12310 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12311 DImode);
12312 else
12313 parts[1] = immed_double_const (l[2], l[3], DImode);
12314 }
12315 else
12316 gcc_unreachable ();
12317 }
12318 }
12319
12320 return size;
12321}
12322
12323/* Emit insns to perform a move or push of DI, DF, and XF values.
12324 Return false when normal moves are needed; true when all required
12325 insns have been emitted. Operands 2-4 contain the input values
12326 int the correct order; operands 5-7 contain the output values. */
12327
12328void
12329ix86_split_long_move (rtx operands[])
12330{
12331 rtx part[2][3];
12332 int nparts;
12333 int push = 0;
12334 int collisions = 0;
12335 enum machine_mode mode = GET_MODE (operands[0]);
12336
12337 /* The DFmode expanders may ask us to move double.
12338 For 64bit target this is single move. By hiding the fact
12339 here we simplify i386.md splitters. */
12340 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12341 {
12342 /* Optimize constant pool reference to immediates. This is used by
12343 fp moves, that force all constants to memory to allow combining. */
12344
12345 if (GET_CODE (operands[1]) == MEM
12346 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12347 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12348 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12349 if (push_operand (operands[0], VOIDmode))
12350 {
12351 operands[0] = copy_rtx (operands[0]);
12352 PUT_MODE (operands[0], Pmode);
12353 }
12354 else
12355 operands[0] = gen_lowpart (DImode, operands[0]);
12356 operands[1] = gen_lowpart (DImode, operands[1]);
12357 emit_move_insn (operands[0], operands[1]);
12358 return;
12359 }
12360
12361 /* The only non-offsettable memory we handle is push. */
12362 if (push_operand (operands[0], VOIDmode))
12363 push = 1;
12364 else
12365 gcc_assert (GET_CODE (operands[0]) != MEM
12366 || offsettable_memref_p (operands[0]));
12367
12368 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12369 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12370
12371 /* When emitting push, take care for source operands on the stack. */
12372 if (push && GET_CODE (operands[1]) == MEM
12373 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12374 {
12375 if (nparts == 3)
12376 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12377 XEXP (part[1][2], 0));
12378 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12379 XEXP (part[1][1], 0));
12380 }
12381
12382 /* We need to do copy in the right order in case an address register
12383 of the source overlaps the destination. */
12384 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12385 {
12386 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12387 collisions++;
12388 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12389 collisions++;
12390 if (nparts == 3
12391 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12392 collisions++;
12393
12394 /* Collision in the middle part can be handled by reordering. */
12395 if (collisions == 1 && nparts == 3
12396 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12397 {
12398 rtx tmp;
12399 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12400 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12401 }
12402
12403 /* If there are more collisions, we can't handle it by reordering.
12404 Do an lea to the last part and use only one colliding move. */
12405 else if (collisions > 1)
12406 {
12407 rtx base;
12408
12409 collisions = 1;
12410
12411 base = part[0][nparts - 1];
12412
12413 /* Handle the case when the last part isn't valid for lea.
12414 Happens in 64-bit mode storing the 12-byte XFmode. */
12415 if (GET_MODE (base) != Pmode)
12416 base = gen_rtx_REG (Pmode, REGNO (base));
12417
12418 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12419 part[1][0] = replace_equiv_address (part[1][0], base);
12420 part[1][1] = replace_equiv_address (part[1][1],
12421 plus_constant (base, UNITS_PER_WORD));
12422 if (nparts == 3)
12423 part[1][2] = replace_equiv_address (part[1][2],
12424 plus_constant (base, 8));
12425 }
12426 }
12427
12428 if (push)
12429 {
12430 if (!TARGET_64BIT)
12431 {
12432 if (nparts == 3)
12433 {
12434 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12435 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12436 emit_move_insn (part[0][2], part[1][2]);
12437 }
12438 }
12439 else
12440 {
12441 /* In 64bit mode we don't have 32bit push available. In case this is
12442 register, it is OK - we will just use larger counterpart. We also
12443 retype memory - these comes from attempt to avoid REX prefix on
12444 moving of second half of TFmode value. */
12445 if (GET_MODE (part[1][1]) == SImode)
12446 {
12447 switch (GET_CODE (part[1][1]))
12448 {
12449 case MEM:
12450 part[1][1] = adjust_address (part[1][1], DImode, 0);
12451 break;
12452
12453 case REG:
12454 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12455 break;
12456
12457 default:
12458 gcc_unreachable ();
12459 }
12460
12461 if (GET_MODE (part[1][0]) == SImode)
12462 part[1][0] = part[1][1];
12463 }
12464 }
12465 emit_move_insn (part[0][1], part[1][1]);
12466 emit_move_insn (part[0][0], part[1][0]);
12467 return;
12468 }
12469
12470 /* Choose correct order to not overwrite the source before it is copied. */
12471 if ((REG_P (part[0][0])
12472 && REG_P (part[1][1])
12473 && (REGNO (part[0][0]) == REGNO (part[1][1])
12474 || (nparts == 3
12475 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12476 || (collisions > 0
12477 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12478 {
12479 if (nparts == 3)
12480 {
12481 operands[2] = part[0][2];
12482 operands[3] = part[0][1];
12483 operands[4] = part[0][0];
12484 operands[5] = part[1][2];
12485 operands[6] = part[1][1];
12486 operands[7] = part[1][0];
12487 }
12488 else
12489 {
12490 operands[2] = part[0][1];
12491 operands[3] = part[0][0];
12492 operands[5] = part[1][1];
12493 operands[6] = part[1][0];
12494 }
12495 }
12496 else
12497 {
12498 if (nparts == 3)
12499 {
12500 operands[2] = part[0][0];
12501 operands[3] = part[0][1];
12502 operands[4] = part[0][2];
12503 operands[5] = part[1][0];
12504 operands[6] = part[1][1];
12505 operands[7] = part[1][2];
12506 }
12507 else
12508 {
12509 operands[2] = part[0][0];
12510 operands[3] = part[0][1];
12511 operands[5] = part[1][0];
12512 operands[6] = part[1][1];
12513 }
12514 }
12515
12516 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12517 if (optimize_size)
12518 {
12519 if (GET_CODE (operands[5]) == CONST_INT
12520 && operands[5] != const0_rtx
12521 && REG_P (operands[2]))
12522 {
12523 if (GET_CODE (operands[6]) == CONST_INT
12524 && INTVAL (operands[6]) == INTVAL (operands[5]))
12525 operands[6] = operands[2];
12526
12527 if (nparts == 3
12528 && GET_CODE (operands[7]) == CONST_INT
12529 && INTVAL (operands[7]) == INTVAL (operands[5]))
12530 operands[7] = operands[2];
12531 }
12532
12533 if (nparts == 3
12534 && GET_CODE (operands[6]) == CONST_INT
12535 && operands[6] != const0_rtx
12536 && REG_P (operands[3])
12537 && GET_CODE (operands[7]) == CONST_INT
12538 && INTVAL (operands[7]) == INTVAL (operands[6]))
12539 operands[7] = operands[3];
12540 }
12541
12542 emit_move_insn (operands[2], operands[5]);
12543 emit_move_insn (operands[3], operands[6]);
12544 if (nparts == 3)
12545 emit_move_insn (operands[4], operands[7]);
12546
12547 return;
12548}
12549
12550/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12551 left shift by a constant, either using a single shift or
12552 a sequence of add instructions. */
12553
12554static void
12555ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12556{
12557 if (count == 1)
12558 {
12559 emit_insn ((mode == DImode
12560 ? gen_addsi3
12561 : gen_adddi3) (operand, operand, operand));
12562 }
12563 else if (!optimize_size
12564 && count * ix86_cost->add <= ix86_cost->shift_const)
12565 {
12566 int i;
12567 for (i=0; i<count; i++)
12568 {
12569 emit_insn ((mode == DImode
12570 ? gen_addsi3
12571 : gen_adddi3) (operand, operand, operand));
12572 }
12573 }
12574 else
12575 emit_insn ((mode == DImode
12576 ? gen_ashlsi3
12577 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12578}
12579
12580void
12581ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12582{
12583 rtx low[2], high[2];
12584 int count;
12585 const int single_width = mode == DImode ? 32 : 64;
12586
12587 if (GET_CODE (operands[2]) == CONST_INT)
12588 {
12589 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12590 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12591
12592 if (count >= single_width)
12593 {
12594 emit_move_insn (high[0], low[1]);
12595 emit_move_insn (low[0], const0_rtx);
12596
12597 if (count > single_width)
12598 ix86_expand_ashl_const (high[0], count - single_width, mode);
12599 }
12600 else
12601 {
12602 if (!rtx_equal_p (operands[0], operands[1]))
12603 emit_move_insn (operands[0], operands[1]);
12604 emit_insn ((mode == DImode
12605 ? gen_x86_shld_1
12606 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12607 ix86_expand_ashl_const (low[0], count, mode);
12608 }
12609 return;
12610 }
12611
12612 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12613
12614 if (operands[1] == const1_rtx)
12615 {
12616 /* Assuming we've chosen a QImode capable registers, then 1 << N
12617 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12618 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12619 {
12620 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12621
12622 ix86_expand_clear (low[0]);
12623 ix86_expand_clear (high[0]);
12624 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12625
12626 d = gen_lowpart (QImode, low[0]);
12627 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12628 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12629 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12630
12631 d = gen_lowpart (QImode, high[0]);
12632 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12633 s = gen_rtx_NE (QImode, flags, const0_rtx);
12634 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12635 }
12636
12637 /* Otherwise, we can get the same results by manually performing
12638 a bit extract operation on bit 5/6, and then performing the two
12639 shifts. The two methods of getting 0/1 into low/high are exactly
12640 the same size. Avoiding the shift in the bit extract case helps
12641 pentium4 a bit; no one else seems to care much either way. */
12642 else
12643 {
12644 rtx x;
12645
12646 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12647 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12648 else
12649 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12650 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12651
12652 emit_insn ((mode == DImode
12653 ? gen_lshrsi3
12654 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12655 emit_insn ((mode == DImode
12656 ? gen_andsi3
12657 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12658 emit_move_insn (low[0], high[0]);
12659 emit_insn ((mode == DImode
12660 ? gen_xorsi3
12661 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12662 }
12663
12664 emit_insn ((mode == DImode
12665 ? gen_ashlsi3
12666 : gen_ashldi3) (low[0], low[0], operands[2]));
12667 emit_insn ((mode == DImode
12668 ? gen_ashlsi3
12669 : gen_ashldi3) (high[0], high[0], operands[2]));
12670 return;
12671 }
12672
12673 if (operands[1] == constm1_rtx)
12674 {
12675 /* For -1 << N, we can avoid the shld instruction, because we
12676 know that we're shifting 0...31/63 ones into a -1. */
12677 emit_move_insn (low[0], constm1_rtx);
12678 if (optimize_size)
12679 emit_move_insn (high[0], low[0]);
12680 else
12681 emit_move_insn (high[0], constm1_rtx);
12682 }
12683 else
12684 {
12685 if (!rtx_equal_p (operands[0], operands[1]))
12686 emit_move_insn (operands[0], operands[1]);
12687
12688 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12689 emit_insn ((mode == DImode
12690 ? gen_x86_shld_1
12691 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12692 }
12693
12694 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12695
12696 if (TARGET_CMOVE && scratch)
12697 {
12698 ix86_expand_clear (scratch);
12699 emit_insn ((mode == DImode
12700 ? gen_x86_shift_adj_1
12701 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12702 }
12703 else
12704 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12705}
12706
12707void
12708ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12709{
12710 rtx low[2], high[2];
12711 int count;
12712 const int single_width = mode == DImode ? 32 : 64;
12713
12714 if (GET_CODE (operands[2]) == CONST_INT)
12715 {
12716 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12717 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12718
12719 if (count == single_width * 2 - 1)
12720 {
12721 emit_move_insn (high[0], high[1]);
12722 emit_insn ((mode == DImode
12723 ? gen_ashrsi3
12724 : gen_ashrdi3) (high[0], high[0],
12725 GEN_INT (single_width - 1)));
12726 emit_move_insn (low[0], high[0]);
12727
12728 }
12729 else if (count >= single_width)
12730 {
12731 emit_move_insn (low[0], high[1]);
12732 emit_move_insn (high[0], low[0]);
12733 emit_insn ((mode == DImode
12734 ? gen_ashrsi3
12735 : gen_ashrdi3) (high[0], high[0],
12736 GEN_INT (single_width - 1)));
12737 if (count > single_width)
12738 emit_insn ((mode == DImode
12739 ? gen_ashrsi3
12740 : gen_ashrdi3) (low[0], low[0],
12741 GEN_INT (count - single_width)));
12742 }
12743 else
12744 {
12745 if (!rtx_equal_p (operands[0], operands[1]))
12746 emit_move_insn (operands[0], operands[1]);
12747 emit_insn ((mode == DImode
12748 ? gen_x86_shrd_1
12749 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12750 emit_insn ((mode == DImode
12751 ? gen_ashrsi3
12752 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12753 }
12754 }
12755 else
12756 {
12757 if (!rtx_equal_p (operands[0], operands[1]))
12758 emit_move_insn (operands[0], operands[1]);
12759
12760 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12761
12762 emit_insn ((mode == DImode
12763 ? gen_x86_shrd_1
12764 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12765 emit_insn ((mode == DImode
12766 ? gen_ashrsi3
12767 : gen_ashrdi3) (high[0], high[0], operands[2]));
12768
12769 if (TARGET_CMOVE && scratch)
12770 {
12771 emit_move_insn (scratch, high[0]);
12772 emit_insn ((mode == DImode
12773 ? gen_ashrsi3
12774 : gen_ashrdi3) (scratch, scratch,
12775 GEN_INT (single_width - 1)));
12776 emit_insn ((mode == DImode
12777 ? gen_x86_shift_adj_1
12778 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12779 scratch));
12780 }
12781 else
12782 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12783 }
12784}
12785
12786void
12787ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12788{
12789 rtx low[2], high[2];
12790 int count;
12791 const int single_width = mode == DImode ? 32 : 64;
12792
12793 if (GET_CODE (operands[2]) == CONST_INT)
12794 {
12795 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12796 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12797
12798 if (count >= single_width)
12799 {
12800 emit_move_insn (low[0], high[1]);
12801 ix86_expand_clear (high[0]);
12802
12803 if (count > single_width)
12804 emit_insn ((mode == DImode
12805 ? gen_lshrsi3
12806 : gen_lshrdi3) (low[0], low[0],
12807 GEN_INT (count - single_width)));
12808 }
12809 else
12810 {
12811 if (!rtx_equal_p (operands[0], operands[1]))
12812 emit_move_insn (operands[0], operands[1]);
12813 emit_insn ((mode == DImode
12814 ? gen_x86_shrd_1
12815 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12816 emit_insn ((mode == DImode
12817 ? gen_lshrsi3
12818 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12819 }
12820 }
12821 else
12822 {
12823 if (!rtx_equal_p (operands[0], operands[1]))
12824 emit_move_insn (operands[0], operands[1]);
12825
12826 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12827
12828 emit_insn ((mode == DImode
12829 ? gen_x86_shrd_1
12830 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12831 emit_insn ((mode == DImode
12832 ? gen_lshrsi3
12833 : gen_lshrdi3) (high[0], high[0], operands[2]));
12834
12835 /* Heh. By reversing the arguments, we can reuse this pattern. */
12836 if (TARGET_CMOVE && scratch)
12837 {
12838 ix86_expand_clear (scratch);
12839 emit_insn ((mode == DImode
12840 ? gen_x86_shift_adj_1
12841 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12842 scratch));
12843 }
12844 else
12845 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12846 }
12847}
12848
12849/* Helper function for the string operations below. Dest VARIABLE whether
12850 it is aligned to VALUE bytes. If true, jump to the label. */
12851static rtx
12852ix86_expand_aligntest (rtx variable, int value)
12853{
12854 rtx label = gen_label_rtx ();
12855 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12856 if (GET_MODE (variable) == DImode)
12857 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12858 else
12859 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12860 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12861 1, label);
12862 return label;
12863}
12864
12865/* Adjust COUNTER by the VALUE. */
12866static void
12867ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12868{
12869 if (GET_MODE (countreg) == DImode)
12870 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12871 else
12872 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12873}
12874
12875/* Zero extend possibly SImode EXP to Pmode register. */
12876rtx
12877ix86_zero_extend_to_Pmode (rtx exp)
12878{
12879 rtx r;
12880 if (GET_MODE (exp) == VOIDmode)
12881 return force_reg (Pmode, exp);
12882 if (GET_MODE (exp) == Pmode)
12883 return copy_to_mode_reg (Pmode, exp);
12884 r = gen_reg_rtx (Pmode);
12885 emit_insn (gen_zero_extendsidi2 (r, exp));
12886 return r;
12887}
12888
12889/* Expand string move (memcpy) operation. Use i386 string operations when
12890 profitable. expand_clrmem contains similar code. */
12891int
12892ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12893{
12894 rtx srcreg, destreg, countreg, srcexp, destexp;
12895 enum machine_mode counter_mode;
12896 HOST_WIDE_INT align = 0;
12897 unsigned HOST_WIDE_INT count = 0;
12898
12899 if (GET_CODE (align_exp) == CONST_INT)
12900 align = INTVAL (align_exp);
12901
12902 /* Can't use any of this if the user has appropriated esi or edi. */
12903 if (global_regs[4] || global_regs[5])
12904 return 0;
12905
12906 /* This simple hack avoids all inlining code and simplifies code below. */
12907 if (!TARGET_ALIGN_STRINGOPS)
12908 align = 64;
12909
12910 if (GET_CODE (count_exp) == CONST_INT)
12911 {
12912 count = INTVAL (count_exp);
12913 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12914 return 0;
12915 }
12916
12917 /* Figure out proper mode for counter. For 32bits it is always SImode,
12918 for 64bits use SImode when possible, otherwise DImode.
12919 Set count to number of bytes copied when known at compile time. */
12920 if (!TARGET_64BIT
12921 || GET_MODE (count_exp) == SImode
12922 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12923 counter_mode = SImode;
12924 else
12925 counter_mode = DImode;
12926
12927 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12928
12929 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12930 if (destreg != XEXP (dst, 0))
12931 dst = replace_equiv_address_nv (dst, destreg);
12932 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12933 if (srcreg != XEXP (src, 0))
12934 src = replace_equiv_address_nv (src, srcreg);
12935
12936 /* When optimizing for size emit simple rep ; movsb instruction for
12937 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12938 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12939 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12940 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12941 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12942 known to be zero or not. The rep; movsb sequence causes higher
12943 register pressure though, so take that into account. */
12944
12945 if ((!optimize || optimize_size)
12946 && (count == 0
12947 || ((count & 0x03)
12948 && (!optimize_size
12949 || count > 5 * 4
12950 || (count & 3) + count / 4 > 6))))
12951 {
12952 emit_insn (gen_cld ());
12953 countreg = ix86_zero_extend_to_Pmode (count_exp);
12954 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12955 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12956 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12957 destexp, srcexp));
12958 }
12959
12960 /* For constant aligned (or small unaligned) copies use rep movsl
12961 followed by code copying the rest. For PentiumPro ensure 8 byte
12962 alignment to allow rep movsl acceleration. */
12963
12964 else if (count != 0
12965 && (align >= 8
12966 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12967 || optimize_size || count < (unsigned int) 64))
12968 {
12969 unsigned HOST_WIDE_INT offset = 0;
12970 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12971 rtx srcmem, dstmem;
12972
12973 emit_insn (gen_cld ());
12974 if (count & ~(size - 1))
12975 {
12976 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12977 {
12978 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12979
12980 while (offset < (count & ~(size - 1)))
12981 {
12982 srcmem = adjust_automodify_address_nv (src, movs_mode,
12983 srcreg, offset);
12984 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12985 destreg, offset);
12986 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12987 offset += size;
12988 }
12989 }
12990 else
12991 {
12992 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12993 & (TARGET_64BIT ? -1 : 0x3fffffff));
12994 countreg = copy_to_mode_reg (counter_mode, countreg);
12995 countreg = ix86_zero_extend_to_Pmode (countreg);
12996
12997 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12998 GEN_INT (size == 4 ? 2 : 3));
12999 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13000 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13001
13002 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13003 countreg, destexp, srcexp));
13004 offset = count & ~(size - 1);
13005 }
13006 }
13007 if (size == 8 && (count & 0x04))
13008 {
13009 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13010 offset);
13011 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13012 offset);
13013 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13014 offset += 4;
13015 }
13016 if (count & 0x02)
13017 {
13018 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13019 offset);
13020 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13021 offset);
13022 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13023 offset += 2;
13024 }
13025 if (count & 0x01)
13026 {
13027 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13028 offset);
13029 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13030 offset);
13031 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13032 }
13033 }
13034 /* The generic code based on the glibc implementation:
13035 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13036 allowing accelerated copying there)
13037 - copy the data using rep movsl
13038 - copy the rest. */
13039 else
13040 {
13041 rtx countreg2;
13042 rtx label = NULL;
13043 rtx srcmem, dstmem;
13044 int desired_alignment = (TARGET_PENTIUMPRO
13045 && (count == 0 || count >= (unsigned int) 260)
13046 ? 8 : UNITS_PER_WORD);
13047 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13048 dst = change_address (dst, BLKmode, destreg);
13049 src = change_address (src, BLKmode, srcreg);
13050
13051 /* In case we don't know anything about the alignment, default to
13052 library version, since it is usually equally fast and result in
13053 shorter code.
13054
13055 Also emit call when we know that the count is large and call overhead
13056 will not be important. */
13057 if (!TARGET_INLINE_ALL_STRINGOPS
13058 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13059 return 0;
13060
13061 if (TARGET_SINGLE_STRINGOP)
13062 emit_insn (gen_cld ());
13063
13064 countreg2 = gen_reg_rtx (Pmode);
13065 countreg = copy_to_mode_reg (counter_mode, count_exp);
13066
13067 /* We don't use loops to align destination and to copy parts smaller
13068 than 4 bytes, because gcc is able to optimize such code better (in
13069 the case the destination or the count really is aligned, gcc is often
13070 able to predict the branches) and also it is friendlier to the
13071 hardware branch prediction.
13072
13073 Using loops is beneficial for generic case, because we can
13074 handle small counts using the loops. Many CPUs (such as Athlon)
13075 have large REP prefix setup costs.
13076
13077 This is quite costly. Maybe we can revisit this decision later or
13078 add some customizability to this code. */
13079
13080 if (count == 0 && align < desired_alignment)
13081 {
13082 label = gen_label_rtx ();
13083 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13084 LEU, 0, counter_mode, 1, label);
13085 }
13086 if (align <= 1)
13087 {
13088 rtx label = ix86_expand_aligntest (destreg, 1);
13089 srcmem = change_address (src, QImode, srcreg);
13090 dstmem = change_address (dst, QImode, destreg);
13091 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13092 ix86_adjust_counter (countreg, 1);
13093 emit_label (label);
13094 LABEL_NUSES (label) = 1;
13095 }
13096 if (align <= 2)
13097 {
13098 rtx label = ix86_expand_aligntest (destreg, 2);
13099 srcmem = change_address (src, HImode, srcreg);
13100 dstmem = change_address (dst, HImode, destreg);
13101 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13102 ix86_adjust_counter (countreg, 2);
13103 emit_label (label);
13104 LABEL_NUSES (label) = 1;
13105 }
13106 if (align <= 4 && desired_alignment > 4)
13107 {
13108 rtx label = ix86_expand_aligntest (destreg, 4);
13109 srcmem = change_address (src, SImode, srcreg);
13110 dstmem = change_address (dst, SImode, destreg);
13111 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13112 ix86_adjust_counter (countreg, 4);
13113 emit_label (label);
13114 LABEL_NUSES (label) = 1;
13115 }
13116
13117 if (label && desired_alignment > 4 && !TARGET_64BIT)
13118 {
13119 emit_label (label);
13120 LABEL_NUSES (label) = 1;
13121 label = NULL_RTX;
13122 }
13123 if (!TARGET_SINGLE_STRINGOP)
13124 emit_insn (gen_cld ());
13125 if (TARGET_64BIT)
13126 {
13127 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13128 GEN_INT (3)));
13129 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13130 }
13131 else
13132 {
13133 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13134 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13135 }
13136 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13137 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13139 countreg2, destexp, srcexp));
13140
13141 if (label)
13142 {
13143 emit_label (label);
13144 LABEL_NUSES (label) = 1;
13145 }
13146 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13147 {
13148 srcmem = change_address (src, SImode, srcreg);
13149 dstmem = change_address (dst, SImode, destreg);
13150 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13151 }
13152 if ((align <= 4 || count == 0) && TARGET_64BIT)
13153 {
13154 rtx label = ix86_expand_aligntest (countreg, 4);
13155 srcmem = change_address (src, SImode, srcreg);
13156 dstmem = change_address (dst, SImode, destreg);
13157 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13158 emit_label (label);
13159 LABEL_NUSES (label) = 1;
13160 }
13161 if (align > 2 && count != 0 && (count & 2))
13162 {
13163 srcmem = change_address (src, HImode, srcreg);
13164 dstmem = change_address (dst, HImode, destreg);
13165 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13166 }
13167 if (align <= 2 || count == 0)
13168 {
13169 rtx label = ix86_expand_aligntest (countreg, 2);
13170 srcmem = change_address (src, HImode, srcreg);
13171 dstmem = change_address (dst, HImode, destreg);
13172 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13173 emit_label (label);
13174 LABEL_NUSES (label) = 1;
13175 }
13176 if (align > 1 && count != 0 && (count & 1))
13177 {
13178 srcmem = change_address (src, QImode, srcreg);
13179 dstmem = change_address (dst, QImode, destreg);
13180 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13181 }
13182 if (align <= 1 || count == 0)
13183 {
13184 rtx label = ix86_expand_aligntest (countreg, 1);
13185 srcmem = change_address (src, QImode, srcreg);
13186 dstmem = change_address (dst, QImode, destreg);
13187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13188 emit_label (label);
13189 LABEL_NUSES (label) = 1;
13190 }
13191 }
13192
13193 return 1;
13194}
13195
13196/* Expand string clear operation (bzero). Use i386 string operations when
13197 profitable. expand_movmem contains similar code. */
13198int
13199ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13200{
13201 rtx destreg, zeroreg, countreg, destexp;
13202 enum machine_mode counter_mode;
13203 HOST_WIDE_INT align = 0;
13204 unsigned HOST_WIDE_INT count = 0;
13205
13206 if (GET_CODE (align_exp) == CONST_INT)
13207 align = INTVAL (align_exp);
13208
13209 /* Can't use any of this if the user has appropriated esi. */
13210 if (global_regs[4])
13211 return 0;
13212
13213 /* This simple hack avoids all inlining code and simplifies code below. */
13214 if (!TARGET_ALIGN_STRINGOPS)
13215 align = 32;
13216
13217 if (GET_CODE (count_exp) == CONST_INT)
13218 {
13219 count = INTVAL (count_exp);
13220 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13221 return 0;
13222 }
13223 /* Figure out proper mode for counter. For 32bits it is always SImode,
13224 for 64bits use SImode when possible, otherwise DImode.
13225 Set count to number of bytes copied when known at compile time. */
13226 if (!TARGET_64BIT
13227 || GET_MODE (count_exp) == SImode
13228 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13229 counter_mode = SImode;
13230 else
13231 counter_mode = DImode;
13232
13233 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13234 if (destreg != XEXP (dst, 0))
13235 dst = replace_equiv_address_nv (dst, destreg);
13236
13237
13238 /* When optimizing for size emit simple rep ; movsb instruction for
13239 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13240 sequence is 7 bytes long, so if optimizing for size and count is
13241 small enough that some stosl, stosw and stosb instructions without
13242 rep are shorter, fall back into the next if. */
13243
13244 if ((!optimize || optimize_size)
13245 && (count == 0
13246 || ((count & 0x03)
13247 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13248 {
13249 emit_insn (gen_cld ());
13250
13251 countreg = ix86_zero_extend_to_Pmode (count_exp);
13252 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13253 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13254 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13255 }
13256 else if (count != 0
13257 && (align >= 8
13258 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13259 || optimize_size || count < (unsigned int) 64))
13260 {
13261 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13262 unsigned HOST_WIDE_INT offset = 0;
13263
13264 emit_insn (gen_cld ());
13265
13266 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13267 if (count & ~(size - 1))
13268 {
13269 unsigned HOST_WIDE_INT repcount;
13270 unsigned int max_nonrep;
13271
13272 repcount = count >> (size == 4 ? 2 : 3);
13273 if (!TARGET_64BIT)
13274 repcount &= 0x3fffffff;
13275
13276 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13277 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13278 bytes. In both cases the latter seems to be faster for small
13279 values of N. */
13280 max_nonrep = size == 4 ? 7 : 4;
13281 if (!optimize_size)
13282 switch (ix86_tune)
13283 {
13284 case PROCESSOR_PENTIUM4:
13285 case PROCESSOR_NOCONA:
13286 max_nonrep = 3;
13287 break;
13288 default:
13289 break;
13290 }
13291
13292 if (repcount <= max_nonrep)
13293 while (repcount-- > 0)
13294 {
13295 rtx mem = adjust_automodify_address_nv (dst,
13296 GET_MODE (zeroreg),
13297 destreg, offset);
13298 emit_insn (gen_strset (destreg, mem, zeroreg));
13299 offset += size;
13300 }
13301 else
13302 {
13303 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13304 countreg = ix86_zero_extend_to_Pmode (countreg);
13305 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13306 GEN_INT (size == 4 ? 2 : 3));
13307 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13308 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13309 destexp));
13310 offset = count & ~(size - 1);
13311 }
13312 }
13313 if (size == 8 && (count & 0x04))
13314 {
13315 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13316 offset);
13317 emit_insn (gen_strset (destreg, mem,
13318 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13319 offset += 4;
13320 }
13321 if (count & 0x02)
13322 {
13323 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13324 offset);
13325 emit_insn (gen_strset (destreg, mem,
13326 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13327 offset += 2;
13328 }
13329 if (count & 0x01)
13330 {
13331 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13332 offset);
13333 emit_insn (gen_strset (destreg, mem,
13334 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13335 }
13336 }
13337 else
13338 {
13339 rtx countreg2;
13340 rtx label = NULL;
13341 /* Compute desired alignment of the string operation. */
13342 int desired_alignment = (TARGET_PENTIUMPRO
13343 && (count == 0 || count >= (unsigned int) 260)
13344 ? 8 : UNITS_PER_WORD);
13345
13346 /* In case we don't know anything about the alignment, default to
13347 library version, since it is usually equally fast and result in
13348 shorter code.
13349
13350 Also emit call when we know that the count is large and call overhead
13351 will not be important. */
13352 if (!TARGET_INLINE_ALL_STRINGOPS
13353 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13354 return 0;
13355
13356 if (TARGET_SINGLE_STRINGOP)
13357 emit_insn (gen_cld ());
13358
13359 countreg2 = gen_reg_rtx (Pmode);
13360 countreg = copy_to_mode_reg (counter_mode, count_exp);
13361 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13362 /* Get rid of MEM_OFFSET, it won't be accurate. */
13363 dst = change_address (dst, BLKmode, destreg);
13364
13365 if (count == 0 && align < desired_alignment)
13366 {
13367 label = gen_label_rtx ();
13368 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13369 LEU, 0, counter_mode, 1, label);
13370 }
13371 if (align <= 1)
13372 {
13373 rtx label = ix86_expand_aligntest (destreg, 1);
13374 emit_insn (gen_strset (destreg, dst,
13375 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13376 ix86_adjust_counter (countreg, 1);
13377 emit_label (label);
13378 LABEL_NUSES (label) = 1;
13379 }
13380 if (align <= 2)
13381 {
13382 rtx label = ix86_expand_aligntest (destreg, 2);
13383 emit_insn (gen_strset (destreg, dst,
13384 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13385 ix86_adjust_counter (countreg, 2);
13386 emit_label (label);
13387 LABEL_NUSES (label) = 1;
13388 }
13389 if (align <= 4 && desired_alignment > 4)
13390 {
13391 rtx label = ix86_expand_aligntest (destreg, 4);
13392 emit_insn (gen_strset (destreg, dst,
13393 (TARGET_64BIT
13394 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13395 : zeroreg)));
13396 ix86_adjust_counter (countreg, 4);
13397 emit_label (label);
13398 LABEL_NUSES (label) = 1;
13399 }
13400
13401 if (label && desired_alignment > 4 && !TARGET_64BIT)
13402 {
13403 emit_label (label);
13404 LABEL_NUSES (label) = 1;
13405 label = NULL_RTX;
13406 }
13407
13408 if (!TARGET_SINGLE_STRINGOP)
13409 emit_insn (gen_cld ());
13410 if (TARGET_64BIT)
13411 {
13412 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13413 GEN_INT (3)));
13414 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13415 }
13416 else
13417 {
13418 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13419 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13420 }
13421 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13422 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13423
13424 if (label)
13425 {
13426 emit_label (label);
13427 LABEL_NUSES (label) = 1;
13428 }
13429
13430 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13431 emit_insn (gen_strset (destreg, dst,
13432 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13433 if (TARGET_64BIT && (align <= 4 || count == 0))
13434 {
13435 rtx label = ix86_expand_aligntest (countreg, 4);
13436 emit_insn (gen_strset (destreg, dst,
13437 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13438 emit_label (label);
13439 LABEL_NUSES (label) = 1;
13440 }
13441 if (align > 2 && count != 0 && (count & 2))
13442 emit_insn (gen_strset (destreg, dst,
13443 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13444 if (align <= 2 || count == 0)
13445 {
13446 rtx label = ix86_expand_aligntest (countreg, 2);
13447 emit_insn (gen_strset (destreg, dst,
13448 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13449 emit_label (label);
13450 LABEL_NUSES (label) = 1;
13451 }
13452 if (align > 1 && count != 0 && (count & 1))
13453 emit_insn (gen_strset (destreg, dst,
13454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13455 if (align <= 1 || count == 0)
13456 {
13457 rtx label = ix86_expand_aligntest (countreg, 1);
13458 emit_insn (gen_strset (destreg, dst,
13459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13460 emit_label (label);
13461 LABEL_NUSES (label) = 1;
13462 }
13463 }
13464 return 1;
13465}
13466
13467/* Expand strlen. */
13468int
13469ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13470{
13471 rtx addr, scratch1, scratch2, scratch3, scratch4;
13472
13473 /* The generic case of strlen expander is long. Avoid it's
13474 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13475
13476 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13477 && !TARGET_INLINE_ALL_STRINGOPS
13478 && !optimize_size
13479 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13480 return 0;
13481
13482 addr = force_reg (Pmode, XEXP (src, 0));
13483 scratch1 = gen_reg_rtx (Pmode);
13484
13485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13486 && !optimize_size)
13487 {
13488 /* Well it seems that some optimizer does not combine a call like
13489 foo(strlen(bar), strlen(bar));
13490 when the move and the subtraction is done here. It does calculate
13491 the length just once when these instructions are done inside of
13492 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13493 often used and I use one fewer register for the lifetime of
13494 output_strlen_unroll() this is better. */
13495
13496 emit_move_insn (out, addr);
13497
13498 ix86_expand_strlensi_unroll_1 (out, src, align);
13499
13500 /* strlensi_unroll_1 returns the address of the zero at the end of
13501 the string, like memchr(), so compute the length by subtracting
13502 the start address. */
13503 if (TARGET_64BIT)
13504 emit_insn (gen_subdi3 (out, out, addr));
13505 else
13506 emit_insn (gen_subsi3 (out, out, addr));
13507 }
13508 else
13509 {
13510 rtx unspec;
13511 scratch2 = gen_reg_rtx (Pmode);
13512 scratch3 = gen_reg_rtx (Pmode);
13513 scratch4 = force_reg (Pmode, constm1_rtx);
13514
13515 emit_move_insn (scratch3, addr);
13516 eoschar = force_reg (QImode, eoschar);
13517
13518 emit_insn (gen_cld ());
13519 src = replace_equiv_address_nv (src, scratch3);
13520
13521 /* If .md starts supporting :P, this can be done in .md. */
13522 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13523 scratch4), UNSPEC_SCAS);
13524 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13525 if (TARGET_64BIT)
13526 {
13527 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13528 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13529 }
13530 else
13531 {
13532 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13533 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13534 }
13535 }
13536 return 1;
13537}
13538
13539/* Expand the appropriate insns for doing strlen if not just doing
13540 repnz; scasb
13541
13542 out = result, initialized with the start address
13543 align_rtx = alignment of the address.
13544 scratch = scratch register, initialized with the startaddress when
13545 not aligned, otherwise undefined
13546
13547 This is just the body. It needs the initializations mentioned above and
13548 some address computing at the end. These things are done in i386.md. */
13549
13550static void
13551ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13552{
13553 int align;
13554 rtx tmp;
13555 rtx align_2_label = NULL_RTX;
13556 rtx align_3_label = NULL_RTX;
13557 rtx align_4_label = gen_label_rtx ();
13558 rtx end_0_label = gen_label_rtx ();
13559 rtx mem;
13560 rtx tmpreg = gen_reg_rtx (SImode);
13561 rtx scratch = gen_reg_rtx (SImode);
13562 rtx cmp;
13563
13564 align = 0;
13565 if (GET_CODE (align_rtx) == CONST_INT)
13566 align = INTVAL (align_rtx);
13567
13568 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13569
13570 /* Is there a known alignment and is it less than 4? */
13571 if (align < 4)
13572 {
13573 rtx scratch1 = gen_reg_rtx (Pmode);
13574 emit_move_insn (scratch1, out);
13575 /* Is there a known alignment and is it not 2? */
13576 if (align != 2)
13577 {
13578 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13579 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13580
13581 /* Leave just the 3 lower bits. */
13582 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13583 NULL_RTX, 0, OPTAB_WIDEN);
13584
13585 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13586 Pmode, 1, align_4_label);
13587 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13588 Pmode, 1, align_2_label);
13589 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13590 Pmode, 1, align_3_label);
13591 }
13592 else
13593 {
13594 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13595 check if is aligned to 4 - byte. */
13596
13597 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13598 NULL_RTX, 0, OPTAB_WIDEN);
13599
13600 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13601 Pmode, 1, align_4_label);
13602 }
13603
13604 mem = change_address (src, QImode, out);
13605
13606 /* Now compare the bytes. */
13607
13608 /* Compare the first n unaligned byte on a byte per byte basis. */
13609 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13610 QImode, 1, end_0_label);
13611
13612 /* Increment the address. */
13613 if (TARGET_64BIT)
13614 emit_insn (gen_adddi3 (out, out, const1_rtx));
13615 else
13616 emit_insn (gen_addsi3 (out, out, const1_rtx));
13617
13618 /* Not needed with an alignment of 2 */
13619 if (align != 2)
13620 {
13621 emit_label (align_2_label);
13622
13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13624 end_0_label);
13625
13626 if (TARGET_64BIT)
13627 emit_insn (gen_adddi3 (out, out, const1_rtx));
13628 else
13629 emit_insn (gen_addsi3 (out, out, const1_rtx));
13630
13631 emit_label (align_3_label);
13632 }
13633
13634 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13635 end_0_label);
13636
13637 if (TARGET_64BIT)
13638 emit_insn (gen_adddi3 (out, out, const1_rtx));
13639 else
13640 emit_insn (gen_addsi3 (out, out, const1_rtx));
13641 }
13642
13643 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13644 align this loop. It gives only huge programs, but does not help to
13645 speed up. */
13646 emit_label (align_4_label);
13647
13648 mem = change_address (src, SImode, out);
13649 emit_move_insn (scratch, mem);
13650 if (TARGET_64BIT)
13651 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13652 else
13653 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13654
13655 /* This formula yields a nonzero result iff one of the bytes is zero.
13656 This saves three branches inside loop and many cycles. */
13657
13658 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13659 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13660 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13661 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13662 gen_int_mode (0x80808080, SImode)));
13663 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13664 align_4_label);
13665
13666 if (TARGET_CMOVE)
13667 {
13668 rtx reg = gen_reg_rtx (SImode);
13669 rtx reg2 = gen_reg_rtx (Pmode);
13670 emit_move_insn (reg, tmpreg);
13671 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13672
13673 /* If zero is not in the first two bytes, move two bytes forward. */
13674 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13675 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13676 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13677 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13678 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13679 reg,
13680 tmpreg)));
13681 /* Emit lea manually to avoid clobbering of flags. */
13682 emit_insn (gen_rtx_SET (SImode, reg2,
13683 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13684
13685 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13686 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13687 emit_insn (gen_rtx_SET (VOIDmode, out,
13688 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13689 reg2,
13690 out)));
13691
13692 }
13693 else
13694 {
13695 rtx end_2_label = gen_label_rtx ();
13696 /* Is zero in the first two bytes? */
13697
13698 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13702 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13703 pc_rtx);
13704 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13705 JUMP_LABEL (tmp) = end_2_label;
13706
13707 /* Not in the first two. Move two bytes forward. */
13708 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13709 if (TARGET_64BIT)
13710 emit_insn (gen_adddi3 (out, out, const2_rtx));
13711 else
13712 emit_insn (gen_addsi3 (out, out, const2_rtx));
13713
13714 emit_label (end_2_label);
13715
13716 }
13717
13718 /* Avoid branch in fixing the byte. */
13719 tmpreg = gen_lowpart (QImode, tmpreg);
13720 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13721 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13722 if (TARGET_64BIT)
13723 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13724 else
13725 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13726
13727 emit_label (end_0_label);
13728}
13729
13730void
13731ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13732 rtx callarg2 ATTRIBUTE_UNUSED,
13733 rtx pop, int sibcall)
13734{
13735 rtx use = NULL, call;
13736
13737 if (pop == const0_rtx)
13738 pop = NULL;
13739 gcc_assert (!TARGET_64BIT || !pop);
13740
13741 if (TARGET_MACHO && !TARGET_64BIT)
13742 {
13743#if TARGET_MACHO
13744 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13745 fnaddr = machopic_indirect_call_target (fnaddr);
13746#endif
13747 }
13748 else
13749 {
13750 /* Static functions and indirect calls don't need the pic register. */
13751 if (! TARGET_64BIT && flag_pic
13752 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13753 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13754 use_reg (&use, pic_offset_table_rtx);
13755 }
13756
13757 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13758 {
13759 rtx al = gen_rtx_REG (QImode, 0);
13760 emit_move_insn (al, callarg2);
13761 use_reg (&use, al);
13762 }
13763
13764 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13765 {
13766 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13767 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13768 }
13769 if (sibcall && TARGET_64BIT
13770 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13771 {
13772 rtx addr;
13773 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13774 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13775 emit_move_insn (fnaddr, addr);
13776 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13777 }
13778
13779 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13780 if (retval)
13781 call = gen_rtx_SET (VOIDmode, retval, call);
13782 if (pop)
13783 {
13784 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13785 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13786 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13787 }
13788
13789 call = emit_call_insn (call);
13790 if (use)
13791 CALL_INSN_FUNCTION_USAGE (call) = use;
13792}
13793
13794
13795/* Clear stack slot assignments remembered from previous functions.
13796 This is called from INIT_EXPANDERS once before RTL is emitted for each
13797 function. */
13798
13799static struct machine_function *
13800ix86_init_machine_status (void)
13801{
13802 struct machine_function *f;
13803
13804 f = ggc_alloc_cleared (sizeof (struct machine_function));
13805 f->use_fast_prologue_epilogue_nregs = -1;
13806 f->tls_descriptor_call_expanded_p = 0;
13807
13808 return f;
13809}
13810
13811/* Return a MEM corresponding to a stack slot with mode MODE.
13812 Allocate a new slot if necessary.
13813
13814 The RTL for a function can have several slots available: N is
13815 which slot to use. */
13816
13817rtx
13818assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13819{
13820 struct stack_local_entry *s;
13821
13822 gcc_assert (n < MAX_386_STACK_LOCALS);
13823
13824 /* Virtual slot is valid only before vregs are instantiated. */
13825 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13826
13827 for (s = ix86_stack_locals; s; s = s->next)
13828 if (s->mode == mode && s->n == n)
13829 return s->rtl;
13830
13831 s = (struct stack_local_entry *)
13832 ggc_alloc (sizeof (struct stack_local_entry));
13833 s->n = n;
13834 s->mode = mode;
13835 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13836
13837 s->next = ix86_stack_locals;
13838 ix86_stack_locals = s;
13839 return s->rtl;
13840}
13841
13842/* Construct the SYMBOL_REF for the tls_get_addr function. */
13843
13844static GTY(()) rtx ix86_tls_symbol;
13845rtx
13846ix86_tls_get_addr (void)
13847{
13848
13849 if (!ix86_tls_symbol)
13850 {
13851 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13852 (TARGET_ANY_GNU_TLS
13853 && !TARGET_64BIT)
13854 ? "___tls_get_addr"
13855 : "__tls_get_addr");
13856 }
13857
13858 return ix86_tls_symbol;
13859}
13860
13861/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13862
13863static GTY(()) rtx ix86_tls_module_base_symbol;
13864rtx
13865ix86_tls_module_base (void)
13866{
13867
13868 if (!ix86_tls_module_base_symbol)
13869 {
13870 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13871 "_TLS_MODULE_BASE_");
13872 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13873 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13874 }
13875
13876 return ix86_tls_module_base_symbol;
13877}
13878
13879/* Calculate the length of the memory address in the instruction
13880 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13881
13882int
13883memory_address_length (rtx addr)
13884{
13885 struct ix86_address parts;
13886 rtx base, index, disp;
13887 int len;
13888 int ok;
13889
13890 if (GET_CODE (addr) == PRE_DEC
13891 || GET_CODE (addr) == POST_INC
13892 || GET_CODE (addr) == PRE_MODIFY
13893 || GET_CODE (addr) == POST_MODIFY)
13894 return 0;
13895
13896 ok = ix86_decompose_address (addr, &parts);
13897 gcc_assert (ok);
13898
13899 if (parts.base && GET_CODE (parts.base) == SUBREG)
13900 parts.base = SUBREG_REG (parts.base);
13901 if (parts.index && GET_CODE (parts.index) == SUBREG)
13902 parts.index = SUBREG_REG (parts.index);
13903
13904 base = parts.base;
13905 index = parts.index;
13906 disp = parts.disp;
13907 len = 0;
13908
13909 /* Rule of thumb:
13910 - esp as the base always wants an index,
13911 - ebp as the base always wants a displacement. */
13912
13913 /* Register Indirect. */
13914 if (base && !index && !disp)
13915 {
13916 /* esp (for its index) and ebp (for its displacement) need
13917 the two-byte modrm form. */
13918 if (addr == stack_pointer_rtx
13919 || addr == arg_pointer_rtx
13920 || addr == frame_pointer_rtx
13921 || addr == hard_frame_pointer_rtx)
13922 len = 1;
13923 }
13924
13925 /* Direct Addressing. */
13926 else if (disp && !base && !index)
13927 len = 4;
13928
13929 else
13930 {
13931 /* Find the length of the displacement constant. */
13932 if (disp)
13933 {
13934 if (base && satisfies_constraint_K (disp))
13935 len = 1;
13936 else
13937 len = 4;
13938 }
13939 /* ebp always wants a displacement. */
13940 else if (base == hard_frame_pointer_rtx)
13941 len = 1;
13942
13943 /* An index requires the two-byte modrm form.... */
13944 if (index
13945 /* ...like esp, which always wants an index. */
13946 || base == stack_pointer_rtx
13947 || base == arg_pointer_rtx
13948 || base == frame_pointer_rtx)
13949 len += 1;
13950 }
13951
13952 return len;
13953}
13954
13955/* Compute default value for "length_immediate" attribute. When SHORTFORM
13956 is set, expect that insn have 8bit immediate alternative. */
13957int
13958ix86_attr_length_immediate_default (rtx insn, int shortform)
13959{
13960 int len = 0;
13961 int i;
13962 extract_insn_cached (insn);
13963 for (i = recog_data.n_operands - 1; i >= 0; --i)
13964 if (CONSTANT_P (recog_data.operand[i]))
13965 {
13966 gcc_assert (!len);
13967 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13968 len = 1;
13969 else
13970 {
13971 switch (get_attr_mode (insn))
13972 {
13973 case MODE_QI:
13974 len+=1;
13975 break;
13976 case MODE_HI:
13977 len+=2;
13978 break;
13979 case MODE_SI:
13980 len+=4;
13981 break;
13982 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13983 case MODE_DI:
13984 len+=4;
13985 break;
13986 default:
13987 fatal_insn ("unknown insn mode", insn);
13988 }
13989 }
13990 }
13991 return len;
13992}
13993/* Compute default value for "length_address" attribute. */
13994int
13995ix86_attr_length_address_default (rtx insn)
13996{
13997 int i;
13998
13999 if (get_attr_type (insn) == TYPE_LEA)
14000 {
14001 rtx set = PATTERN (insn);
14002
14003 if (GET_CODE (set) == PARALLEL)
14004 set = XVECEXP (set, 0, 0);
14005
14006 gcc_assert (GET_CODE (set) == SET);
14007
14008 return memory_address_length (SET_SRC (set));
14009 }
14010
14011 extract_insn_cached (insn);
14012 for (i = recog_data.n_operands - 1; i >= 0; --i)
14013 if (GET_CODE (recog_data.operand[i]) == MEM)
14014 {
14015 return memory_address_length (XEXP (recog_data.operand[i], 0));
14016 break;
14017 }
14018 return 0;
14019}
14020
14021/* Return the maximum number of instructions a cpu can issue. */
14022
14023static int
14024ix86_issue_rate (void)
14025{
14026 switch (ix86_tune)
14027 {
14028 case PROCESSOR_PENTIUM:
14029 case PROCESSOR_K6:
14030 return 2;
14031
14032 case PROCESSOR_PENTIUMPRO:
14033 case PROCESSOR_PENTIUM4:
14034 case PROCESSOR_ATHLON:
14035 case PROCESSOR_K8:
14036 case PROCESSOR_AMDFAM10:
13836 case PROCESSOR_NOCONA:
13837 case PROCESSOR_GENERIC32:
13838 case PROCESSOR_GENERIC64:
13839 return 3;
13840
13841 case PROCESSOR_CORE2:
13842 return 4;
13843
13844 default:
13845 return 1;
13846 }
13847}
13848
13849/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13850 by DEP_INSN and nothing set by DEP_INSN. */
13851
13852static int
13853ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13854{
13855 rtx set, set2;
13856
13857 /* Simplify the test for uninteresting insns. */
13858 if (insn_type != TYPE_SETCC
13859 && insn_type != TYPE_ICMOV
13860 && insn_type != TYPE_FCMOV
13861 && insn_type != TYPE_IBR)
13862 return 0;
13863
13864 if ((set = single_set (dep_insn)) != 0)
13865 {
13866 set = SET_DEST (set);
13867 set2 = NULL_RTX;
13868 }
13869 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13870 && XVECLEN (PATTERN (dep_insn), 0) == 2
13871 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13872 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13873 {
13874 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13875 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13876 }
13877 else
13878 return 0;
13879
13880 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13881 return 0;
13882
13883 /* This test is true if the dependent insn reads the flags but
13884 not any other potentially set register. */
13885 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13886 return 0;
13887
13888 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13889 return 0;
13890
13891 return 1;
13892}
13893
13894/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13895 address with operands set by DEP_INSN. */
13896
13897static int
13898ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13899{
13900 rtx addr;
13901
13902 if (insn_type == TYPE_LEA
13903 && TARGET_PENTIUM)
13904 {
13905 addr = PATTERN (insn);
13906
13907 if (GET_CODE (addr) == PARALLEL)
13908 addr = XVECEXP (addr, 0, 0);
13909
13910 gcc_assert (GET_CODE (addr) == SET);
13911
13912 addr = SET_SRC (addr);
13913 }
13914 else
13915 {
13916 int i;
13917 extract_insn_cached (insn);
13918 for (i = recog_data.n_operands - 1; i >= 0; --i)
13919 if (GET_CODE (recog_data.operand[i]) == MEM)
13920 {
13921 addr = XEXP (recog_data.operand[i], 0);
13922 goto found;
13923 }
13924 return 0;
13925 found:;
13926 }
13927
13928 return modified_in_p (addr, dep_insn);
13929}
13930
13931static int
13932ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13933{
13934 enum attr_type insn_type, dep_insn_type;
13935 enum attr_memory memory;
13936 rtx set, set2;
13937 int dep_insn_code_number;
13938
13939 /* Anti and output dependencies have zero cost on all CPUs. */
13940 if (REG_NOTE_KIND (link) != 0)
13941 return 0;
13942
13943 dep_insn_code_number = recog_memoized (dep_insn);
13944
13945 /* If we can't recognize the insns, we can't really do anything. */
13946 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13947 return cost;
13948
13949 insn_type = get_attr_type (insn);
13950 dep_insn_type = get_attr_type (dep_insn);
13951
13952 switch (ix86_tune)
13953 {
13954 case PROCESSOR_PENTIUM:
13955 /* Address Generation Interlock adds a cycle of latency. */
13956 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13957 cost += 1;
13958
13959 /* ??? Compares pair with jump/setcc. */
13960 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13961 cost = 0;
13962
13963 /* Floating point stores require value to be ready one cycle earlier. */
13964 if (insn_type == TYPE_FMOV
13965 && get_attr_memory (insn) == MEMORY_STORE
13966 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13967 cost += 1;
13968 break;
13969
13970 case PROCESSOR_PENTIUMPRO:
13971 memory = get_attr_memory (insn);
13972
13973 /* INT->FP conversion is expensive. */
13974 if (get_attr_fp_int_src (dep_insn))
13975 cost += 5;
13976
13977 /* There is one cycle extra latency between an FP op and a store. */
13978 if (insn_type == TYPE_FMOV
13979 && (set = single_set (dep_insn)) != NULL_RTX
13980 && (set2 = single_set (insn)) != NULL_RTX
13981 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13982 && GET_CODE (SET_DEST (set2)) == MEM)
13983 cost += 1;
13984
13985 /* Show ability of reorder buffer to hide latency of load by executing
13986 in parallel with previous instruction in case
13987 previous instruction is not needed to compute the address. */
13988 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13989 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13990 {
13991 /* Claim moves to take one cycle, as core can issue one load
13992 at time and the next load can start cycle later. */
13993 if (dep_insn_type == TYPE_IMOV
13994 || dep_insn_type == TYPE_FMOV)
13995 cost = 1;
13996 else if (cost > 1)
13997 cost--;
13998 }
13999 break;
14000
14001 case PROCESSOR_K6:
14002 memory = get_attr_memory (insn);
14003
14004 /* The esp dependency is resolved before the instruction is really
14005 finished. */
14006 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14007 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14008 return 1;
14009
14010 /* INT->FP conversion is expensive. */
14011 if (get_attr_fp_int_src (dep_insn))
14012 cost += 5;
14013
14014 /* Show ability of reorder buffer to hide latency of load by executing
14015 in parallel with previous instruction in case
14016 previous instruction is not needed to compute the address. */
14017 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14018 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14019 {
14020 /* Claim moves to take one cycle, as core can issue one load
14021 at time and the next load can start cycle later. */
14022 if (dep_insn_type == TYPE_IMOV
14023 || dep_insn_type == TYPE_FMOV)
14024 cost = 1;
14025 else if (cost > 2)
14026 cost -= 2;
14027 else
14028 cost = 1;
14029 }
14030 break;
14031
14032 case PROCESSOR_ATHLON:
14033 case PROCESSOR_K8:
14037 case PROCESSOR_NOCONA:
14038 case PROCESSOR_GENERIC32:
14039 case PROCESSOR_GENERIC64:
14040 return 3;
14041
14042 case PROCESSOR_CORE2:
14043 return 4;
14044
14045 default:
14046 return 1;
14047 }
14048}
14049
14050/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14051 by DEP_INSN and nothing set by DEP_INSN. */
14052
14053static int
14054ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14055{
14056 rtx set, set2;
14057
14058 /* Simplify the test for uninteresting insns. */
14059 if (insn_type != TYPE_SETCC
14060 && insn_type != TYPE_ICMOV
14061 && insn_type != TYPE_FCMOV
14062 && insn_type != TYPE_IBR)
14063 return 0;
14064
14065 if ((set = single_set (dep_insn)) != 0)
14066 {
14067 set = SET_DEST (set);
14068 set2 = NULL_RTX;
14069 }
14070 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14071 && XVECLEN (PATTERN (dep_insn), 0) == 2
14072 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14073 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14074 {
14075 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14076 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14077 }
14078 else
14079 return 0;
14080
14081 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14082 return 0;
14083
14084 /* This test is true if the dependent insn reads the flags but
14085 not any other potentially set register. */
14086 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14087 return 0;
14088
14089 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14090 return 0;
14091
14092 return 1;
14093}
14094
14095/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14096 address with operands set by DEP_INSN. */
14097
14098static int
14099ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14100{
14101 rtx addr;
14102
14103 if (insn_type == TYPE_LEA
14104 && TARGET_PENTIUM)
14105 {
14106 addr = PATTERN (insn);
14107
14108 if (GET_CODE (addr) == PARALLEL)
14109 addr = XVECEXP (addr, 0, 0);
14110
14111 gcc_assert (GET_CODE (addr) == SET);
14112
14113 addr = SET_SRC (addr);
14114 }
14115 else
14116 {
14117 int i;
14118 extract_insn_cached (insn);
14119 for (i = recog_data.n_operands - 1; i >= 0; --i)
14120 if (GET_CODE (recog_data.operand[i]) == MEM)
14121 {
14122 addr = XEXP (recog_data.operand[i], 0);
14123 goto found;
14124 }
14125 return 0;
14126 found:;
14127 }
14128
14129 return modified_in_p (addr, dep_insn);
14130}
14131
14132static int
14133ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14134{
14135 enum attr_type insn_type, dep_insn_type;
14136 enum attr_memory memory;
14137 rtx set, set2;
14138 int dep_insn_code_number;
14139
14140 /* Anti and output dependencies have zero cost on all CPUs. */
14141 if (REG_NOTE_KIND (link) != 0)
14142 return 0;
14143
14144 dep_insn_code_number = recog_memoized (dep_insn);
14145
14146 /* If we can't recognize the insns, we can't really do anything. */
14147 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14148 return cost;
14149
14150 insn_type = get_attr_type (insn);
14151 dep_insn_type = get_attr_type (dep_insn);
14152
14153 switch (ix86_tune)
14154 {
14155 case PROCESSOR_PENTIUM:
14156 /* Address Generation Interlock adds a cycle of latency. */
14157 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14158 cost += 1;
14159
14160 /* ??? Compares pair with jump/setcc. */
14161 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14162 cost = 0;
14163
14164 /* Floating point stores require value to be ready one cycle earlier. */
14165 if (insn_type == TYPE_FMOV
14166 && get_attr_memory (insn) == MEMORY_STORE
14167 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14168 cost += 1;
14169 break;
14170
14171 case PROCESSOR_PENTIUMPRO:
14172 memory = get_attr_memory (insn);
14173
14174 /* INT->FP conversion is expensive. */
14175 if (get_attr_fp_int_src (dep_insn))
14176 cost += 5;
14177
14178 /* There is one cycle extra latency between an FP op and a store. */
14179 if (insn_type == TYPE_FMOV
14180 && (set = single_set (dep_insn)) != NULL_RTX
14181 && (set2 = single_set (insn)) != NULL_RTX
14182 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14183 && GET_CODE (SET_DEST (set2)) == MEM)
14184 cost += 1;
14185
14186 /* Show ability of reorder buffer to hide latency of load by executing
14187 in parallel with previous instruction in case
14188 previous instruction is not needed to compute the address. */
14189 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14190 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14191 {
14192 /* Claim moves to take one cycle, as core can issue one load
14193 at time and the next load can start cycle later. */
14194 if (dep_insn_type == TYPE_IMOV
14195 || dep_insn_type == TYPE_FMOV)
14196 cost = 1;
14197 else if (cost > 1)
14198 cost--;
14199 }
14200 break;
14201
14202 case PROCESSOR_K6:
14203 memory = get_attr_memory (insn);
14204
14205 /* The esp dependency is resolved before the instruction is really
14206 finished. */
14207 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14208 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14209 return 1;
14210
14211 /* INT->FP conversion is expensive. */
14212 if (get_attr_fp_int_src (dep_insn))
14213 cost += 5;
14214
14215 /* Show ability of reorder buffer to hide latency of load by executing
14216 in parallel with previous instruction in case
14217 previous instruction is not needed to compute the address. */
14218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14219 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14220 {
14221 /* Claim moves to take one cycle, as core can issue one load
14222 at time and the next load can start cycle later. */
14223 if (dep_insn_type == TYPE_IMOV
14224 || dep_insn_type == TYPE_FMOV)
14225 cost = 1;
14226 else if (cost > 2)
14227 cost -= 2;
14228 else
14229 cost = 1;
14230 }
14231 break;
14232
14233 case PROCESSOR_ATHLON:
14234 case PROCESSOR_K8:
14235 case PROCESSOR_AMDFAM10:
14034 case PROCESSOR_GENERIC32:
14035 case PROCESSOR_GENERIC64:
14036 memory = get_attr_memory (insn);
14037
14038 /* Show ability of reorder buffer to hide latency of load by executing
14039 in parallel with previous instruction in case
14040 previous instruction is not needed to compute the address. */
14041 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14042 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14043 {
14044 enum attr_unit unit = get_attr_unit (insn);
14045 int loadcost = 3;
14046
14047 /* Because of the difference between the length of integer and
14048 floating unit pipeline preparation stages, the memory operands
14049 for floating point are cheaper.
14050
14051 ??? For Athlon it the difference is most probably 2. */
14052 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14053 loadcost = 3;
14054 else
14055 loadcost = TARGET_ATHLON ? 2 : 0;
14056
14057 if (cost >= loadcost)
14058 cost -= loadcost;
14059 else
14060 cost = 0;
14061 }
14062
14063 default:
14064 break;
14065 }
14066
14067 return cost;
14068}
14069
14070/* How many alternative schedules to try. This should be as wide as the
14071 scheduling freedom in the DFA, but no wider. Making this value too
14072 large results extra work for the scheduler. */
14073
14074static int
14075ia32_multipass_dfa_lookahead (void)
14076{
14077 if (ix86_tune == PROCESSOR_PENTIUM)
14078 return 2;
14079
14080 if (ix86_tune == PROCESSOR_PENTIUMPRO
14081 || ix86_tune == PROCESSOR_K6)
14082 return 1;
14083
14084 else
14085 return 0;
14086}
14087
14088
14089/* Compute the alignment given to a constant that is being placed in memory.
14090 EXP is the constant and ALIGN is the alignment that the object would
14091 ordinarily have.
14092 The value of this function is used instead of that alignment to align
14093 the object. */
14094
14095int
14096ix86_constant_alignment (tree exp, int align)
14097{
14098 if (TREE_CODE (exp) == REAL_CST)
14099 {
14100 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14101 return 64;
14102 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14103 return 128;
14104 }
14105 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14106 && !TARGET_NO_ALIGN_LONG_STRINGS
14107 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14108 return BITS_PER_WORD;
14109
14110 return align;
14111}
14112
14113/* Compute the alignment for a static variable.
14114 TYPE is the data type, and ALIGN is the alignment that
14115 the object would ordinarily have. The value of this function is used
14116 instead of that alignment to align the object. */
14117
14118int
14119ix86_data_alignment (tree type, int align)
14120{
14121 int max_align = optimize_size ? BITS_PER_WORD : 256;
14122
14123 if (AGGREGATE_TYPE_P (type)
14124 && TYPE_SIZE (type)
14125 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14126 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14127 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14128 && align < max_align)
14129 align = max_align;
14130
14131 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14132 to 16byte boundary. */
14133 if (TARGET_64BIT)
14134 {
14135 if (AGGREGATE_TYPE_P (type)
14136 && TYPE_SIZE (type)
14137 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14138 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14139 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14140 return 128;
14141 }
14142
14143 if (TREE_CODE (type) == ARRAY_TYPE)
14144 {
14145 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14146 return 64;
14147 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14148 return 128;
14149 }
14150 else if (TREE_CODE (type) == COMPLEX_TYPE)
14151 {
14152
14153 if (TYPE_MODE (type) == DCmode && align < 64)
14154 return 64;
14155 if (TYPE_MODE (type) == XCmode && align < 128)
14156 return 128;
14157 }
14158 else if ((TREE_CODE (type) == RECORD_TYPE
14159 || TREE_CODE (type) == UNION_TYPE
14160 || TREE_CODE (type) == QUAL_UNION_TYPE)
14161 && TYPE_FIELDS (type))
14162 {
14163 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14164 return 64;
14165 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14166 return 128;
14167 }
14168 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14169 || TREE_CODE (type) == INTEGER_TYPE)
14170 {
14171 if (TYPE_MODE (type) == DFmode && align < 64)
14172 return 64;
14173 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14174 return 128;
14175 }
14176
14177 return align;
14178}
14179
14180/* Compute the alignment for a local variable.
14181 TYPE is the data type, and ALIGN is the alignment that
14182 the object would ordinarily have. The value of this macro is used
14183 instead of that alignment to align the object. */
14184
14185int
14186ix86_local_alignment (tree type, int align)
14187{
14188 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14189 to 16byte boundary. */
14190 if (TARGET_64BIT)
14191 {
14192 if (AGGREGATE_TYPE_P (type)
14193 && TYPE_SIZE (type)
14194 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14195 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14196 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14197 return 128;
14198 }
14199 if (TREE_CODE (type) == ARRAY_TYPE)
14200 {
14201 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14202 return 64;
14203 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14204 return 128;
14205 }
14206 else if (TREE_CODE (type) == COMPLEX_TYPE)
14207 {
14208 if (TYPE_MODE (type) == DCmode && align < 64)
14209 return 64;
14210 if (TYPE_MODE (type) == XCmode && align < 128)
14211 return 128;
14212 }
14213 else if ((TREE_CODE (type) == RECORD_TYPE
14214 || TREE_CODE (type) == UNION_TYPE
14215 || TREE_CODE (type) == QUAL_UNION_TYPE)
14216 && TYPE_FIELDS (type))
14217 {
14218 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14219 return 64;
14220 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14221 return 128;
14222 }
14223 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14224 || TREE_CODE (type) == INTEGER_TYPE)
14225 {
14226
14227 if (TYPE_MODE (type) == DFmode && align < 64)
14228 return 64;
14229 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14230 return 128;
14231 }
14232 return align;
14233}
14234
14235/* Emit RTL insns to initialize the variable parts of a trampoline.
14236 FNADDR is an RTX for the address of the function's pure code.
14237 CXT is an RTX for the static chain value for the function. */
14238void
14239x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14240{
14241 if (!TARGET_64BIT)
14242 {
14243 /* Compute offset from the end of the jmp to the target function. */
14244 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14245 plus_constant (tramp, 10),
14246 NULL_RTX, 1, OPTAB_DIRECT);
14247 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14248 gen_int_mode (0xb9, QImode));
14249 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14250 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14251 gen_int_mode (0xe9, QImode));
14252 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14253 }
14254 else
14255 {
14256 int offset = 0;
14257 /* Try to load address using shorter movl instead of movabs.
14258 We may want to support movq for kernel mode, but kernel does not use
14259 trampolines at the moment. */
14260 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14261 {
14262 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14263 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14264 gen_int_mode (0xbb41, HImode));
14265 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14266 gen_lowpart (SImode, fnaddr));
14267 offset += 6;
14268 }
14269 else
14270 {
14271 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14272 gen_int_mode (0xbb49, HImode));
14273 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14274 fnaddr);
14275 offset += 10;
14276 }
14277 /* Load static chain using movabs to r10. */
14278 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14279 gen_int_mode (0xba49, HImode));
14280 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14281 cxt);
14282 offset += 10;
14283 /* Jump to the r11 */
14284 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14285 gen_int_mode (0xff49, HImode));
14286 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14287 gen_int_mode (0xe3, QImode));
14288 offset += 3;
14289 gcc_assert (offset <= TRAMPOLINE_SIZE);
14290 }
14291
14292#ifdef ENABLE_EXECUTE_STACK
14293 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14294 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14295#endif
14296}
14297
14298/* Codes for all the SSE/MMX builtins. */
14299enum ix86_builtins
14300{
14301 IX86_BUILTIN_ADDPS,
14302 IX86_BUILTIN_ADDSS,
14303 IX86_BUILTIN_DIVPS,
14304 IX86_BUILTIN_DIVSS,
14305 IX86_BUILTIN_MULPS,
14306 IX86_BUILTIN_MULSS,
14307 IX86_BUILTIN_SUBPS,
14308 IX86_BUILTIN_SUBSS,
14309
14310 IX86_BUILTIN_CMPEQPS,
14311 IX86_BUILTIN_CMPLTPS,
14312 IX86_BUILTIN_CMPLEPS,
14313 IX86_BUILTIN_CMPGTPS,
14314 IX86_BUILTIN_CMPGEPS,
14315 IX86_BUILTIN_CMPNEQPS,
14316 IX86_BUILTIN_CMPNLTPS,
14317 IX86_BUILTIN_CMPNLEPS,
14318 IX86_BUILTIN_CMPNGTPS,
14319 IX86_BUILTIN_CMPNGEPS,
14320 IX86_BUILTIN_CMPORDPS,
14321 IX86_BUILTIN_CMPUNORDPS,
14322 IX86_BUILTIN_CMPEQSS,
14323 IX86_BUILTIN_CMPLTSS,
14324 IX86_BUILTIN_CMPLESS,
14325 IX86_BUILTIN_CMPNEQSS,
14326 IX86_BUILTIN_CMPNLTSS,
14327 IX86_BUILTIN_CMPNLESS,
14328 IX86_BUILTIN_CMPNGTSS,
14329 IX86_BUILTIN_CMPNGESS,
14330 IX86_BUILTIN_CMPORDSS,
14331 IX86_BUILTIN_CMPUNORDSS,
14332
14333 IX86_BUILTIN_COMIEQSS,
14334 IX86_BUILTIN_COMILTSS,
14335 IX86_BUILTIN_COMILESS,
14336 IX86_BUILTIN_COMIGTSS,
14337 IX86_BUILTIN_COMIGESS,
14338 IX86_BUILTIN_COMINEQSS,
14339 IX86_BUILTIN_UCOMIEQSS,
14340 IX86_BUILTIN_UCOMILTSS,
14341 IX86_BUILTIN_UCOMILESS,
14342 IX86_BUILTIN_UCOMIGTSS,
14343 IX86_BUILTIN_UCOMIGESS,
14344 IX86_BUILTIN_UCOMINEQSS,
14345
14346 IX86_BUILTIN_CVTPI2PS,
14347 IX86_BUILTIN_CVTPS2PI,
14348 IX86_BUILTIN_CVTSI2SS,
14349 IX86_BUILTIN_CVTSI642SS,
14350 IX86_BUILTIN_CVTSS2SI,
14351 IX86_BUILTIN_CVTSS2SI64,
14352 IX86_BUILTIN_CVTTPS2PI,
14353 IX86_BUILTIN_CVTTSS2SI,
14354 IX86_BUILTIN_CVTTSS2SI64,
14355
14356 IX86_BUILTIN_MAXPS,
14357 IX86_BUILTIN_MAXSS,
14358 IX86_BUILTIN_MINPS,
14359 IX86_BUILTIN_MINSS,
14360
14361 IX86_BUILTIN_LOADUPS,
14362 IX86_BUILTIN_STOREUPS,
14363 IX86_BUILTIN_MOVSS,
14364
14365 IX86_BUILTIN_MOVHLPS,
14366 IX86_BUILTIN_MOVLHPS,
14367 IX86_BUILTIN_LOADHPS,
14368 IX86_BUILTIN_LOADLPS,
14369 IX86_BUILTIN_STOREHPS,
14370 IX86_BUILTIN_STORELPS,
14371
14372 IX86_BUILTIN_MASKMOVQ,
14373 IX86_BUILTIN_MOVMSKPS,
14374 IX86_BUILTIN_PMOVMSKB,
14375
14376 IX86_BUILTIN_MOVNTPS,
14377 IX86_BUILTIN_MOVNTQ,
14378
14379 IX86_BUILTIN_LOADDQU,
14380 IX86_BUILTIN_STOREDQU,
14381
14382 IX86_BUILTIN_PACKSSWB,
14383 IX86_BUILTIN_PACKSSDW,
14384 IX86_BUILTIN_PACKUSWB,
14385
14386 IX86_BUILTIN_PADDB,
14387 IX86_BUILTIN_PADDW,
14388 IX86_BUILTIN_PADDD,
14389 IX86_BUILTIN_PADDQ,
14390 IX86_BUILTIN_PADDSB,
14391 IX86_BUILTIN_PADDSW,
14392 IX86_BUILTIN_PADDUSB,
14393 IX86_BUILTIN_PADDUSW,
14394 IX86_BUILTIN_PSUBB,
14395 IX86_BUILTIN_PSUBW,
14396 IX86_BUILTIN_PSUBD,
14397 IX86_BUILTIN_PSUBQ,
14398 IX86_BUILTIN_PSUBSB,
14399 IX86_BUILTIN_PSUBSW,
14400 IX86_BUILTIN_PSUBUSB,
14401 IX86_BUILTIN_PSUBUSW,
14402
14403 IX86_BUILTIN_PAND,
14404 IX86_BUILTIN_PANDN,
14405 IX86_BUILTIN_POR,
14406 IX86_BUILTIN_PXOR,
14407
14408 IX86_BUILTIN_PAVGB,
14409 IX86_BUILTIN_PAVGW,
14410
14411 IX86_BUILTIN_PCMPEQB,
14412 IX86_BUILTIN_PCMPEQW,
14413 IX86_BUILTIN_PCMPEQD,
14414 IX86_BUILTIN_PCMPGTB,
14415 IX86_BUILTIN_PCMPGTW,
14416 IX86_BUILTIN_PCMPGTD,
14417
14418 IX86_BUILTIN_PMADDWD,
14419
14420 IX86_BUILTIN_PMAXSW,
14421 IX86_BUILTIN_PMAXUB,
14422 IX86_BUILTIN_PMINSW,
14423 IX86_BUILTIN_PMINUB,
14424
14425 IX86_BUILTIN_PMULHUW,
14426 IX86_BUILTIN_PMULHW,
14427 IX86_BUILTIN_PMULLW,
14428
14429 IX86_BUILTIN_PSADBW,
14430 IX86_BUILTIN_PSHUFW,
14431
14432 IX86_BUILTIN_PSLLW,
14433 IX86_BUILTIN_PSLLD,
14434 IX86_BUILTIN_PSLLQ,
14435 IX86_BUILTIN_PSRAW,
14436 IX86_BUILTIN_PSRAD,
14437 IX86_BUILTIN_PSRLW,
14438 IX86_BUILTIN_PSRLD,
14439 IX86_BUILTIN_PSRLQ,
14440 IX86_BUILTIN_PSLLWI,
14441 IX86_BUILTIN_PSLLDI,
14442 IX86_BUILTIN_PSLLQI,
14443 IX86_BUILTIN_PSRAWI,
14444 IX86_BUILTIN_PSRADI,
14445 IX86_BUILTIN_PSRLWI,
14446 IX86_BUILTIN_PSRLDI,
14447 IX86_BUILTIN_PSRLQI,
14448
14449 IX86_BUILTIN_PUNPCKHBW,
14450 IX86_BUILTIN_PUNPCKHWD,
14451 IX86_BUILTIN_PUNPCKHDQ,
14452 IX86_BUILTIN_PUNPCKLBW,
14453 IX86_BUILTIN_PUNPCKLWD,
14454 IX86_BUILTIN_PUNPCKLDQ,
14455
14456 IX86_BUILTIN_SHUFPS,
14457
14458 IX86_BUILTIN_RCPPS,
14459 IX86_BUILTIN_RCPSS,
14460 IX86_BUILTIN_RSQRTPS,
14461 IX86_BUILTIN_RSQRTSS,
14462 IX86_BUILTIN_SQRTPS,
14463 IX86_BUILTIN_SQRTSS,
14464
14465 IX86_BUILTIN_UNPCKHPS,
14466 IX86_BUILTIN_UNPCKLPS,
14467
14468 IX86_BUILTIN_ANDPS,
14469 IX86_BUILTIN_ANDNPS,
14470 IX86_BUILTIN_ORPS,
14471 IX86_BUILTIN_XORPS,
14472
14473 IX86_BUILTIN_EMMS,
14474 IX86_BUILTIN_LDMXCSR,
14475 IX86_BUILTIN_STMXCSR,
14476 IX86_BUILTIN_SFENCE,
14477
14478 /* 3DNow! Original */
14479 IX86_BUILTIN_FEMMS,
14480 IX86_BUILTIN_PAVGUSB,
14481 IX86_BUILTIN_PF2ID,
14482 IX86_BUILTIN_PFACC,
14483 IX86_BUILTIN_PFADD,
14484 IX86_BUILTIN_PFCMPEQ,
14485 IX86_BUILTIN_PFCMPGE,
14486 IX86_BUILTIN_PFCMPGT,
14487 IX86_BUILTIN_PFMAX,
14488 IX86_BUILTIN_PFMIN,
14489 IX86_BUILTIN_PFMUL,
14490 IX86_BUILTIN_PFRCP,
14491 IX86_BUILTIN_PFRCPIT1,
14492 IX86_BUILTIN_PFRCPIT2,
14493 IX86_BUILTIN_PFRSQIT1,
14494 IX86_BUILTIN_PFRSQRT,
14495 IX86_BUILTIN_PFSUB,
14496 IX86_BUILTIN_PFSUBR,
14497 IX86_BUILTIN_PI2FD,
14498 IX86_BUILTIN_PMULHRW,
14499
14500 /* 3DNow! Athlon Extensions */
14501 IX86_BUILTIN_PF2IW,
14502 IX86_BUILTIN_PFNACC,
14503 IX86_BUILTIN_PFPNACC,
14504 IX86_BUILTIN_PI2FW,
14505 IX86_BUILTIN_PSWAPDSI,
14506 IX86_BUILTIN_PSWAPDSF,
14507
14508 /* SSE2 */
14509 IX86_BUILTIN_ADDPD,
14510 IX86_BUILTIN_ADDSD,
14511 IX86_BUILTIN_DIVPD,
14512 IX86_BUILTIN_DIVSD,
14513 IX86_BUILTIN_MULPD,
14514 IX86_BUILTIN_MULSD,
14515 IX86_BUILTIN_SUBPD,
14516 IX86_BUILTIN_SUBSD,
14517
14518 IX86_BUILTIN_CMPEQPD,
14519 IX86_BUILTIN_CMPLTPD,
14520 IX86_BUILTIN_CMPLEPD,
14521 IX86_BUILTIN_CMPGTPD,
14522 IX86_BUILTIN_CMPGEPD,
14523 IX86_BUILTIN_CMPNEQPD,
14524 IX86_BUILTIN_CMPNLTPD,
14525 IX86_BUILTIN_CMPNLEPD,
14526 IX86_BUILTIN_CMPNGTPD,
14527 IX86_BUILTIN_CMPNGEPD,
14528 IX86_BUILTIN_CMPORDPD,
14529 IX86_BUILTIN_CMPUNORDPD,
14530 IX86_BUILTIN_CMPNEPD,
14531 IX86_BUILTIN_CMPEQSD,
14532 IX86_BUILTIN_CMPLTSD,
14533 IX86_BUILTIN_CMPLESD,
14534 IX86_BUILTIN_CMPNEQSD,
14535 IX86_BUILTIN_CMPNLTSD,
14536 IX86_BUILTIN_CMPNLESD,
14537 IX86_BUILTIN_CMPORDSD,
14538 IX86_BUILTIN_CMPUNORDSD,
14539 IX86_BUILTIN_CMPNESD,
14540
14541 IX86_BUILTIN_COMIEQSD,
14542 IX86_BUILTIN_COMILTSD,
14543 IX86_BUILTIN_COMILESD,
14544 IX86_BUILTIN_COMIGTSD,
14545 IX86_BUILTIN_COMIGESD,
14546 IX86_BUILTIN_COMINEQSD,
14547 IX86_BUILTIN_UCOMIEQSD,
14548 IX86_BUILTIN_UCOMILTSD,
14549 IX86_BUILTIN_UCOMILESD,
14550 IX86_BUILTIN_UCOMIGTSD,
14551 IX86_BUILTIN_UCOMIGESD,
14552 IX86_BUILTIN_UCOMINEQSD,
14553
14554 IX86_BUILTIN_MAXPD,
14555 IX86_BUILTIN_MAXSD,
14556 IX86_BUILTIN_MINPD,
14557 IX86_BUILTIN_MINSD,
14558
14559 IX86_BUILTIN_ANDPD,
14560 IX86_BUILTIN_ANDNPD,
14561 IX86_BUILTIN_ORPD,
14562 IX86_BUILTIN_XORPD,
14563
14564 IX86_BUILTIN_SQRTPD,
14565 IX86_BUILTIN_SQRTSD,
14566
14567 IX86_BUILTIN_UNPCKHPD,
14568 IX86_BUILTIN_UNPCKLPD,
14569
14570 IX86_BUILTIN_SHUFPD,
14571
14572 IX86_BUILTIN_LOADUPD,
14573 IX86_BUILTIN_STOREUPD,
14574 IX86_BUILTIN_MOVSD,
14575
14576 IX86_BUILTIN_LOADHPD,
14577 IX86_BUILTIN_LOADLPD,
14578
14579 IX86_BUILTIN_CVTDQ2PD,
14580 IX86_BUILTIN_CVTDQ2PS,
14581
14582 IX86_BUILTIN_CVTPD2DQ,
14583 IX86_BUILTIN_CVTPD2PI,
14584 IX86_BUILTIN_CVTPD2PS,
14585 IX86_BUILTIN_CVTTPD2DQ,
14586 IX86_BUILTIN_CVTTPD2PI,
14587
14588 IX86_BUILTIN_CVTPI2PD,
14589 IX86_BUILTIN_CVTSI2SD,
14590 IX86_BUILTIN_CVTSI642SD,
14591
14592 IX86_BUILTIN_CVTSD2SI,
14593 IX86_BUILTIN_CVTSD2SI64,
14594 IX86_BUILTIN_CVTSD2SS,
14595 IX86_BUILTIN_CVTSS2SD,
14596 IX86_BUILTIN_CVTTSD2SI,
14597 IX86_BUILTIN_CVTTSD2SI64,
14598
14599 IX86_BUILTIN_CVTPS2DQ,
14600 IX86_BUILTIN_CVTPS2PD,
14601 IX86_BUILTIN_CVTTPS2DQ,
14602
14603 IX86_BUILTIN_MOVNTI,
14604 IX86_BUILTIN_MOVNTPD,
14605 IX86_BUILTIN_MOVNTDQ,
14606
14607 /* SSE2 MMX */
14608 IX86_BUILTIN_MASKMOVDQU,
14609 IX86_BUILTIN_MOVMSKPD,
14610 IX86_BUILTIN_PMOVMSKB128,
14611
14612 IX86_BUILTIN_PACKSSWB128,
14613 IX86_BUILTIN_PACKSSDW128,
14614 IX86_BUILTIN_PACKUSWB128,
14615
14616 IX86_BUILTIN_PADDB128,
14617 IX86_BUILTIN_PADDW128,
14618 IX86_BUILTIN_PADDD128,
14619 IX86_BUILTIN_PADDQ128,
14620 IX86_BUILTIN_PADDSB128,
14621 IX86_BUILTIN_PADDSW128,
14622 IX86_BUILTIN_PADDUSB128,
14623 IX86_BUILTIN_PADDUSW128,
14624 IX86_BUILTIN_PSUBB128,
14625 IX86_BUILTIN_PSUBW128,
14626 IX86_BUILTIN_PSUBD128,
14627 IX86_BUILTIN_PSUBQ128,
14628 IX86_BUILTIN_PSUBSB128,
14629 IX86_BUILTIN_PSUBSW128,
14630 IX86_BUILTIN_PSUBUSB128,
14631 IX86_BUILTIN_PSUBUSW128,
14632
14633 IX86_BUILTIN_PAND128,
14634 IX86_BUILTIN_PANDN128,
14635 IX86_BUILTIN_POR128,
14636 IX86_BUILTIN_PXOR128,
14637
14638 IX86_BUILTIN_PAVGB128,
14639 IX86_BUILTIN_PAVGW128,
14640
14641 IX86_BUILTIN_PCMPEQB128,
14642 IX86_BUILTIN_PCMPEQW128,
14643 IX86_BUILTIN_PCMPEQD128,
14644 IX86_BUILTIN_PCMPGTB128,
14645 IX86_BUILTIN_PCMPGTW128,
14646 IX86_BUILTIN_PCMPGTD128,
14647
14648 IX86_BUILTIN_PMADDWD128,
14649
14650 IX86_BUILTIN_PMAXSW128,
14651 IX86_BUILTIN_PMAXUB128,
14652 IX86_BUILTIN_PMINSW128,
14653 IX86_BUILTIN_PMINUB128,
14654
14655 IX86_BUILTIN_PMULUDQ,
14656 IX86_BUILTIN_PMULUDQ128,
14657 IX86_BUILTIN_PMULHUW128,
14658 IX86_BUILTIN_PMULHW128,
14659 IX86_BUILTIN_PMULLW128,
14660
14661 IX86_BUILTIN_PSADBW128,
14662 IX86_BUILTIN_PSHUFHW,
14663 IX86_BUILTIN_PSHUFLW,
14664 IX86_BUILTIN_PSHUFD,
14665
14666 IX86_BUILTIN_PSLLW128,
14667 IX86_BUILTIN_PSLLD128,
14668 IX86_BUILTIN_PSLLQ128,
14669 IX86_BUILTIN_PSRAW128,
14670 IX86_BUILTIN_PSRAD128,
14671 IX86_BUILTIN_PSRLW128,
14672 IX86_BUILTIN_PSRLD128,
14673 IX86_BUILTIN_PSRLQ128,
14674 IX86_BUILTIN_PSLLDQI128,
14675 IX86_BUILTIN_PSLLWI128,
14676 IX86_BUILTIN_PSLLDI128,
14677 IX86_BUILTIN_PSLLQI128,
14678 IX86_BUILTIN_PSRAWI128,
14679 IX86_BUILTIN_PSRADI128,
14680 IX86_BUILTIN_PSRLDQI128,
14681 IX86_BUILTIN_PSRLWI128,
14682 IX86_BUILTIN_PSRLDI128,
14683 IX86_BUILTIN_PSRLQI128,
14684
14685 IX86_BUILTIN_PUNPCKHBW128,
14686 IX86_BUILTIN_PUNPCKHWD128,
14687 IX86_BUILTIN_PUNPCKHDQ128,
14688 IX86_BUILTIN_PUNPCKHQDQ128,
14689 IX86_BUILTIN_PUNPCKLBW128,
14690 IX86_BUILTIN_PUNPCKLWD128,
14691 IX86_BUILTIN_PUNPCKLDQ128,
14692 IX86_BUILTIN_PUNPCKLQDQ128,
14693
14694 IX86_BUILTIN_CLFLUSH,
14695 IX86_BUILTIN_MFENCE,
14696 IX86_BUILTIN_LFENCE,
14697
14698 /* Prescott New Instructions. */
14699 IX86_BUILTIN_ADDSUBPS,
14700 IX86_BUILTIN_HADDPS,
14701 IX86_BUILTIN_HSUBPS,
14702 IX86_BUILTIN_MOVSHDUP,
14703 IX86_BUILTIN_MOVSLDUP,
14704 IX86_BUILTIN_ADDSUBPD,
14705 IX86_BUILTIN_HADDPD,
14706 IX86_BUILTIN_HSUBPD,
14707 IX86_BUILTIN_LDDQU,
14708
14709 IX86_BUILTIN_MONITOR,
14710 IX86_BUILTIN_MWAIT,
14711
14712 /* SSSE3. */
14713 IX86_BUILTIN_PHADDW,
14714 IX86_BUILTIN_PHADDD,
14715 IX86_BUILTIN_PHADDSW,
14716 IX86_BUILTIN_PHSUBW,
14717 IX86_BUILTIN_PHSUBD,
14718 IX86_BUILTIN_PHSUBSW,
14719 IX86_BUILTIN_PMADDUBSW,
14720 IX86_BUILTIN_PMULHRSW,
14721 IX86_BUILTIN_PSHUFB,
14722 IX86_BUILTIN_PSIGNB,
14723 IX86_BUILTIN_PSIGNW,
14724 IX86_BUILTIN_PSIGND,
14725 IX86_BUILTIN_PALIGNR,
14726 IX86_BUILTIN_PABSB,
14727 IX86_BUILTIN_PABSW,
14728 IX86_BUILTIN_PABSD,
14729
14730 IX86_BUILTIN_PHADDW128,
14731 IX86_BUILTIN_PHADDD128,
14732 IX86_BUILTIN_PHADDSW128,
14733 IX86_BUILTIN_PHSUBW128,
14734 IX86_BUILTIN_PHSUBD128,
14735 IX86_BUILTIN_PHSUBSW128,
14736 IX86_BUILTIN_PMADDUBSW128,
14737 IX86_BUILTIN_PMULHRSW128,
14738 IX86_BUILTIN_PSHUFB128,
14739 IX86_BUILTIN_PSIGNB128,
14740 IX86_BUILTIN_PSIGNW128,
14741 IX86_BUILTIN_PSIGND128,
14742 IX86_BUILTIN_PALIGNR128,
14743 IX86_BUILTIN_PABSB128,
14744 IX86_BUILTIN_PABSW128,
14745 IX86_BUILTIN_PABSD128,
14746
14236 case PROCESSOR_GENERIC32:
14237 case PROCESSOR_GENERIC64:
14238 memory = get_attr_memory (insn);
14239
14240 /* Show ability of reorder buffer to hide latency of load by executing
14241 in parallel with previous instruction in case
14242 previous instruction is not needed to compute the address. */
14243 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14244 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14245 {
14246 enum attr_unit unit = get_attr_unit (insn);
14247 int loadcost = 3;
14248
14249 /* Because of the difference between the length of integer and
14250 floating unit pipeline preparation stages, the memory operands
14251 for floating point are cheaper.
14252
14253 ??? For Athlon it the difference is most probably 2. */
14254 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14255 loadcost = 3;
14256 else
14257 loadcost = TARGET_ATHLON ? 2 : 0;
14258
14259 if (cost >= loadcost)
14260 cost -= loadcost;
14261 else
14262 cost = 0;
14263 }
14264
14265 default:
14266 break;
14267 }
14268
14269 return cost;
14270}
14271
14272/* How many alternative schedules to try. This should be as wide as the
14273 scheduling freedom in the DFA, but no wider. Making this value too
14274 large results extra work for the scheduler. */
14275
14276static int
14277ia32_multipass_dfa_lookahead (void)
14278{
14279 if (ix86_tune == PROCESSOR_PENTIUM)
14280 return 2;
14281
14282 if (ix86_tune == PROCESSOR_PENTIUMPRO
14283 || ix86_tune == PROCESSOR_K6)
14284 return 1;
14285
14286 else
14287 return 0;
14288}
14289
14290
14291/* Compute the alignment given to a constant that is being placed in memory.
14292 EXP is the constant and ALIGN is the alignment that the object would
14293 ordinarily have.
14294 The value of this function is used instead of that alignment to align
14295 the object. */
14296
14297int
14298ix86_constant_alignment (tree exp, int align)
14299{
14300 if (TREE_CODE (exp) == REAL_CST)
14301 {
14302 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14303 return 64;
14304 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14305 return 128;
14306 }
14307 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14308 && !TARGET_NO_ALIGN_LONG_STRINGS
14309 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14310 return BITS_PER_WORD;
14311
14312 return align;
14313}
14314
14315/* Compute the alignment for a static variable.
14316 TYPE is the data type, and ALIGN is the alignment that
14317 the object would ordinarily have. The value of this function is used
14318 instead of that alignment to align the object. */
14319
14320int
14321ix86_data_alignment (tree type, int align)
14322{
14323 int max_align = optimize_size ? BITS_PER_WORD : 256;
14324
14325 if (AGGREGATE_TYPE_P (type)
14326 && TYPE_SIZE (type)
14327 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14328 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14329 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14330 && align < max_align)
14331 align = max_align;
14332
14333 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14334 to 16byte boundary. */
14335 if (TARGET_64BIT)
14336 {
14337 if (AGGREGATE_TYPE_P (type)
14338 && TYPE_SIZE (type)
14339 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14340 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14341 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14342 return 128;
14343 }
14344
14345 if (TREE_CODE (type) == ARRAY_TYPE)
14346 {
14347 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14348 return 64;
14349 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14350 return 128;
14351 }
14352 else if (TREE_CODE (type) == COMPLEX_TYPE)
14353 {
14354
14355 if (TYPE_MODE (type) == DCmode && align < 64)
14356 return 64;
14357 if (TYPE_MODE (type) == XCmode && align < 128)
14358 return 128;
14359 }
14360 else if ((TREE_CODE (type) == RECORD_TYPE
14361 || TREE_CODE (type) == UNION_TYPE
14362 || TREE_CODE (type) == QUAL_UNION_TYPE)
14363 && TYPE_FIELDS (type))
14364 {
14365 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14366 return 64;
14367 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14368 return 128;
14369 }
14370 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14371 || TREE_CODE (type) == INTEGER_TYPE)
14372 {
14373 if (TYPE_MODE (type) == DFmode && align < 64)
14374 return 64;
14375 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14376 return 128;
14377 }
14378
14379 return align;
14380}
14381
14382/* Compute the alignment for a local variable.
14383 TYPE is the data type, and ALIGN is the alignment that
14384 the object would ordinarily have. The value of this macro is used
14385 instead of that alignment to align the object. */
14386
14387int
14388ix86_local_alignment (tree type, int align)
14389{
14390 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14391 to 16byte boundary. */
14392 if (TARGET_64BIT)
14393 {
14394 if (AGGREGATE_TYPE_P (type)
14395 && TYPE_SIZE (type)
14396 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14397 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14398 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14399 return 128;
14400 }
14401 if (TREE_CODE (type) == ARRAY_TYPE)
14402 {
14403 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14404 return 64;
14405 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14406 return 128;
14407 }
14408 else if (TREE_CODE (type) == COMPLEX_TYPE)
14409 {
14410 if (TYPE_MODE (type) == DCmode && align < 64)
14411 return 64;
14412 if (TYPE_MODE (type) == XCmode && align < 128)
14413 return 128;
14414 }
14415 else if ((TREE_CODE (type) == RECORD_TYPE
14416 || TREE_CODE (type) == UNION_TYPE
14417 || TREE_CODE (type) == QUAL_UNION_TYPE)
14418 && TYPE_FIELDS (type))
14419 {
14420 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14421 return 64;
14422 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14423 return 128;
14424 }
14425 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14426 || TREE_CODE (type) == INTEGER_TYPE)
14427 {
14428
14429 if (TYPE_MODE (type) == DFmode && align < 64)
14430 return 64;
14431 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14432 return 128;
14433 }
14434 return align;
14435}
14436
14437/* Emit RTL insns to initialize the variable parts of a trampoline.
14438 FNADDR is an RTX for the address of the function's pure code.
14439 CXT is an RTX for the static chain value for the function. */
14440void
14441x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14442{
14443 if (!TARGET_64BIT)
14444 {
14445 /* Compute offset from the end of the jmp to the target function. */
14446 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14447 plus_constant (tramp, 10),
14448 NULL_RTX, 1, OPTAB_DIRECT);
14449 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14450 gen_int_mode (0xb9, QImode));
14451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14452 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14453 gen_int_mode (0xe9, QImode));
14454 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14455 }
14456 else
14457 {
14458 int offset = 0;
14459 /* Try to load address using shorter movl instead of movabs.
14460 We may want to support movq for kernel mode, but kernel does not use
14461 trampolines at the moment. */
14462 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14463 {
14464 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14466 gen_int_mode (0xbb41, HImode));
14467 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14468 gen_lowpart (SImode, fnaddr));
14469 offset += 6;
14470 }
14471 else
14472 {
14473 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14474 gen_int_mode (0xbb49, HImode));
14475 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14476 fnaddr);
14477 offset += 10;
14478 }
14479 /* Load static chain using movabs to r10. */
14480 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14481 gen_int_mode (0xba49, HImode));
14482 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14483 cxt);
14484 offset += 10;
14485 /* Jump to the r11 */
14486 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14487 gen_int_mode (0xff49, HImode));
14488 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14489 gen_int_mode (0xe3, QImode));
14490 offset += 3;
14491 gcc_assert (offset <= TRAMPOLINE_SIZE);
14492 }
14493
14494#ifdef ENABLE_EXECUTE_STACK
14495 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14496 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14497#endif
14498}
14499
14500/* Codes for all the SSE/MMX builtins. */
14501enum ix86_builtins
14502{
14503 IX86_BUILTIN_ADDPS,
14504 IX86_BUILTIN_ADDSS,
14505 IX86_BUILTIN_DIVPS,
14506 IX86_BUILTIN_DIVSS,
14507 IX86_BUILTIN_MULPS,
14508 IX86_BUILTIN_MULSS,
14509 IX86_BUILTIN_SUBPS,
14510 IX86_BUILTIN_SUBSS,
14511
14512 IX86_BUILTIN_CMPEQPS,
14513 IX86_BUILTIN_CMPLTPS,
14514 IX86_BUILTIN_CMPLEPS,
14515 IX86_BUILTIN_CMPGTPS,
14516 IX86_BUILTIN_CMPGEPS,
14517 IX86_BUILTIN_CMPNEQPS,
14518 IX86_BUILTIN_CMPNLTPS,
14519 IX86_BUILTIN_CMPNLEPS,
14520 IX86_BUILTIN_CMPNGTPS,
14521 IX86_BUILTIN_CMPNGEPS,
14522 IX86_BUILTIN_CMPORDPS,
14523 IX86_BUILTIN_CMPUNORDPS,
14524 IX86_BUILTIN_CMPEQSS,
14525 IX86_BUILTIN_CMPLTSS,
14526 IX86_BUILTIN_CMPLESS,
14527 IX86_BUILTIN_CMPNEQSS,
14528 IX86_BUILTIN_CMPNLTSS,
14529 IX86_BUILTIN_CMPNLESS,
14530 IX86_BUILTIN_CMPNGTSS,
14531 IX86_BUILTIN_CMPNGESS,
14532 IX86_BUILTIN_CMPORDSS,
14533 IX86_BUILTIN_CMPUNORDSS,
14534
14535 IX86_BUILTIN_COMIEQSS,
14536 IX86_BUILTIN_COMILTSS,
14537 IX86_BUILTIN_COMILESS,
14538 IX86_BUILTIN_COMIGTSS,
14539 IX86_BUILTIN_COMIGESS,
14540 IX86_BUILTIN_COMINEQSS,
14541 IX86_BUILTIN_UCOMIEQSS,
14542 IX86_BUILTIN_UCOMILTSS,
14543 IX86_BUILTIN_UCOMILESS,
14544 IX86_BUILTIN_UCOMIGTSS,
14545 IX86_BUILTIN_UCOMIGESS,
14546 IX86_BUILTIN_UCOMINEQSS,
14547
14548 IX86_BUILTIN_CVTPI2PS,
14549 IX86_BUILTIN_CVTPS2PI,
14550 IX86_BUILTIN_CVTSI2SS,
14551 IX86_BUILTIN_CVTSI642SS,
14552 IX86_BUILTIN_CVTSS2SI,
14553 IX86_BUILTIN_CVTSS2SI64,
14554 IX86_BUILTIN_CVTTPS2PI,
14555 IX86_BUILTIN_CVTTSS2SI,
14556 IX86_BUILTIN_CVTTSS2SI64,
14557
14558 IX86_BUILTIN_MAXPS,
14559 IX86_BUILTIN_MAXSS,
14560 IX86_BUILTIN_MINPS,
14561 IX86_BUILTIN_MINSS,
14562
14563 IX86_BUILTIN_LOADUPS,
14564 IX86_BUILTIN_STOREUPS,
14565 IX86_BUILTIN_MOVSS,
14566
14567 IX86_BUILTIN_MOVHLPS,
14568 IX86_BUILTIN_MOVLHPS,
14569 IX86_BUILTIN_LOADHPS,
14570 IX86_BUILTIN_LOADLPS,
14571 IX86_BUILTIN_STOREHPS,
14572 IX86_BUILTIN_STORELPS,
14573
14574 IX86_BUILTIN_MASKMOVQ,
14575 IX86_BUILTIN_MOVMSKPS,
14576 IX86_BUILTIN_PMOVMSKB,
14577
14578 IX86_BUILTIN_MOVNTPS,
14579 IX86_BUILTIN_MOVNTQ,
14580
14581 IX86_BUILTIN_LOADDQU,
14582 IX86_BUILTIN_STOREDQU,
14583
14584 IX86_BUILTIN_PACKSSWB,
14585 IX86_BUILTIN_PACKSSDW,
14586 IX86_BUILTIN_PACKUSWB,
14587
14588 IX86_BUILTIN_PADDB,
14589 IX86_BUILTIN_PADDW,
14590 IX86_BUILTIN_PADDD,
14591 IX86_BUILTIN_PADDQ,
14592 IX86_BUILTIN_PADDSB,
14593 IX86_BUILTIN_PADDSW,
14594 IX86_BUILTIN_PADDUSB,
14595 IX86_BUILTIN_PADDUSW,
14596 IX86_BUILTIN_PSUBB,
14597 IX86_BUILTIN_PSUBW,
14598 IX86_BUILTIN_PSUBD,
14599 IX86_BUILTIN_PSUBQ,
14600 IX86_BUILTIN_PSUBSB,
14601 IX86_BUILTIN_PSUBSW,
14602 IX86_BUILTIN_PSUBUSB,
14603 IX86_BUILTIN_PSUBUSW,
14604
14605 IX86_BUILTIN_PAND,
14606 IX86_BUILTIN_PANDN,
14607 IX86_BUILTIN_POR,
14608 IX86_BUILTIN_PXOR,
14609
14610 IX86_BUILTIN_PAVGB,
14611 IX86_BUILTIN_PAVGW,
14612
14613 IX86_BUILTIN_PCMPEQB,
14614 IX86_BUILTIN_PCMPEQW,
14615 IX86_BUILTIN_PCMPEQD,
14616 IX86_BUILTIN_PCMPGTB,
14617 IX86_BUILTIN_PCMPGTW,
14618 IX86_BUILTIN_PCMPGTD,
14619
14620 IX86_BUILTIN_PMADDWD,
14621
14622 IX86_BUILTIN_PMAXSW,
14623 IX86_BUILTIN_PMAXUB,
14624 IX86_BUILTIN_PMINSW,
14625 IX86_BUILTIN_PMINUB,
14626
14627 IX86_BUILTIN_PMULHUW,
14628 IX86_BUILTIN_PMULHW,
14629 IX86_BUILTIN_PMULLW,
14630
14631 IX86_BUILTIN_PSADBW,
14632 IX86_BUILTIN_PSHUFW,
14633
14634 IX86_BUILTIN_PSLLW,
14635 IX86_BUILTIN_PSLLD,
14636 IX86_BUILTIN_PSLLQ,
14637 IX86_BUILTIN_PSRAW,
14638 IX86_BUILTIN_PSRAD,
14639 IX86_BUILTIN_PSRLW,
14640 IX86_BUILTIN_PSRLD,
14641 IX86_BUILTIN_PSRLQ,
14642 IX86_BUILTIN_PSLLWI,
14643 IX86_BUILTIN_PSLLDI,
14644 IX86_BUILTIN_PSLLQI,
14645 IX86_BUILTIN_PSRAWI,
14646 IX86_BUILTIN_PSRADI,
14647 IX86_BUILTIN_PSRLWI,
14648 IX86_BUILTIN_PSRLDI,
14649 IX86_BUILTIN_PSRLQI,
14650
14651 IX86_BUILTIN_PUNPCKHBW,
14652 IX86_BUILTIN_PUNPCKHWD,
14653 IX86_BUILTIN_PUNPCKHDQ,
14654 IX86_BUILTIN_PUNPCKLBW,
14655 IX86_BUILTIN_PUNPCKLWD,
14656 IX86_BUILTIN_PUNPCKLDQ,
14657
14658 IX86_BUILTIN_SHUFPS,
14659
14660 IX86_BUILTIN_RCPPS,
14661 IX86_BUILTIN_RCPSS,
14662 IX86_BUILTIN_RSQRTPS,
14663 IX86_BUILTIN_RSQRTSS,
14664 IX86_BUILTIN_SQRTPS,
14665 IX86_BUILTIN_SQRTSS,
14666
14667 IX86_BUILTIN_UNPCKHPS,
14668 IX86_BUILTIN_UNPCKLPS,
14669
14670 IX86_BUILTIN_ANDPS,
14671 IX86_BUILTIN_ANDNPS,
14672 IX86_BUILTIN_ORPS,
14673 IX86_BUILTIN_XORPS,
14674
14675 IX86_BUILTIN_EMMS,
14676 IX86_BUILTIN_LDMXCSR,
14677 IX86_BUILTIN_STMXCSR,
14678 IX86_BUILTIN_SFENCE,
14679
14680 /* 3DNow! Original */
14681 IX86_BUILTIN_FEMMS,
14682 IX86_BUILTIN_PAVGUSB,
14683 IX86_BUILTIN_PF2ID,
14684 IX86_BUILTIN_PFACC,
14685 IX86_BUILTIN_PFADD,
14686 IX86_BUILTIN_PFCMPEQ,
14687 IX86_BUILTIN_PFCMPGE,
14688 IX86_BUILTIN_PFCMPGT,
14689 IX86_BUILTIN_PFMAX,
14690 IX86_BUILTIN_PFMIN,
14691 IX86_BUILTIN_PFMUL,
14692 IX86_BUILTIN_PFRCP,
14693 IX86_BUILTIN_PFRCPIT1,
14694 IX86_BUILTIN_PFRCPIT2,
14695 IX86_BUILTIN_PFRSQIT1,
14696 IX86_BUILTIN_PFRSQRT,
14697 IX86_BUILTIN_PFSUB,
14698 IX86_BUILTIN_PFSUBR,
14699 IX86_BUILTIN_PI2FD,
14700 IX86_BUILTIN_PMULHRW,
14701
14702 /* 3DNow! Athlon Extensions */
14703 IX86_BUILTIN_PF2IW,
14704 IX86_BUILTIN_PFNACC,
14705 IX86_BUILTIN_PFPNACC,
14706 IX86_BUILTIN_PI2FW,
14707 IX86_BUILTIN_PSWAPDSI,
14708 IX86_BUILTIN_PSWAPDSF,
14709
14710 /* SSE2 */
14711 IX86_BUILTIN_ADDPD,
14712 IX86_BUILTIN_ADDSD,
14713 IX86_BUILTIN_DIVPD,
14714 IX86_BUILTIN_DIVSD,
14715 IX86_BUILTIN_MULPD,
14716 IX86_BUILTIN_MULSD,
14717 IX86_BUILTIN_SUBPD,
14718 IX86_BUILTIN_SUBSD,
14719
14720 IX86_BUILTIN_CMPEQPD,
14721 IX86_BUILTIN_CMPLTPD,
14722 IX86_BUILTIN_CMPLEPD,
14723 IX86_BUILTIN_CMPGTPD,
14724 IX86_BUILTIN_CMPGEPD,
14725 IX86_BUILTIN_CMPNEQPD,
14726 IX86_BUILTIN_CMPNLTPD,
14727 IX86_BUILTIN_CMPNLEPD,
14728 IX86_BUILTIN_CMPNGTPD,
14729 IX86_BUILTIN_CMPNGEPD,
14730 IX86_BUILTIN_CMPORDPD,
14731 IX86_BUILTIN_CMPUNORDPD,
14732 IX86_BUILTIN_CMPNEPD,
14733 IX86_BUILTIN_CMPEQSD,
14734 IX86_BUILTIN_CMPLTSD,
14735 IX86_BUILTIN_CMPLESD,
14736 IX86_BUILTIN_CMPNEQSD,
14737 IX86_BUILTIN_CMPNLTSD,
14738 IX86_BUILTIN_CMPNLESD,
14739 IX86_BUILTIN_CMPORDSD,
14740 IX86_BUILTIN_CMPUNORDSD,
14741 IX86_BUILTIN_CMPNESD,
14742
14743 IX86_BUILTIN_COMIEQSD,
14744 IX86_BUILTIN_COMILTSD,
14745 IX86_BUILTIN_COMILESD,
14746 IX86_BUILTIN_COMIGTSD,
14747 IX86_BUILTIN_COMIGESD,
14748 IX86_BUILTIN_COMINEQSD,
14749 IX86_BUILTIN_UCOMIEQSD,
14750 IX86_BUILTIN_UCOMILTSD,
14751 IX86_BUILTIN_UCOMILESD,
14752 IX86_BUILTIN_UCOMIGTSD,
14753 IX86_BUILTIN_UCOMIGESD,
14754 IX86_BUILTIN_UCOMINEQSD,
14755
14756 IX86_BUILTIN_MAXPD,
14757 IX86_BUILTIN_MAXSD,
14758 IX86_BUILTIN_MINPD,
14759 IX86_BUILTIN_MINSD,
14760
14761 IX86_BUILTIN_ANDPD,
14762 IX86_BUILTIN_ANDNPD,
14763 IX86_BUILTIN_ORPD,
14764 IX86_BUILTIN_XORPD,
14765
14766 IX86_BUILTIN_SQRTPD,
14767 IX86_BUILTIN_SQRTSD,
14768
14769 IX86_BUILTIN_UNPCKHPD,
14770 IX86_BUILTIN_UNPCKLPD,
14771
14772 IX86_BUILTIN_SHUFPD,
14773
14774 IX86_BUILTIN_LOADUPD,
14775 IX86_BUILTIN_STOREUPD,
14776 IX86_BUILTIN_MOVSD,
14777
14778 IX86_BUILTIN_LOADHPD,
14779 IX86_BUILTIN_LOADLPD,
14780
14781 IX86_BUILTIN_CVTDQ2PD,
14782 IX86_BUILTIN_CVTDQ2PS,
14783
14784 IX86_BUILTIN_CVTPD2DQ,
14785 IX86_BUILTIN_CVTPD2PI,
14786 IX86_BUILTIN_CVTPD2PS,
14787 IX86_BUILTIN_CVTTPD2DQ,
14788 IX86_BUILTIN_CVTTPD2PI,
14789
14790 IX86_BUILTIN_CVTPI2PD,
14791 IX86_BUILTIN_CVTSI2SD,
14792 IX86_BUILTIN_CVTSI642SD,
14793
14794 IX86_BUILTIN_CVTSD2SI,
14795 IX86_BUILTIN_CVTSD2SI64,
14796 IX86_BUILTIN_CVTSD2SS,
14797 IX86_BUILTIN_CVTSS2SD,
14798 IX86_BUILTIN_CVTTSD2SI,
14799 IX86_BUILTIN_CVTTSD2SI64,
14800
14801 IX86_BUILTIN_CVTPS2DQ,
14802 IX86_BUILTIN_CVTPS2PD,
14803 IX86_BUILTIN_CVTTPS2DQ,
14804
14805 IX86_BUILTIN_MOVNTI,
14806 IX86_BUILTIN_MOVNTPD,
14807 IX86_BUILTIN_MOVNTDQ,
14808
14809 /* SSE2 MMX */
14810 IX86_BUILTIN_MASKMOVDQU,
14811 IX86_BUILTIN_MOVMSKPD,
14812 IX86_BUILTIN_PMOVMSKB128,
14813
14814 IX86_BUILTIN_PACKSSWB128,
14815 IX86_BUILTIN_PACKSSDW128,
14816 IX86_BUILTIN_PACKUSWB128,
14817
14818 IX86_BUILTIN_PADDB128,
14819 IX86_BUILTIN_PADDW128,
14820 IX86_BUILTIN_PADDD128,
14821 IX86_BUILTIN_PADDQ128,
14822 IX86_BUILTIN_PADDSB128,
14823 IX86_BUILTIN_PADDSW128,
14824 IX86_BUILTIN_PADDUSB128,
14825 IX86_BUILTIN_PADDUSW128,
14826 IX86_BUILTIN_PSUBB128,
14827 IX86_BUILTIN_PSUBW128,
14828 IX86_BUILTIN_PSUBD128,
14829 IX86_BUILTIN_PSUBQ128,
14830 IX86_BUILTIN_PSUBSB128,
14831 IX86_BUILTIN_PSUBSW128,
14832 IX86_BUILTIN_PSUBUSB128,
14833 IX86_BUILTIN_PSUBUSW128,
14834
14835 IX86_BUILTIN_PAND128,
14836 IX86_BUILTIN_PANDN128,
14837 IX86_BUILTIN_POR128,
14838 IX86_BUILTIN_PXOR128,
14839
14840 IX86_BUILTIN_PAVGB128,
14841 IX86_BUILTIN_PAVGW128,
14842
14843 IX86_BUILTIN_PCMPEQB128,
14844 IX86_BUILTIN_PCMPEQW128,
14845 IX86_BUILTIN_PCMPEQD128,
14846 IX86_BUILTIN_PCMPGTB128,
14847 IX86_BUILTIN_PCMPGTW128,
14848 IX86_BUILTIN_PCMPGTD128,
14849
14850 IX86_BUILTIN_PMADDWD128,
14851
14852 IX86_BUILTIN_PMAXSW128,
14853 IX86_BUILTIN_PMAXUB128,
14854 IX86_BUILTIN_PMINSW128,
14855 IX86_BUILTIN_PMINUB128,
14856
14857 IX86_BUILTIN_PMULUDQ,
14858 IX86_BUILTIN_PMULUDQ128,
14859 IX86_BUILTIN_PMULHUW128,
14860 IX86_BUILTIN_PMULHW128,
14861 IX86_BUILTIN_PMULLW128,
14862
14863 IX86_BUILTIN_PSADBW128,
14864 IX86_BUILTIN_PSHUFHW,
14865 IX86_BUILTIN_PSHUFLW,
14866 IX86_BUILTIN_PSHUFD,
14867
14868 IX86_BUILTIN_PSLLW128,
14869 IX86_BUILTIN_PSLLD128,
14870 IX86_BUILTIN_PSLLQ128,
14871 IX86_BUILTIN_PSRAW128,
14872 IX86_BUILTIN_PSRAD128,
14873 IX86_BUILTIN_PSRLW128,
14874 IX86_BUILTIN_PSRLD128,
14875 IX86_BUILTIN_PSRLQ128,
14876 IX86_BUILTIN_PSLLDQI128,
14877 IX86_BUILTIN_PSLLWI128,
14878 IX86_BUILTIN_PSLLDI128,
14879 IX86_BUILTIN_PSLLQI128,
14880 IX86_BUILTIN_PSRAWI128,
14881 IX86_BUILTIN_PSRADI128,
14882 IX86_BUILTIN_PSRLDQI128,
14883 IX86_BUILTIN_PSRLWI128,
14884 IX86_BUILTIN_PSRLDI128,
14885 IX86_BUILTIN_PSRLQI128,
14886
14887 IX86_BUILTIN_PUNPCKHBW128,
14888 IX86_BUILTIN_PUNPCKHWD128,
14889 IX86_BUILTIN_PUNPCKHDQ128,
14890 IX86_BUILTIN_PUNPCKHQDQ128,
14891 IX86_BUILTIN_PUNPCKLBW128,
14892 IX86_BUILTIN_PUNPCKLWD128,
14893 IX86_BUILTIN_PUNPCKLDQ128,
14894 IX86_BUILTIN_PUNPCKLQDQ128,
14895
14896 IX86_BUILTIN_CLFLUSH,
14897 IX86_BUILTIN_MFENCE,
14898 IX86_BUILTIN_LFENCE,
14899
14900 /* Prescott New Instructions. */
14901 IX86_BUILTIN_ADDSUBPS,
14902 IX86_BUILTIN_HADDPS,
14903 IX86_BUILTIN_HSUBPS,
14904 IX86_BUILTIN_MOVSHDUP,
14905 IX86_BUILTIN_MOVSLDUP,
14906 IX86_BUILTIN_ADDSUBPD,
14907 IX86_BUILTIN_HADDPD,
14908 IX86_BUILTIN_HSUBPD,
14909 IX86_BUILTIN_LDDQU,
14910
14911 IX86_BUILTIN_MONITOR,
14912 IX86_BUILTIN_MWAIT,
14913
14914 /* SSSE3. */
14915 IX86_BUILTIN_PHADDW,
14916 IX86_BUILTIN_PHADDD,
14917 IX86_BUILTIN_PHADDSW,
14918 IX86_BUILTIN_PHSUBW,
14919 IX86_BUILTIN_PHSUBD,
14920 IX86_BUILTIN_PHSUBSW,
14921 IX86_BUILTIN_PMADDUBSW,
14922 IX86_BUILTIN_PMULHRSW,
14923 IX86_BUILTIN_PSHUFB,
14924 IX86_BUILTIN_PSIGNB,
14925 IX86_BUILTIN_PSIGNW,
14926 IX86_BUILTIN_PSIGND,
14927 IX86_BUILTIN_PALIGNR,
14928 IX86_BUILTIN_PABSB,
14929 IX86_BUILTIN_PABSW,
14930 IX86_BUILTIN_PABSD,
14931
14932 IX86_BUILTIN_PHADDW128,
14933 IX86_BUILTIN_PHADDD128,
14934 IX86_BUILTIN_PHADDSW128,
14935 IX86_BUILTIN_PHSUBW128,
14936 IX86_BUILTIN_PHSUBD128,
14937 IX86_BUILTIN_PHSUBSW128,
14938 IX86_BUILTIN_PMADDUBSW128,
14939 IX86_BUILTIN_PMULHRSW128,
14940 IX86_BUILTIN_PSHUFB128,
14941 IX86_BUILTIN_PSIGNB128,
14942 IX86_BUILTIN_PSIGNW128,
14943 IX86_BUILTIN_PSIGND128,
14944 IX86_BUILTIN_PALIGNR128,
14945 IX86_BUILTIN_PABSB128,
14946 IX86_BUILTIN_PABSW128,
14947 IX86_BUILTIN_PABSD128,
14948
14949 /* AMDFAM10 - SSE4A New Instructions. */
14950 IX86_BUILTIN_MOVNTSD,
14951 IX86_BUILTIN_MOVNTSS,
14952 IX86_BUILTIN_EXTRQI,
14953 IX86_BUILTIN_EXTRQ,
14954 IX86_BUILTIN_INSERTQI,
14955 IX86_BUILTIN_INSERTQ,
14956
14747 IX86_BUILTIN_VEC_INIT_V2SI,
14748 IX86_BUILTIN_VEC_INIT_V4HI,
14749 IX86_BUILTIN_VEC_INIT_V8QI,
14750 IX86_BUILTIN_VEC_EXT_V2DF,
14751 IX86_BUILTIN_VEC_EXT_V2DI,
14752 IX86_BUILTIN_VEC_EXT_V4SF,
14753 IX86_BUILTIN_VEC_EXT_V4SI,
14754 IX86_BUILTIN_VEC_EXT_V8HI,
14755 IX86_BUILTIN_VEC_EXT_V16QI,
14756 IX86_BUILTIN_VEC_EXT_V2SI,
14757 IX86_BUILTIN_VEC_EXT_V4HI,
14758 IX86_BUILTIN_VEC_SET_V8HI,
14759 IX86_BUILTIN_VEC_SET_V4HI,
14760
14761 IX86_BUILTIN_MAX
14762};
14763
14764#define def_builtin(MASK, NAME, TYPE, CODE) \
14765do { \
14766 if ((MASK) & target_flags \
14767 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14768 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14769 NULL, NULL_TREE); \
14770} while (0)
14771
14772/* Bits for builtin_description.flag. */
14773
14774/* Set when we don't support the comparison natively, and should
14775 swap_comparison in order to support it. */
14776#define BUILTIN_DESC_SWAP_OPERANDS 1
14777
14778struct builtin_description
14779{
14780 const unsigned int mask;
14781 const enum insn_code icode;
14782 const char *const name;
14783 const enum ix86_builtins code;
14784 const enum rtx_code comparison;
14785 const unsigned int flag;
14786};
14787
14788static const struct builtin_description bdesc_comi[] =
14789{
14790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14814};
14815
14816static const struct builtin_description bdesc_2arg[] =
14817{
14818 /* SSE */
14819 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14820 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14821 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14822 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14823 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14824 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14825 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14826 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14827
14828 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14829 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14830 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14831 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14832 BUILTIN_DESC_SWAP_OPERANDS },
14833 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14834 BUILTIN_DESC_SWAP_OPERANDS },
14835 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14836 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14837 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14838 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14839 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14840 BUILTIN_DESC_SWAP_OPERANDS },
14841 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14842 BUILTIN_DESC_SWAP_OPERANDS },
14843 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14844 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14845 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14846 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14847 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14848 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14849 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14850 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14851 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14852 BUILTIN_DESC_SWAP_OPERANDS },
14853 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14854 BUILTIN_DESC_SWAP_OPERANDS },
14855 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14856
14857 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14858 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14859 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14860 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14861
14862 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14863 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14864 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14865 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14866
14867 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14868 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14869 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14870 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14871 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14872
14873 /* MMX */
14874 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14875 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14876 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14878 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14879 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14880 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14882
14883 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14884 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14885 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14886 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14887 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14888 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14889 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14890 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14891
14892 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14893 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14894 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14895
14896 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14897 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14898 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14899 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14900
14901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14903
14904 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14905 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14906 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14907 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14908 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14909 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14910
14911 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14912 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14913 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14915
14916 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14917 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14918 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14919 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14920 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14921 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14922
14923 /* Special. */
14924 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14925 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14926 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14927
14928 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14929 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14930 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14931
14932 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14933 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14934 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14935 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14936 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14937 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14938
14939 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14940 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14941 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14942 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14943 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14944 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14945
14946 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14947 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14948 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14949 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14950
14951 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14952 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14953
14954 /* SSE2 */
14955 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14956 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14957 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14958 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14959 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14960 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14961 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14962 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14963
14964 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14965 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14966 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14967 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14968 BUILTIN_DESC_SWAP_OPERANDS },
14969 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14970 BUILTIN_DESC_SWAP_OPERANDS },
14971 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14972 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14973 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14974 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14975 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14976 BUILTIN_DESC_SWAP_OPERANDS },
14977 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14978 BUILTIN_DESC_SWAP_OPERANDS },
14979 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14982 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14983 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14984 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14985 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14986 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14987 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14988
14989 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14990 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14991 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14992 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14993
14994 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14995 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14996 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14997 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14998
14999 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15000 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15001 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15002
15003 /* SSE2 MMX */
15004 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15005 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15006 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15007 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15008 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15009 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15010 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15011 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15012
15013 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15014 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15015 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15016 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15017 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15018 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15019 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15020 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15021
15022 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15023 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15024
15025 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15027 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15028 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15029
15030 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15032
15033 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15037 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15038 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15039
15040 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15041 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15042 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15043 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15044
15045 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15046 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15047 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15048 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15049 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15050 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15051 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15052 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15053
15054 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15055 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15056 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15057
15058 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15059 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15060
15061 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15062 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15063
15064 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15065 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15066 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15067
15068 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15069 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15070 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15071
15072 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15073 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15074
15075 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15076
15077 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15078 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15079 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15080 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15081
15082 /* SSE3 MMX */
15083 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15084 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15085 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15086 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15087 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15088 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15089
15090 /* SSSE3 */
15091 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15092 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15093 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15094 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15095 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15096 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15097 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15098 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15099 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15100 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15101 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15102 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15103 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15104 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15105 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15106 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15107 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15108 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15109 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15110 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15111 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15112 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15113 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15114 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15115};
15116
15117static const struct builtin_description bdesc_1arg[] =
15118{
15119 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15120 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15121
15122 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15123 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15124 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15125
15126 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15127 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15128 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15129 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15130 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15131 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15132
15133 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15134 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15135
15136 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15137
15138 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15139 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15140
15141 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15142 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15143 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15144 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15145 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15146
15147 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15148
15149 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15150 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15151 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15152 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15153
15154 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15155 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15156 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15157
15158 /* SSE3 */
15159 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15160 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15161
15162 /* SSSE3 */
15163 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15164 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15165 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15166 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15167 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15168 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15169};
15170
15171static void
15172ix86_init_builtins (void)
15173{
15174 if (TARGET_MMX)
15175 ix86_init_mmx_sse_builtins ();
15176}
15177
15178/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15179 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15180 builtins. */
15181static void
15182ix86_init_mmx_sse_builtins (void)
15183{
15184 const struct builtin_description * d;
15185 size_t i;
15186
15187 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15188 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15189 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15190 tree V2DI_type_node
15191 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15192 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15193 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15194 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15195 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15196 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15197 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15198
15199 tree pchar_type_node = build_pointer_type (char_type_node);
15200 tree pcchar_type_node = build_pointer_type (
15201 build_type_variant (char_type_node, 1, 0));
15202 tree pfloat_type_node = build_pointer_type (float_type_node);
15203 tree pcfloat_type_node = build_pointer_type (
15204 build_type_variant (float_type_node, 1, 0));
15205 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15206 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15207 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15208
15209 /* Comparisons. */
15210 tree int_ftype_v4sf_v4sf
15211 = build_function_type_list (integer_type_node,
15212 V4SF_type_node, V4SF_type_node, NULL_TREE);
15213 tree v4si_ftype_v4sf_v4sf
15214 = build_function_type_list (V4SI_type_node,
15215 V4SF_type_node, V4SF_type_node, NULL_TREE);
15216 /* MMX/SSE/integer conversions. */
15217 tree int_ftype_v4sf
15218 = build_function_type_list (integer_type_node,
15219 V4SF_type_node, NULL_TREE);
15220 tree int64_ftype_v4sf
15221 = build_function_type_list (long_long_integer_type_node,
15222 V4SF_type_node, NULL_TREE);
15223 tree int_ftype_v8qi
15224 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15225 tree v4sf_ftype_v4sf_int
15226 = build_function_type_list (V4SF_type_node,
15227 V4SF_type_node, integer_type_node, NULL_TREE);
15228 tree v4sf_ftype_v4sf_int64
15229 = build_function_type_list (V4SF_type_node,
15230 V4SF_type_node, long_long_integer_type_node,
15231 NULL_TREE);
15232 tree v4sf_ftype_v4sf_v2si
15233 = build_function_type_list (V4SF_type_node,
15234 V4SF_type_node, V2SI_type_node, NULL_TREE);
15235
15236 /* Miscellaneous. */
15237 tree v8qi_ftype_v4hi_v4hi
15238 = build_function_type_list (V8QI_type_node,
15239 V4HI_type_node, V4HI_type_node, NULL_TREE);
15240 tree v4hi_ftype_v2si_v2si
15241 = build_function_type_list (V4HI_type_node,
15242 V2SI_type_node, V2SI_type_node, NULL_TREE);
15243 tree v4sf_ftype_v4sf_v4sf_int
15244 = build_function_type_list (V4SF_type_node,
15245 V4SF_type_node, V4SF_type_node,
15246 integer_type_node, NULL_TREE);
15247 tree v2si_ftype_v4hi_v4hi
15248 = build_function_type_list (V2SI_type_node,
15249 V4HI_type_node, V4HI_type_node, NULL_TREE);
15250 tree v4hi_ftype_v4hi_int
15251 = build_function_type_list (V4HI_type_node,
15252 V4HI_type_node, integer_type_node, NULL_TREE);
15253 tree v4hi_ftype_v4hi_di
15254 = build_function_type_list (V4HI_type_node,
15255 V4HI_type_node, long_long_unsigned_type_node,
15256 NULL_TREE);
15257 tree v2si_ftype_v2si_di
15258 = build_function_type_list (V2SI_type_node,
15259 V2SI_type_node, long_long_unsigned_type_node,
15260 NULL_TREE);
15261 tree void_ftype_void
15262 = build_function_type (void_type_node, void_list_node);
15263 tree void_ftype_unsigned
15264 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15265 tree void_ftype_unsigned_unsigned
15266 = build_function_type_list (void_type_node, unsigned_type_node,
15267 unsigned_type_node, NULL_TREE);
15268 tree void_ftype_pcvoid_unsigned_unsigned
15269 = build_function_type_list (void_type_node, const_ptr_type_node,
15270 unsigned_type_node, unsigned_type_node,
15271 NULL_TREE);
15272 tree unsigned_ftype_void
15273 = build_function_type (unsigned_type_node, void_list_node);
15274 tree v2si_ftype_v4sf
15275 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15276 /* Loads/stores. */
15277 tree void_ftype_v8qi_v8qi_pchar
15278 = build_function_type_list (void_type_node,
15279 V8QI_type_node, V8QI_type_node,
15280 pchar_type_node, NULL_TREE);
15281 tree v4sf_ftype_pcfloat
15282 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15283 /* @@@ the type is bogus */
15284 tree v4sf_ftype_v4sf_pv2si
15285 = build_function_type_list (V4SF_type_node,
15286 V4SF_type_node, pv2si_type_node, NULL_TREE);
15287 tree void_ftype_pv2si_v4sf
15288 = build_function_type_list (void_type_node,
15289 pv2si_type_node, V4SF_type_node, NULL_TREE);
15290 tree void_ftype_pfloat_v4sf
15291 = build_function_type_list (void_type_node,
15292 pfloat_type_node, V4SF_type_node, NULL_TREE);
15293 tree void_ftype_pdi_di
15294 = build_function_type_list (void_type_node,
15295 pdi_type_node, long_long_unsigned_type_node,
15296 NULL_TREE);
15297 tree void_ftype_pv2di_v2di
15298 = build_function_type_list (void_type_node,
15299 pv2di_type_node, V2DI_type_node, NULL_TREE);
15300 /* Normal vector unops. */
15301 tree v4sf_ftype_v4sf
15302 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15303 tree v16qi_ftype_v16qi
15304 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15305 tree v8hi_ftype_v8hi
15306 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15307 tree v4si_ftype_v4si
15308 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15309 tree v8qi_ftype_v8qi
15310 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15311 tree v4hi_ftype_v4hi
15312 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15313
15314 /* Normal vector binops. */
15315 tree v4sf_ftype_v4sf_v4sf
15316 = build_function_type_list (V4SF_type_node,
15317 V4SF_type_node, V4SF_type_node, NULL_TREE);
15318 tree v8qi_ftype_v8qi_v8qi
15319 = build_function_type_list (V8QI_type_node,
15320 V8QI_type_node, V8QI_type_node, NULL_TREE);
15321 tree v4hi_ftype_v4hi_v4hi
15322 = build_function_type_list (V4HI_type_node,
15323 V4HI_type_node, V4HI_type_node, NULL_TREE);
15324 tree v2si_ftype_v2si_v2si
15325 = build_function_type_list (V2SI_type_node,
15326 V2SI_type_node, V2SI_type_node, NULL_TREE);
15327 tree di_ftype_di_di
15328 = build_function_type_list (long_long_unsigned_type_node,
15329 long_long_unsigned_type_node,
15330 long_long_unsigned_type_node, NULL_TREE);
15331
15332 tree di_ftype_di_di_int
15333 = build_function_type_list (long_long_unsigned_type_node,
15334 long_long_unsigned_type_node,
15335 long_long_unsigned_type_node,
15336 integer_type_node, NULL_TREE);
15337
15338 tree v2si_ftype_v2sf
15339 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15340 tree v2sf_ftype_v2si
15341 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15342 tree v2si_ftype_v2si
15343 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15344 tree v2sf_ftype_v2sf
15345 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15346 tree v2sf_ftype_v2sf_v2sf
15347 = build_function_type_list (V2SF_type_node,
15348 V2SF_type_node, V2SF_type_node, NULL_TREE);
15349 tree v2si_ftype_v2sf_v2sf
15350 = build_function_type_list (V2SI_type_node,
15351 V2SF_type_node, V2SF_type_node, NULL_TREE);
15352 tree pint_type_node = build_pointer_type (integer_type_node);
15353 tree pdouble_type_node = build_pointer_type (double_type_node);
15354 tree pcdouble_type_node = build_pointer_type (
15355 build_type_variant (double_type_node, 1, 0));
15356 tree int_ftype_v2df_v2df
15357 = build_function_type_list (integer_type_node,
15358 V2DF_type_node, V2DF_type_node, NULL_TREE);
15359
15360 tree void_ftype_pcvoid
15361 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15362 tree v4sf_ftype_v4si
15363 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15364 tree v4si_ftype_v4sf
15365 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15366 tree v2df_ftype_v4si
15367 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15368 tree v4si_ftype_v2df
15369 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15370 tree v2si_ftype_v2df
15371 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15372 tree v4sf_ftype_v2df
15373 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15374 tree v2df_ftype_v2si
15375 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15376 tree v2df_ftype_v4sf
15377 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15378 tree int_ftype_v2df
15379 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15380 tree int64_ftype_v2df
15381 = build_function_type_list (long_long_integer_type_node,
15382 V2DF_type_node, NULL_TREE);
15383 tree v2df_ftype_v2df_int
15384 = build_function_type_list (V2DF_type_node,
15385 V2DF_type_node, integer_type_node, NULL_TREE);
15386 tree v2df_ftype_v2df_int64
15387 = build_function_type_list (V2DF_type_node,
15388 V2DF_type_node, long_long_integer_type_node,
15389 NULL_TREE);
15390 tree v4sf_ftype_v4sf_v2df
15391 = build_function_type_list (V4SF_type_node,
15392 V4SF_type_node, V2DF_type_node, NULL_TREE);
15393 tree v2df_ftype_v2df_v4sf
15394 = build_function_type_list (V2DF_type_node,
15395 V2DF_type_node, V4SF_type_node, NULL_TREE);
15396 tree v2df_ftype_v2df_v2df_int
15397 = build_function_type_list (V2DF_type_node,
15398 V2DF_type_node, V2DF_type_node,
15399 integer_type_node,
15400 NULL_TREE);
15401 tree v2df_ftype_v2df_pcdouble
15402 = build_function_type_list (V2DF_type_node,
15403 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15404 tree void_ftype_pdouble_v2df
15405 = build_function_type_list (void_type_node,
15406 pdouble_type_node, V2DF_type_node, NULL_TREE);
15407 tree void_ftype_pint_int
15408 = build_function_type_list (void_type_node,
15409 pint_type_node, integer_type_node, NULL_TREE);
15410 tree void_ftype_v16qi_v16qi_pchar
15411 = build_function_type_list (void_type_node,
15412 V16QI_type_node, V16QI_type_node,
15413 pchar_type_node, NULL_TREE);
15414 tree v2df_ftype_pcdouble
15415 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15416 tree v2df_ftype_v2df_v2df
15417 = build_function_type_list (V2DF_type_node,
15418 V2DF_type_node, V2DF_type_node, NULL_TREE);
15419 tree v16qi_ftype_v16qi_v16qi
15420 = build_function_type_list (V16QI_type_node,
15421 V16QI_type_node, V16QI_type_node, NULL_TREE);
15422 tree v8hi_ftype_v8hi_v8hi
15423 = build_function_type_list (V8HI_type_node,
15424 V8HI_type_node, V8HI_type_node, NULL_TREE);
15425 tree v4si_ftype_v4si_v4si
15426 = build_function_type_list (V4SI_type_node,
15427 V4SI_type_node, V4SI_type_node, NULL_TREE);
15428 tree v2di_ftype_v2di_v2di
15429 = build_function_type_list (V2DI_type_node,
15430 V2DI_type_node, V2DI_type_node, NULL_TREE);
15431 tree v2di_ftype_v2df_v2df
15432 = build_function_type_list (V2DI_type_node,
15433 V2DF_type_node, V2DF_type_node, NULL_TREE);
15434 tree v2df_ftype_v2df
15435 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15436 tree v2di_ftype_v2di_int
15437 = build_function_type_list (V2DI_type_node,
15438 V2DI_type_node, integer_type_node, NULL_TREE);
15439 tree v2di_ftype_v2di_v2di_int
15440 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15441 V2DI_type_node, integer_type_node, NULL_TREE);
15442 tree v4si_ftype_v4si_int
15443 = build_function_type_list (V4SI_type_node,
15444 V4SI_type_node, integer_type_node, NULL_TREE);
15445 tree v8hi_ftype_v8hi_int
15446 = build_function_type_list (V8HI_type_node,
15447 V8HI_type_node, integer_type_node, NULL_TREE);
15448 tree v4si_ftype_v8hi_v8hi
15449 = build_function_type_list (V4SI_type_node,
15450 V8HI_type_node, V8HI_type_node, NULL_TREE);
15451 tree di_ftype_v8qi_v8qi
15452 = build_function_type_list (long_long_unsigned_type_node,
15453 V8QI_type_node, V8QI_type_node, NULL_TREE);
15454 tree di_ftype_v2si_v2si
15455 = build_function_type_list (long_long_unsigned_type_node,
15456 V2SI_type_node, V2SI_type_node, NULL_TREE);
15457 tree v2di_ftype_v16qi_v16qi
15458 = build_function_type_list (V2DI_type_node,
15459 V16QI_type_node, V16QI_type_node, NULL_TREE);
15460 tree v2di_ftype_v4si_v4si
15461 = build_function_type_list (V2DI_type_node,
15462 V4SI_type_node, V4SI_type_node, NULL_TREE);
15463 tree int_ftype_v16qi
15464 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15465 tree v16qi_ftype_pcchar
15466 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15467 tree void_ftype_pchar_v16qi
15468 = build_function_type_list (void_type_node,
15469 pchar_type_node, V16QI_type_node, NULL_TREE);
15470
14957 IX86_BUILTIN_VEC_INIT_V2SI,
14958 IX86_BUILTIN_VEC_INIT_V4HI,
14959 IX86_BUILTIN_VEC_INIT_V8QI,
14960 IX86_BUILTIN_VEC_EXT_V2DF,
14961 IX86_BUILTIN_VEC_EXT_V2DI,
14962 IX86_BUILTIN_VEC_EXT_V4SF,
14963 IX86_BUILTIN_VEC_EXT_V4SI,
14964 IX86_BUILTIN_VEC_EXT_V8HI,
14965 IX86_BUILTIN_VEC_EXT_V16QI,
14966 IX86_BUILTIN_VEC_EXT_V2SI,
14967 IX86_BUILTIN_VEC_EXT_V4HI,
14968 IX86_BUILTIN_VEC_SET_V8HI,
14969 IX86_BUILTIN_VEC_SET_V4HI,
14970
14971 IX86_BUILTIN_MAX
14972};
14973
14974#define def_builtin(MASK, NAME, TYPE, CODE) \
14975do { \
14976 if ((MASK) & target_flags \
14977 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14978 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14979 NULL, NULL_TREE); \
14980} while (0)
14981
14982/* Bits for builtin_description.flag. */
14983
14984/* Set when we don't support the comparison natively, and should
14985 swap_comparison in order to support it. */
14986#define BUILTIN_DESC_SWAP_OPERANDS 1
14987
14988struct builtin_description
14989{
14990 const unsigned int mask;
14991 const enum insn_code icode;
14992 const char *const name;
14993 const enum ix86_builtins code;
14994 const enum rtx_code comparison;
14995 const unsigned int flag;
14996};
14997
14998static const struct builtin_description bdesc_comi[] =
14999{
15000 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15001 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15002 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15003 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15004 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15005 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15006 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15007 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15008 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15009 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15010 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15011 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15012 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15013 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15014 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15015 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15016 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15017 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15018 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15019 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15020 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15021 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15022 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15023 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15024};
15025
15026static const struct builtin_description bdesc_2arg[] =
15027{
15028 /* SSE */
15029 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15030 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15031 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15032 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15033 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15034 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15035 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15036 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15037
15038 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15039 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15040 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15041 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15042 BUILTIN_DESC_SWAP_OPERANDS },
15043 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15044 BUILTIN_DESC_SWAP_OPERANDS },
15045 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15046 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15047 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15048 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15049 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15050 BUILTIN_DESC_SWAP_OPERANDS },
15051 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15052 BUILTIN_DESC_SWAP_OPERANDS },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15054 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15055 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15056 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15057 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15058 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15059 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15060 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15061 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15062 BUILTIN_DESC_SWAP_OPERANDS },
15063 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15066
15067 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15068 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15071
15072 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15073 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15074 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15075 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15076
15077 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15078 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15079 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15080 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15081 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15082
15083 /* MMX */
15084 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15085 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15086 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15087 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15088 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15089 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15090 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15091 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15092
15093 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15094 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15095 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15096 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15097 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15098 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15099 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15101
15102 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15103 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15104 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15105
15106 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15107 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15110
15111 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15112 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15113
15114 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15115 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15116 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15117 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15118 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15119 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15120
15121 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15122 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15123 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15124 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15125
15126 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15127 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15128 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15132
15133 /* Special. */
15134 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15135 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15136 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15137
15138 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15139 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15140 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15141
15142 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15144 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15145 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15146 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15147 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15148
15149 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15150 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15151 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15152 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15153 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15154 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15155
15156 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15160
15161 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15162 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15163
15164 /* SSE2 */
15165 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15166 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15167 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15168 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15169 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15170 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15171 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15172 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15173
15174 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15175 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15176 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15177 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15178 BUILTIN_DESC_SWAP_OPERANDS },
15179 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15180 BUILTIN_DESC_SWAP_OPERANDS },
15181 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15182 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15185 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15186 BUILTIN_DESC_SWAP_OPERANDS },
15187 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15188 BUILTIN_DESC_SWAP_OPERANDS },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15190 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15191 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15192 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15193 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15194 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15195 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15198
15199 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15200 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15201 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15202 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15203
15204 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15206 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15207 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15208
15209 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15210 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15211 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15212
15213 /* SSE2 MMX */
15214 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15217 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15218 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15220 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15222
15223 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15224 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15225 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15226 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15227 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15228 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15229 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15230 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15231
15232 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15234
15235 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15236 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15237 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15238 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15239
15240 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15241 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15242
15243 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15244 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15245 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15246 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15248 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15249
15250 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15251 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15253 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15254
15255 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15256 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15257 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15261 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15263
15264 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15266 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15267
15268 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15269 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15270
15271 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15273
15274 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15275 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15277
15278 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15279 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15281
15282 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15284
15285 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15286
15287 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15288 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15289 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15291
15292 /* SSE3 MMX */
15293 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15294 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15295 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15296 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15297 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15298 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15299
15300 /* SSSE3 */
15301 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15302 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15303 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15304 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15305 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15306 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15307 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15308 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15309 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15310 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15311 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15312 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15313 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15314 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15315 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15325};
15326
15327static const struct builtin_description bdesc_1arg[] =
15328{
15329 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15330 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15331
15332 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15333 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15334 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15335
15336 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15337 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15338 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15339 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15340 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15341 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15342
15343 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15344 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15345
15346 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15347
15348 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15349 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15350
15351 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15352 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15353 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15354 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15355 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15356
15357 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15358
15359 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15360 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15361 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15362 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15363
15364 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15365 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15367
15368 /* SSE3 */
15369 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15370 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15371
15372 /* SSSE3 */
15373 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15374 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15375 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15376 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15377 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15378 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15379};
15380
15381static void
15382ix86_init_builtins (void)
15383{
15384 if (TARGET_MMX)
15385 ix86_init_mmx_sse_builtins ();
15386}
15387
15388/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15389 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15390 builtins. */
15391static void
15392ix86_init_mmx_sse_builtins (void)
15393{
15394 const struct builtin_description * d;
15395 size_t i;
15396
15397 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15398 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15399 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15400 tree V2DI_type_node
15401 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15402 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15403 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15404 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15405 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15406 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15407 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15408
15409 tree pchar_type_node = build_pointer_type (char_type_node);
15410 tree pcchar_type_node = build_pointer_type (
15411 build_type_variant (char_type_node, 1, 0));
15412 tree pfloat_type_node = build_pointer_type (float_type_node);
15413 tree pcfloat_type_node = build_pointer_type (
15414 build_type_variant (float_type_node, 1, 0));
15415 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15416 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15417 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15418
15419 /* Comparisons. */
15420 tree int_ftype_v4sf_v4sf
15421 = build_function_type_list (integer_type_node,
15422 V4SF_type_node, V4SF_type_node, NULL_TREE);
15423 tree v4si_ftype_v4sf_v4sf
15424 = build_function_type_list (V4SI_type_node,
15425 V4SF_type_node, V4SF_type_node, NULL_TREE);
15426 /* MMX/SSE/integer conversions. */
15427 tree int_ftype_v4sf
15428 = build_function_type_list (integer_type_node,
15429 V4SF_type_node, NULL_TREE);
15430 tree int64_ftype_v4sf
15431 = build_function_type_list (long_long_integer_type_node,
15432 V4SF_type_node, NULL_TREE);
15433 tree int_ftype_v8qi
15434 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15435 tree v4sf_ftype_v4sf_int
15436 = build_function_type_list (V4SF_type_node,
15437 V4SF_type_node, integer_type_node, NULL_TREE);
15438 tree v4sf_ftype_v4sf_int64
15439 = build_function_type_list (V4SF_type_node,
15440 V4SF_type_node, long_long_integer_type_node,
15441 NULL_TREE);
15442 tree v4sf_ftype_v4sf_v2si
15443 = build_function_type_list (V4SF_type_node,
15444 V4SF_type_node, V2SI_type_node, NULL_TREE);
15445
15446 /* Miscellaneous. */
15447 tree v8qi_ftype_v4hi_v4hi
15448 = build_function_type_list (V8QI_type_node,
15449 V4HI_type_node, V4HI_type_node, NULL_TREE);
15450 tree v4hi_ftype_v2si_v2si
15451 = build_function_type_list (V4HI_type_node,
15452 V2SI_type_node, V2SI_type_node, NULL_TREE);
15453 tree v4sf_ftype_v4sf_v4sf_int
15454 = build_function_type_list (V4SF_type_node,
15455 V4SF_type_node, V4SF_type_node,
15456 integer_type_node, NULL_TREE);
15457 tree v2si_ftype_v4hi_v4hi
15458 = build_function_type_list (V2SI_type_node,
15459 V4HI_type_node, V4HI_type_node, NULL_TREE);
15460 tree v4hi_ftype_v4hi_int
15461 = build_function_type_list (V4HI_type_node,
15462 V4HI_type_node, integer_type_node, NULL_TREE);
15463 tree v4hi_ftype_v4hi_di
15464 = build_function_type_list (V4HI_type_node,
15465 V4HI_type_node, long_long_unsigned_type_node,
15466 NULL_TREE);
15467 tree v2si_ftype_v2si_di
15468 = build_function_type_list (V2SI_type_node,
15469 V2SI_type_node, long_long_unsigned_type_node,
15470 NULL_TREE);
15471 tree void_ftype_void
15472 = build_function_type (void_type_node, void_list_node);
15473 tree void_ftype_unsigned
15474 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15475 tree void_ftype_unsigned_unsigned
15476 = build_function_type_list (void_type_node, unsigned_type_node,
15477 unsigned_type_node, NULL_TREE);
15478 tree void_ftype_pcvoid_unsigned_unsigned
15479 = build_function_type_list (void_type_node, const_ptr_type_node,
15480 unsigned_type_node, unsigned_type_node,
15481 NULL_TREE);
15482 tree unsigned_ftype_void
15483 = build_function_type (unsigned_type_node, void_list_node);
15484 tree v2si_ftype_v4sf
15485 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15486 /* Loads/stores. */
15487 tree void_ftype_v8qi_v8qi_pchar
15488 = build_function_type_list (void_type_node,
15489 V8QI_type_node, V8QI_type_node,
15490 pchar_type_node, NULL_TREE);
15491 tree v4sf_ftype_pcfloat
15492 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15493 /* @@@ the type is bogus */
15494 tree v4sf_ftype_v4sf_pv2si
15495 = build_function_type_list (V4SF_type_node,
15496 V4SF_type_node, pv2si_type_node, NULL_TREE);
15497 tree void_ftype_pv2si_v4sf
15498 = build_function_type_list (void_type_node,
15499 pv2si_type_node, V4SF_type_node, NULL_TREE);
15500 tree void_ftype_pfloat_v4sf
15501 = build_function_type_list (void_type_node,
15502 pfloat_type_node, V4SF_type_node, NULL_TREE);
15503 tree void_ftype_pdi_di
15504 = build_function_type_list (void_type_node,
15505 pdi_type_node, long_long_unsigned_type_node,
15506 NULL_TREE);
15507 tree void_ftype_pv2di_v2di
15508 = build_function_type_list (void_type_node,
15509 pv2di_type_node, V2DI_type_node, NULL_TREE);
15510 /* Normal vector unops. */
15511 tree v4sf_ftype_v4sf
15512 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15513 tree v16qi_ftype_v16qi
15514 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15515 tree v8hi_ftype_v8hi
15516 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15517 tree v4si_ftype_v4si
15518 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15519 tree v8qi_ftype_v8qi
15520 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15521 tree v4hi_ftype_v4hi
15522 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15523
15524 /* Normal vector binops. */
15525 tree v4sf_ftype_v4sf_v4sf
15526 = build_function_type_list (V4SF_type_node,
15527 V4SF_type_node, V4SF_type_node, NULL_TREE);
15528 tree v8qi_ftype_v8qi_v8qi
15529 = build_function_type_list (V8QI_type_node,
15530 V8QI_type_node, V8QI_type_node, NULL_TREE);
15531 tree v4hi_ftype_v4hi_v4hi
15532 = build_function_type_list (V4HI_type_node,
15533 V4HI_type_node, V4HI_type_node, NULL_TREE);
15534 tree v2si_ftype_v2si_v2si
15535 = build_function_type_list (V2SI_type_node,
15536 V2SI_type_node, V2SI_type_node, NULL_TREE);
15537 tree di_ftype_di_di
15538 = build_function_type_list (long_long_unsigned_type_node,
15539 long_long_unsigned_type_node,
15540 long_long_unsigned_type_node, NULL_TREE);
15541
15542 tree di_ftype_di_di_int
15543 = build_function_type_list (long_long_unsigned_type_node,
15544 long_long_unsigned_type_node,
15545 long_long_unsigned_type_node,
15546 integer_type_node, NULL_TREE);
15547
15548 tree v2si_ftype_v2sf
15549 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15550 tree v2sf_ftype_v2si
15551 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15552 tree v2si_ftype_v2si
15553 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15554 tree v2sf_ftype_v2sf
15555 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15556 tree v2sf_ftype_v2sf_v2sf
15557 = build_function_type_list (V2SF_type_node,
15558 V2SF_type_node, V2SF_type_node, NULL_TREE);
15559 tree v2si_ftype_v2sf_v2sf
15560 = build_function_type_list (V2SI_type_node,
15561 V2SF_type_node, V2SF_type_node, NULL_TREE);
15562 tree pint_type_node = build_pointer_type (integer_type_node);
15563 tree pdouble_type_node = build_pointer_type (double_type_node);
15564 tree pcdouble_type_node = build_pointer_type (
15565 build_type_variant (double_type_node, 1, 0));
15566 tree int_ftype_v2df_v2df
15567 = build_function_type_list (integer_type_node,
15568 V2DF_type_node, V2DF_type_node, NULL_TREE);
15569
15570 tree void_ftype_pcvoid
15571 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15572 tree v4sf_ftype_v4si
15573 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15574 tree v4si_ftype_v4sf
15575 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15576 tree v2df_ftype_v4si
15577 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15578 tree v4si_ftype_v2df
15579 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15580 tree v2si_ftype_v2df
15581 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15582 tree v4sf_ftype_v2df
15583 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15584 tree v2df_ftype_v2si
15585 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15586 tree v2df_ftype_v4sf
15587 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15588 tree int_ftype_v2df
15589 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15590 tree int64_ftype_v2df
15591 = build_function_type_list (long_long_integer_type_node,
15592 V2DF_type_node, NULL_TREE);
15593 tree v2df_ftype_v2df_int
15594 = build_function_type_list (V2DF_type_node,
15595 V2DF_type_node, integer_type_node, NULL_TREE);
15596 tree v2df_ftype_v2df_int64
15597 = build_function_type_list (V2DF_type_node,
15598 V2DF_type_node, long_long_integer_type_node,
15599 NULL_TREE);
15600 tree v4sf_ftype_v4sf_v2df
15601 = build_function_type_list (V4SF_type_node,
15602 V4SF_type_node, V2DF_type_node, NULL_TREE);
15603 tree v2df_ftype_v2df_v4sf
15604 = build_function_type_list (V2DF_type_node,
15605 V2DF_type_node, V4SF_type_node, NULL_TREE);
15606 tree v2df_ftype_v2df_v2df_int
15607 = build_function_type_list (V2DF_type_node,
15608 V2DF_type_node, V2DF_type_node,
15609 integer_type_node,
15610 NULL_TREE);
15611 tree v2df_ftype_v2df_pcdouble
15612 = build_function_type_list (V2DF_type_node,
15613 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15614 tree void_ftype_pdouble_v2df
15615 = build_function_type_list (void_type_node,
15616 pdouble_type_node, V2DF_type_node, NULL_TREE);
15617 tree void_ftype_pint_int
15618 = build_function_type_list (void_type_node,
15619 pint_type_node, integer_type_node, NULL_TREE);
15620 tree void_ftype_v16qi_v16qi_pchar
15621 = build_function_type_list (void_type_node,
15622 V16QI_type_node, V16QI_type_node,
15623 pchar_type_node, NULL_TREE);
15624 tree v2df_ftype_pcdouble
15625 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15626 tree v2df_ftype_v2df_v2df
15627 = build_function_type_list (V2DF_type_node,
15628 V2DF_type_node, V2DF_type_node, NULL_TREE);
15629 tree v16qi_ftype_v16qi_v16qi
15630 = build_function_type_list (V16QI_type_node,
15631 V16QI_type_node, V16QI_type_node, NULL_TREE);
15632 tree v8hi_ftype_v8hi_v8hi
15633 = build_function_type_list (V8HI_type_node,
15634 V8HI_type_node, V8HI_type_node, NULL_TREE);
15635 tree v4si_ftype_v4si_v4si
15636 = build_function_type_list (V4SI_type_node,
15637 V4SI_type_node, V4SI_type_node, NULL_TREE);
15638 tree v2di_ftype_v2di_v2di
15639 = build_function_type_list (V2DI_type_node,
15640 V2DI_type_node, V2DI_type_node, NULL_TREE);
15641 tree v2di_ftype_v2df_v2df
15642 = build_function_type_list (V2DI_type_node,
15643 V2DF_type_node, V2DF_type_node, NULL_TREE);
15644 tree v2df_ftype_v2df
15645 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15646 tree v2di_ftype_v2di_int
15647 = build_function_type_list (V2DI_type_node,
15648 V2DI_type_node, integer_type_node, NULL_TREE);
15649 tree v2di_ftype_v2di_v2di_int
15650 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15651 V2DI_type_node, integer_type_node, NULL_TREE);
15652 tree v4si_ftype_v4si_int
15653 = build_function_type_list (V4SI_type_node,
15654 V4SI_type_node, integer_type_node, NULL_TREE);
15655 tree v8hi_ftype_v8hi_int
15656 = build_function_type_list (V8HI_type_node,
15657 V8HI_type_node, integer_type_node, NULL_TREE);
15658 tree v4si_ftype_v8hi_v8hi
15659 = build_function_type_list (V4SI_type_node,
15660 V8HI_type_node, V8HI_type_node, NULL_TREE);
15661 tree di_ftype_v8qi_v8qi
15662 = build_function_type_list (long_long_unsigned_type_node,
15663 V8QI_type_node, V8QI_type_node, NULL_TREE);
15664 tree di_ftype_v2si_v2si
15665 = build_function_type_list (long_long_unsigned_type_node,
15666 V2SI_type_node, V2SI_type_node, NULL_TREE);
15667 tree v2di_ftype_v16qi_v16qi
15668 = build_function_type_list (V2DI_type_node,
15669 V16QI_type_node, V16QI_type_node, NULL_TREE);
15670 tree v2di_ftype_v4si_v4si
15671 = build_function_type_list (V2DI_type_node,
15672 V4SI_type_node, V4SI_type_node, NULL_TREE);
15673 tree int_ftype_v16qi
15674 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15675 tree v16qi_ftype_pcchar
15676 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15677 tree void_ftype_pchar_v16qi
15678 = build_function_type_list (void_type_node,
15679 pchar_type_node, V16QI_type_node, NULL_TREE);
15680
15681 tree v2di_ftype_v2di_unsigned_unsigned
15682 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15683 unsigned_type_node, unsigned_type_node,
15684 NULL_TREE);
15685 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15686 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15687 unsigned_type_node, unsigned_type_node,
15688 NULL_TREE);
15689 tree v2di_ftype_v2di_v16qi
15690 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15691 NULL_TREE);
15692
15471 tree float80_type;
15472 tree float128_type;
15473 tree ftype;
15474
15475 /* The __float80 type. */
15476 if (TYPE_MODE (long_double_type_node) == XFmode)
15477 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15478 "__float80");
15479 else
15480 {
15481 /* The __float80 type. */
15482 float80_type = make_node (REAL_TYPE);
15483 TYPE_PRECISION (float80_type) = 80;
15484 layout_type (float80_type);
15485 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15486 }
15487
15488 if (TARGET_64BIT)
15489 {
15490 float128_type = make_node (REAL_TYPE);
15491 TYPE_PRECISION (float128_type) = 128;
15492 layout_type (float128_type);
15493 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15494 }
15495
15496 /* Add all builtins that are more or less simple operations on two
15497 operands. */
15498 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15499 {
15500 /* Use one of the operands; the target can have a different mode for
15501 mask-generating compares. */
15502 enum machine_mode mode;
15503 tree type;
15504
15505 if (d->name == 0)
15506 continue;
15507 mode = insn_data[d->icode].operand[1].mode;
15508
15509 switch (mode)
15510 {
15511 case V16QImode:
15512 type = v16qi_ftype_v16qi_v16qi;
15513 break;
15514 case V8HImode:
15515 type = v8hi_ftype_v8hi_v8hi;
15516 break;
15517 case V4SImode:
15518 type = v4si_ftype_v4si_v4si;
15519 break;
15520 case V2DImode:
15521 type = v2di_ftype_v2di_v2di;
15522 break;
15523 case V2DFmode:
15524 type = v2df_ftype_v2df_v2df;
15525 break;
15526 case V4SFmode:
15527 type = v4sf_ftype_v4sf_v4sf;
15528 break;
15529 case V8QImode:
15530 type = v8qi_ftype_v8qi_v8qi;
15531 break;
15532 case V4HImode:
15533 type = v4hi_ftype_v4hi_v4hi;
15534 break;
15535 case V2SImode:
15536 type = v2si_ftype_v2si_v2si;
15537 break;
15538 case DImode:
15539 type = di_ftype_di_di;
15540 break;
15541
15542 default:
15543 gcc_unreachable ();
15544 }
15545
15546 /* Override for comparisons. */
15547 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15548 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15549 type = v4si_ftype_v4sf_v4sf;
15550
15551 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15552 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15553 type = v2di_ftype_v2df_v2df;
15554
15555 def_builtin (d->mask, d->name, type, d->code);
15556 }
15557
15558 /* Add all builtins that are more or less simple operations on 1 operand. */
15559 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15560 {
15561 enum machine_mode mode;
15562 tree type;
15563
15564 if (d->name == 0)
15565 continue;
15566 mode = insn_data[d->icode].operand[1].mode;
15567
15568 switch (mode)
15569 {
15570 case V16QImode:
15571 type = v16qi_ftype_v16qi;
15572 break;
15573 case V8HImode:
15574 type = v8hi_ftype_v8hi;
15575 break;
15576 case V4SImode:
15577 type = v4si_ftype_v4si;
15578 break;
15579 case V2DFmode:
15580 type = v2df_ftype_v2df;
15581 break;
15582 case V4SFmode:
15583 type = v4sf_ftype_v4sf;
15584 break;
15585 case V8QImode:
15586 type = v8qi_ftype_v8qi;
15587 break;
15588 case V4HImode:
15589 type = v4hi_ftype_v4hi;
15590 break;
15591 case V2SImode:
15592 type = v2si_ftype_v2si;
15593 break;
15594
15595 default:
15596 abort ();
15597 }
15598
15599 def_builtin (d->mask, d->name, type, d->code);
15600 }
15601
15602 /* Add the remaining MMX insns with somewhat more complicated types. */
15603 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15604 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15605 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15606 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15607
15608 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15609 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15610 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15611
15612 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15613 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15614
15615 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15616 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15617
15618 /* comi/ucomi insns. */
15619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15620 if (d->mask == MASK_SSE2)
15621 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15622 else
15623 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15624
15625 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15626 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15627 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15628
15629 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15630 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15631 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15632 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15633 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15634 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15635 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15636 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15637 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15638 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15639 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15640
15641 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15642
15643 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15644 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15645
15646 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15647 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15648 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15649 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15650
15651 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15652 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15653 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15655
15656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15657
15658 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15659
15660 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15661 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15662 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15663 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15664 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15665 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15666
15667 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15668
15669 /* Original 3DNow! */
15670 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15671 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15672 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15673 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15674 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15675 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15676 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15677 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15678 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15679 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15680 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15681 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15682 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15683 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15684 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15685 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15686 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15687 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15688 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15689 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15690
15691 /* 3DNow! extension as used in the Athlon CPU. */
15692 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15693 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15694 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15695 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15696 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15697 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15698
15699 /* SSE2 */
15700 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15701
15702 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15703 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15704
15705 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15706 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15707
15708 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15709 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15710 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15711 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15712 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15713
15714 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15715 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15716 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15717 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15718
15719 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15720 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15721
15722 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15723
15724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15725 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15726
15727 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15730 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15731 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15732
15733 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15734
15735 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15736 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15737 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15738 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15739
15740 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15741 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15742 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15743
15744 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15745 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15746 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15747 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15748
15749 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15750 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15751 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15752
15753 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15754 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15755
15756 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15757 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15758
15759 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15760 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15761 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15762
15763 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15764 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15765 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15766
15767 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15768 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15769
15770 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15771 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15772 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15773 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15774
15775 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15776 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15777 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15778 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15779
15780 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15781 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15782
15783 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15784
15785 /* Prescott New Instructions. */
15786 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15787 void_ftype_pcvoid_unsigned_unsigned,
15788 IX86_BUILTIN_MONITOR);
15789 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15790 void_ftype_unsigned_unsigned,
15791 IX86_BUILTIN_MWAIT);
15792 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15793 v4sf_ftype_v4sf,
15794 IX86_BUILTIN_MOVSHDUP);
15795 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15796 v4sf_ftype_v4sf,
15797 IX86_BUILTIN_MOVSLDUP);
15798 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15799 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15800
15801 /* SSSE3. */
15802 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15803 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15804 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15805 IX86_BUILTIN_PALIGNR);
15806
15693 tree float80_type;
15694 tree float128_type;
15695 tree ftype;
15696
15697 /* The __float80 type. */
15698 if (TYPE_MODE (long_double_type_node) == XFmode)
15699 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15700 "__float80");
15701 else
15702 {
15703 /* The __float80 type. */
15704 float80_type = make_node (REAL_TYPE);
15705 TYPE_PRECISION (float80_type) = 80;
15706 layout_type (float80_type);
15707 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15708 }
15709
15710 if (TARGET_64BIT)
15711 {
15712 float128_type = make_node (REAL_TYPE);
15713 TYPE_PRECISION (float128_type) = 128;
15714 layout_type (float128_type);
15715 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15716 }
15717
15718 /* Add all builtins that are more or less simple operations on two
15719 operands. */
15720 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15721 {
15722 /* Use one of the operands; the target can have a different mode for
15723 mask-generating compares. */
15724 enum machine_mode mode;
15725 tree type;
15726
15727 if (d->name == 0)
15728 continue;
15729 mode = insn_data[d->icode].operand[1].mode;
15730
15731 switch (mode)
15732 {
15733 case V16QImode:
15734 type = v16qi_ftype_v16qi_v16qi;
15735 break;
15736 case V8HImode:
15737 type = v8hi_ftype_v8hi_v8hi;
15738 break;
15739 case V4SImode:
15740 type = v4si_ftype_v4si_v4si;
15741 break;
15742 case V2DImode:
15743 type = v2di_ftype_v2di_v2di;
15744 break;
15745 case V2DFmode:
15746 type = v2df_ftype_v2df_v2df;
15747 break;
15748 case V4SFmode:
15749 type = v4sf_ftype_v4sf_v4sf;
15750 break;
15751 case V8QImode:
15752 type = v8qi_ftype_v8qi_v8qi;
15753 break;
15754 case V4HImode:
15755 type = v4hi_ftype_v4hi_v4hi;
15756 break;
15757 case V2SImode:
15758 type = v2si_ftype_v2si_v2si;
15759 break;
15760 case DImode:
15761 type = di_ftype_di_di;
15762 break;
15763
15764 default:
15765 gcc_unreachable ();
15766 }
15767
15768 /* Override for comparisons. */
15769 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15770 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15771 type = v4si_ftype_v4sf_v4sf;
15772
15773 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15774 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15775 type = v2di_ftype_v2df_v2df;
15776
15777 def_builtin (d->mask, d->name, type, d->code);
15778 }
15779
15780 /* Add all builtins that are more or less simple operations on 1 operand. */
15781 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15782 {
15783 enum machine_mode mode;
15784 tree type;
15785
15786 if (d->name == 0)
15787 continue;
15788 mode = insn_data[d->icode].operand[1].mode;
15789
15790 switch (mode)
15791 {
15792 case V16QImode:
15793 type = v16qi_ftype_v16qi;
15794 break;
15795 case V8HImode:
15796 type = v8hi_ftype_v8hi;
15797 break;
15798 case V4SImode:
15799 type = v4si_ftype_v4si;
15800 break;
15801 case V2DFmode:
15802 type = v2df_ftype_v2df;
15803 break;
15804 case V4SFmode:
15805 type = v4sf_ftype_v4sf;
15806 break;
15807 case V8QImode:
15808 type = v8qi_ftype_v8qi;
15809 break;
15810 case V4HImode:
15811 type = v4hi_ftype_v4hi;
15812 break;
15813 case V2SImode:
15814 type = v2si_ftype_v2si;
15815 break;
15816
15817 default:
15818 abort ();
15819 }
15820
15821 def_builtin (d->mask, d->name, type, d->code);
15822 }
15823
15824 /* Add the remaining MMX insns with somewhat more complicated types. */
15825 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15826 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15827 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15828 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15829
15830 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15831 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15832 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15833
15834 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15835 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15836
15837 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15838 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15839
15840 /* comi/ucomi insns. */
15841 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15842 if (d->mask == MASK_SSE2)
15843 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15844 else
15845 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15846
15847 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15848 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15849 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15850
15851 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15852 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15853 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15854 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15855 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15856 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15857 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15858 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15859 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15860 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15861 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15862
15863 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15864
15865 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15866 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15867
15868 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15869 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15870 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15871 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15872
15873 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15874 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15875 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15876 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15877
15878 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15879
15880 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15881
15882 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15884 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15886 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15887 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15888
15889 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15890
15891 /* Original 3DNow! */
15892 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15893 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15894 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15895 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15896 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15897 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15898 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15899 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15900 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15901 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15902 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15903 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15904 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15905 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15912
15913 /* 3DNow! extension as used in the Athlon CPU. */
15914 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15915 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15916 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15917 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15918 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15919 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15920
15921 /* SSE2 */
15922 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15923
15924 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15925 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15926
15927 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15928 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15929
15930 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15931 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15932 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15933 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15934 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15935
15936 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15937 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15938 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15940
15941 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15943
15944 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15945
15946 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15947 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15948
15949 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15950 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15952 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15954
15955 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15956
15957 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15958 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15959 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15960 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15961
15962 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15965
15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15967 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15968 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15970
15971 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15972 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15973 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15974
15975 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15976 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15977
15978 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15979 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15980
15981 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15982 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15984
15985 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15986 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15988
15989 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15991
15992 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15994 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15996
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15998 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16001
16002 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16003 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16004
16005 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16006
16007 /* Prescott New Instructions. */
16008 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16009 void_ftype_pcvoid_unsigned_unsigned,
16010 IX86_BUILTIN_MONITOR);
16011 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16012 void_ftype_unsigned_unsigned,
16013 IX86_BUILTIN_MWAIT);
16014 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16015 v4sf_ftype_v4sf,
16016 IX86_BUILTIN_MOVSHDUP);
16017 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16018 v4sf_ftype_v4sf,
16019 IX86_BUILTIN_MOVSLDUP);
16020 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16021 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16022
16023 /* SSSE3. */
16024 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16025 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16026 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16027 IX86_BUILTIN_PALIGNR);
16028
16029 /* AMDFAM10 SSE4A New built-ins */
16030 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16031 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16032 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16033 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16034 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16035 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16036 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16037 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16038 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16039 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16040 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16041 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16042
15807 /* Access to the vec_init patterns. */
15808 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15809 integer_type_node, NULL_TREE);
15810 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15811 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15812
15813 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15814 short_integer_type_node,
15815 short_integer_type_node,
15816 short_integer_type_node, NULL_TREE);
15817 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15818 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15819
15820 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15821 char_type_node, char_type_node,
15822 char_type_node, char_type_node,
15823 char_type_node, char_type_node,
15824 char_type_node, NULL_TREE);
15825 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15826 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15827
15828 /* Access to the vec_extract patterns. */
15829 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15830 integer_type_node, NULL_TREE);
15831 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15832 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15833
15834 ftype = build_function_type_list (long_long_integer_type_node,
15835 V2DI_type_node, integer_type_node,
15836 NULL_TREE);
15837 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15838 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15839
15840 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15841 integer_type_node, NULL_TREE);
15842 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15843 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15844
15845 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15846 integer_type_node, NULL_TREE);
15847 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15848 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15849
15850 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15851 integer_type_node, NULL_TREE);
15852 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15853 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15854
15855 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15856 integer_type_node, NULL_TREE);
15857 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15858 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15859
15860 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15861 integer_type_node, NULL_TREE);
15862 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15863 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15864
15865 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15866 integer_type_node, NULL_TREE);
15867 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15868
15869 /* Access to the vec_set patterns. */
15870 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15871 intHI_type_node,
15872 integer_type_node, NULL_TREE);
15873 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15874 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15875
15876 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15877 intHI_type_node,
15878 integer_type_node, NULL_TREE);
15879 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15880 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15881}
15882
15883/* Errors in the source file can cause expand_expr to return const0_rtx
15884 where we expect a vector. To avoid crashing, use one of the vector
15885 clear instructions. */
15886static rtx
15887safe_vector_operand (rtx x, enum machine_mode mode)
15888{
15889 if (x == const0_rtx)
15890 x = CONST0_RTX (mode);
15891 return x;
15892}
15893
15894/* Subroutine of ix86_expand_builtin to take care of binop insns. */
15895
15896static rtx
15897ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15898{
15899 rtx pat, xops[3];
15900 tree arg0 = TREE_VALUE (arglist);
15901 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15902 rtx op0 = expand_normal (arg0);
15903 rtx op1 = expand_normal (arg1);
15904 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15905 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15906 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15907
15908 if (VECTOR_MODE_P (mode0))
15909 op0 = safe_vector_operand (op0, mode0);
15910 if (VECTOR_MODE_P (mode1))
15911 op1 = safe_vector_operand (op1, mode1);
15912
15913 if (optimize || !target
15914 || GET_MODE (target) != tmode
15915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15916 target = gen_reg_rtx (tmode);
15917
15918 if (GET_MODE (op1) == SImode && mode1 == TImode)
15919 {
15920 rtx x = gen_reg_rtx (V4SImode);
15921 emit_insn (gen_sse2_loadd (x, op1));
15922 op1 = gen_lowpart (TImode, x);
15923 }
15924
15925 /* The insn must want input operands in the same modes as the
15926 result. */
15927 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15928 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15929
15930 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15931 op0 = copy_to_mode_reg (mode0, op0);
15932 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15933 op1 = copy_to_mode_reg (mode1, op1);
15934
15935 /* ??? Using ix86_fixup_binary_operands is problematic when
15936 we've got mismatched modes. Fake it. */
15937
15938 xops[0] = target;
15939 xops[1] = op0;
15940 xops[2] = op1;
15941
15942 if (tmode == mode0 && tmode == mode1)
15943 {
15944 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15945 op0 = xops[1];
15946 op1 = xops[2];
15947 }
15948 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15949 {
15950 op0 = force_reg (mode0, op0);
15951 op1 = force_reg (mode1, op1);
15952 target = gen_reg_rtx (tmode);
15953 }
15954
15955 pat = GEN_FCN (icode) (target, op0, op1);
15956 if (! pat)
15957 return 0;
15958 emit_insn (pat);
15959 return target;
15960}
15961
15962/* Subroutine of ix86_expand_builtin to take care of stores. */
15963
15964static rtx
15965ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15966{
15967 rtx pat;
15968 tree arg0 = TREE_VALUE (arglist);
15969 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15970 rtx op0 = expand_normal (arg0);
15971 rtx op1 = expand_normal (arg1);
15972 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15973 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15974
15975 if (VECTOR_MODE_P (mode1))
15976 op1 = safe_vector_operand (op1, mode1);
15977
15978 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15979 op1 = copy_to_mode_reg (mode1, op1);
15980
15981 pat = GEN_FCN (icode) (op0, op1);
15982 if (pat)
15983 emit_insn (pat);
15984 return 0;
15985}
15986
15987/* Subroutine of ix86_expand_builtin to take care of unop insns. */
15988
15989static rtx
15990ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15991 rtx target, int do_load)
15992{
15993 rtx pat;
15994 tree arg0 = TREE_VALUE (arglist);
15995 rtx op0 = expand_normal (arg0);
15996 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15997 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15998
15999 if (optimize || !target
16000 || GET_MODE (target) != tmode
16001 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16002 target = gen_reg_rtx (tmode);
16003 if (do_load)
16004 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16005 else
16006 {
16007 if (VECTOR_MODE_P (mode0))
16008 op0 = safe_vector_operand (op0, mode0);
16009
16010 if ((optimize && !register_operand (op0, mode0))
16011 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16012 op0 = copy_to_mode_reg (mode0, op0);
16013 }
16014
16015 pat = GEN_FCN (icode) (target, op0);
16016 if (! pat)
16017 return 0;
16018 emit_insn (pat);
16019 return target;
16020}
16021
16022/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16023 sqrtss, rsqrtss, rcpss. */
16024
16025static rtx
16026ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16027{
16028 rtx pat;
16029 tree arg0 = TREE_VALUE (arglist);
16030 rtx op1, op0 = expand_normal (arg0);
16031 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16032 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16033
16034 if (optimize || !target
16035 || GET_MODE (target) != tmode
16036 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16037 target = gen_reg_rtx (tmode);
16038
16039 if (VECTOR_MODE_P (mode0))
16040 op0 = safe_vector_operand (op0, mode0);
16041
16042 if ((optimize && !register_operand (op0, mode0))
16043 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16044 op0 = copy_to_mode_reg (mode0, op0);
16045
16046 op1 = op0;
16047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16048 op1 = copy_to_mode_reg (mode0, op1);
16049
16050 pat = GEN_FCN (icode) (target, op0, op1);
16051 if (! pat)
16052 return 0;
16053 emit_insn (pat);
16054 return target;
16055}
16056
16057/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16058
16059static rtx
16060ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16061 rtx target)
16062{
16063 rtx pat;
16064 tree arg0 = TREE_VALUE (arglist);
16065 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16066 rtx op0 = expand_normal (arg0);
16067 rtx op1 = expand_normal (arg1);
16068 rtx op2;
16069 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16070 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16071 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16072 enum rtx_code comparison = d->comparison;
16073
16074 if (VECTOR_MODE_P (mode0))
16075 op0 = safe_vector_operand (op0, mode0);
16076 if (VECTOR_MODE_P (mode1))
16077 op1 = safe_vector_operand (op1, mode1);
16078
16079 /* Swap operands if we have a comparison that isn't available in
16080 hardware. */
16081 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16082 {
16083 rtx tmp = gen_reg_rtx (mode1);
16084 emit_move_insn (tmp, op1);
16085 op1 = op0;
16086 op0 = tmp;
16087 }
16088
16089 if (optimize || !target
16090 || GET_MODE (target) != tmode
16091 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16092 target = gen_reg_rtx (tmode);
16093
16094 if ((optimize && !register_operand (op0, mode0))
16095 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16096 op0 = copy_to_mode_reg (mode0, op0);
16097 if ((optimize && !register_operand (op1, mode1))
16098 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16099 op1 = copy_to_mode_reg (mode1, op1);
16100
16101 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16102 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16103 if (! pat)
16104 return 0;
16105 emit_insn (pat);
16106 return target;
16107}
16108
16109/* Subroutine of ix86_expand_builtin to take care of comi insns. */
16110
16111static rtx
16112ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16113 rtx target)
16114{
16115 rtx pat;
16116 tree arg0 = TREE_VALUE (arglist);
16117 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16118 rtx op0 = expand_normal (arg0);
16119 rtx op1 = expand_normal (arg1);
16120 rtx op2;
16121 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16122 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16123 enum rtx_code comparison = d->comparison;
16124
16125 if (VECTOR_MODE_P (mode0))
16126 op0 = safe_vector_operand (op0, mode0);
16127 if (VECTOR_MODE_P (mode1))
16128 op1 = safe_vector_operand (op1, mode1);
16129
16130 /* Swap operands if we have a comparison that isn't available in
16131 hardware. */
16132 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16133 {
16134 rtx tmp = op1;
16135 op1 = op0;
16136 op0 = tmp;
16137 }
16138
16139 target = gen_reg_rtx (SImode);
16140 emit_move_insn (target, const0_rtx);
16141 target = gen_rtx_SUBREG (QImode, target, 0);
16142
16143 if ((optimize && !register_operand (op0, mode0))
16144 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16145 op0 = copy_to_mode_reg (mode0, op0);
16146 if ((optimize && !register_operand (op1, mode1))
16147 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16148 op1 = copy_to_mode_reg (mode1, op1);
16149
16150 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16151 pat = GEN_FCN (d->icode) (op0, op1);
16152 if (! pat)
16153 return 0;
16154 emit_insn (pat);
16155 emit_insn (gen_rtx_SET (VOIDmode,
16156 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16157 gen_rtx_fmt_ee (comparison, QImode,
16158 SET_DEST (pat),
16159 const0_rtx)));
16160
16161 return SUBREG_REG (target);
16162}
16163
16164/* Return the integer constant in ARG. Constrain it to be in the range
16165 of the subparts of VEC_TYPE; issue an error if not. */
16166
16167static int
16168get_element_number (tree vec_type, tree arg)
16169{
16170 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16171
16172 if (!host_integerp (arg, 1)
16173 || (elt = tree_low_cst (arg, 1), elt > max))
16174 {
16175 error ("selector must be an integer constant in the range 0..%wi", max);
16176 return 0;
16177 }
16178
16179 return elt;
16180}
16181
16182/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16183 ix86_expand_vector_init. We DO have language-level syntax for this, in
16184 the form of (type){ init-list }. Except that since we can't place emms
16185 instructions from inside the compiler, we can't allow the use of MMX
16186 registers unless the user explicitly asks for it. So we do *not* define
16187 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16188 we have builtins invoked by mmintrin.h that gives us license to emit
16189 these sorts of instructions. */
16190
16191static rtx
16192ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16193{
16194 enum machine_mode tmode = TYPE_MODE (type);
16195 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16196 int i, n_elt = GET_MODE_NUNITS (tmode);
16197 rtvec v = rtvec_alloc (n_elt);
16198
16199 gcc_assert (VECTOR_MODE_P (tmode));
16200
16201 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16202 {
16203 rtx x = expand_normal (TREE_VALUE (arglist));
16204 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16205 }
16206
16207 gcc_assert (arglist == NULL);
16208
16209 if (!target || !register_operand (target, tmode))
16210 target = gen_reg_rtx (tmode);
16211
16212 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16213 return target;
16214}
16215
16216/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16217 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16218 had a language-level syntax for referencing vector elements. */
16219
16220static rtx
16221ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16222{
16223 enum machine_mode tmode, mode0;
16224 tree arg0, arg1;
16225 int elt;
16226 rtx op0;
16227
16228 arg0 = TREE_VALUE (arglist);
16229 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16230
16231 op0 = expand_normal (arg0);
16232 elt = get_element_number (TREE_TYPE (arg0), arg1);
16233
16234 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16235 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16236 gcc_assert (VECTOR_MODE_P (mode0));
16237
16238 op0 = force_reg (mode0, op0);
16239
16240 if (optimize || !target || !register_operand (target, tmode))
16241 target = gen_reg_rtx (tmode);
16242
16243 ix86_expand_vector_extract (true, target, op0, elt);
16244
16245 return target;
16246}
16247
16248/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16249 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16250 a language-level syntax for referencing vector elements. */
16251
16252static rtx
16253ix86_expand_vec_set_builtin (tree arglist)
16254{
16255 enum machine_mode tmode, mode1;
16256 tree arg0, arg1, arg2;
16257 int elt;
16258 rtx op0, op1, target;
16259
16260 arg0 = TREE_VALUE (arglist);
16261 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16262 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16263
16264 tmode = TYPE_MODE (TREE_TYPE (arg0));
16265 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16266 gcc_assert (VECTOR_MODE_P (tmode));
16267
16268 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16269 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16270 elt = get_element_number (TREE_TYPE (arg0), arg2);
16271
16272 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16273 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16274
16275 op0 = force_reg (tmode, op0);
16276 op1 = force_reg (mode1, op1);
16277
16278 /* OP0 is the source of these builtin functions and shouldn't be
16279 modified. Create a copy, use it and return it as target. */
16280 target = gen_reg_rtx (tmode);
16281 emit_move_insn (target, op0);
16282 ix86_expand_vector_set (true, target, op1, elt);
16283
16284 return target;
16285}
16286
16287/* Expand an expression EXP that calls a built-in function,
16288 with result going to TARGET if that's convenient
16289 (and in mode MODE if that's convenient).
16290 SUBTARGET may be used as the target for computing one of EXP's operands.
16291 IGNORE is nonzero if the value is to be ignored. */
16292
16293static rtx
16294ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16295 enum machine_mode mode ATTRIBUTE_UNUSED,
16296 int ignore ATTRIBUTE_UNUSED)
16297{
16298 const struct builtin_description *d;
16299 size_t i;
16300 enum insn_code icode;
16301 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16302 tree arglist = TREE_OPERAND (exp, 1);
16043 /* Access to the vec_init patterns. */
16044 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16045 integer_type_node, NULL_TREE);
16046 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16047 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16048
16049 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16050 short_integer_type_node,
16051 short_integer_type_node,
16052 short_integer_type_node, NULL_TREE);
16053 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16054 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16055
16056 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16057 char_type_node, char_type_node,
16058 char_type_node, char_type_node,
16059 char_type_node, char_type_node,
16060 char_type_node, NULL_TREE);
16061 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16062 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16063
16064 /* Access to the vec_extract patterns. */
16065 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16066 integer_type_node, NULL_TREE);
16067 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16068 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16069
16070 ftype = build_function_type_list (long_long_integer_type_node,
16071 V2DI_type_node, integer_type_node,
16072 NULL_TREE);
16073 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16074 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16075
16076 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16077 integer_type_node, NULL_TREE);
16078 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16079 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16080
16081 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16082 integer_type_node, NULL_TREE);
16083 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16084 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16085
16086 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16087 integer_type_node, NULL_TREE);
16088 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16089 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16090
16091 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16092 integer_type_node, NULL_TREE);
16093 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16094 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16095
16096 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16097 integer_type_node, NULL_TREE);
16098 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16099 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16100
16101 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16102 integer_type_node, NULL_TREE);
16103 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16104
16105 /* Access to the vec_set patterns. */
16106 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16107 intHI_type_node,
16108 integer_type_node, NULL_TREE);
16109 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16110 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16111
16112 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16113 intHI_type_node,
16114 integer_type_node, NULL_TREE);
16115 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16116 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16117}
16118
16119/* Errors in the source file can cause expand_expr to return const0_rtx
16120 where we expect a vector. To avoid crashing, use one of the vector
16121 clear instructions. */
16122static rtx
16123safe_vector_operand (rtx x, enum machine_mode mode)
16124{
16125 if (x == const0_rtx)
16126 x = CONST0_RTX (mode);
16127 return x;
16128}
16129
16130/* Subroutine of ix86_expand_builtin to take care of binop insns. */
16131
16132static rtx
16133ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16134{
16135 rtx pat, xops[3];
16136 tree arg0 = TREE_VALUE (arglist);
16137 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16138 rtx op0 = expand_normal (arg0);
16139 rtx op1 = expand_normal (arg1);
16140 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16141 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16142 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16143
16144 if (VECTOR_MODE_P (mode0))
16145 op0 = safe_vector_operand (op0, mode0);
16146 if (VECTOR_MODE_P (mode1))
16147 op1 = safe_vector_operand (op1, mode1);
16148
16149 if (optimize || !target
16150 || GET_MODE (target) != tmode
16151 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16152 target = gen_reg_rtx (tmode);
16153
16154 if (GET_MODE (op1) == SImode && mode1 == TImode)
16155 {
16156 rtx x = gen_reg_rtx (V4SImode);
16157 emit_insn (gen_sse2_loadd (x, op1));
16158 op1 = gen_lowpart (TImode, x);
16159 }
16160
16161 /* The insn must want input operands in the same modes as the
16162 result. */
16163 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16164 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16165
16166 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16167 op0 = copy_to_mode_reg (mode0, op0);
16168 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16169 op1 = copy_to_mode_reg (mode1, op1);
16170
16171 /* ??? Using ix86_fixup_binary_operands is problematic when
16172 we've got mismatched modes. Fake it. */
16173
16174 xops[0] = target;
16175 xops[1] = op0;
16176 xops[2] = op1;
16177
16178 if (tmode == mode0 && tmode == mode1)
16179 {
16180 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16181 op0 = xops[1];
16182 op1 = xops[2];
16183 }
16184 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16185 {
16186 op0 = force_reg (mode0, op0);
16187 op1 = force_reg (mode1, op1);
16188 target = gen_reg_rtx (tmode);
16189 }
16190
16191 pat = GEN_FCN (icode) (target, op0, op1);
16192 if (! pat)
16193 return 0;
16194 emit_insn (pat);
16195 return target;
16196}
16197
16198/* Subroutine of ix86_expand_builtin to take care of stores. */
16199
16200static rtx
16201ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16202{
16203 rtx pat;
16204 tree arg0 = TREE_VALUE (arglist);
16205 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16206 rtx op0 = expand_normal (arg0);
16207 rtx op1 = expand_normal (arg1);
16208 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16209 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16210
16211 if (VECTOR_MODE_P (mode1))
16212 op1 = safe_vector_operand (op1, mode1);
16213
16214 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16215 op1 = copy_to_mode_reg (mode1, op1);
16216
16217 pat = GEN_FCN (icode) (op0, op1);
16218 if (pat)
16219 emit_insn (pat);
16220 return 0;
16221}
16222
16223/* Subroutine of ix86_expand_builtin to take care of unop insns. */
16224
16225static rtx
16226ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16227 rtx target, int do_load)
16228{
16229 rtx pat;
16230 tree arg0 = TREE_VALUE (arglist);
16231 rtx op0 = expand_normal (arg0);
16232 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16233 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16234
16235 if (optimize || !target
16236 || GET_MODE (target) != tmode
16237 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16238 target = gen_reg_rtx (tmode);
16239 if (do_load)
16240 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16241 else
16242 {
16243 if (VECTOR_MODE_P (mode0))
16244 op0 = safe_vector_operand (op0, mode0);
16245
16246 if ((optimize && !register_operand (op0, mode0))
16247 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16248 op0 = copy_to_mode_reg (mode0, op0);
16249 }
16250
16251 pat = GEN_FCN (icode) (target, op0);
16252 if (! pat)
16253 return 0;
16254 emit_insn (pat);
16255 return target;
16256}
16257
16258/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16259 sqrtss, rsqrtss, rcpss. */
16260
16261static rtx
16262ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16263{
16264 rtx pat;
16265 tree arg0 = TREE_VALUE (arglist);
16266 rtx op1, op0 = expand_normal (arg0);
16267 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16268 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16269
16270 if (optimize || !target
16271 || GET_MODE (target) != tmode
16272 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16273 target = gen_reg_rtx (tmode);
16274
16275 if (VECTOR_MODE_P (mode0))
16276 op0 = safe_vector_operand (op0, mode0);
16277
16278 if ((optimize && !register_operand (op0, mode0))
16279 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16280 op0 = copy_to_mode_reg (mode0, op0);
16281
16282 op1 = op0;
16283 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16284 op1 = copy_to_mode_reg (mode0, op1);
16285
16286 pat = GEN_FCN (icode) (target, op0, op1);
16287 if (! pat)
16288 return 0;
16289 emit_insn (pat);
16290 return target;
16291}
16292
16293/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16294
16295static rtx
16296ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16297 rtx target)
16298{
16299 rtx pat;
16300 tree arg0 = TREE_VALUE (arglist);
16301 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16302 rtx op0 = expand_normal (arg0);
16303 rtx op1 = expand_normal (arg1);
16304 rtx op2;
16305 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16306 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16307 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16308 enum rtx_code comparison = d->comparison;
16309
16310 if (VECTOR_MODE_P (mode0))
16311 op0 = safe_vector_operand (op0, mode0);
16312 if (VECTOR_MODE_P (mode1))
16313 op1 = safe_vector_operand (op1, mode1);
16314
16315 /* Swap operands if we have a comparison that isn't available in
16316 hardware. */
16317 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16318 {
16319 rtx tmp = gen_reg_rtx (mode1);
16320 emit_move_insn (tmp, op1);
16321 op1 = op0;
16322 op0 = tmp;
16323 }
16324
16325 if (optimize || !target
16326 || GET_MODE (target) != tmode
16327 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16328 target = gen_reg_rtx (tmode);
16329
16330 if ((optimize && !register_operand (op0, mode0))
16331 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16332 op0 = copy_to_mode_reg (mode0, op0);
16333 if ((optimize && !register_operand (op1, mode1))
16334 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16335 op1 = copy_to_mode_reg (mode1, op1);
16336
16337 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16338 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16339 if (! pat)
16340 return 0;
16341 emit_insn (pat);
16342 return target;
16343}
16344
16345/* Subroutine of ix86_expand_builtin to take care of comi insns. */
16346
16347static rtx
16348ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16349 rtx target)
16350{
16351 rtx pat;
16352 tree arg0 = TREE_VALUE (arglist);
16353 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16354 rtx op0 = expand_normal (arg0);
16355 rtx op1 = expand_normal (arg1);
16356 rtx op2;
16357 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16358 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16359 enum rtx_code comparison = d->comparison;
16360
16361 if (VECTOR_MODE_P (mode0))
16362 op0 = safe_vector_operand (op0, mode0);
16363 if (VECTOR_MODE_P (mode1))
16364 op1 = safe_vector_operand (op1, mode1);
16365
16366 /* Swap operands if we have a comparison that isn't available in
16367 hardware. */
16368 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16369 {
16370 rtx tmp = op1;
16371 op1 = op0;
16372 op0 = tmp;
16373 }
16374
16375 target = gen_reg_rtx (SImode);
16376 emit_move_insn (target, const0_rtx);
16377 target = gen_rtx_SUBREG (QImode, target, 0);
16378
16379 if ((optimize && !register_operand (op0, mode0))
16380 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16381 op0 = copy_to_mode_reg (mode0, op0);
16382 if ((optimize && !register_operand (op1, mode1))
16383 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16384 op1 = copy_to_mode_reg (mode1, op1);
16385
16386 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16387 pat = GEN_FCN (d->icode) (op0, op1);
16388 if (! pat)
16389 return 0;
16390 emit_insn (pat);
16391 emit_insn (gen_rtx_SET (VOIDmode,
16392 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16393 gen_rtx_fmt_ee (comparison, QImode,
16394 SET_DEST (pat),
16395 const0_rtx)));
16396
16397 return SUBREG_REG (target);
16398}
16399
16400/* Return the integer constant in ARG. Constrain it to be in the range
16401 of the subparts of VEC_TYPE; issue an error if not. */
16402
16403static int
16404get_element_number (tree vec_type, tree arg)
16405{
16406 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16407
16408 if (!host_integerp (arg, 1)
16409 || (elt = tree_low_cst (arg, 1), elt > max))
16410 {
16411 error ("selector must be an integer constant in the range 0..%wi", max);
16412 return 0;
16413 }
16414
16415 return elt;
16416}
16417
16418/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16419 ix86_expand_vector_init. We DO have language-level syntax for this, in
16420 the form of (type){ init-list }. Except that since we can't place emms
16421 instructions from inside the compiler, we can't allow the use of MMX
16422 registers unless the user explicitly asks for it. So we do *not* define
16423 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16424 we have builtins invoked by mmintrin.h that gives us license to emit
16425 these sorts of instructions. */
16426
16427static rtx
16428ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16429{
16430 enum machine_mode tmode = TYPE_MODE (type);
16431 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16432 int i, n_elt = GET_MODE_NUNITS (tmode);
16433 rtvec v = rtvec_alloc (n_elt);
16434
16435 gcc_assert (VECTOR_MODE_P (tmode));
16436
16437 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16438 {
16439 rtx x = expand_normal (TREE_VALUE (arglist));
16440 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16441 }
16442
16443 gcc_assert (arglist == NULL);
16444
16445 if (!target || !register_operand (target, tmode))
16446 target = gen_reg_rtx (tmode);
16447
16448 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16449 return target;
16450}
16451
16452/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16453 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16454 had a language-level syntax for referencing vector elements. */
16455
16456static rtx
16457ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16458{
16459 enum machine_mode tmode, mode0;
16460 tree arg0, arg1;
16461 int elt;
16462 rtx op0;
16463
16464 arg0 = TREE_VALUE (arglist);
16465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16466
16467 op0 = expand_normal (arg0);
16468 elt = get_element_number (TREE_TYPE (arg0), arg1);
16469
16470 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16471 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16472 gcc_assert (VECTOR_MODE_P (mode0));
16473
16474 op0 = force_reg (mode0, op0);
16475
16476 if (optimize || !target || !register_operand (target, tmode))
16477 target = gen_reg_rtx (tmode);
16478
16479 ix86_expand_vector_extract (true, target, op0, elt);
16480
16481 return target;
16482}
16483
16484/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16485 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16486 a language-level syntax for referencing vector elements. */
16487
16488static rtx
16489ix86_expand_vec_set_builtin (tree arglist)
16490{
16491 enum machine_mode tmode, mode1;
16492 tree arg0, arg1, arg2;
16493 int elt;
16494 rtx op0, op1, target;
16495
16496 arg0 = TREE_VALUE (arglist);
16497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16498 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16499
16500 tmode = TYPE_MODE (TREE_TYPE (arg0));
16501 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16502 gcc_assert (VECTOR_MODE_P (tmode));
16503
16504 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16505 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16506 elt = get_element_number (TREE_TYPE (arg0), arg2);
16507
16508 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16509 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16510
16511 op0 = force_reg (tmode, op0);
16512 op1 = force_reg (mode1, op1);
16513
16514 /* OP0 is the source of these builtin functions and shouldn't be
16515 modified. Create a copy, use it and return it as target. */
16516 target = gen_reg_rtx (tmode);
16517 emit_move_insn (target, op0);
16518 ix86_expand_vector_set (true, target, op1, elt);
16519
16520 return target;
16521}
16522
16523/* Expand an expression EXP that calls a built-in function,
16524 with result going to TARGET if that's convenient
16525 (and in mode MODE if that's convenient).
16526 SUBTARGET may be used as the target for computing one of EXP's operands.
16527 IGNORE is nonzero if the value is to be ignored. */
16528
16529static rtx
16530ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16531 enum machine_mode mode ATTRIBUTE_UNUSED,
16532 int ignore ATTRIBUTE_UNUSED)
16533{
16534 const struct builtin_description *d;
16535 size_t i;
16536 enum insn_code icode;
16537 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16538 tree arglist = TREE_OPERAND (exp, 1);
16303 tree arg0, arg1, arg2;
16304 rtx op0, op1, op2, pat;
16305 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16539 tree arg0, arg1, arg2, arg3;
16540 rtx op0, op1, op2, op3, pat;
16541 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16306 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16307
16308 switch (fcode)
16309 {
16310 case IX86_BUILTIN_EMMS:
16311 emit_insn (gen_mmx_emms ());
16312 return 0;
16313
16314 case IX86_BUILTIN_SFENCE:
16315 emit_insn (gen_sse_sfence ());
16316 return 0;
16317
16318 case IX86_BUILTIN_MASKMOVQ:
16319 case IX86_BUILTIN_MASKMOVDQU:
16320 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16321 ? CODE_FOR_mmx_maskmovq
16322 : CODE_FOR_sse2_maskmovdqu);
16323 /* Note the arg order is different from the operand order. */
16324 arg1 = TREE_VALUE (arglist);
16325 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16326 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16327 op0 = expand_normal (arg0);
16328 op1 = expand_normal (arg1);
16329 op2 = expand_normal (arg2);
16330 mode0 = insn_data[icode].operand[0].mode;
16331 mode1 = insn_data[icode].operand[1].mode;
16332 mode2 = insn_data[icode].operand[2].mode;
16333
16334 op0 = force_reg (Pmode, op0);
16335 op0 = gen_rtx_MEM (mode1, op0);
16336
16337 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16338 op0 = copy_to_mode_reg (mode0, op0);
16339 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16340 op1 = copy_to_mode_reg (mode1, op1);
16341 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16342 op2 = copy_to_mode_reg (mode2, op2);
16343 pat = GEN_FCN (icode) (op0, op1, op2);
16344 if (! pat)
16345 return 0;
16346 emit_insn (pat);
16347 return 0;
16348
16349 case IX86_BUILTIN_SQRTSS:
16350 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16351 case IX86_BUILTIN_RSQRTSS:
16352 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16353 case IX86_BUILTIN_RCPSS:
16354 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16355
16356 case IX86_BUILTIN_LOADUPS:
16357 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16358
16359 case IX86_BUILTIN_STOREUPS:
16360 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16361
16362 case IX86_BUILTIN_LOADHPS:
16363 case IX86_BUILTIN_LOADLPS:
16364 case IX86_BUILTIN_LOADHPD:
16365 case IX86_BUILTIN_LOADLPD:
16366 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16367 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16368 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16369 : CODE_FOR_sse2_loadlpd);
16370 arg0 = TREE_VALUE (arglist);
16371 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16372 op0 = expand_normal (arg0);
16373 op1 = expand_normal (arg1);
16374 tmode = insn_data[icode].operand[0].mode;
16375 mode0 = insn_data[icode].operand[1].mode;
16376 mode1 = insn_data[icode].operand[2].mode;
16377
16378 op0 = force_reg (mode0, op0);
16379 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16380 if (optimize || target == 0
16381 || GET_MODE (target) != tmode
16382 || !register_operand (target, tmode))
16383 target = gen_reg_rtx (tmode);
16384 pat = GEN_FCN (icode) (target, op0, op1);
16385 if (! pat)
16386 return 0;
16387 emit_insn (pat);
16388 return target;
16389
16390 case IX86_BUILTIN_STOREHPS:
16391 case IX86_BUILTIN_STORELPS:
16392 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16393 : CODE_FOR_sse_storelps);
16394 arg0 = TREE_VALUE (arglist);
16395 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16396 op0 = expand_normal (arg0);
16397 op1 = expand_normal (arg1);
16398 mode0 = insn_data[icode].operand[0].mode;
16399 mode1 = insn_data[icode].operand[1].mode;
16400
16401 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16402 op1 = force_reg (mode1, op1);
16403
16404 pat = GEN_FCN (icode) (op0, op1);
16405 if (! pat)
16406 return 0;
16407 emit_insn (pat);
16408 return const0_rtx;
16409
16410 case IX86_BUILTIN_MOVNTPS:
16411 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16412 case IX86_BUILTIN_MOVNTQ:
16413 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16414
16415 case IX86_BUILTIN_LDMXCSR:
16416 op0 = expand_normal (TREE_VALUE (arglist));
16417 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16418 emit_move_insn (target, op0);
16419 emit_insn (gen_sse_ldmxcsr (target));
16420 return 0;
16421
16422 case IX86_BUILTIN_STMXCSR:
16423 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16424 emit_insn (gen_sse_stmxcsr (target));
16425 return copy_to_mode_reg (SImode, target);
16426
16427 case IX86_BUILTIN_SHUFPS:
16428 case IX86_BUILTIN_SHUFPD:
16429 icode = (fcode == IX86_BUILTIN_SHUFPS
16430 ? CODE_FOR_sse_shufps
16431 : CODE_FOR_sse2_shufpd);
16432 arg0 = TREE_VALUE (arglist);
16433 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16434 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16435 op0 = expand_normal (arg0);
16436 op1 = expand_normal (arg1);
16437 op2 = expand_normal (arg2);
16438 tmode = insn_data[icode].operand[0].mode;
16439 mode0 = insn_data[icode].operand[1].mode;
16440 mode1 = insn_data[icode].operand[2].mode;
16441 mode2 = insn_data[icode].operand[3].mode;
16442
16443 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16444 op0 = copy_to_mode_reg (mode0, op0);
16445 if ((optimize && !register_operand (op1, mode1))
16446 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16447 op1 = copy_to_mode_reg (mode1, op1);
16448 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16449 {
16450 /* @@@ better error message */
16451 error ("mask must be an immediate");
16452 return gen_reg_rtx (tmode);
16453 }
16454 if (optimize || target == 0
16455 || GET_MODE (target) != tmode
16456 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16457 target = gen_reg_rtx (tmode);
16458 pat = GEN_FCN (icode) (target, op0, op1, op2);
16459 if (! pat)
16460 return 0;
16461 emit_insn (pat);
16462 return target;
16463
16464 case IX86_BUILTIN_PSHUFW:
16465 case IX86_BUILTIN_PSHUFD:
16466 case IX86_BUILTIN_PSHUFHW:
16467 case IX86_BUILTIN_PSHUFLW:
16468 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16469 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16470 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16471 : CODE_FOR_mmx_pshufw);
16472 arg0 = TREE_VALUE (arglist);
16473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16474 op0 = expand_normal (arg0);
16475 op1 = expand_normal (arg1);
16476 tmode = insn_data[icode].operand[0].mode;
16477 mode1 = insn_data[icode].operand[1].mode;
16478 mode2 = insn_data[icode].operand[2].mode;
16479
16480 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16481 op0 = copy_to_mode_reg (mode1, op0);
16482 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16483 {
16484 /* @@@ better error message */
16485 error ("mask must be an immediate");
16486 return const0_rtx;
16487 }
16488 if (target == 0
16489 || GET_MODE (target) != tmode
16490 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16491 target = gen_reg_rtx (tmode);
16492 pat = GEN_FCN (icode) (target, op0, op1);
16493 if (! pat)
16494 return 0;
16495 emit_insn (pat);
16496 return target;
16497
16498 case IX86_BUILTIN_PSLLWI128:
16499 icode = CODE_FOR_ashlv8hi3;
16500 goto do_pshifti;
16501 case IX86_BUILTIN_PSLLDI128:
16502 icode = CODE_FOR_ashlv4si3;
16503 goto do_pshifti;
16504 case IX86_BUILTIN_PSLLQI128:
16505 icode = CODE_FOR_ashlv2di3;
16506 goto do_pshifti;
16507 case IX86_BUILTIN_PSRAWI128:
16508 icode = CODE_FOR_ashrv8hi3;
16509 goto do_pshifti;
16510 case IX86_BUILTIN_PSRADI128:
16511 icode = CODE_FOR_ashrv4si3;
16512 goto do_pshifti;
16513 case IX86_BUILTIN_PSRLWI128:
16514 icode = CODE_FOR_lshrv8hi3;
16515 goto do_pshifti;
16516 case IX86_BUILTIN_PSRLDI128:
16517 icode = CODE_FOR_lshrv4si3;
16518 goto do_pshifti;
16519 case IX86_BUILTIN_PSRLQI128:
16520 icode = CODE_FOR_lshrv2di3;
16521 goto do_pshifti;
16522 do_pshifti:
16523 arg0 = TREE_VALUE (arglist);
16524 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16525 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16526 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16527
16528 if (GET_CODE (op1) != CONST_INT)
16529 {
16530 error ("shift must be an immediate");
16531 return const0_rtx;
16532 }
16533 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16534 op1 = GEN_INT (255);
16535
16536 tmode = insn_data[icode].operand[0].mode;
16537 mode1 = insn_data[icode].operand[1].mode;
16538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16539 op0 = copy_to_reg (op0);
16540
16541 target = gen_reg_rtx (tmode);
16542 pat = GEN_FCN (icode) (target, op0, op1);
16543 if (!pat)
16544 return 0;
16545 emit_insn (pat);
16546 return target;
16547
16548 case IX86_BUILTIN_PSLLW128:
16549 icode = CODE_FOR_ashlv8hi3;
16550 goto do_pshift;
16551 case IX86_BUILTIN_PSLLD128:
16552 icode = CODE_FOR_ashlv4si3;
16553 goto do_pshift;
16554 case IX86_BUILTIN_PSLLQ128:
16555 icode = CODE_FOR_ashlv2di3;
16556 goto do_pshift;
16557 case IX86_BUILTIN_PSRAW128:
16558 icode = CODE_FOR_ashrv8hi3;
16559 goto do_pshift;
16560 case IX86_BUILTIN_PSRAD128:
16561 icode = CODE_FOR_ashrv4si3;
16562 goto do_pshift;
16563 case IX86_BUILTIN_PSRLW128:
16564 icode = CODE_FOR_lshrv8hi3;
16565 goto do_pshift;
16566 case IX86_BUILTIN_PSRLD128:
16567 icode = CODE_FOR_lshrv4si3;
16568 goto do_pshift;
16569 case IX86_BUILTIN_PSRLQ128:
16570 icode = CODE_FOR_lshrv2di3;
16571 goto do_pshift;
16572 do_pshift:
16573 arg0 = TREE_VALUE (arglist);
16574 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16575 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16576 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16577
16578 tmode = insn_data[icode].operand[0].mode;
16579 mode1 = insn_data[icode].operand[1].mode;
16580
16581 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16582 op0 = copy_to_reg (op0);
16583
16584 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16585 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16586 op1 = copy_to_reg (op1);
16587
16588 target = gen_reg_rtx (tmode);
16589 pat = GEN_FCN (icode) (target, op0, op1);
16590 if (!pat)
16591 return 0;
16592 emit_insn (pat);
16593 return target;
16594
16595 case IX86_BUILTIN_PSLLDQI128:
16596 case IX86_BUILTIN_PSRLDQI128:
16597 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16598 : CODE_FOR_sse2_lshrti3);
16599 arg0 = TREE_VALUE (arglist);
16600 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16601 op0 = expand_normal (arg0);
16602 op1 = expand_normal (arg1);
16603 tmode = insn_data[icode].operand[0].mode;
16604 mode1 = insn_data[icode].operand[1].mode;
16605 mode2 = insn_data[icode].operand[2].mode;
16606
16607 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16608 {
16609 op0 = copy_to_reg (op0);
16610 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16611 }
16612 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16613 {
16614 error ("shift must be an immediate");
16615 return const0_rtx;
16616 }
16617 target = gen_reg_rtx (V2DImode);
16618 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16619 op0, op1);
16620 if (! pat)
16621 return 0;
16622 emit_insn (pat);
16623 return target;
16624
16625 case IX86_BUILTIN_FEMMS:
16626 emit_insn (gen_mmx_femms ());
16627 return NULL_RTX;
16628
16629 case IX86_BUILTIN_PAVGUSB:
16630 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16631
16632 case IX86_BUILTIN_PF2ID:
16633 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16634
16635 case IX86_BUILTIN_PFACC:
16636 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16637
16638 case IX86_BUILTIN_PFADD:
16639 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16640
16641 case IX86_BUILTIN_PFCMPEQ:
16642 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16643
16644 case IX86_BUILTIN_PFCMPGE:
16645 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16646
16647 case IX86_BUILTIN_PFCMPGT:
16648 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16649
16650 case IX86_BUILTIN_PFMAX:
16651 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16652
16653 case IX86_BUILTIN_PFMIN:
16654 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16655
16656 case IX86_BUILTIN_PFMUL:
16657 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16658
16659 case IX86_BUILTIN_PFRCP:
16660 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16661
16662 case IX86_BUILTIN_PFRCPIT1:
16663 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16664
16665 case IX86_BUILTIN_PFRCPIT2:
16666 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16667
16668 case IX86_BUILTIN_PFRSQIT1:
16669 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16670
16671 case IX86_BUILTIN_PFRSQRT:
16672 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16673
16674 case IX86_BUILTIN_PFSUB:
16675 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16676
16677 case IX86_BUILTIN_PFSUBR:
16678 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16679
16680 case IX86_BUILTIN_PI2FD:
16681 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16682
16683 case IX86_BUILTIN_PMULHRW:
16684 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16685
16686 case IX86_BUILTIN_PF2IW:
16687 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16688
16689 case IX86_BUILTIN_PFNACC:
16690 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16691
16692 case IX86_BUILTIN_PFPNACC:
16693 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16694
16695 case IX86_BUILTIN_PI2FW:
16696 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16697
16698 case IX86_BUILTIN_PSWAPDSI:
16699 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16700
16701 case IX86_BUILTIN_PSWAPDSF:
16702 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16703
16704 case IX86_BUILTIN_SQRTSD:
16705 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16706 case IX86_BUILTIN_LOADUPD:
16707 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16708 case IX86_BUILTIN_STOREUPD:
16709 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16710
16711 case IX86_BUILTIN_MFENCE:
16712 emit_insn (gen_sse2_mfence ());
16713 return 0;
16714 case IX86_BUILTIN_LFENCE:
16715 emit_insn (gen_sse2_lfence ());
16716 return 0;
16717
16718 case IX86_BUILTIN_CLFLUSH:
16719 arg0 = TREE_VALUE (arglist);
16720 op0 = expand_normal (arg0);
16721 icode = CODE_FOR_sse2_clflush;
16722 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16723 op0 = copy_to_mode_reg (Pmode, op0);
16724
16725 emit_insn (gen_sse2_clflush (op0));
16726 return 0;
16727
16728 case IX86_BUILTIN_MOVNTPD:
16729 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16730 case IX86_BUILTIN_MOVNTDQ:
16731 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16732 case IX86_BUILTIN_MOVNTI:
16733 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16734
16735 case IX86_BUILTIN_LOADDQU:
16736 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16737 case IX86_BUILTIN_STOREDQU:
16738 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16739
16740 case IX86_BUILTIN_MONITOR:
16741 arg0 = TREE_VALUE (arglist);
16742 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16743 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16744 op0 = expand_normal (arg0);
16745 op1 = expand_normal (arg1);
16746 op2 = expand_normal (arg2);
16747 if (!REG_P (op0))
16748 op0 = copy_to_mode_reg (Pmode, op0);
16749 if (!REG_P (op1))
16750 op1 = copy_to_mode_reg (SImode, op1);
16751 if (!REG_P (op2))
16752 op2 = copy_to_mode_reg (SImode, op2);
16753 if (!TARGET_64BIT)
16754 emit_insn (gen_sse3_monitor (op0, op1, op2));
16755 else
16756 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16757 return 0;
16758
16759 case IX86_BUILTIN_MWAIT:
16760 arg0 = TREE_VALUE (arglist);
16761 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16762 op0 = expand_normal (arg0);
16763 op1 = expand_normal (arg1);
16764 if (!REG_P (op0))
16765 op0 = copy_to_mode_reg (SImode, op0);
16766 if (!REG_P (op1))
16767 op1 = copy_to_mode_reg (SImode, op1);
16768 emit_insn (gen_sse3_mwait (op0, op1));
16769 return 0;
16770
16771 case IX86_BUILTIN_LDDQU:
16772 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16773 target, 1);
16774
16775 case IX86_BUILTIN_PALIGNR:
16776 case IX86_BUILTIN_PALIGNR128:
16777 if (fcode == IX86_BUILTIN_PALIGNR)
16778 {
16779 icode = CODE_FOR_ssse3_palignrdi;
16780 mode = DImode;
16781 }
16782 else
16783 {
16784 icode = CODE_FOR_ssse3_palignrti;
16785 mode = V2DImode;
16786 }
16787 arg0 = TREE_VALUE (arglist);
16788 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16789 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16790 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16791 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16792 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16793 tmode = insn_data[icode].operand[0].mode;
16794 mode1 = insn_data[icode].operand[1].mode;
16795 mode2 = insn_data[icode].operand[2].mode;
16796 mode3 = insn_data[icode].operand[3].mode;
16797
16798 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16799 {
16800 op0 = copy_to_reg (op0);
16801 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16802 }
16803 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16804 {
16805 op1 = copy_to_reg (op1);
16806 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16807 }
16808 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16809 {
16810 error ("shift must be an immediate");
16811 return const0_rtx;
16812 }
16813 target = gen_reg_rtx (mode);
16814 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16815 op0, op1, op2);
16816 if (! pat)
16817 return 0;
16818 emit_insn (pat);
16819 return target;
16820
16542 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16543
16544 switch (fcode)
16545 {
16546 case IX86_BUILTIN_EMMS:
16547 emit_insn (gen_mmx_emms ());
16548 return 0;
16549
16550 case IX86_BUILTIN_SFENCE:
16551 emit_insn (gen_sse_sfence ());
16552 return 0;
16553
16554 case IX86_BUILTIN_MASKMOVQ:
16555 case IX86_BUILTIN_MASKMOVDQU:
16556 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16557 ? CODE_FOR_mmx_maskmovq
16558 : CODE_FOR_sse2_maskmovdqu);
16559 /* Note the arg order is different from the operand order. */
16560 arg1 = TREE_VALUE (arglist);
16561 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16562 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16563 op0 = expand_normal (arg0);
16564 op1 = expand_normal (arg1);
16565 op2 = expand_normal (arg2);
16566 mode0 = insn_data[icode].operand[0].mode;
16567 mode1 = insn_data[icode].operand[1].mode;
16568 mode2 = insn_data[icode].operand[2].mode;
16569
16570 op0 = force_reg (Pmode, op0);
16571 op0 = gen_rtx_MEM (mode1, op0);
16572
16573 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16574 op0 = copy_to_mode_reg (mode0, op0);
16575 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16576 op1 = copy_to_mode_reg (mode1, op1);
16577 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16578 op2 = copy_to_mode_reg (mode2, op2);
16579 pat = GEN_FCN (icode) (op0, op1, op2);
16580 if (! pat)
16581 return 0;
16582 emit_insn (pat);
16583 return 0;
16584
16585 case IX86_BUILTIN_SQRTSS:
16586 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16587 case IX86_BUILTIN_RSQRTSS:
16588 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16589 case IX86_BUILTIN_RCPSS:
16590 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16591
16592 case IX86_BUILTIN_LOADUPS:
16593 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16594
16595 case IX86_BUILTIN_STOREUPS:
16596 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16597
16598 case IX86_BUILTIN_LOADHPS:
16599 case IX86_BUILTIN_LOADLPS:
16600 case IX86_BUILTIN_LOADHPD:
16601 case IX86_BUILTIN_LOADLPD:
16602 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16603 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16604 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16605 : CODE_FOR_sse2_loadlpd);
16606 arg0 = TREE_VALUE (arglist);
16607 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16608 op0 = expand_normal (arg0);
16609 op1 = expand_normal (arg1);
16610 tmode = insn_data[icode].operand[0].mode;
16611 mode0 = insn_data[icode].operand[1].mode;
16612 mode1 = insn_data[icode].operand[2].mode;
16613
16614 op0 = force_reg (mode0, op0);
16615 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16616 if (optimize || target == 0
16617 || GET_MODE (target) != tmode
16618 || !register_operand (target, tmode))
16619 target = gen_reg_rtx (tmode);
16620 pat = GEN_FCN (icode) (target, op0, op1);
16621 if (! pat)
16622 return 0;
16623 emit_insn (pat);
16624 return target;
16625
16626 case IX86_BUILTIN_STOREHPS:
16627 case IX86_BUILTIN_STORELPS:
16628 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16629 : CODE_FOR_sse_storelps);
16630 arg0 = TREE_VALUE (arglist);
16631 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16632 op0 = expand_normal (arg0);
16633 op1 = expand_normal (arg1);
16634 mode0 = insn_data[icode].operand[0].mode;
16635 mode1 = insn_data[icode].operand[1].mode;
16636
16637 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16638 op1 = force_reg (mode1, op1);
16639
16640 pat = GEN_FCN (icode) (op0, op1);
16641 if (! pat)
16642 return 0;
16643 emit_insn (pat);
16644 return const0_rtx;
16645
16646 case IX86_BUILTIN_MOVNTPS:
16647 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16648 case IX86_BUILTIN_MOVNTQ:
16649 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16650
16651 case IX86_BUILTIN_LDMXCSR:
16652 op0 = expand_normal (TREE_VALUE (arglist));
16653 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16654 emit_move_insn (target, op0);
16655 emit_insn (gen_sse_ldmxcsr (target));
16656 return 0;
16657
16658 case IX86_BUILTIN_STMXCSR:
16659 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16660 emit_insn (gen_sse_stmxcsr (target));
16661 return copy_to_mode_reg (SImode, target);
16662
16663 case IX86_BUILTIN_SHUFPS:
16664 case IX86_BUILTIN_SHUFPD:
16665 icode = (fcode == IX86_BUILTIN_SHUFPS
16666 ? CODE_FOR_sse_shufps
16667 : CODE_FOR_sse2_shufpd);
16668 arg0 = TREE_VALUE (arglist);
16669 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16670 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16671 op0 = expand_normal (arg0);
16672 op1 = expand_normal (arg1);
16673 op2 = expand_normal (arg2);
16674 tmode = insn_data[icode].operand[0].mode;
16675 mode0 = insn_data[icode].operand[1].mode;
16676 mode1 = insn_data[icode].operand[2].mode;
16677 mode2 = insn_data[icode].operand[3].mode;
16678
16679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16680 op0 = copy_to_mode_reg (mode0, op0);
16681 if ((optimize && !register_operand (op1, mode1))
16682 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16683 op1 = copy_to_mode_reg (mode1, op1);
16684 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16685 {
16686 /* @@@ better error message */
16687 error ("mask must be an immediate");
16688 return gen_reg_rtx (tmode);
16689 }
16690 if (optimize || target == 0
16691 || GET_MODE (target) != tmode
16692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16693 target = gen_reg_rtx (tmode);
16694 pat = GEN_FCN (icode) (target, op0, op1, op2);
16695 if (! pat)
16696 return 0;
16697 emit_insn (pat);
16698 return target;
16699
16700 case IX86_BUILTIN_PSHUFW:
16701 case IX86_BUILTIN_PSHUFD:
16702 case IX86_BUILTIN_PSHUFHW:
16703 case IX86_BUILTIN_PSHUFLW:
16704 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16705 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16706 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16707 : CODE_FOR_mmx_pshufw);
16708 arg0 = TREE_VALUE (arglist);
16709 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16710 op0 = expand_normal (arg0);
16711 op1 = expand_normal (arg1);
16712 tmode = insn_data[icode].operand[0].mode;
16713 mode1 = insn_data[icode].operand[1].mode;
16714 mode2 = insn_data[icode].operand[2].mode;
16715
16716 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16717 op0 = copy_to_mode_reg (mode1, op0);
16718 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16719 {
16720 /* @@@ better error message */
16721 error ("mask must be an immediate");
16722 return const0_rtx;
16723 }
16724 if (target == 0
16725 || GET_MODE (target) != tmode
16726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16727 target = gen_reg_rtx (tmode);
16728 pat = GEN_FCN (icode) (target, op0, op1);
16729 if (! pat)
16730 return 0;
16731 emit_insn (pat);
16732 return target;
16733
16734 case IX86_BUILTIN_PSLLWI128:
16735 icode = CODE_FOR_ashlv8hi3;
16736 goto do_pshifti;
16737 case IX86_BUILTIN_PSLLDI128:
16738 icode = CODE_FOR_ashlv4si3;
16739 goto do_pshifti;
16740 case IX86_BUILTIN_PSLLQI128:
16741 icode = CODE_FOR_ashlv2di3;
16742 goto do_pshifti;
16743 case IX86_BUILTIN_PSRAWI128:
16744 icode = CODE_FOR_ashrv8hi3;
16745 goto do_pshifti;
16746 case IX86_BUILTIN_PSRADI128:
16747 icode = CODE_FOR_ashrv4si3;
16748 goto do_pshifti;
16749 case IX86_BUILTIN_PSRLWI128:
16750 icode = CODE_FOR_lshrv8hi3;
16751 goto do_pshifti;
16752 case IX86_BUILTIN_PSRLDI128:
16753 icode = CODE_FOR_lshrv4si3;
16754 goto do_pshifti;
16755 case IX86_BUILTIN_PSRLQI128:
16756 icode = CODE_FOR_lshrv2di3;
16757 goto do_pshifti;
16758 do_pshifti:
16759 arg0 = TREE_VALUE (arglist);
16760 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16763
16764 if (GET_CODE (op1) != CONST_INT)
16765 {
16766 error ("shift must be an immediate");
16767 return const0_rtx;
16768 }
16769 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16770 op1 = GEN_INT (255);
16771
16772 tmode = insn_data[icode].operand[0].mode;
16773 mode1 = insn_data[icode].operand[1].mode;
16774 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16775 op0 = copy_to_reg (op0);
16776
16777 target = gen_reg_rtx (tmode);
16778 pat = GEN_FCN (icode) (target, op0, op1);
16779 if (!pat)
16780 return 0;
16781 emit_insn (pat);
16782 return target;
16783
16784 case IX86_BUILTIN_PSLLW128:
16785 icode = CODE_FOR_ashlv8hi3;
16786 goto do_pshift;
16787 case IX86_BUILTIN_PSLLD128:
16788 icode = CODE_FOR_ashlv4si3;
16789 goto do_pshift;
16790 case IX86_BUILTIN_PSLLQ128:
16791 icode = CODE_FOR_ashlv2di3;
16792 goto do_pshift;
16793 case IX86_BUILTIN_PSRAW128:
16794 icode = CODE_FOR_ashrv8hi3;
16795 goto do_pshift;
16796 case IX86_BUILTIN_PSRAD128:
16797 icode = CODE_FOR_ashrv4si3;
16798 goto do_pshift;
16799 case IX86_BUILTIN_PSRLW128:
16800 icode = CODE_FOR_lshrv8hi3;
16801 goto do_pshift;
16802 case IX86_BUILTIN_PSRLD128:
16803 icode = CODE_FOR_lshrv4si3;
16804 goto do_pshift;
16805 case IX86_BUILTIN_PSRLQ128:
16806 icode = CODE_FOR_lshrv2di3;
16807 goto do_pshift;
16808 do_pshift:
16809 arg0 = TREE_VALUE (arglist);
16810 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16811 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16812 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16813
16814 tmode = insn_data[icode].operand[0].mode;
16815 mode1 = insn_data[icode].operand[1].mode;
16816
16817 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16818 op0 = copy_to_reg (op0);
16819
16820 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16821 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16822 op1 = copy_to_reg (op1);
16823
16824 target = gen_reg_rtx (tmode);
16825 pat = GEN_FCN (icode) (target, op0, op1);
16826 if (!pat)
16827 return 0;
16828 emit_insn (pat);
16829 return target;
16830
16831 case IX86_BUILTIN_PSLLDQI128:
16832 case IX86_BUILTIN_PSRLDQI128:
16833 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16834 : CODE_FOR_sse2_lshrti3);
16835 arg0 = TREE_VALUE (arglist);
16836 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16837 op0 = expand_normal (arg0);
16838 op1 = expand_normal (arg1);
16839 tmode = insn_data[icode].operand[0].mode;
16840 mode1 = insn_data[icode].operand[1].mode;
16841 mode2 = insn_data[icode].operand[2].mode;
16842
16843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16844 {
16845 op0 = copy_to_reg (op0);
16846 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16847 }
16848 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16849 {
16850 error ("shift must be an immediate");
16851 return const0_rtx;
16852 }
16853 target = gen_reg_rtx (V2DImode);
16854 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16855 op0, op1);
16856 if (! pat)
16857 return 0;
16858 emit_insn (pat);
16859 return target;
16860
16861 case IX86_BUILTIN_FEMMS:
16862 emit_insn (gen_mmx_femms ());
16863 return NULL_RTX;
16864
16865 case IX86_BUILTIN_PAVGUSB:
16866 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16867
16868 case IX86_BUILTIN_PF2ID:
16869 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16870
16871 case IX86_BUILTIN_PFACC:
16872 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16873
16874 case IX86_BUILTIN_PFADD:
16875 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16876
16877 case IX86_BUILTIN_PFCMPEQ:
16878 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16879
16880 case IX86_BUILTIN_PFCMPGE:
16881 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16882
16883 case IX86_BUILTIN_PFCMPGT:
16884 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16885
16886 case IX86_BUILTIN_PFMAX:
16887 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16888
16889 case IX86_BUILTIN_PFMIN:
16890 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16891
16892 case IX86_BUILTIN_PFMUL:
16893 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16894
16895 case IX86_BUILTIN_PFRCP:
16896 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16897
16898 case IX86_BUILTIN_PFRCPIT1:
16899 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16900
16901 case IX86_BUILTIN_PFRCPIT2:
16902 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16903
16904 case IX86_BUILTIN_PFRSQIT1:
16905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16906
16907 case IX86_BUILTIN_PFRSQRT:
16908 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16909
16910 case IX86_BUILTIN_PFSUB:
16911 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16912
16913 case IX86_BUILTIN_PFSUBR:
16914 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16915
16916 case IX86_BUILTIN_PI2FD:
16917 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16918
16919 case IX86_BUILTIN_PMULHRW:
16920 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16921
16922 case IX86_BUILTIN_PF2IW:
16923 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16924
16925 case IX86_BUILTIN_PFNACC:
16926 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16927
16928 case IX86_BUILTIN_PFPNACC:
16929 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16930
16931 case IX86_BUILTIN_PI2FW:
16932 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16933
16934 case IX86_BUILTIN_PSWAPDSI:
16935 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16936
16937 case IX86_BUILTIN_PSWAPDSF:
16938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16939
16940 case IX86_BUILTIN_SQRTSD:
16941 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16942 case IX86_BUILTIN_LOADUPD:
16943 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16944 case IX86_BUILTIN_STOREUPD:
16945 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16946
16947 case IX86_BUILTIN_MFENCE:
16948 emit_insn (gen_sse2_mfence ());
16949 return 0;
16950 case IX86_BUILTIN_LFENCE:
16951 emit_insn (gen_sse2_lfence ());
16952 return 0;
16953
16954 case IX86_BUILTIN_CLFLUSH:
16955 arg0 = TREE_VALUE (arglist);
16956 op0 = expand_normal (arg0);
16957 icode = CODE_FOR_sse2_clflush;
16958 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16959 op0 = copy_to_mode_reg (Pmode, op0);
16960
16961 emit_insn (gen_sse2_clflush (op0));
16962 return 0;
16963
16964 case IX86_BUILTIN_MOVNTPD:
16965 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16966 case IX86_BUILTIN_MOVNTDQ:
16967 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16968 case IX86_BUILTIN_MOVNTI:
16969 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16970
16971 case IX86_BUILTIN_LOADDQU:
16972 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16973 case IX86_BUILTIN_STOREDQU:
16974 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16975
16976 case IX86_BUILTIN_MONITOR:
16977 arg0 = TREE_VALUE (arglist);
16978 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16979 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16980 op0 = expand_normal (arg0);
16981 op1 = expand_normal (arg1);
16982 op2 = expand_normal (arg2);
16983 if (!REG_P (op0))
16984 op0 = copy_to_mode_reg (Pmode, op0);
16985 if (!REG_P (op1))
16986 op1 = copy_to_mode_reg (SImode, op1);
16987 if (!REG_P (op2))
16988 op2 = copy_to_mode_reg (SImode, op2);
16989 if (!TARGET_64BIT)
16990 emit_insn (gen_sse3_monitor (op0, op1, op2));
16991 else
16992 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16993 return 0;
16994
16995 case IX86_BUILTIN_MWAIT:
16996 arg0 = TREE_VALUE (arglist);
16997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16998 op0 = expand_normal (arg0);
16999 op1 = expand_normal (arg1);
17000 if (!REG_P (op0))
17001 op0 = copy_to_mode_reg (SImode, op0);
17002 if (!REG_P (op1))
17003 op1 = copy_to_mode_reg (SImode, op1);
17004 emit_insn (gen_sse3_mwait (op0, op1));
17005 return 0;
17006
17007 case IX86_BUILTIN_LDDQU:
17008 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17009 target, 1);
17010
17011 case IX86_BUILTIN_PALIGNR:
17012 case IX86_BUILTIN_PALIGNR128:
17013 if (fcode == IX86_BUILTIN_PALIGNR)
17014 {
17015 icode = CODE_FOR_ssse3_palignrdi;
17016 mode = DImode;
17017 }
17018 else
17019 {
17020 icode = CODE_FOR_ssse3_palignrti;
17021 mode = V2DImode;
17022 }
17023 arg0 = TREE_VALUE (arglist);
17024 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17025 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17026 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17027 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17028 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17029 tmode = insn_data[icode].operand[0].mode;
17030 mode1 = insn_data[icode].operand[1].mode;
17031 mode2 = insn_data[icode].operand[2].mode;
17032 mode3 = insn_data[icode].operand[3].mode;
17033
17034 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17035 {
17036 op0 = copy_to_reg (op0);
17037 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17038 }
17039 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17040 {
17041 op1 = copy_to_reg (op1);
17042 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17043 }
17044 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17045 {
17046 error ("shift must be an immediate");
17047 return const0_rtx;
17048 }
17049 target = gen_reg_rtx (mode);
17050 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17051 op0, op1, op2);
17052 if (! pat)
17053 return 0;
17054 emit_insn (pat);
17055 return target;
17056
17057 case IX86_BUILTIN_MOVNTSD:
17058 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17059
17060 case IX86_BUILTIN_MOVNTSS:
17061 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17062
17063 case IX86_BUILTIN_INSERTQ:
17064 case IX86_BUILTIN_EXTRQ:
17065 icode = (fcode == IX86_BUILTIN_EXTRQ
17066 ? CODE_FOR_sse4a_extrq
17067 : CODE_FOR_sse4a_insertq);
17068 arg0 = TREE_VALUE (arglist);
17069 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17070 op0 = expand_normal (arg0);
17071 op1 = expand_normal (arg1);
17072 tmode = insn_data[icode].operand[0].mode;
17073 mode1 = insn_data[icode].operand[1].mode;
17074 mode2 = insn_data[icode].operand[2].mode;
17075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17076 op0 = copy_to_mode_reg (mode1, op0);
17077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17078 op1 = copy_to_mode_reg (mode2, op1);
17079 if (optimize || target == 0
17080 || GET_MODE (target) != tmode
17081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17082 target = gen_reg_rtx (tmode);
17083 pat = GEN_FCN (icode) (target, op0, op1);
17084 if (! pat)
17085 return NULL_RTX;
17086 emit_insn (pat);
17087 return target;
17088
17089 case IX86_BUILTIN_EXTRQI:
17090 icode = CODE_FOR_sse4a_extrqi;
17091 arg0 = TREE_VALUE (arglist);
17092 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17093 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17094 op0 = expand_normal (arg0);
17095 op1 = expand_normal (arg1);
17096 op2 = expand_normal (arg2);
17097 tmode = insn_data[icode].operand[0].mode;
17098 mode1 = insn_data[icode].operand[1].mode;
17099 mode2 = insn_data[icode].operand[2].mode;
17100 mode3 = insn_data[icode].operand[3].mode;
17101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17102 op0 = copy_to_mode_reg (mode1, op0);
17103 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17104 {
17105 error ("index mask must be an immediate");
17106 return gen_reg_rtx (tmode);
17107 }
17108 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17109 {
17110 error ("length mask must be an immediate");
17111 return gen_reg_rtx (tmode);
17112 }
17113 if (optimize || target == 0
17114 || GET_MODE (target) != tmode
17115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17116 target = gen_reg_rtx (tmode);
17117 pat = GEN_FCN (icode) (target, op0, op1, op2);
17118 if (! pat)
17119 return NULL_RTX;
17120 emit_insn (pat);
17121 return target;
17122
17123 case IX86_BUILTIN_INSERTQI:
17124 icode = CODE_FOR_sse4a_insertqi;
17125 arg0 = TREE_VALUE (arglist);
17126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17128 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17129 op0 = expand_normal (arg0);
17130 op1 = expand_normal (arg1);
17131 op2 = expand_normal (arg2);
17132 op3 = expand_normal (arg3);
17133 tmode = insn_data[icode].operand[0].mode;
17134 mode1 = insn_data[icode].operand[1].mode;
17135 mode2 = insn_data[icode].operand[2].mode;
17136 mode3 = insn_data[icode].operand[3].mode;
17137 mode4 = insn_data[icode].operand[4].mode;
17138
17139 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17140 op0 = copy_to_mode_reg (mode1, op0);
17141
17142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17143 op1 = copy_to_mode_reg (mode2, op1);
17144
17145 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17146 {
17147 error ("index mask must be an immediate");
17148 return gen_reg_rtx (tmode);
17149 }
17150 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17151 {
17152 error ("length mask must be an immediate");
17153 return gen_reg_rtx (tmode);
17154 }
17155 if (optimize || target == 0
17156 || GET_MODE (target) != tmode
17157 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17158 target = gen_reg_rtx (tmode);
17159 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17160 if (! pat)
17161 return NULL_RTX;
17162 emit_insn (pat);
17163 return target;
17164
16821 case IX86_BUILTIN_VEC_INIT_V2SI:
16822 case IX86_BUILTIN_VEC_INIT_V4HI:
16823 case IX86_BUILTIN_VEC_INIT_V8QI:
16824 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16825
16826 case IX86_BUILTIN_VEC_EXT_V2DF:
16827 case IX86_BUILTIN_VEC_EXT_V2DI:
16828 case IX86_BUILTIN_VEC_EXT_V4SF:
16829 case IX86_BUILTIN_VEC_EXT_V4SI:
16830 case IX86_BUILTIN_VEC_EXT_V8HI:
16831 case IX86_BUILTIN_VEC_EXT_V16QI:
16832 case IX86_BUILTIN_VEC_EXT_V2SI:
16833 case IX86_BUILTIN_VEC_EXT_V4HI:
16834 return ix86_expand_vec_ext_builtin (arglist, target);
16835
16836 case IX86_BUILTIN_VEC_SET_V8HI:
16837 case IX86_BUILTIN_VEC_SET_V4HI:
16838 return ix86_expand_vec_set_builtin (arglist);
16839
16840 default:
16841 break;
16842 }
16843
16844 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16845 if (d->code == fcode)
16846 {
16847 /* Compares are treated specially. */
16848 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16849 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16850 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16851 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16852 return ix86_expand_sse_compare (d, arglist, target);
16853
16854 return ix86_expand_binop_builtin (d->icode, arglist, target);
16855 }
16856
16857 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16858 if (d->code == fcode)
16859 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16860
16861 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16862 if (d->code == fcode)
16863 return ix86_expand_sse_comi (d, arglist, target);
16864
16865 gcc_unreachable ();
16866}
16867
16868/* Store OPERAND to the memory after reload is completed. This means
16869 that we can't easily use assign_stack_local. */
16870rtx
16871ix86_force_to_memory (enum machine_mode mode, rtx operand)
16872{
16873 rtx result;
16874
16875 gcc_assert (reload_completed);
16876 if (TARGET_RED_ZONE)
16877 {
16878 result = gen_rtx_MEM (mode,
16879 gen_rtx_PLUS (Pmode,
16880 stack_pointer_rtx,
16881 GEN_INT (-RED_ZONE_SIZE)));
16882 emit_move_insn (result, operand);
16883 }
16884 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16885 {
16886 switch (mode)
16887 {
16888 case HImode:
16889 case SImode:
16890 operand = gen_lowpart (DImode, operand);
16891 /* FALLTHRU */
16892 case DImode:
16893 emit_insn (
16894 gen_rtx_SET (VOIDmode,
16895 gen_rtx_MEM (DImode,
16896 gen_rtx_PRE_DEC (DImode,
16897 stack_pointer_rtx)),
16898 operand));
16899 break;
16900 default:
16901 gcc_unreachable ();
16902 }
16903 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16904 }
16905 else
16906 {
16907 switch (mode)
16908 {
16909 case DImode:
16910 {
16911 rtx operands[2];
16912 split_di (&operand, 1, operands, operands + 1);
16913 emit_insn (
16914 gen_rtx_SET (VOIDmode,
16915 gen_rtx_MEM (SImode,
16916 gen_rtx_PRE_DEC (Pmode,
16917 stack_pointer_rtx)),
16918 operands[1]));
16919 emit_insn (
16920 gen_rtx_SET (VOIDmode,
16921 gen_rtx_MEM (SImode,
16922 gen_rtx_PRE_DEC (Pmode,
16923 stack_pointer_rtx)),
16924 operands[0]));
16925 }
16926 break;
16927 case HImode:
16928 /* Store HImodes as SImodes. */
16929 operand = gen_lowpart (SImode, operand);
16930 /* FALLTHRU */
16931 case SImode:
16932 emit_insn (
16933 gen_rtx_SET (VOIDmode,
16934 gen_rtx_MEM (GET_MODE (operand),
16935 gen_rtx_PRE_DEC (SImode,
16936 stack_pointer_rtx)),
16937 operand));
16938 break;
16939 default:
16940 gcc_unreachable ();
16941 }
16942 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16943 }
16944 return result;
16945}
16946
16947/* Free operand from the memory. */
16948void
16949ix86_free_from_memory (enum machine_mode mode)
16950{
16951 if (!TARGET_RED_ZONE)
16952 {
16953 int size;
16954
16955 if (mode == DImode || TARGET_64BIT)
16956 size = 8;
16957 else
16958 size = 4;
16959 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16960 to pop or add instruction if registers are available. */
16961 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16962 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16963 GEN_INT (size))));
16964 }
16965}
16966
16967/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16968 QImode must go into class Q_REGS.
16969 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16970 movdf to do mem-to-mem moves through integer regs. */
16971enum reg_class
16972ix86_preferred_reload_class (rtx x, enum reg_class class)
16973{
16974 enum machine_mode mode = GET_MODE (x);
16975
16976 /* We're only allowed to return a subclass of CLASS. Many of the
16977 following checks fail for NO_REGS, so eliminate that early. */
16978 if (class == NO_REGS)
16979 return NO_REGS;
16980
16981 /* All classes can load zeros. */
16982 if (x == CONST0_RTX (mode))
16983 return class;
16984
16985 /* Force constants into memory if we are loading a (nonzero) constant into
16986 an MMX or SSE register. This is because there are no MMX/SSE instructions
16987 to load from a constant. */
16988 if (CONSTANT_P (x)
16989 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16990 return NO_REGS;
16991
16992 /* Prefer SSE regs only, if we can use them for math. */
16993 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16994 return SSE_CLASS_P (class) ? class : NO_REGS;
16995
16996 /* Floating-point constants need more complex checks. */
16997 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16998 {
16999 /* General regs can load everything. */
17000 if (reg_class_subset_p (class, GENERAL_REGS))
17001 return class;
17002
17003 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17004 zero above. We only want to wind up preferring 80387 registers if
17005 we plan on doing computation with them. */
17006 if (TARGET_80387
17007 && standard_80387_constant_p (x))
17008 {
17009 /* Limit class to non-sse. */
17010 if (class == FLOAT_SSE_REGS)
17011 return FLOAT_REGS;
17012 if (class == FP_TOP_SSE_REGS)
17013 return FP_TOP_REG;
17014 if (class == FP_SECOND_SSE_REGS)
17015 return FP_SECOND_REG;
17016 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17017 return class;
17018 }
17019
17020 return NO_REGS;
17021 }
17022
17023 /* Generally when we see PLUS here, it's the function invariant
17024 (plus soft-fp const_int). Which can only be computed into general
17025 regs. */
17026 if (GET_CODE (x) == PLUS)
17027 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17028
17029 /* QImode constants are easy to load, but non-constant QImode data
17030 must go into Q_REGS. */
17031 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17032 {
17033 if (reg_class_subset_p (class, Q_REGS))
17034 return class;
17035 if (reg_class_subset_p (Q_REGS, class))
17036 return Q_REGS;
17037 return NO_REGS;
17038 }
17039
17040 return class;
17041}
17042
17043/* Discourage putting floating-point values in SSE registers unless
17044 SSE math is being used, and likewise for the 387 registers. */
17045enum reg_class
17046ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17047{
17048 enum machine_mode mode = GET_MODE (x);
17049
17050 /* Restrict the output reload class to the register bank that we are doing
17051 math on. If we would like not to return a subset of CLASS, reject this
17052 alternative: if reload cannot do this, it will still use its choice. */
17053 mode = GET_MODE (x);
17054 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17055 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17056
17057 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17058 {
17059 if (class == FP_TOP_SSE_REGS)
17060 return FP_TOP_REG;
17061 else if (class == FP_SECOND_SSE_REGS)
17062 return FP_SECOND_REG;
17063 else
17064 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17065 }
17066
17067 return class;
17068}
17069
17070/* If we are copying between general and FP registers, we need a memory
17071 location. The same is true for SSE and MMX registers.
17072
17073 The macro can't work reliably when one of the CLASSES is class containing
17074 registers from multiple units (SSE, MMX, integer). We avoid this by never
17075 combining those units in single alternative in the machine description.
17076 Ensure that this constraint holds to avoid unexpected surprises.
17077
17078 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17079 enforce these sanity checks. */
17080
17081int
17082ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17083 enum machine_mode mode, int strict)
17084{
17085 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17086 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17087 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17088 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17089 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17090 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17091 {
17092 gcc_assert (!strict);
17093 return true;
17094 }
17095
17096 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17097 return true;
17098
17099 /* ??? This is a lie. We do have moves between mmx/general, and for
17100 mmx/sse2. But by saying we need secondary memory we discourage the
17101 register allocator from using the mmx registers unless needed. */
17102 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17103 return true;
17104
17105 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17106 {
17107 /* SSE1 doesn't have any direct moves from other classes. */
17108 if (!TARGET_SSE2)
17109 return true;
17110
17111 /* If the target says that inter-unit moves are more expensive
17112 than moving through memory, then don't generate them. */
17113 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17114 return true;
17115
17116 /* Between SSE and general, we have moves no larger than word size. */
17117 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17118 return true;
17119
17120 /* ??? For the cost of one register reformat penalty, we could use
17121 the same instructions to move SFmode and DFmode data, but the
17122 relevant move patterns don't support those alternatives. */
17123 if (mode == SFmode || mode == DFmode)
17124 return true;
17125 }
17126
17127 return false;
17128}
17129
17130/* Return true if the registers in CLASS cannot represent the change from
17131 modes FROM to TO. */
17132
17133bool
17134ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17135 enum reg_class class)
17136{
17137 if (from == to)
17138 return false;
17139
17140 /* x87 registers can't do subreg at all, as all values are reformatted
17141 to extended precision. */
17142 if (MAYBE_FLOAT_CLASS_P (class))
17143 return true;
17144
17145 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17146 {
17147 /* Vector registers do not support QI or HImode loads. If we don't
17148 disallow a change to these modes, reload will assume it's ok to
17149 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17150 the vec_dupv4hi pattern. */
17151 if (GET_MODE_SIZE (from) < 4)
17152 return true;
17153
17154 /* Vector registers do not support subreg with nonzero offsets, which
17155 are otherwise valid for integer registers. Since we can't see
17156 whether we have a nonzero offset from here, prohibit all
17157 nonparadoxical subregs changing size. */
17158 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17159 return true;
17160 }
17161
17162 return false;
17163}
17164
17165/* Return the cost of moving data from a register in class CLASS1 to
17166 one in class CLASS2.
17167
17168 It is not required that the cost always equal 2 when FROM is the same as TO;
17169 on some machines it is expensive to move between registers if they are not
17170 general registers. */
17171
17172int
17173ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17174 enum reg_class class2)
17175{
17176 /* In case we require secondary memory, compute cost of the store followed
17177 by load. In order to avoid bad register allocation choices, we need
17178 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17179
17180 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17181 {
17182 int cost = 1;
17183
17184 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17185 MEMORY_MOVE_COST (mode, class1, 1));
17186 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17187 MEMORY_MOVE_COST (mode, class2, 1));
17188
17189 /* In case of copying from general_purpose_register we may emit multiple
17190 stores followed by single load causing memory size mismatch stall.
17191 Count this as arbitrarily high cost of 20. */
17192 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17193 cost += 20;
17194
17195 /* In the case of FP/MMX moves, the registers actually overlap, and we
17196 have to switch modes in order to treat them differently. */
17197 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17198 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17199 cost += 20;
17200
17201 return cost;
17202 }
17203
17204 /* Moves between SSE/MMX and integer unit are expensive. */
17205 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17206 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17207 return ix86_cost->mmxsse_to_integer;
17208 if (MAYBE_FLOAT_CLASS_P (class1))
17209 return ix86_cost->fp_move;
17210 if (MAYBE_SSE_CLASS_P (class1))
17211 return ix86_cost->sse_move;
17212 if (MAYBE_MMX_CLASS_P (class1))
17213 return ix86_cost->mmx_move;
17214 return 2;
17215}
17216
17217/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17218
17219bool
17220ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17221{
17222 /* Flags and only flags can only hold CCmode values. */
17223 if (CC_REGNO_P (regno))
17224 return GET_MODE_CLASS (mode) == MODE_CC;
17225 if (GET_MODE_CLASS (mode) == MODE_CC
17226 || GET_MODE_CLASS (mode) == MODE_RANDOM
17227 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17228 return 0;
17229 if (FP_REGNO_P (regno))
17230 return VALID_FP_MODE_P (mode);
17231 if (SSE_REGNO_P (regno))
17232 {
17233 /* We implement the move patterns for all vector modes into and
17234 out of SSE registers, even when no operation instructions
17235 are available. */
17236 return (VALID_SSE_REG_MODE (mode)
17237 || VALID_SSE2_REG_MODE (mode)
17238 || VALID_MMX_REG_MODE (mode)
17239 || VALID_MMX_REG_MODE_3DNOW (mode));
17240 }
17241 if (MMX_REGNO_P (regno))
17242 {
17243 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17244 so if the register is available at all, then we can move data of
17245 the given mode into or out of it. */
17246 return (VALID_MMX_REG_MODE (mode)
17247 || VALID_MMX_REG_MODE_3DNOW (mode));
17248 }
17249
17250 if (mode == QImode)
17251 {
17252 /* Take care for QImode values - they can be in non-QI regs,
17253 but then they do cause partial register stalls. */
17254 if (regno < 4 || TARGET_64BIT)
17255 return 1;
17256 if (!TARGET_PARTIAL_REG_STALL)
17257 return 1;
17258 return reload_in_progress || reload_completed;
17259 }
17260 /* We handle both integer and floats in the general purpose registers. */
17261 else if (VALID_INT_MODE_P (mode))
17262 return 1;
17263 else if (VALID_FP_MODE_P (mode))
17264 return 1;
17265 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17266 on to use that value in smaller contexts, this can easily force a
17267 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17268 supporting DImode, allow it. */
17269 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17270 return 1;
17271
17272 return 0;
17273}
17274
17275/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17276 tieable integer mode. */
17277
17278static bool
17279ix86_tieable_integer_mode_p (enum machine_mode mode)
17280{
17281 switch (mode)
17282 {
17283 case HImode:
17284 case SImode:
17285 return true;
17286
17287 case QImode:
17288 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17289
17290 case DImode:
17291 return TARGET_64BIT;
17292
17293 default:
17294 return false;
17295 }
17296}
17297
17298/* Return true if MODE1 is accessible in a register that can hold MODE2
17299 without copying. That is, all register classes that can hold MODE2
17300 can also hold MODE1. */
17301
17302bool
17303ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17304{
17305 if (mode1 == mode2)
17306 return true;
17307
17308 if (ix86_tieable_integer_mode_p (mode1)
17309 && ix86_tieable_integer_mode_p (mode2))
17310 return true;
17311
17312 /* MODE2 being XFmode implies fp stack or general regs, which means we
17313 can tie any smaller floating point modes to it. Note that we do not
17314 tie this with TFmode. */
17315 if (mode2 == XFmode)
17316 return mode1 == SFmode || mode1 == DFmode;
17317
17318 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17319 that we can tie it with SFmode. */
17320 if (mode2 == DFmode)
17321 return mode1 == SFmode;
17322
17323 /* If MODE2 is only appropriate for an SSE register, then tie with
17324 any other mode acceptable to SSE registers. */
17325 if (GET_MODE_SIZE (mode2) >= 8
17326 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17327 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17328
17329 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17330 with any other mode acceptable to MMX registers. */
17331 if (GET_MODE_SIZE (mode2) == 8
17332 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17333 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17334
17335 return false;
17336}
17337
17338/* Return the cost of moving data of mode M between a
17339 register and memory. A value of 2 is the default; this cost is
17340 relative to those in `REGISTER_MOVE_COST'.
17341
17342 If moving between registers and memory is more expensive than
17343 between two registers, you should define this macro to express the
17344 relative cost.
17345
17346 Model also increased moving costs of QImode registers in non
17347 Q_REGS classes.
17348 */
17349int
17350ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17351{
17352 if (FLOAT_CLASS_P (class))
17353 {
17354 int index;
17355 switch (mode)
17356 {
17357 case SFmode:
17358 index = 0;
17359 break;
17360 case DFmode:
17361 index = 1;
17362 break;
17363 case XFmode:
17364 index = 2;
17365 break;
17366 default:
17367 return 100;
17368 }
17369 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17370 }
17371 if (SSE_CLASS_P (class))
17372 {
17373 int index;
17374 switch (GET_MODE_SIZE (mode))
17375 {
17376 case 4:
17377 index = 0;
17378 break;
17379 case 8:
17380 index = 1;
17381 break;
17382 case 16:
17383 index = 2;
17384 break;
17385 default:
17386 return 100;
17387 }
17388 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17389 }
17390 if (MMX_CLASS_P (class))
17391 {
17392 int index;
17393 switch (GET_MODE_SIZE (mode))
17394 {
17395 case 4:
17396 index = 0;
17397 break;
17398 case 8:
17399 index = 1;
17400 break;
17401 default:
17402 return 100;
17403 }
17404 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17405 }
17406 switch (GET_MODE_SIZE (mode))
17407 {
17408 case 1:
17409 if (in)
17410 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17411 : ix86_cost->movzbl_load);
17412 else
17413 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17414 : ix86_cost->int_store[0] + 4);
17415 break;
17416 case 2:
17417 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17418 default:
17419 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17420 if (mode == TFmode)
17421 mode = XFmode;
17422 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17423 * (((int) GET_MODE_SIZE (mode)
17424 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17425 }
17426}
17427
17428/* Compute a (partial) cost for rtx X. Return true if the complete
17429 cost has been computed, and false if subexpressions should be
17430 scanned. In either case, *TOTAL contains the cost result. */
17431
17432static bool
17433ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17434{
17435 enum machine_mode mode = GET_MODE (x);
17436
17437 switch (code)
17438 {
17439 case CONST_INT:
17440 case CONST:
17441 case LABEL_REF:
17442 case SYMBOL_REF:
17443 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17444 *total = 3;
17445 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17446 *total = 2;
17447 else if (flag_pic && SYMBOLIC_CONST (x)
17448 && (!TARGET_64BIT
17449 || (!GET_CODE (x) != LABEL_REF
17450 && (GET_CODE (x) != SYMBOL_REF
17451 || !SYMBOL_REF_LOCAL_P (x)))))
17452 *total = 1;
17453 else
17454 *total = 0;
17455 return true;
17456
17457 case CONST_DOUBLE:
17458 if (mode == VOIDmode)
17459 *total = 0;
17460 else
17461 switch (standard_80387_constant_p (x))
17462 {
17463 case 1: /* 0.0 */
17464 *total = 1;
17465 break;
17466 default: /* Other constants */
17467 *total = 2;
17468 break;
17469 case 0:
17470 case -1:
17471 /* Start with (MEM (SYMBOL_REF)), since that's where
17472 it'll probably end up. Add a penalty for size. */
17473 *total = (COSTS_N_INSNS (1)
17474 + (flag_pic != 0 && !TARGET_64BIT)
17475 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17476 break;
17477 }
17478 return true;
17479
17480 case ZERO_EXTEND:
17481 /* The zero extensions is often completely free on x86_64, so make
17482 it as cheap as possible. */
17483 if (TARGET_64BIT && mode == DImode
17484 && GET_MODE (XEXP (x, 0)) == SImode)
17485 *total = 1;
17486 else if (TARGET_ZERO_EXTEND_WITH_AND)
17487 *total = ix86_cost->add;
17488 else
17489 *total = ix86_cost->movzx;
17490 return false;
17491
17492 case SIGN_EXTEND:
17493 *total = ix86_cost->movsx;
17494 return false;
17495
17496 case ASHIFT:
17497 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17498 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17499 {
17500 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17501 if (value == 1)
17502 {
17503 *total = ix86_cost->add;
17504 return false;
17505 }
17506 if ((value == 2 || value == 3)
17507 && ix86_cost->lea <= ix86_cost->shift_const)
17508 {
17509 *total = ix86_cost->lea;
17510 return false;
17511 }
17512 }
17513 /* FALLTHRU */
17514
17515 case ROTATE:
17516 case ASHIFTRT:
17517 case LSHIFTRT:
17518 case ROTATERT:
17519 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17520 {
17521 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17522 {
17523 if (INTVAL (XEXP (x, 1)) > 32)
17524 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17525 else
17526 *total = ix86_cost->shift_const * 2;
17527 }
17528 else
17529 {
17530 if (GET_CODE (XEXP (x, 1)) == AND)
17531 *total = ix86_cost->shift_var * 2;
17532 else
17533 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17534 }
17535 }
17536 else
17537 {
17538 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17539 *total = ix86_cost->shift_const;
17540 else
17541 *total = ix86_cost->shift_var;
17542 }
17543 return false;
17544
17545 case MULT:
17546 if (FLOAT_MODE_P (mode))
17547 {
17548 *total = ix86_cost->fmul;
17549 return false;
17550 }
17551 else
17552 {
17553 rtx op0 = XEXP (x, 0);
17554 rtx op1 = XEXP (x, 1);
17555 int nbits;
17556 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17557 {
17558 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17559 for (nbits = 0; value != 0; value &= value - 1)
17560 nbits++;
17561 }
17562 else
17563 /* This is arbitrary. */
17564 nbits = 7;
17565
17566 /* Compute costs correctly for widening multiplication. */
17567 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17568 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17569 == GET_MODE_SIZE (mode))
17570 {
17571 int is_mulwiden = 0;
17572 enum machine_mode inner_mode = GET_MODE (op0);
17573
17574 if (GET_CODE (op0) == GET_CODE (op1))
17575 is_mulwiden = 1, op1 = XEXP (op1, 0);
17576 else if (GET_CODE (op1) == CONST_INT)
17577 {
17578 if (GET_CODE (op0) == SIGN_EXTEND)
17579 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17580 == INTVAL (op1);
17581 else
17582 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17583 }
17584
17585 if (is_mulwiden)
17586 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17587 }
17588
17589 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17590 + nbits * ix86_cost->mult_bit
17591 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17592
17593 return true;
17594 }
17595
17596 case DIV:
17597 case UDIV:
17598 case MOD:
17599 case UMOD:
17600 if (FLOAT_MODE_P (mode))
17601 *total = ix86_cost->fdiv;
17602 else
17603 *total = ix86_cost->divide[MODE_INDEX (mode)];
17604 return false;
17605
17606 case PLUS:
17607 if (FLOAT_MODE_P (mode))
17608 *total = ix86_cost->fadd;
17609 else if (GET_MODE_CLASS (mode) == MODE_INT
17610 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17611 {
17612 if (GET_CODE (XEXP (x, 0)) == PLUS
17613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17614 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17615 && CONSTANT_P (XEXP (x, 1)))
17616 {
17617 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17618 if (val == 2 || val == 4 || val == 8)
17619 {
17620 *total = ix86_cost->lea;
17621 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17622 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17623 outer_code);
17624 *total += rtx_cost (XEXP (x, 1), outer_code);
17625 return true;
17626 }
17627 }
17628 else if (GET_CODE (XEXP (x, 0)) == MULT
17629 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17630 {
17631 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17632 if (val == 2 || val == 4 || val == 8)
17633 {
17634 *total = ix86_cost->lea;
17635 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17636 *total += rtx_cost (XEXP (x, 1), outer_code);
17637 return true;
17638 }
17639 }
17640 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17641 {
17642 *total = ix86_cost->lea;
17643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17644 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17645 *total += rtx_cost (XEXP (x, 1), outer_code);
17646 return true;
17647 }
17648 }
17649 /* FALLTHRU */
17650
17651 case MINUS:
17652 if (FLOAT_MODE_P (mode))
17653 {
17654 *total = ix86_cost->fadd;
17655 return false;
17656 }
17657 /* FALLTHRU */
17658
17659 case AND:
17660 case IOR:
17661 case XOR:
17662 if (!TARGET_64BIT && mode == DImode)
17663 {
17664 *total = (ix86_cost->add * 2
17665 + (rtx_cost (XEXP (x, 0), outer_code)
17666 << (GET_MODE (XEXP (x, 0)) != DImode))
17667 + (rtx_cost (XEXP (x, 1), outer_code)
17668 << (GET_MODE (XEXP (x, 1)) != DImode)));
17669 return true;
17670 }
17671 /* FALLTHRU */
17672
17673 case NEG:
17674 if (FLOAT_MODE_P (mode))
17675 {
17676 *total = ix86_cost->fchs;
17677 return false;
17678 }
17679 /* FALLTHRU */
17680
17681 case NOT:
17682 if (!TARGET_64BIT && mode == DImode)
17683 *total = ix86_cost->add * 2;
17684 else
17685 *total = ix86_cost->add;
17686 return false;
17687
17688 case COMPARE:
17689 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17690 && XEXP (XEXP (x, 0), 1) == const1_rtx
17691 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17692 && XEXP (x, 1) == const0_rtx)
17693 {
17694 /* This kind of construct is implemented using test[bwl].
17695 Treat it as if we had an AND. */
17696 *total = (ix86_cost->add
17697 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17698 + rtx_cost (const1_rtx, outer_code));
17699 return true;
17700 }
17701 return false;
17702
17703 case FLOAT_EXTEND:
17704 if (!TARGET_SSE_MATH
17705 || mode == XFmode
17706 || (mode == DFmode && !TARGET_SSE2))
17707 /* For standard 80387 constants, raise the cost to prevent
17708 compress_float_constant() to generate load from memory. */
17709 switch (standard_80387_constant_p (XEXP (x, 0)))
17710 {
17711 case -1:
17712 case 0:
17713 *total = 0;
17714 break;
17715 case 1: /* 0.0 */
17716 *total = 1;
17717 break;
17718 default:
17719 *total = (x86_ext_80387_constants & TUNEMASK
17720 || optimize_size
17721 ? 1 : 0);
17722 }
17723 return false;
17724
17725 case ABS:
17726 if (FLOAT_MODE_P (mode))
17727 *total = ix86_cost->fabs;
17728 return false;
17729
17730 case SQRT:
17731 if (FLOAT_MODE_P (mode))
17732 *total = ix86_cost->fsqrt;
17733 return false;
17734
17735 case UNSPEC:
17736 if (XINT (x, 1) == UNSPEC_TP)
17737 *total = 0;
17738 return false;
17739
17740 default:
17741 return false;
17742 }
17743}
17744
17745#if TARGET_MACHO
17746
17747static int current_machopic_label_num;
17748
17749/* Given a symbol name and its associated stub, write out the
17750 definition of the stub. */
17751
17752void
17753machopic_output_stub (FILE *file, const char *symb, const char *stub)
17754{
17755 unsigned int length;
17756 char *binder_name, *symbol_name, lazy_ptr_name[32];
17757 int label = ++current_machopic_label_num;
17758
17759 /* For 64-bit we shouldn't get here. */
17760 gcc_assert (!TARGET_64BIT);
17761
17762 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17763 symb = (*targetm.strip_name_encoding) (symb);
17764
17765 length = strlen (stub);
17766 binder_name = alloca (length + 32);
17767 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17768
17769 length = strlen (symb);
17770 symbol_name = alloca (length + 32);
17771 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17772
17773 sprintf (lazy_ptr_name, "L%d$lz", label);
17774
17775 if (MACHOPIC_PURE)
17776 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17777 else
17778 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17779
17780 fprintf (file, "%s:\n", stub);
17781 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17782
17783 if (MACHOPIC_PURE)
17784 {
17785 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17786 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17787 fprintf (file, "\tjmp\t*%%edx\n");
17788 }
17789 else
17790 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17791
17792 fprintf (file, "%s:\n", binder_name);
17793
17794 if (MACHOPIC_PURE)
17795 {
17796 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17797 fprintf (file, "\tpushl\t%%eax\n");
17798 }
17799 else
17800 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17801
17802 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17803
17804 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17805 fprintf (file, "%s:\n", lazy_ptr_name);
17806 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17807 fprintf (file, "\t.long %s\n", binder_name);
17808}
17809
17810void
17811darwin_x86_file_end (void)
17812{
17813 darwin_file_end ();
17814 ix86_file_end ();
17815}
17816#endif /* TARGET_MACHO */
17817
17818/* Order the registers for register allocator. */
17819
17820void
17821x86_order_regs_for_local_alloc (void)
17822{
17823 int pos = 0;
17824 int i;
17825
17826 /* First allocate the local general purpose registers. */
17827 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17828 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17829 reg_alloc_order [pos++] = i;
17830
17831 /* Global general purpose registers. */
17832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17833 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17834 reg_alloc_order [pos++] = i;
17835
17836 /* x87 registers come first in case we are doing FP math
17837 using them. */
17838 if (!TARGET_SSE_MATH)
17839 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17840 reg_alloc_order [pos++] = i;
17841
17842 /* SSE registers. */
17843 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17844 reg_alloc_order [pos++] = i;
17845 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17846 reg_alloc_order [pos++] = i;
17847
17848 /* x87 registers. */
17849 if (TARGET_SSE_MATH)
17850 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17851 reg_alloc_order [pos++] = i;
17852
17853 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17854 reg_alloc_order [pos++] = i;
17855
17856 /* Initialize the rest of array as we do not allocate some registers
17857 at all. */
17858 while (pos < FIRST_PSEUDO_REGISTER)
17859 reg_alloc_order [pos++] = 0;
17860}
17861
17862/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17863 struct attribute_spec.handler. */
17864static tree
17865ix86_handle_struct_attribute (tree *node, tree name,
17866 tree args ATTRIBUTE_UNUSED,
17867 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17868{
17869 tree *type = NULL;
17870 if (DECL_P (*node))
17871 {
17872 if (TREE_CODE (*node) == TYPE_DECL)
17873 type = &TREE_TYPE (*node);
17874 }
17875 else
17876 type = node;
17877
17878 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17879 || TREE_CODE (*type) == UNION_TYPE)))
17880 {
17881 warning (OPT_Wattributes, "%qs attribute ignored",
17882 IDENTIFIER_POINTER (name));
17883 *no_add_attrs = true;
17884 }
17885
17886 else if ((is_attribute_p ("ms_struct", name)
17887 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17888 || ((is_attribute_p ("gcc_struct", name)
17889 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17890 {
17891 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17892 IDENTIFIER_POINTER (name));
17893 *no_add_attrs = true;
17894 }
17895
17896 return NULL_TREE;
17897}
17898
17899static bool
17900ix86_ms_bitfield_layout_p (tree record_type)
17901{
17902 return (TARGET_MS_BITFIELD_LAYOUT &&
17903 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17904 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17905}
17906
17907/* Returns an expression indicating where the this parameter is
17908 located on entry to the FUNCTION. */
17909
17910static rtx
17911x86_this_parameter (tree function)
17912{
17913 tree type = TREE_TYPE (function);
17914
17915 if (TARGET_64BIT)
17916 {
17917 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17918 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17919 }
17920
17921 if (ix86_function_regparm (type, function) > 0)
17922 {
17923 tree parm;
17924
17925 parm = TYPE_ARG_TYPES (type);
17926 /* Figure out whether or not the function has a variable number of
17927 arguments. */
17928 for (; parm; parm = TREE_CHAIN (parm))
17929 if (TREE_VALUE (parm) == void_type_node)
17930 break;
17931 /* If not, the this parameter is in the first argument. */
17932 if (parm)
17933 {
17934 int regno = 0;
17935 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17936 regno = 2;
17937 return gen_rtx_REG (SImode, regno);
17938 }
17939 }
17940
17941 if (aggregate_value_p (TREE_TYPE (type), type))
17942 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17943 else
17944 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17945}
17946
17947/* Determine whether x86_output_mi_thunk can succeed. */
17948
17949static bool
17950x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17951 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17952 HOST_WIDE_INT vcall_offset, tree function)
17953{
17954 /* 64-bit can handle anything. */
17955 if (TARGET_64BIT)
17956 return true;
17957
17958 /* For 32-bit, everything's fine if we have one free register. */
17959 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17960 return true;
17961
17962 /* Need a free register for vcall_offset. */
17963 if (vcall_offset)
17964 return false;
17965
17966 /* Need a free register for GOT references. */
17967 if (flag_pic && !(*targetm.binds_local_p) (function))
17968 return false;
17969
17970 /* Otherwise ok. */
17971 return true;
17972}
17973
17974/* Output the assembler code for a thunk function. THUNK_DECL is the
17975 declaration for the thunk function itself, FUNCTION is the decl for
17976 the target function. DELTA is an immediate constant offset to be
17977 added to THIS. If VCALL_OFFSET is nonzero, the word at
17978 *(*this + vcall_offset) should be added to THIS. */
17979
17980static void
17981x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17982 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17983 HOST_WIDE_INT vcall_offset, tree function)
17984{
17985 rtx xops[3];
17986 rtx this = x86_this_parameter (function);
17987 rtx this_reg, tmp;
17988
17989 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17990 pull it in now and let DELTA benefit. */
17991 if (REG_P (this))
17992 this_reg = this;
17993 else if (vcall_offset)
17994 {
17995 /* Put the this parameter into %eax. */
17996 xops[0] = this;
17997 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17998 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17999 }
18000 else
18001 this_reg = NULL_RTX;
18002
18003 /* Adjust the this parameter by a fixed constant. */
18004 if (delta)
18005 {
18006 xops[0] = GEN_INT (delta);
18007 xops[1] = this_reg ? this_reg : this;
18008 if (TARGET_64BIT)
18009 {
18010 if (!x86_64_general_operand (xops[0], DImode))
18011 {
18012 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18013 xops[1] = tmp;
18014 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18015 xops[0] = tmp;
18016 xops[1] = this;
18017 }
18018 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18019 }
18020 else
18021 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18022 }
18023
18024 /* Adjust the this parameter by a value stored in the vtable. */
18025 if (vcall_offset)
18026 {
18027 if (TARGET_64BIT)
18028 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18029 else
18030 {
18031 int tmp_regno = 2 /* ECX */;
18032 if (lookup_attribute ("fastcall",
18033 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18034 tmp_regno = 0 /* EAX */;
18035 tmp = gen_rtx_REG (SImode, tmp_regno);
18036 }
18037
18038 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18039 xops[1] = tmp;
18040 if (TARGET_64BIT)
18041 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18042 else
18043 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18044
18045 /* Adjust the this parameter. */
18046 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18047 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18048 {
18049 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18050 xops[0] = GEN_INT (vcall_offset);
18051 xops[1] = tmp2;
18052 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18053 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18054 }
18055 xops[1] = this_reg;
18056 if (TARGET_64BIT)
18057 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18058 else
18059 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18060 }
18061
18062 /* If necessary, drop THIS back to its stack slot. */
18063 if (this_reg && this_reg != this)
18064 {
18065 xops[0] = this_reg;
18066 xops[1] = this;
18067 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18068 }
18069
18070 xops[0] = XEXP (DECL_RTL (function), 0);
18071 if (TARGET_64BIT)
18072 {
18073 if (!flag_pic || (*targetm.binds_local_p) (function))
18074 output_asm_insn ("jmp\t%P0", xops);
18075 else
18076 {
18077 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18078 tmp = gen_rtx_CONST (Pmode, tmp);
18079 tmp = gen_rtx_MEM (QImode, tmp);
18080 xops[0] = tmp;
18081 output_asm_insn ("jmp\t%A0", xops);
18082 }
18083 }
18084 else
18085 {
18086 if (!flag_pic || (*targetm.binds_local_p) (function))
18087 output_asm_insn ("jmp\t%P0", xops);
18088 else
18089#if TARGET_MACHO
18090 if (TARGET_MACHO)
18091 {
18092 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18093 tmp = (gen_rtx_SYMBOL_REF
18094 (Pmode,
18095 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18096 tmp = gen_rtx_MEM (QImode, tmp);
18097 xops[0] = tmp;
18098 output_asm_insn ("jmp\t%0", xops);
18099 }
18100 else
18101#endif /* TARGET_MACHO */
18102 {
18103 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18104 output_set_got (tmp, NULL_RTX);
18105
18106 xops[1] = tmp;
18107 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18108 output_asm_insn ("jmp\t{*}%1", xops);
18109 }
18110 }
18111}
18112
18113static void
18114x86_file_start (void)
18115{
18116 default_file_start ();
18117#if TARGET_MACHO
18118 darwin_file_start ();
18119#endif
18120 if (X86_FILE_START_VERSION_DIRECTIVE)
18121 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18122 if (X86_FILE_START_FLTUSED)
18123 fputs ("\t.global\t__fltused\n", asm_out_file);
18124 if (ix86_asm_dialect == ASM_INTEL)
18125 fputs ("\t.intel_syntax\n", asm_out_file);
18126}
18127
18128int
18129x86_field_alignment (tree field, int computed)
18130{
18131 enum machine_mode mode;
18132 tree type = TREE_TYPE (field);
18133
18134 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18135 return computed;
18136 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18137 ? get_inner_array_type (type) : type);
18138 if (mode == DFmode || mode == DCmode
18139 || GET_MODE_CLASS (mode) == MODE_INT
18140 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18141 return MIN (32, computed);
18142 return computed;
18143}
18144
18145/* Output assembler code to FILE to increment profiler label # LABELNO
18146 for profiling a function entry. */
18147void
18148x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18149{
18150 if (TARGET_64BIT)
18151 if (flag_pic)
18152 {
18153#ifndef NO_PROFILE_COUNTERS
18154 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18155#endif
18156 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18157 }
18158 else
18159 {
18160#ifndef NO_PROFILE_COUNTERS
18161 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18162#endif
18163 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18164 }
18165 else if (flag_pic)
18166 {
18167#ifndef NO_PROFILE_COUNTERS
18168 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18169 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18170#endif
18171 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18172 }
18173 else
18174 {
18175#ifndef NO_PROFILE_COUNTERS
18176 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18177 PROFILE_COUNT_REGISTER);
18178#endif
18179 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18180 }
18181}
18182
18183/* We don't have exact information about the insn sizes, but we may assume
18184 quite safely that we are informed about all 1 byte insns and memory
18185 address sizes. This is enough to eliminate unnecessary padding in
18186 99% of cases. */
18187
18188static int
18189min_insn_size (rtx insn)
18190{
18191 int l = 0;
18192
18193 if (!INSN_P (insn) || !active_insn_p (insn))
18194 return 0;
18195
18196 /* Discard alignments we've emit and jump instructions. */
18197 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18198 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18199 return 0;
18200 if (GET_CODE (insn) == JUMP_INSN
18201 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18202 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18203 return 0;
18204
18205 /* Important case - calls are always 5 bytes.
18206 It is common to have many calls in the row. */
18207 if (GET_CODE (insn) == CALL_INSN
18208 && symbolic_reference_mentioned_p (PATTERN (insn))
18209 && !SIBLING_CALL_P (insn))
18210 return 5;
18211 if (get_attr_length (insn) <= 1)
18212 return 1;
18213
18214 /* For normal instructions we may rely on the sizes of addresses
18215 and the presence of symbol to require 4 bytes of encoding.
18216 This is not the case for jumps where references are PC relative. */
18217 if (GET_CODE (insn) != JUMP_INSN)
18218 {
18219 l = get_attr_length_address (insn);
18220 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18221 l = 4;
18222 }
18223 if (l)
18224 return 1+l;
18225 else
18226 return 2;
18227}
18228
18229/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18230 window. */
18231
18232static void
18233ix86_avoid_jump_misspredicts (void)
18234{
18235 rtx insn, start = get_insns ();
18236 int nbytes = 0, njumps = 0;
18237 int isjump = 0;
18238
18239 /* Look for all minimal intervals of instructions containing 4 jumps.
18240 The intervals are bounded by START and INSN. NBYTES is the total
18241 size of instructions in the interval including INSN and not including
18242 START. When the NBYTES is smaller than 16 bytes, it is possible
18243 that the end of START and INSN ends up in the same 16byte page.
18244
18245 The smallest offset in the page INSN can start is the case where START
18246 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18247 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18248 */
18249 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18250 {
18251
18252 nbytes += min_insn_size (insn);
18253 if (dump_file)
18254 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18255 INSN_UID (insn), min_insn_size (insn));
18256 if ((GET_CODE (insn) == JUMP_INSN
18257 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18258 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18259 || GET_CODE (insn) == CALL_INSN)
18260 njumps++;
18261 else
18262 continue;
18263
18264 while (njumps > 3)
18265 {
18266 start = NEXT_INSN (start);
18267 if ((GET_CODE (start) == JUMP_INSN
18268 && GET_CODE (PATTERN (start)) != ADDR_VEC
18269 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18270 || GET_CODE (start) == CALL_INSN)
18271 njumps--, isjump = 1;
18272 else
18273 isjump = 0;
18274 nbytes -= min_insn_size (start);
18275 }
18276 gcc_assert (njumps >= 0);
18277 if (dump_file)
18278 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18279 INSN_UID (start), INSN_UID (insn), nbytes);
18280
18281 if (njumps == 3 && isjump && nbytes < 16)
18282 {
18283 int padsize = 15 - nbytes + min_insn_size (insn);
18284
18285 if (dump_file)
18286 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18287 INSN_UID (insn), padsize);
18288 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18289 }
18290 }
18291}
18292
18293/* AMD Athlon works faster
18294 when RET is not destination of conditional jump or directly preceded
18295 by other jump instruction. We avoid the penalty by inserting NOP just
18296 before the RET instructions in such cases. */
18297static void
18298ix86_pad_returns (void)
18299{
18300 edge e;
18301 edge_iterator ei;
18302
18303 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18304 {
18305 basic_block bb = e->src;
18306 rtx ret = BB_END (bb);
18307 rtx prev;
18308 bool replace = false;
18309
18310 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18311 || !maybe_hot_bb_p (bb))
18312 continue;
18313 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18314 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18315 break;
18316 if (prev && GET_CODE (prev) == CODE_LABEL)
18317 {
18318 edge e;
18319 edge_iterator ei;
18320
18321 FOR_EACH_EDGE (e, ei, bb->preds)
18322 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18323 && !(e->flags & EDGE_FALLTHRU))
18324 replace = true;
18325 }
18326 if (!replace)
18327 {
18328 prev = prev_active_insn (ret);
18329 if (prev
18330 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18331 || GET_CODE (prev) == CALL_INSN))
18332 replace = true;
18333 /* Empty functions get branch mispredict even when the jump destination
18334 is not visible to us. */
18335 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18336 replace = true;
18337 }
18338 if (replace)
18339 {
18340 emit_insn_before (gen_return_internal_long (), ret);
18341 delete_insn (ret);
18342 }
18343 }
18344}
18345
18346/* Implement machine specific optimizations. We implement padding of returns
18347 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18348static void
18349ix86_reorg (void)
18350{
18351 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18352 ix86_pad_returns ();
18353 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18354 ix86_avoid_jump_misspredicts ();
18355}
18356
18357/* Return nonzero when QImode register that must be represented via REX prefix
18358 is used. */
18359bool
18360x86_extended_QIreg_mentioned_p (rtx insn)
18361{
18362 int i;
18363 extract_insn_cached (insn);
18364 for (i = 0; i < recog_data.n_operands; i++)
18365 if (REG_P (recog_data.operand[i])
18366 && REGNO (recog_data.operand[i]) >= 4)
18367 return true;
18368 return false;
18369}
18370
18371/* Return nonzero when P points to register encoded via REX prefix.
18372 Called via for_each_rtx. */
18373static int
18374extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18375{
18376 unsigned int regno;
18377 if (!REG_P (*p))
18378 return 0;
18379 regno = REGNO (*p);
18380 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18381}
18382
18383/* Return true when INSN mentions register that must be encoded using REX
18384 prefix. */
18385bool
18386x86_extended_reg_mentioned_p (rtx insn)
18387{
18388 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18389}
18390
18391/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18392 optabs would emit if we didn't have TFmode patterns. */
18393
18394void
18395x86_emit_floatuns (rtx operands[2])
18396{
18397 rtx neglab, donelab, i0, i1, f0, in, out;
18398 enum machine_mode mode, inmode;
18399
18400 inmode = GET_MODE (operands[1]);
18401 gcc_assert (inmode == SImode || inmode == DImode);
18402
18403 out = operands[0];
18404 in = force_reg (inmode, operands[1]);
18405 mode = GET_MODE (out);
18406 neglab = gen_label_rtx ();
18407 donelab = gen_label_rtx ();
18408 i1 = gen_reg_rtx (Pmode);
18409 f0 = gen_reg_rtx (mode);
18410
18411 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18412
18413 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18414 emit_jump_insn (gen_jump (donelab));
18415 emit_barrier ();
18416
18417 emit_label (neglab);
18418
18419 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18420 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18421 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18422 expand_float (f0, i0, 0);
18423 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18424
18425 emit_label (donelab);
18426}
18427
18428/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18429 with all elements equal to VAR. Return true if successful. */
18430
18431static bool
18432ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18433 rtx target, rtx val)
18434{
18435 enum machine_mode smode, wsmode, wvmode;
18436 rtx x;
18437
18438 switch (mode)
18439 {
18440 case V2SImode:
18441 case V2SFmode:
18442 if (!mmx_ok)
18443 return false;
18444 /* FALLTHRU */
18445
18446 case V2DFmode:
18447 case V2DImode:
18448 case V4SFmode:
18449 case V4SImode:
18450 val = force_reg (GET_MODE_INNER (mode), val);
18451 x = gen_rtx_VEC_DUPLICATE (mode, val);
18452 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18453 return true;
18454
18455 case V4HImode:
18456 if (!mmx_ok)
18457 return false;
18458 if (TARGET_SSE || TARGET_3DNOW_A)
18459 {
18460 val = gen_lowpart (SImode, val);
18461 x = gen_rtx_TRUNCATE (HImode, val);
18462 x = gen_rtx_VEC_DUPLICATE (mode, x);
18463 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18464 return true;
18465 }
18466 else
18467 {
18468 smode = HImode;
18469 wsmode = SImode;
18470 wvmode = V2SImode;
18471 goto widen;
18472 }
18473
18474 case V8QImode:
18475 if (!mmx_ok)
18476 return false;
18477 smode = QImode;
18478 wsmode = HImode;
18479 wvmode = V4HImode;
18480 goto widen;
18481 case V8HImode:
18482 if (TARGET_SSE2)
18483 {
18484 rtx tmp1, tmp2;
18485 /* Extend HImode to SImode using a paradoxical SUBREG. */
18486 tmp1 = gen_reg_rtx (SImode);
18487 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18488 /* Insert the SImode value as low element of V4SImode vector. */
18489 tmp2 = gen_reg_rtx (V4SImode);
18490 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18491 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18492 CONST0_RTX (V4SImode),
18493 const1_rtx);
18494 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18495 /* Cast the V4SImode vector back to a V8HImode vector. */
18496 tmp1 = gen_reg_rtx (V8HImode);
18497 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18498 /* Duplicate the low short through the whole low SImode word. */
18499 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18500 /* Cast the V8HImode vector back to a V4SImode vector. */
18501 tmp2 = gen_reg_rtx (V4SImode);
18502 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18503 /* Replicate the low element of the V4SImode vector. */
18504 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18505 /* Cast the V2SImode back to V8HImode, and store in target. */
18506 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18507 return true;
18508 }
18509 smode = HImode;
18510 wsmode = SImode;
18511 wvmode = V4SImode;
18512 goto widen;
18513 case V16QImode:
18514 if (TARGET_SSE2)
18515 {
18516 rtx tmp1, tmp2;
18517 /* Extend QImode to SImode using a paradoxical SUBREG. */
18518 tmp1 = gen_reg_rtx (SImode);
18519 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18520 /* Insert the SImode value as low element of V4SImode vector. */
18521 tmp2 = gen_reg_rtx (V4SImode);
18522 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18523 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18524 CONST0_RTX (V4SImode),
18525 const1_rtx);
18526 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18527 /* Cast the V4SImode vector back to a V16QImode vector. */
18528 tmp1 = gen_reg_rtx (V16QImode);
18529 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18530 /* Duplicate the low byte through the whole low SImode word. */
18531 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18532 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18533 /* Cast the V16QImode vector back to a V4SImode vector. */
18534 tmp2 = gen_reg_rtx (V4SImode);
18535 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18536 /* Replicate the low element of the V4SImode vector. */
18537 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18538 /* Cast the V2SImode back to V16QImode, and store in target. */
18539 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18540 return true;
18541 }
18542 smode = QImode;
18543 wsmode = HImode;
18544 wvmode = V8HImode;
18545 goto widen;
18546 widen:
18547 /* Replicate the value once into the next wider mode and recurse. */
18548 val = convert_modes (wsmode, smode, val, true);
18549 x = expand_simple_binop (wsmode, ASHIFT, val,
18550 GEN_INT (GET_MODE_BITSIZE (smode)),
18551 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18552 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18553
18554 x = gen_reg_rtx (wvmode);
18555 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18556 gcc_unreachable ();
18557 emit_move_insn (target, gen_lowpart (mode, x));
18558 return true;
18559
18560 default:
18561 return false;
18562 }
18563}
18564
18565/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18566 whose ONE_VAR element is VAR, and other elements are zero. Return true
18567 if successful. */
18568
18569static bool
18570ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18571 rtx target, rtx var, int one_var)
18572{
18573 enum machine_mode vsimode;
18574 rtx new_target;
18575 rtx x, tmp;
18576
18577 switch (mode)
18578 {
18579 case V2SFmode:
18580 case V2SImode:
18581 if (!mmx_ok)
18582 return false;
18583 /* FALLTHRU */
18584
18585 case V2DFmode:
18586 case V2DImode:
18587 if (one_var != 0)
18588 return false;
18589 var = force_reg (GET_MODE_INNER (mode), var);
18590 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18591 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18592 return true;
18593
18594 case V4SFmode:
18595 case V4SImode:
18596 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18597 new_target = gen_reg_rtx (mode);
18598 else
18599 new_target = target;
18600 var = force_reg (GET_MODE_INNER (mode), var);
18601 x = gen_rtx_VEC_DUPLICATE (mode, var);
18602 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18603 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18604 if (one_var != 0)
18605 {
18606 /* We need to shuffle the value to the correct position, so
18607 create a new pseudo to store the intermediate result. */
18608
18609 /* With SSE2, we can use the integer shuffle insns. */
18610 if (mode != V4SFmode && TARGET_SSE2)
18611 {
18612 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18613 GEN_INT (1),
18614 GEN_INT (one_var == 1 ? 0 : 1),
18615 GEN_INT (one_var == 2 ? 0 : 1),
18616 GEN_INT (one_var == 3 ? 0 : 1)));
18617 if (target != new_target)
18618 emit_move_insn (target, new_target);
18619 return true;
18620 }
18621
18622 /* Otherwise convert the intermediate result to V4SFmode and
18623 use the SSE1 shuffle instructions. */
18624 if (mode != V4SFmode)
18625 {
18626 tmp = gen_reg_rtx (V4SFmode);
18627 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18628 }
18629 else
18630 tmp = new_target;
18631
18632 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18633 GEN_INT (1),
18634 GEN_INT (one_var == 1 ? 0 : 1),
18635 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18636 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18637
18638 if (mode != V4SFmode)
18639 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18640 else if (tmp != target)
18641 emit_move_insn (target, tmp);
18642 }
18643 else if (target != new_target)
18644 emit_move_insn (target, new_target);
18645 return true;
18646
18647 case V8HImode:
18648 case V16QImode:
18649 vsimode = V4SImode;
18650 goto widen;
18651 case V4HImode:
18652 case V8QImode:
18653 if (!mmx_ok)
18654 return false;
18655 vsimode = V2SImode;
18656 goto widen;
18657 widen:
18658 if (one_var != 0)
18659 return false;
18660
18661 /* Zero extend the variable element to SImode and recurse. */
18662 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18663
18664 x = gen_reg_rtx (vsimode);
18665 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18666 var, one_var))
18667 gcc_unreachable ();
18668
18669 emit_move_insn (target, gen_lowpart (mode, x));
18670 return true;
18671
18672 default:
18673 return false;
18674 }
18675}
18676
18677/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18678 consisting of the values in VALS. It is known that all elements
18679 except ONE_VAR are constants. Return true if successful. */
18680
18681static bool
18682ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18683 rtx target, rtx vals, int one_var)
18684{
18685 rtx var = XVECEXP (vals, 0, one_var);
18686 enum machine_mode wmode;
18687 rtx const_vec, x;
18688
18689 const_vec = copy_rtx (vals);
18690 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18691 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18692
18693 switch (mode)
18694 {
18695 case V2DFmode:
18696 case V2DImode:
18697 case V2SFmode:
18698 case V2SImode:
18699 /* For the two element vectors, it's just as easy to use
18700 the general case. */
18701 return false;
18702
18703 case V4SFmode:
18704 case V4SImode:
18705 case V8HImode:
18706 case V4HImode:
18707 break;
18708
18709 case V16QImode:
18710 wmode = V8HImode;
18711 goto widen;
18712 case V8QImode:
18713 wmode = V4HImode;
18714 goto widen;
18715 widen:
18716 /* There's no way to set one QImode entry easily. Combine
18717 the variable value with its adjacent constant value, and
18718 promote to an HImode set. */
18719 x = XVECEXP (vals, 0, one_var ^ 1);
18720 if (one_var & 1)
18721 {
18722 var = convert_modes (HImode, QImode, var, true);
18723 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18725 x = GEN_INT (INTVAL (x) & 0xff);
18726 }
18727 else
18728 {
18729 var = convert_modes (HImode, QImode, var, true);
18730 x = gen_int_mode (INTVAL (x) << 8, HImode);
18731 }
18732 if (x != const0_rtx)
18733 var = expand_simple_binop (HImode, IOR, var, x, var,
18734 1, OPTAB_LIB_WIDEN);
18735
18736 x = gen_reg_rtx (wmode);
18737 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18738 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18739
18740 emit_move_insn (target, gen_lowpart (mode, x));
18741 return true;
18742
18743 default:
18744 return false;
18745 }
18746
18747 emit_move_insn (target, const_vec);
18748 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18749 return true;
18750}
18751
18752/* A subroutine of ix86_expand_vector_init. Handle the most general case:
18753 all values variable, and none identical. */
18754
18755static void
18756ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18757 rtx target, rtx vals)
18758{
18759 enum machine_mode half_mode = GET_MODE_INNER (mode);
18760 rtx op0 = NULL, op1 = NULL;
18761 bool use_vec_concat = false;
18762
18763 switch (mode)
18764 {
18765 case V2SFmode:
18766 case V2SImode:
18767 if (!mmx_ok && !TARGET_SSE)
18768 break;
18769 /* FALLTHRU */
18770
18771 case V2DFmode:
18772 case V2DImode:
18773 /* For the two element vectors, we always implement VEC_CONCAT. */
18774 op0 = XVECEXP (vals, 0, 0);
18775 op1 = XVECEXP (vals, 0, 1);
18776 use_vec_concat = true;
18777 break;
18778
18779 case V4SFmode:
18780 half_mode = V2SFmode;
18781 goto half;
18782 case V4SImode:
18783 half_mode = V2SImode;
18784 goto half;
18785 half:
18786 {
18787 rtvec v;
18788
18789 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18790 Recurse to load the two halves. */
18791
18792 op0 = gen_reg_rtx (half_mode);
18793 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18794 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18795
18796 op1 = gen_reg_rtx (half_mode);
18797 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18798 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18799
18800 use_vec_concat = true;
18801 }
18802 break;
18803
18804 case V8HImode:
18805 case V16QImode:
18806 case V4HImode:
18807 case V8QImode:
18808 break;
18809
18810 default:
18811 gcc_unreachable ();
18812 }
18813
18814 if (use_vec_concat)
18815 {
18816 if (!register_operand (op0, half_mode))
18817 op0 = force_reg (half_mode, op0);
18818 if (!register_operand (op1, half_mode))
18819 op1 = force_reg (half_mode, op1);
18820
18821 emit_insn (gen_rtx_SET (VOIDmode, target,
18822 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18823 }
18824 else
18825 {
18826 int i, j, n_elts, n_words, n_elt_per_word;
18827 enum machine_mode inner_mode;
18828 rtx words[4], shift;
18829
18830 inner_mode = GET_MODE_INNER (mode);
18831 n_elts = GET_MODE_NUNITS (mode);
18832 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18833 n_elt_per_word = n_elts / n_words;
18834 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18835
18836 for (i = 0; i < n_words; ++i)
18837 {
18838 rtx word = NULL_RTX;
18839
18840 for (j = 0; j < n_elt_per_word; ++j)
18841 {
18842 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18843 elt = convert_modes (word_mode, inner_mode, elt, true);
18844
18845 if (j == 0)
18846 word = elt;
18847 else
18848 {
18849 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18850 word, 1, OPTAB_LIB_WIDEN);
18851 word = expand_simple_binop (word_mode, IOR, word, elt,
18852 word, 1, OPTAB_LIB_WIDEN);
18853 }
18854 }
18855
18856 words[i] = word;
18857 }
18858
18859 if (n_words == 1)
18860 emit_move_insn (target, gen_lowpart (mode, words[0]));
18861 else if (n_words == 2)
18862 {
18863 rtx tmp = gen_reg_rtx (mode);
18864 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18865 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18866 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18867 emit_move_insn (target, tmp);
18868 }
18869 else if (n_words == 4)
18870 {
18871 rtx tmp = gen_reg_rtx (V4SImode);
18872 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18873 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18874 emit_move_insn (target, gen_lowpart (mode, tmp));
18875 }
18876 else
18877 gcc_unreachable ();
18878 }
18879}
18880
18881/* Initialize vector TARGET via VALS. Suppress the use of MMX
18882 instructions unless MMX_OK is true. */
18883
18884void
18885ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18886{
18887 enum machine_mode mode = GET_MODE (target);
18888 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18889 int n_elts = GET_MODE_NUNITS (mode);
18890 int n_var = 0, one_var = -1;
18891 bool all_same = true, all_const_zero = true;
18892 int i;
18893 rtx x;
18894
18895 for (i = 0; i < n_elts; ++i)
18896 {
18897 x = XVECEXP (vals, 0, i);
18898 if (!CONSTANT_P (x))
18899 n_var++, one_var = i;
18900 else if (x != CONST0_RTX (inner_mode))
18901 all_const_zero = false;
18902 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18903 all_same = false;
18904 }
18905
18906 /* Constants are best loaded from the constant pool. */
18907 if (n_var == 0)
18908 {
18909 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18910 return;
18911 }
18912
18913 /* If all values are identical, broadcast the value. */
18914 if (all_same
18915 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18916 XVECEXP (vals, 0, 0)))
18917 return;
18918
18919 /* Values where only one field is non-constant are best loaded from
18920 the pool and overwritten via move later. */
18921 if (n_var == 1)
18922 {
18923 if (all_const_zero
18924 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18925 XVECEXP (vals, 0, one_var),
18926 one_var))
18927 return;
18928
18929 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18930 return;
18931 }
18932
18933 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18934}
18935
18936void
18937ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18938{
18939 enum machine_mode mode = GET_MODE (target);
18940 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18941 bool use_vec_merge = false;
18942 rtx tmp;
18943
18944 switch (mode)
18945 {
18946 case V2SFmode:
18947 case V2SImode:
18948 if (mmx_ok)
18949 {
18950 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18951 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18952 if (elt == 0)
18953 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18954 else
18955 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18956 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18957 return;
18958 }
18959 break;
18960
18961 case V2DFmode:
18962 case V2DImode:
18963 {
18964 rtx op0, op1;
18965
18966 /* For the two element vectors, we implement a VEC_CONCAT with
18967 the extraction of the other element. */
18968
18969 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18970 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18971
18972 if (elt == 0)
18973 op0 = val, op1 = tmp;
18974 else
18975 op0 = tmp, op1 = val;
18976
18977 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18978 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18979 }
18980 return;
18981
18982 case V4SFmode:
18983 switch (elt)
18984 {
18985 case 0:
18986 use_vec_merge = true;
18987 break;
18988
18989 case 1:
18990 /* tmp = target = A B C D */
18991 tmp = copy_to_reg (target);
18992 /* target = A A B B */
18993 emit_insn (gen_sse_unpcklps (target, target, target));
18994 /* target = X A B B */
18995 ix86_expand_vector_set (false, target, val, 0);
18996 /* target = A X C D */
18997 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18998 GEN_INT (1), GEN_INT (0),
18999 GEN_INT (2+4), GEN_INT (3+4)));
19000 return;
19001
19002 case 2:
19003 /* tmp = target = A B C D */
19004 tmp = copy_to_reg (target);
19005 /* tmp = X B C D */
19006 ix86_expand_vector_set (false, tmp, val, 0);
19007 /* target = A B X D */
19008 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19009 GEN_INT (0), GEN_INT (1),
19010 GEN_INT (0+4), GEN_INT (3+4)));
19011 return;
19012
19013 case 3:
19014 /* tmp = target = A B C D */
19015 tmp = copy_to_reg (target);
19016 /* tmp = X B C D */
19017 ix86_expand_vector_set (false, tmp, val, 0);
19018 /* target = A B X D */
19019 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19020 GEN_INT (0), GEN_INT (1),
19021 GEN_INT (2+4), GEN_INT (0+4)));
19022 return;
19023
19024 default:
19025 gcc_unreachable ();
19026 }
19027 break;
19028
19029 case V4SImode:
19030 /* Element 0 handled by vec_merge below. */
19031 if (elt == 0)
19032 {
19033 use_vec_merge = true;
19034 break;
19035 }
19036
19037 if (TARGET_SSE2)
19038 {
19039 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19040 store into element 0, then shuffle them back. */
19041
19042 rtx order[4];
19043
19044 order[0] = GEN_INT (elt);
19045 order[1] = const1_rtx;
19046 order[2] = const2_rtx;
19047 order[3] = GEN_INT (3);
19048 order[elt] = const0_rtx;
19049
19050 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19051 order[1], order[2], order[3]));
19052
19053 ix86_expand_vector_set (false, target, val, 0);
19054
19055 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19056 order[1], order[2], order[3]));
19057 }
19058 else
19059 {
19060 /* For SSE1, we have to reuse the V4SF code. */
19061 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19062 gen_lowpart (SFmode, val), elt);
19063 }
19064 return;
19065
19066 case V8HImode:
19067 use_vec_merge = TARGET_SSE2;
19068 break;
19069 case V4HImode:
19070 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19071 break;
19072
19073 case V16QImode:
19074 case V8QImode:
19075 default:
19076 break;
19077 }
19078
19079 if (use_vec_merge)
19080 {
19081 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19082 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19083 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19084 }
19085 else
19086 {
19087 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19088
19089 emit_move_insn (mem, target);
19090
19091 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19092 emit_move_insn (tmp, val);
19093
19094 emit_move_insn (target, mem);
19095 }
19096}
19097
19098void
19099ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19100{
19101 enum machine_mode mode = GET_MODE (vec);
19102 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19103 bool use_vec_extr = false;
19104 rtx tmp;
19105
19106 switch (mode)
19107 {
19108 case V2SImode:
19109 case V2SFmode:
19110 if (!mmx_ok)
19111 break;
19112 /* FALLTHRU */
19113
19114 case V2DFmode:
19115 case V2DImode:
19116 use_vec_extr = true;
19117 break;
19118
19119 case V4SFmode:
19120 switch (elt)
19121 {
19122 case 0:
19123 tmp = vec;
19124 break;
19125
19126 case 1:
19127 case 3:
19128 tmp = gen_reg_rtx (mode);
19129 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19130 GEN_INT (elt), GEN_INT (elt),
19131 GEN_INT (elt+4), GEN_INT (elt+4)));
19132 break;
19133
19134 case 2:
19135 tmp = gen_reg_rtx (mode);
19136 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19137 break;
19138
19139 default:
19140 gcc_unreachable ();
19141 }
19142 vec = tmp;
19143 use_vec_extr = true;
19144 elt = 0;
19145 break;
19146
19147 case V4SImode:
19148 if (TARGET_SSE2)
19149 {
19150 switch (elt)
19151 {
19152 case 0:
19153 tmp = vec;
19154 break;
19155
19156 case 1:
19157 case 3:
19158 tmp = gen_reg_rtx (mode);
19159 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19160 GEN_INT (elt), GEN_INT (elt),
19161 GEN_INT (elt), GEN_INT (elt)));
19162 break;
19163
19164 case 2:
19165 tmp = gen_reg_rtx (mode);
19166 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19167 break;
19168
19169 default:
19170 gcc_unreachable ();
19171 }
19172 vec = tmp;
19173 use_vec_extr = true;
19174 elt = 0;
19175 }
19176 else
19177 {
19178 /* For SSE1, we have to reuse the V4SF code. */
19179 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19180 gen_lowpart (V4SFmode, vec), elt);
19181 return;
19182 }
19183 break;
19184
19185 case V8HImode:
19186 use_vec_extr = TARGET_SSE2;
19187 break;
19188 case V4HImode:
19189 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19190 break;
19191
19192 case V16QImode:
19193 case V8QImode:
19194 /* ??? Could extract the appropriate HImode element and shift. */
19195 default:
19196 break;
19197 }
19198
19199 if (use_vec_extr)
19200 {
19201 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19202 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19203
19204 /* Let the rtl optimizers know about the zero extension performed. */
19205 if (inner_mode == HImode)
19206 {
19207 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19208 target = gen_lowpart (SImode, target);
19209 }
19210
19211 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19212 }
19213 else
19214 {
19215 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19216
19217 emit_move_insn (mem, vec);
19218
19219 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19220 emit_move_insn (target, tmp);
19221 }
19222}
19223
19224/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19225 pattern to reduce; DEST is the destination; IN is the input vector. */
19226
19227void
19228ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19229{
19230 rtx tmp1, tmp2, tmp3;
19231
19232 tmp1 = gen_reg_rtx (V4SFmode);
19233 tmp2 = gen_reg_rtx (V4SFmode);
19234 tmp3 = gen_reg_rtx (V4SFmode);
19235
19236 emit_insn (gen_sse_movhlps (tmp1, in, in));
19237 emit_insn (fn (tmp2, tmp1, in));
19238
19239 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19240 GEN_INT (1), GEN_INT (1),
19241 GEN_INT (1+4), GEN_INT (1+4)));
19242 emit_insn (fn (dest, tmp2, tmp3));
19243}
19244
19245/* Target hook for scalar_mode_supported_p. */
19246static bool
19247ix86_scalar_mode_supported_p (enum machine_mode mode)
19248{
19249 if (DECIMAL_FLOAT_MODE_P (mode))
19250 return true;
19251 else
19252 return default_scalar_mode_supported_p (mode);
19253}
19254
19255/* Implements target hook vector_mode_supported_p. */
19256static bool
19257ix86_vector_mode_supported_p (enum machine_mode mode)
19258{
19259 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19260 return true;
19261 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19262 return true;
19263 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19264 return true;
19265 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19266 return true;
19267 return false;
19268}
19269
19270/* Worker function for TARGET_MD_ASM_CLOBBERS.
19271
19272 We do this in the new i386 backend to maintain source compatibility
19273 with the old cc0-based compiler. */
19274
19275static tree
19276ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19277 tree inputs ATTRIBUTE_UNUSED,
19278 tree clobbers)
19279{
19280 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19281 clobbers);
19282 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19283 clobbers);
19284 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19285 clobbers);
19286 return clobbers;
19287}
19288
19289/* Return true if this goes in small data/bss. */
19290
19291static bool
19292ix86_in_large_data_p (tree exp)
19293{
19294 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19295 return false;
19296
19297 /* Functions are never large data. */
19298 if (TREE_CODE (exp) == FUNCTION_DECL)
19299 return false;
19300
19301 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19302 {
19303 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19304 if (strcmp (section, ".ldata") == 0
19305 || strcmp (section, ".lbss") == 0)
19306 return true;
19307 return false;
19308 }
19309 else
19310 {
19311 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19312
19313 /* If this is an incomplete type with size 0, then we can't put it
19314 in data because it might be too big when completed. */
19315 if (!size || size > ix86_section_threshold)
19316 return true;
19317 }
19318
19319 return false;
19320}
19321static void
19322ix86_encode_section_info (tree decl, rtx rtl, int first)
19323{
19324 default_encode_section_info (decl, rtl, first);
19325
19326 if (TREE_CODE (decl) == VAR_DECL
19327 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19328 && ix86_in_large_data_p (decl))
19329 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19330}
19331
19332/* Worker function for REVERSE_CONDITION. */
19333
19334enum rtx_code
19335ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19336{
19337 return (mode != CCFPmode && mode != CCFPUmode
19338 ? reverse_condition (code)
19339 : reverse_condition_maybe_unordered (code));
19340}
19341
19342/* Output code to perform an x87 FP register move, from OPERANDS[1]
19343 to OPERANDS[0]. */
19344
19345const char *
19346output_387_reg_move (rtx insn, rtx *operands)
19347{
19348 if (REG_P (operands[1])
19349 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19350 {
19351 if (REGNO (operands[0]) == FIRST_STACK_REG)
19352 return output_387_ffreep (operands, 0);
19353 return "fstp\t%y0";
19354 }
19355 if (STACK_TOP_P (operands[0]))
19356 return "fld%z1\t%y1";
19357 return "fst\t%y0";
19358}
19359
19360/* Output code to perform a conditional jump to LABEL, if C2 flag in
19361 FP status register is set. */
19362
19363void
19364ix86_emit_fp_unordered_jump (rtx label)
19365{
19366 rtx reg = gen_reg_rtx (HImode);
19367 rtx temp;
19368
19369 emit_insn (gen_x86_fnstsw_1 (reg));
19370
19371 if (TARGET_USE_SAHF)
19372 {
19373 emit_insn (gen_x86_sahf_1 (reg));
19374
19375 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19376 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19377 }
19378 else
19379 {
19380 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19381
19382 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19383 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19384 }
19385
19386 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19387 gen_rtx_LABEL_REF (VOIDmode, label),
19388 pc_rtx);
19389 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19390 emit_jump_insn (temp);
19391}
19392
19393/* Output code to perform a log1p XFmode calculation. */
19394
19395void ix86_emit_i387_log1p (rtx op0, rtx op1)
19396{
19397 rtx label1 = gen_label_rtx ();
19398 rtx label2 = gen_label_rtx ();
19399
19400 rtx tmp = gen_reg_rtx (XFmode);
19401 rtx tmp2 = gen_reg_rtx (XFmode);
19402
19403 emit_insn (gen_absxf2 (tmp, op1));
19404 emit_insn (gen_cmpxf (tmp,
19405 CONST_DOUBLE_FROM_REAL_VALUE (
19406 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19407 XFmode)));
19408 emit_jump_insn (gen_bge (label1));
19409
19410 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19411 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19412 emit_jump (label2);
19413
19414 emit_label (label1);
19415 emit_move_insn (tmp, CONST1_RTX (XFmode));
19416 emit_insn (gen_addxf3 (tmp, op1, tmp));
19417 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19418 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19419
19420 emit_label (label2);
19421}
19422
19423/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19424
19425static void
19426i386_solaris_elf_named_section (const char *name, unsigned int flags,
19427 tree decl)
19428{
19429 /* With Binutils 2.15, the "@unwind" marker must be specified on
19430 every occurrence of the ".eh_frame" section, not just the first
19431 one. */
19432 if (TARGET_64BIT
19433 && strcmp (name, ".eh_frame") == 0)
19434 {
19435 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19436 flags & SECTION_WRITE ? "aw" : "a");
19437 return;
19438 }
19439 default_elf_asm_named_section (name, flags, decl);
19440}
19441
19442/* Return the mangling of TYPE if it is an extended fundamental type. */
19443
19444static const char *
19445ix86_mangle_fundamental_type (tree type)
19446{
19447 switch (TYPE_MODE (type))
19448 {
19449 case TFmode:
19450 /* __float128 is "g". */
19451 return "g";
19452 case XFmode:
19453 /* "long double" or __float80 is "e". */
19454 return "e";
19455 default:
19456 return NULL;
19457 }
19458}
19459
19460/* For 32-bit code we can save PIC register setup by using
19461 __stack_chk_fail_local hidden function instead of calling
19462 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19463 register, so it is better to call __stack_chk_fail directly. */
19464
19465static tree
19466ix86_stack_protect_fail (void)
19467{
19468 return TARGET_64BIT
19469 ? default_external_stack_protect_fail ()
19470 : default_hidden_stack_protect_fail ();
19471}
19472
19473/* Select a format to encode pointers in exception handling data. CODE
19474 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19475 true if the symbol may be affected by dynamic relocations.
19476
19477 ??? All x86 object file formats are capable of representing this.
19478 After all, the relocation needed is the same as for the call insn.
19479 Whether or not a particular assembler allows us to enter such, I
19480 guess we'll have to see. */
19481int
19482asm_preferred_eh_data_format (int code, int global)
19483{
19484 if (flag_pic)
19485 {
19486 int type = DW_EH_PE_sdata8;
19487 if (!TARGET_64BIT
19488 || ix86_cmodel == CM_SMALL_PIC
19489 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19490 type = DW_EH_PE_sdata4;
19491 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19492 }
19493 if (ix86_cmodel == CM_SMALL
19494 || (ix86_cmodel == CM_MEDIUM && code))
19495 return DW_EH_PE_udata4;
19496 return DW_EH_PE_absptr;
19497}
19498
19499#include "gt-i386.h"
17165 case IX86_BUILTIN_VEC_INIT_V2SI:
17166 case IX86_BUILTIN_VEC_INIT_V4HI:
17167 case IX86_BUILTIN_VEC_INIT_V8QI:
17168 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17169
17170 case IX86_BUILTIN_VEC_EXT_V2DF:
17171 case IX86_BUILTIN_VEC_EXT_V2DI:
17172 case IX86_BUILTIN_VEC_EXT_V4SF:
17173 case IX86_BUILTIN_VEC_EXT_V4SI:
17174 case IX86_BUILTIN_VEC_EXT_V8HI:
17175 case IX86_BUILTIN_VEC_EXT_V16QI:
17176 case IX86_BUILTIN_VEC_EXT_V2SI:
17177 case IX86_BUILTIN_VEC_EXT_V4HI:
17178 return ix86_expand_vec_ext_builtin (arglist, target);
17179
17180 case IX86_BUILTIN_VEC_SET_V8HI:
17181 case IX86_BUILTIN_VEC_SET_V4HI:
17182 return ix86_expand_vec_set_builtin (arglist);
17183
17184 default:
17185 break;
17186 }
17187
17188 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17189 if (d->code == fcode)
17190 {
17191 /* Compares are treated specially. */
17192 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17193 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17194 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17195 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17196 return ix86_expand_sse_compare (d, arglist, target);
17197
17198 return ix86_expand_binop_builtin (d->icode, arglist, target);
17199 }
17200
17201 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17202 if (d->code == fcode)
17203 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17204
17205 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17206 if (d->code == fcode)
17207 return ix86_expand_sse_comi (d, arglist, target);
17208
17209 gcc_unreachable ();
17210}
17211
17212/* Store OPERAND to the memory after reload is completed. This means
17213 that we can't easily use assign_stack_local. */
17214rtx
17215ix86_force_to_memory (enum machine_mode mode, rtx operand)
17216{
17217 rtx result;
17218
17219 gcc_assert (reload_completed);
17220 if (TARGET_RED_ZONE)
17221 {
17222 result = gen_rtx_MEM (mode,
17223 gen_rtx_PLUS (Pmode,
17224 stack_pointer_rtx,
17225 GEN_INT (-RED_ZONE_SIZE)));
17226 emit_move_insn (result, operand);
17227 }
17228 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17229 {
17230 switch (mode)
17231 {
17232 case HImode:
17233 case SImode:
17234 operand = gen_lowpart (DImode, operand);
17235 /* FALLTHRU */
17236 case DImode:
17237 emit_insn (
17238 gen_rtx_SET (VOIDmode,
17239 gen_rtx_MEM (DImode,
17240 gen_rtx_PRE_DEC (DImode,
17241 stack_pointer_rtx)),
17242 operand));
17243 break;
17244 default:
17245 gcc_unreachable ();
17246 }
17247 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17248 }
17249 else
17250 {
17251 switch (mode)
17252 {
17253 case DImode:
17254 {
17255 rtx operands[2];
17256 split_di (&operand, 1, operands, operands + 1);
17257 emit_insn (
17258 gen_rtx_SET (VOIDmode,
17259 gen_rtx_MEM (SImode,
17260 gen_rtx_PRE_DEC (Pmode,
17261 stack_pointer_rtx)),
17262 operands[1]));
17263 emit_insn (
17264 gen_rtx_SET (VOIDmode,
17265 gen_rtx_MEM (SImode,
17266 gen_rtx_PRE_DEC (Pmode,
17267 stack_pointer_rtx)),
17268 operands[0]));
17269 }
17270 break;
17271 case HImode:
17272 /* Store HImodes as SImodes. */
17273 operand = gen_lowpart (SImode, operand);
17274 /* FALLTHRU */
17275 case SImode:
17276 emit_insn (
17277 gen_rtx_SET (VOIDmode,
17278 gen_rtx_MEM (GET_MODE (operand),
17279 gen_rtx_PRE_DEC (SImode,
17280 stack_pointer_rtx)),
17281 operand));
17282 break;
17283 default:
17284 gcc_unreachable ();
17285 }
17286 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17287 }
17288 return result;
17289}
17290
17291/* Free operand from the memory. */
17292void
17293ix86_free_from_memory (enum machine_mode mode)
17294{
17295 if (!TARGET_RED_ZONE)
17296 {
17297 int size;
17298
17299 if (mode == DImode || TARGET_64BIT)
17300 size = 8;
17301 else
17302 size = 4;
17303 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17304 to pop or add instruction if registers are available. */
17305 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17306 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17307 GEN_INT (size))));
17308 }
17309}
17310
17311/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17312 QImode must go into class Q_REGS.
17313 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17314 movdf to do mem-to-mem moves through integer regs. */
17315enum reg_class
17316ix86_preferred_reload_class (rtx x, enum reg_class class)
17317{
17318 enum machine_mode mode = GET_MODE (x);
17319
17320 /* We're only allowed to return a subclass of CLASS. Many of the
17321 following checks fail for NO_REGS, so eliminate that early. */
17322 if (class == NO_REGS)
17323 return NO_REGS;
17324
17325 /* All classes can load zeros. */
17326 if (x == CONST0_RTX (mode))
17327 return class;
17328
17329 /* Force constants into memory if we are loading a (nonzero) constant into
17330 an MMX or SSE register. This is because there are no MMX/SSE instructions
17331 to load from a constant. */
17332 if (CONSTANT_P (x)
17333 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17334 return NO_REGS;
17335
17336 /* Prefer SSE regs only, if we can use them for math. */
17337 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17338 return SSE_CLASS_P (class) ? class : NO_REGS;
17339
17340 /* Floating-point constants need more complex checks. */
17341 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17342 {
17343 /* General regs can load everything. */
17344 if (reg_class_subset_p (class, GENERAL_REGS))
17345 return class;
17346
17347 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17348 zero above. We only want to wind up preferring 80387 registers if
17349 we plan on doing computation with them. */
17350 if (TARGET_80387
17351 && standard_80387_constant_p (x))
17352 {
17353 /* Limit class to non-sse. */
17354 if (class == FLOAT_SSE_REGS)
17355 return FLOAT_REGS;
17356 if (class == FP_TOP_SSE_REGS)
17357 return FP_TOP_REG;
17358 if (class == FP_SECOND_SSE_REGS)
17359 return FP_SECOND_REG;
17360 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17361 return class;
17362 }
17363
17364 return NO_REGS;
17365 }
17366
17367 /* Generally when we see PLUS here, it's the function invariant
17368 (plus soft-fp const_int). Which can only be computed into general
17369 regs. */
17370 if (GET_CODE (x) == PLUS)
17371 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17372
17373 /* QImode constants are easy to load, but non-constant QImode data
17374 must go into Q_REGS. */
17375 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17376 {
17377 if (reg_class_subset_p (class, Q_REGS))
17378 return class;
17379 if (reg_class_subset_p (Q_REGS, class))
17380 return Q_REGS;
17381 return NO_REGS;
17382 }
17383
17384 return class;
17385}
17386
17387/* Discourage putting floating-point values in SSE registers unless
17388 SSE math is being used, and likewise for the 387 registers. */
17389enum reg_class
17390ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17391{
17392 enum machine_mode mode = GET_MODE (x);
17393
17394 /* Restrict the output reload class to the register bank that we are doing
17395 math on. If we would like not to return a subset of CLASS, reject this
17396 alternative: if reload cannot do this, it will still use its choice. */
17397 mode = GET_MODE (x);
17398 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17399 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17400
17401 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17402 {
17403 if (class == FP_TOP_SSE_REGS)
17404 return FP_TOP_REG;
17405 else if (class == FP_SECOND_SSE_REGS)
17406 return FP_SECOND_REG;
17407 else
17408 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17409 }
17410
17411 return class;
17412}
17413
17414/* If we are copying between general and FP registers, we need a memory
17415 location. The same is true for SSE and MMX registers.
17416
17417 The macro can't work reliably when one of the CLASSES is class containing
17418 registers from multiple units (SSE, MMX, integer). We avoid this by never
17419 combining those units in single alternative in the machine description.
17420 Ensure that this constraint holds to avoid unexpected surprises.
17421
17422 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17423 enforce these sanity checks. */
17424
17425int
17426ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17427 enum machine_mode mode, int strict)
17428{
17429 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17430 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17431 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17432 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17433 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17434 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17435 {
17436 gcc_assert (!strict);
17437 return true;
17438 }
17439
17440 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17441 return true;
17442
17443 /* ??? This is a lie. We do have moves between mmx/general, and for
17444 mmx/sse2. But by saying we need secondary memory we discourage the
17445 register allocator from using the mmx registers unless needed. */
17446 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17447 return true;
17448
17449 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17450 {
17451 /* SSE1 doesn't have any direct moves from other classes. */
17452 if (!TARGET_SSE2)
17453 return true;
17454
17455 /* If the target says that inter-unit moves are more expensive
17456 than moving through memory, then don't generate them. */
17457 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17458 return true;
17459
17460 /* Between SSE and general, we have moves no larger than word size. */
17461 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17462 return true;
17463
17464 /* ??? For the cost of one register reformat penalty, we could use
17465 the same instructions to move SFmode and DFmode data, but the
17466 relevant move patterns don't support those alternatives. */
17467 if (mode == SFmode || mode == DFmode)
17468 return true;
17469 }
17470
17471 return false;
17472}
17473
17474/* Return true if the registers in CLASS cannot represent the change from
17475 modes FROM to TO. */
17476
17477bool
17478ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17479 enum reg_class class)
17480{
17481 if (from == to)
17482 return false;
17483
17484 /* x87 registers can't do subreg at all, as all values are reformatted
17485 to extended precision. */
17486 if (MAYBE_FLOAT_CLASS_P (class))
17487 return true;
17488
17489 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17490 {
17491 /* Vector registers do not support QI or HImode loads. If we don't
17492 disallow a change to these modes, reload will assume it's ok to
17493 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17494 the vec_dupv4hi pattern. */
17495 if (GET_MODE_SIZE (from) < 4)
17496 return true;
17497
17498 /* Vector registers do not support subreg with nonzero offsets, which
17499 are otherwise valid for integer registers. Since we can't see
17500 whether we have a nonzero offset from here, prohibit all
17501 nonparadoxical subregs changing size. */
17502 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17503 return true;
17504 }
17505
17506 return false;
17507}
17508
17509/* Return the cost of moving data from a register in class CLASS1 to
17510 one in class CLASS2.
17511
17512 It is not required that the cost always equal 2 when FROM is the same as TO;
17513 on some machines it is expensive to move between registers if they are not
17514 general registers. */
17515
17516int
17517ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17518 enum reg_class class2)
17519{
17520 /* In case we require secondary memory, compute cost of the store followed
17521 by load. In order to avoid bad register allocation choices, we need
17522 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17523
17524 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17525 {
17526 int cost = 1;
17527
17528 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17529 MEMORY_MOVE_COST (mode, class1, 1));
17530 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17531 MEMORY_MOVE_COST (mode, class2, 1));
17532
17533 /* In case of copying from general_purpose_register we may emit multiple
17534 stores followed by single load causing memory size mismatch stall.
17535 Count this as arbitrarily high cost of 20. */
17536 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17537 cost += 20;
17538
17539 /* In the case of FP/MMX moves, the registers actually overlap, and we
17540 have to switch modes in order to treat them differently. */
17541 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17542 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17543 cost += 20;
17544
17545 return cost;
17546 }
17547
17548 /* Moves between SSE/MMX and integer unit are expensive. */
17549 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17550 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17551 return ix86_cost->mmxsse_to_integer;
17552 if (MAYBE_FLOAT_CLASS_P (class1))
17553 return ix86_cost->fp_move;
17554 if (MAYBE_SSE_CLASS_P (class1))
17555 return ix86_cost->sse_move;
17556 if (MAYBE_MMX_CLASS_P (class1))
17557 return ix86_cost->mmx_move;
17558 return 2;
17559}
17560
17561/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17562
17563bool
17564ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17565{
17566 /* Flags and only flags can only hold CCmode values. */
17567 if (CC_REGNO_P (regno))
17568 return GET_MODE_CLASS (mode) == MODE_CC;
17569 if (GET_MODE_CLASS (mode) == MODE_CC
17570 || GET_MODE_CLASS (mode) == MODE_RANDOM
17571 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17572 return 0;
17573 if (FP_REGNO_P (regno))
17574 return VALID_FP_MODE_P (mode);
17575 if (SSE_REGNO_P (regno))
17576 {
17577 /* We implement the move patterns for all vector modes into and
17578 out of SSE registers, even when no operation instructions
17579 are available. */
17580 return (VALID_SSE_REG_MODE (mode)
17581 || VALID_SSE2_REG_MODE (mode)
17582 || VALID_MMX_REG_MODE (mode)
17583 || VALID_MMX_REG_MODE_3DNOW (mode));
17584 }
17585 if (MMX_REGNO_P (regno))
17586 {
17587 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17588 so if the register is available at all, then we can move data of
17589 the given mode into or out of it. */
17590 return (VALID_MMX_REG_MODE (mode)
17591 || VALID_MMX_REG_MODE_3DNOW (mode));
17592 }
17593
17594 if (mode == QImode)
17595 {
17596 /* Take care for QImode values - they can be in non-QI regs,
17597 but then they do cause partial register stalls. */
17598 if (regno < 4 || TARGET_64BIT)
17599 return 1;
17600 if (!TARGET_PARTIAL_REG_STALL)
17601 return 1;
17602 return reload_in_progress || reload_completed;
17603 }
17604 /* We handle both integer and floats in the general purpose registers. */
17605 else if (VALID_INT_MODE_P (mode))
17606 return 1;
17607 else if (VALID_FP_MODE_P (mode))
17608 return 1;
17609 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17610 on to use that value in smaller contexts, this can easily force a
17611 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17612 supporting DImode, allow it. */
17613 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17614 return 1;
17615
17616 return 0;
17617}
17618
17619/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17620 tieable integer mode. */
17621
17622static bool
17623ix86_tieable_integer_mode_p (enum machine_mode mode)
17624{
17625 switch (mode)
17626 {
17627 case HImode:
17628 case SImode:
17629 return true;
17630
17631 case QImode:
17632 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17633
17634 case DImode:
17635 return TARGET_64BIT;
17636
17637 default:
17638 return false;
17639 }
17640}
17641
17642/* Return true if MODE1 is accessible in a register that can hold MODE2
17643 without copying. That is, all register classes that can hold MODE2
17644 can also hold MODE1. */
17645
17646bool
17647ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17648{
17649 if (mode1 == mode2)
17650 return true;
17651
17652 if (ix86_tieable_integer_mode_p (mode1)
17653 && ix86_tieable_integer_mode_p (mode2))
17654 return true;
17655
17656 /* MODE2 being XFmode implies fp stack or general regs, which means we
17657 can tie any smaller floating point modes to it. Note that we do not
17658 tie this with TFmode. */
17659 if (mode2 == XFmode)
17660 return mode1 == SFmode || mode1 == DFmode;
17661
17662 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17663 that we can tie it with SFmode. */
17664 if (mode2 == DFmode)
17665 return mode1 == SFmode;
17666
17667 /* If MODE2 is only appropriate for an SSE register, then tie with
17668 any other mode acceptable to SSE registers. */
17669 if (GET_MODE_SIZE (mode2) >= 8
17670 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17671 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17672
17673 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17674 with any other mode acceptable to MMX registers. */
17675 if (GET_MODE_SIZE (mode2) == 8
17676 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17677 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17678
17679 return false;
17680}
17681
17682/* Return the cost of moving data of mode M between a
17683 register and memory. A value of 2 is the default; this cost is
17684 relative to those in `REGISTER_MOVE_COST'.
17685
17686 If moving between registers and memory is more expensive than
17687 between two registers, you should define this macro to express the
17688 relative cost.
17689
17690 Model also increased moving costs of QImode registers in non
17691 Q_REGS classes.
17692 */
17693int
17694ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17695{
17696 if (FLOAT_CLASS_P (class))
17697 {
17698 int index;
17699 switch (mode)
17700 {
17701 case SFmode:
17702 index = 0;
17703 break;
17704 case DFmode:
17705 index = 1;
17706 break;
17707 case XFmode:
17708 index = 2;
17709 break;
17710 default:
17711 return 100;
17712 }
17713 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17714 }
17715 if (SSE_CLASS_P (class))
17716 {
17717 int index;
17718 switch (GET_MODE_SIZE (mode))
17719 {
17720 case 4:
17721 index = 0;
17722 break;
17723 case 8:
17724 index = 1;
17725 break;
17726 case 16:
17727 index = 2;
17728 break;
17729 default:
17730 return 100;
17731 }
17732 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17733 }
17734 if (MMX_CLASS_P (class))
17735 {
17736 int index;
17737 switch (GET_MODE_SIZE (mode))
17738 {
17739 case 4:
17740 index = 0;
17741 break;
17742 case 8:
17743 index = 1;
17744 break;
17745 default:
17746 return 100;
17747 }
17748 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17749 }
17750 switch (GET_MODE_SIZE (mode))
17751 {
17752 case 1:
17753 if (in)
17754 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17755 : ix86_cost->movzbl_load);
17756 else
17757 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17758 : ix86_cost->int_store[0] + 4);
17759 break;
17760 case 2:
17761 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17762 default:
17763 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17764 if (mode == TFmode)
17765 mode = XFmode;
17766 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17767 * (((int) GET_MODE_SIZE (mode)
17768 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17769 }
17770}
17771
17772/* Compute a (partial) cost for rtx X. Return true if the complete
17773 cost has been computed, and false if subexpressions should be
17774 scanned. In either case, *TOTAL contains the cost result. */
17775
17776static bool
17777ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17778{
17779 enum machine_mode mode = GET_MODE (x);
17780
17781 switch (code)
17782 {
17783 case CONST_INT:
17784 case CONST:
17785 case LABEL_REF:
17786 case SYMBOL_REF:
17787 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17788 *total = 3;
17789 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17790 *total = 2;
17791 else if (flag_pic && SYMBOLIC_CONST (x)
17792 && (!TARGET_64BIT
17793 || (!GET_CODE (x) != LABEL_REF
17794 && (GET_CODE (x) != SYMBOL_REF
17795 || !SYMBOL_REF_LOCAL_P (x)))))
17796 *total = 1;
17797 else
17798 *total = 0;
17799 return true;
17800
17801 case CONST_DOUBLE:
17802 if (mode == VOIDmode)
17803 *total = 0;
17804 else
17805 switch (standard_80387_constant_p (x))
17806 {
17807 case 1: /* 0.0 */
17808 *total = 1;
17809 break;
17810 default: /* Other constants */
17811 *total = 2;
17812 break;
17813 case 0:
17814 case -1:
17815 /* Start with (MEM (SYMBOL_REF)), since that's where
17816 it'll probably end up. Add a penalty for size. */
17817 *total = (COSTS_N_INSNS (1)
17818 + (flag_pic != 0 && !TARGET_64BIT)
17819 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17820 break;
17821 }
17822 return true;
17823
17824 case ZERO_EXTEND:
17825 /* The zero extensions is often completely free on x86_64, so make
17826 it as cheap as possible. */
17827 if (TARGET_64BIT && mode == DImode
17828 && GET_MODE (XEXP (x, 0)) == SImode)
17829 *total = 1;
17830 else if (TARGET_ZERO_EXTEND_WITH_AND)
17831 *total = ix86_cost->add;
17832 else
17833 *total = ix86_cost->movzx;
17834 return false;
17835
17836 case SIGN_EXTEND:
17837 *total = ix86_cost->movsx;
17838 return false;
17839
17840 case ASHIFT:
17841 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17842 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17843 {
17844 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17845 if (value == 1)
17846 {
17847 *total = ix86_cost->add;
17848 return false;
17849 }
17850 if ((value == 2 || value == 3)
17851 && ix86_cost->lea <= ix86_cost->shift_const)
17852 {
17853 *total = ix86_cost->lea;
17854 return false;
17855 }
17856 }
17857 /* FALLTHRU */
17858
17859 case ROTATE:
17860 case ASHIFTRT:
17861 case LSHIFTRT:
17862 case ROTATERT:
17863 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17864 {
17865 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17866 {
17867 if (INTVAL (XEXP (x, 1)) > 32)
17868 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17869 else
17870 *total = ix86_cost->shift_const * 2;
17871 }
17872 else
17873 {
17874 if (GET_CODE (XEXP (x, 1)) == AND)
17875 *total = ix86_cost->shift_var * 2;
17876 else
17877 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17878 }
17879 }
17880 else
17881 {
17882 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17883 *total = ix86_cost->shift_const;
17884 else
17885 *total = ix86_cost->shift_var;
17886 }
17887 return false;
17888
17889 case MULT:
17890 if (FLOAT_MODE_P (mode))
17891 {
17892 *total = ix86_cost->fmul;
17893 return false;
17894 }
17895 else
17896 {
17897 rtx op0 = XEXP (x, 0);
17898 rtx op1 = XEXP (x, 1);
17899 int nbits;
17900 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17901 {
17902 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17903 for (nbits = 0; value != 0; value &= value - 1)
17904 nbits++;
17905 }
17906 else
17907 /* This is arbitrary. */
17908 nbits = 7;
17909
17910 /* Compute costs correctly for widening multiplication. */
17911 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17912 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17913 == GET_MODE_SIZE (mode))
17914 {
17915 int is_mulwiden = 0;
17916 enum machine_mode inner_mode = GET_MODE (op0);
17917
17918 if (GET_CODE (op0) == GET_CODE (op1))
17919 is_mulwiden = 1, op1 = XEXP (op1, 0);
17920 else if (GET_CODE (op1) == CONST_INT)
17921 {
17922 if (GET_CODE (op0) == SIGN_EXTEND)
17923 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17924 == INTVAL (op1);
17925 else
17926 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17927 }
17928
17929 if (is_mulwiden)
17930 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17931 }
17932
17933 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17934 + nbits * ix86_cost->mult_bit
17935 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17936
17937 return true;
17938 }
17939
17940 case DIV:
17941 case UDIV:
17942 case MOD:
17943 case UMOD:
17944 if (FLOAT_MODE_P (mode))
17945 *total = ix86_cost->fdiv;
17946 else
17947 *total = ix86_cost->divide[MODE_INDEX (mode)];
17948 return false;
17949
17950 case PLUS:
17951 if (FLOAT_MODE_P (mode))
17952 *total = ix86_cost->fadd;
17953 else if (GET_MODE_CLASS (mode) == MODE_INT
17954 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17955 {
17956 if (GET_CODE (XEXP (x, 0)) == PLUS
17957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17958 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17959 && CONSTANT_P (XEXP (x, 1)))
17960 {
17961 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17962 if (val == 2 || val == 4 || val == 8)
17963 {
17964 *total = ix86_cost->lea;
17965 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17966 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17967 outer_code);
17968 *total += rtx_cost (XEXP (x, 1), outer_code);
17969 return true;
17970 }
17971 }
17972 else if (GET_CODE (XEXP (x, 0)) == MULT
17973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17974 {
17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17976 if (val == 2 || val == 4 || val == 8)
17977 {
17978 *total = ix86_cost->lea;
17979 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17980 *total += rtx_cost (XEXP (x, 1), outer_code);
17981 return true;
17982 }
17983 }
17984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17985 {
17986 *total = ix86_cost->lea;
17987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17988 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17989 *total += rtx_cost (XEXP (x, 1), outer_code);
17990 return true;
17991 }
17992 }
17993 /* FALLTHRU */
17994
17995 case MINUS:
17996 if (FLOAT_MODE_P (mode))
17997 {
17998 *total = ix86_cost->fadd;
17999 return false;
18000 }
18001 /* FALLTHRU */
18002
18003 case AND:
18004 case IOR:
18005 case XOR:
18006 if (!TARGET_64BIT && mode == DImode)
18007 {
18008 *total = (ix86_cost->add * 2
18009 + (rtx_cost (XEXP (x, 0), outer_code)
18010 << (GET_MODE (XEXP (x, 0)) != DImode))
18011 + (rtx_cost (XEXP (x, 1), outer_code)
18012 << (GET_MODE (XEXP (x, 1)) != DImode)));
18013 return true;
18014 }
18015 /* FALLTHRU */
18016
18017 case NEG:
18018 if (FLOAT_MODE_P (mode))
18019 {
18020 *total = ix86_cost->fchs;
18021 return false;
18022 }
18023 /* FALLTHRU */
18024
18025 case NOT:
18026 if (!TARGET_64BIT && mode == DImode)
18027 *total = ix86_cost->add * 2;
18028 else
18029 *total = ix86_cost->add;
18030 return false;
18031
18032 case COMPARE:
18033 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18034 && XEXP (XEXP (x, 0), 1) == const1_rtx
18035 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18036 && XEXP (x, 1) == const0_rtx)
18037 {
18038 /* This kind of construct is implemented using test[bwl].
18039 Treat it as if we had an AND. */
18040 *total = (ix86_cost->add
18041 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18042 + rtx_cost (const1_rtx, outer_code));
18043 return true;
18044 }
18045 return false;
18046
18047 case FLOAT_EXTEND:
18048 if (!TARGET_SSE_MATH
18049 || mode == XFmode
18050 || (mode == DFmode && !TARGET_SSE2))
18051 /* For standard 80387 constants, raise the cost to prevent
18052 compress_float_constant() to generate load from memory. */
18053 switch (standard_80387_constant_p (XEXP (x, 0)))
18054 {
18055 case -1:
18056 case 0:
18057 *total = 0;
18058 break;
18059 case 1: /* 0.0 */
18060 *total = 1;
18061 break;
18062 default:
18063 *total = (x86_ext_80387_constants & TUNEMASK
18064 || optimize_size
18065 ? 1 : 0);
18066 }
18067 return false;
18068
18069 case ABS:
18070 if (FLOAT_MODE_P (mode))
18071 *total = ix86_cost->fabs;
18072 return false;
18073
18074 case SQRT:
18075 if (FLOAT_MODE_P (mode))
18076 *total = ix86_cost->fsqrt;
18077 return false;
18078
18079 case UNSPEC:
18080 if (XINT (x, 1) == UNSPEC_TP)
18081 *total = 0;
18082 return false;
18083
18084 default:
18085 return false;
18086 }
18087}
18088
18089#if TARGET_MACHO
18090
18091static int current_machopic_label_num;
18092
18093/* Given a symbol name and its associated stub, write out the
18094 definition of the stub. */
18095
18096void
18097machopic_output_stub (FILE *file, const char *symb, const char *stub)
18098{
18099 unsigned int length;
18100 char *binder_name, *symbol_name, lazy_ptr_name[32];
18101 int label = ++current_machopic_label_num;
18102
18103 /* For 64-bit we shouldn't get here. */
18104 gcc_assert (!TARGET_64BIT);
18105
18106 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18107 symb = (*targetm.strip_name_encoding) (symb);
18108
18109 length = strlen (stub);
18110 binder_name = alloca (length + 32);
18111 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18112
18113 length = strlen (symb);
18114 symbol_name = alloca (length + 32);
18115 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18116
18117 sprintf (lazy_ptr_name, "L%d$lz", label);
18118
18119 if (MACHOPIC_PURE)
18120 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18121 else
18122 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18123
18124 fprintf (file, "%s:\n", stub);
18125 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18126
18127 if (MACHOPIC_PURE)
18128 {
18129 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18130 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18131 fprintf (file, "\tjmp\t*%%edx\n");
18132 }
18133 else
18134 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18135
18136 fprintf (file, "%s:\n", binder_name);
18137
18138 if (MACHOPIC_PURE)
18139 {
18140 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18141 fprintf (file, "\tpushl\t%%eax\n");
18142 }
18143 else
18144 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18145
18146 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18147
18148 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18149 fprintf (file, "%s:\n", lazy_ptr_name);
18150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18151 fprintf (file, "\t.long %s\n", binder_name);
18152}
18153
18154void
18155darwin_x86_file_end (void)
18156{
18157 darwin_file_end ();
18158 ix86_file_end ();
18159}
18160#endif /* TARGET_MACHO */
18161
18162/* Order the registers for register allocator. */
18163
18164void
18165x86_order_regs_for_local_alloc (void)
18166{
18167 int pos = 0;
18168 int i;
18169
18170 /* First allocate the local general purpose registers. */
18171 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18172 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18173 reg_alloc_order [pos++] = i;
18174
18175 /* Global general purpose registers. */
18176 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18177 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18178 reg_alloc_order [pos++] = i;
18179
18180 /* x87 registers come first in case we are doing FP math
18181 using them. */
18182 if (!TARGET_SSE_MATH)
18183 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18184 reg_alloc_order [pos++] = i;
18185
18186 /* SSE registers. */
18187 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18188 reg_alloc_order [pos++] = i;
18189 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18190 reg_alloc_order [pos++] = i;
18191
18192 /* x87 registers. */
18193 if (TARGET_SSE_MATH)
18194 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18195 reg_alloc_order [pos++] = i;
18196
18197 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18198 reg_alloc_order [pos++] = i;
18199
18200 /* Initialize the rest of array as we do not allocate some registers
18201 at all. */
18202 while (pos < FIRST_PSEUDO_REGISTER)
18203 reg_alloc_order [pos++] = 0;
18204}
18205
18206/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18207 struct attribute_spec.handler. */
18208static tree
18209ix86_handle_struct_attribute (tree *node, tree name,
18210 tree args ATTRIBUTE_UNUSED,
18211 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18212{
18213 tree *type = NULL;
18214 if (DECL_P (*node))
18215 {
18216 if (TREE_CODE (*node) == TYPE_DECL)
18217 type = &TREE_TYPE (*node);
18218 }
18219 else
18220 type = node;
18221
18222 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18223 || TREE_CODE (*type) == UNION_TYPE)))
18224 {
18225 warning (OPT_Wattributes, "%qs attribute ignored",
18226 IDENTIFIER_POINTER (name));
18227 *no_add_attrs = true;
18228 }
18229
18230 else if ((is_attribute_p ("ms_struct", name)
18231 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18232 || ((is_attribute_p ("gcc_struct", name)
18233 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18234 {
18235 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18236 IDENTIFIER_POINTER (name));
18237 *no_add_attrs = true;
18238 }
18239
18240 return NULL_TREE;
18241}
18242
18243static bool
18244ix86_ms_bitfield_layout_p (tree record_type)
18245{
18246 return (TARGET_MS_BITFIELD_LAYOUT &&
18247 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18248 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18249}
18250
18251/* Returns an expression indicating where the this parameter is
18252 located on entry to the FUNCTION. */
18253
18254static rtx
18255x86_this_parameter (tree function)
18256{
18257 tree type = TREE_TYPE (function);
18258
18259 if (TARGET_64BIT)
18260 {
18261 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18262 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18263 }
18264
18265 if (ix86_function_regparm (type, function) > 0)
18266 {
18267 tree parm;
18268
18269 parm = TYPE_ARG_TYPES (type);
18270 /* Figure out whether or not the function has a variable number of
18271 arguments. */
18272 for (; parm; parm = TREE_CHAIN (parm))
18273 if (TREE_VALUE (parm) == void_type_node)
18274 break;
18275 /* If not, the this parameter is in the first argument. */
18276 if (parm)
18277 {
18278 int regno = 0;
18279 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18280 regno = 2;
18281 return gen_rtx_REG (SImode, regno);
18282 }
18283 }
18284
18285 if (aggregate_value_p (TREE_TYPE (type), type))
18286 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18287 else
18288 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18289}
18290
18291/* Determine whether x86_output_mi_thunk can succeed. */
18292
18293static bool
18294x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18295 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18296 HOST_WIDE_INT vcall_offset, tree function)
18297{
18298 /* 64-bit can handle anything. */
18299 if (TARGET_64BIT)
18300 return true;
18301
18302 /* For 32-bit, everything's fine if we have one free register. */
18303 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18304 return true;
18305
18306 /* Need a free register for vcall_offset. */
18307 if (vcall_offset)
18308 return false;
18309
18310 /* Need a free register for GOT references. */
18311 if (flag_pic && !(*targetm.binds_local_p) (function))
18312 return false;
18313
18314 /* Otherwise ok. */
18315 return true;
18316}
18317
18318/* Output the assembler code for a thunk function. THUNK_DECL is the
18319 declaration for the thunk function itself, FUNCTION is the decl for
18320 the target function. DELTA is an immediate constant offset to be
18321 added to THIS. If VCALL_OFFSET is nonzero, the word at
18322 *(*this + vcall_offset) should be added to THIS. */
18323
18324static void
18325x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18326 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18327 HOST_WIDE_INT vcall_offset, tree function)
18328{
18329 rtx xops[3];
18330 rtx this = x86_this_parameter (function);
18331 rtx this_reg, tmp;
18332
18333 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18334 pull it in now and let DELTA benefit. */
18335 if (REG_P (this))
18336 this_reg = this;
18337 else if (vcall_offset)
18338 {
18339 /* Put the this parameter into %eax. */
18340 xops[0] = this;
18341 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18342 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18343 }
18344 else
18345 this_reg = NULL_RTX;
18346
18347 /* Adjust the this parameter by a fixed constant. */
18348 if (delta)
18349 {
18350 xops[0] = GEN_INT (delta);
18351 xops[1] = this_reg ? this_reg : this;
18352 if (TARGET_64BIT)
18353 {
18354 if (!x86_64_general_operand (xops[0], DImode))
18355 {
18356 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18357 xops[1] = tmp;
18358 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18359 xops[0] = tmp;
18360 xops[1] = this;
18361 }
18362 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18363 }
18364 else
18365 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18366 }
18367
18368 /* Adjust the this parameter by a value stored in the vtable. */
18369 if (vcall_offset)
18370 {
18371 if (TARGET_64BIT)
18372 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18373 else
18374 {
18375 int tmp_regno = 2 /* ECX */;
18376 if (lookup_attribute ("fastcall",
18377 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18378 tmp_regno = 0 /* EAX */;
18379 tmp = gen_rtx_REG (SImode, tmp_regno);
18380 }
18381
18382 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18383 xops[1] = tmp;
18384 if (TARGET_64BIT)
18385 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18386 else
18387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18388
18389 /* Adjust the this parameter. */
18390 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18391 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18392 {
18393 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18394 xops[0] = GEN_INT (vcall_offset);
18395 xops[1] = tmp2;
18396 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18397 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18398 }
18399 xops[1] = this_reg;
18400 if (TARGET_64BIT)
18401 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18402 else
18403 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18404 }
18405
18406 /* If necessary, drop THIS back to its stack slot. */
18407 if (this_reg && this_reg != this)
18408 {
18409 xops[0] = this_reg;
18410 xops[1] = this;
18411 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18412 }
18413
18414 xops[0] = XEXP (DECL_RTL (function), 0);
18415 if (TARGET_64BIT)
18416 {
18417 if (!flag_pic || (*targetm.binds_local_p) (function))
18418 output_asm_insn ("jmp\t%P0", xops);
18419 else
18420 {
18421 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18422 tmp = gen_rtx_CONST (Pmode, tmp);
18423 tmp = gen_rtx_MEM (QImode, tmp);
18424 xops[0] = tmp;
18425 output_asm_insn ("jmp\t%A0", xops);
18426 }
18427 }
18428 else
18429 {
18430 if (!flag_pic || (*targetm.binds_local_p) (function))
18431 output_asm_insn ("jmp\t%P0", xops);
18432 else
18433#if TARGET_MACHO
18434 if (TARGET_MACHO)
18435 {
18436 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18437 tmp = (gen_rtx_SYMBOL_REF
18438 (Pmode,
18439 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18440 tmp = gen_rtx_MEM (QImode, tmp);
18441 xops[0] = tmp;
18442 output_asm_insn ("jmp\t%0", xops);
18443 }
18444 else
18445#endif /* TARGET_MACHO */
18446 {
18447 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18448 output_set_got (tmp, NULL_RTX);
18449
18450 xops[1] = tmp;
18451 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18452 output_asm_insn ("jmp\t{*}%1", xops);
18453 }
18454 }
18455}
18456
18457static void
18458x86_file_start (void)
18459{
18460 default_file_start ();
18461#if TARGET_MACHO
18462 darwin_file_start ();
18463#endif
18464 if (X86_FILE_START_VERSION_DIRECTIVE)
18465 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18466 if (X86_FILE_START_FLTUSED)
18467 fputs ("\t.global\t__fltused\n", asm_out_file);
18468 if (ix86_asm_dialect == ASM_INTEL)
18469 fputs ("\t.intel_syntax\n", asm_out_file);
18470}
18471
18472int
18473x86_field_alignment (tree field, int computed)
18474{
18475 enum machine_mode mode;
18476 tree type = TREE_TYPE (field);
18477
18478 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18479 return computed;
18480 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18481 ? get_inner_array_type (type) : type);
18482 if (mode == DFmode || mode == DCmode
18483 || GET_MODE_CLASS (mode) == MODE_INT
18484 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18485 return MIN (32, computed);
18486 return computed;
18487}
18488
18489/* Output assembler code to FILE to increment profiler label # LABELNO
18490 for profiling a function entry. */
18491void
18492x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18493{
18494 if (TARGET_64BIT)
18495 if (flag_pic)
18496 {
18497#ifndef NO_PROFILE_COUNTERS
18498 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18499#endif
18500 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18501 }
18502 else
18503 {
18504#ifndef NO_PROFILE_COUNTERS
18505 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18506#endif
18507 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18508 }
18509 else if (flag_pic)
18510 {
18511#ifndef NO_PROFILE_COUNTERS
18512 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18513 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18514#endif
18515 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18516 }
18517 else
18518 {
18519#ifndef NO_PROFILE_COUNTERS
18520 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18521 PROFILE_COUNT_REGISTER);
18522#endif
18523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18524 }
18525}
18526
18527/* We don't have exact information about the insn sizes, but we may assume
18528 quite safely that we are informed about all 1 byte insns and memory
18529 address sizes. This is enough to eliminate unnecessary padding in
18530 99% of cases. */
18531
18532static int
18533min_insn_size (rtx insn)
18534{
18535 int l = 0;
18536
18537 if (!INSN_P (insn) || !active_insn_p (insn))
18538 return 0;
18539
18540 /* Discard alignments we've emit and jump instructions. */
18541 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18542 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18543 return 0;
18544 if (GET_CODE (insn) == JUMP_INSN
18545 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18546 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18547 return 0;
18548
18549 /* Important case - calls are always 5 bytes.
18550 It is common to have many calls in the row. */
18551 if (GET_CODE (insn) == CALL_INSN
18552 && symbolic_reference_mentioned_p (PATTERN (insn))
18553 && !SIBLING_CALL_P (insn))
18554 return 5;
18555 if (get_attr_length (insn) <= 1)
18556 return 1;
18557
18558 /* For normal instructions we may rely on the sizes of addresses
18559 and the presence of symbol to require 4 bytes of encoding.
18560 This is not the case for jumps where references are PC relative. */
18561 if (GET_CODE (insn) != JUMP_INSN)
18562 {
18563 l = get_attr_length_address (insn);
18564 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18565 l = 4;
18566 }
18567 if (l)
18568 return 1+l;
18569 else
18570 return 2;
18571}
18572
18573/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18574 window. */
18575
18576static void
18577ix86_avoid_jump_misspredicts (void)
18578{
18579 rtx insn, start = get_insns ();
18580 int nbytes = 0, njumps = 0;
18581 int isjump = 0;
18582
18583 /* Look for all minimal intervals of instructions containing 4 jumps.
18584 The intervals are bounded by START and INSN. NBYTES is the total
18585 size of instructions in the interval including INSN and not including
18586 START. When the NBYTES is smaller than 16 bytes, it is possible
18587 that the end of START and INSN ends up in the same 16byte page.
18588
18589 The smallest offset in the page INSN can start is the case where START
18590 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18591 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18592 */
18593 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18594 {
18595
18596 nbytes += min_insn_size (insn);
18597 if (dump_file)
18598 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18599 INSN_UID (insn), min_insn_size (insn));
18600 if ((GET_CODE (insn) == JUMP_INSN
18601 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18602 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18603 || GET_CODE (insn) == CALL_INSN)
18604 njumps++;
18605 else
18606 continue;
18607
18608 while (njumps > 3)
18609 {
18610 start = NEXT_INSN (start);
18611 if ((GET_CODE (start) == JUMP_INSN
18612 && GET_CODE (PATTERN (start)) != ADDR_VEC
18613 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18614 || GET_CODE (start) == CALL_INSN)
18615 njumps--, isjump = 1;
18616 else
18617 isjump = 0;
18618 nbytes -= min_insn_size (start);
18619 }
18620 gcc_assert (njumps >= 0);
18621 if (dump_file)
18622 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18623 INSN_UID (start), INSN_UID (insn), nbytes);
18624
18625 if (njumps == 3 && isjump && nbytes < 16)
18626 {
18627 int padsize = 15 - nbytes + min_insn_size (insn);
18628
18629 if (dump_file)
18630 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18631 INSN_UID (insn), padsize);
18632 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18633 }
18634 }
18635}
18636
18637/* AMD Athlon works faster
18638 when RET is not destination of conditional jump or directly preceded
18639 by other jump instruction. We avoid the penalty by inserting NOP just
18640 before the RET instructions in such cases. */
18641static void
18642ix86_pad_returns (void)
18643{
18644 edge e;
18645 edge_iterator ei;
18646
18647 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18648 {
18649 basic_block bb = e->src;
18650 rtx ret = BB_END (bb);
18651 rtx prev;
18652 bool replace = false;
18653
18654 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18655 || !maybe_hot_bb_p (bb))
18656 continue;
18657 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18658 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18659 break;
18660 if (prev && GET_CODE (prev) == CODE_LABEL)
18661 {
18662 edge e;
18663 edge_iterator ei;
18664
18665 FOR_EACH_EDGE (e, ei, bb->preds)
18666 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18667 && !(e->flags & EDGE_FALLTHRU))
18668 replace = true;
18669 }
18670 if (!replace)
18671 {
18672 prev = prev_active_insn (ret);
18673 if (prev
18674 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18675 || GET_CODE (prev) == CALL_INSN))
18676 replace = true;
18677 /* Empty functions get branch mispredict even when the jump destination
18678 is not visible to us. */
18679 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18680 replace = true;
18681 }
18682 if (replace)
18683 {
18684 emit_insn_before (gen_return_internal_long (), ret);
18685 delete_insn (ret);
18686 }
18687 }
18688}
18689
18690/* Implement machine specific optimizations. We implement padding of returns
18691 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18692static void
18693ix86_reorg (void)
18694{
18695 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18696 ix86_pad_returns ();
18697 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18698 ix86_avoid_jump_misspredicts ();
18699}
18700
18701/* Return nonzero when QImode register that must be represented via REX prefix
18702 is used. */
18703bool
18704x86_extended_QIreg_mentioned_p (rtx insn)
18705{
18706 int i;
18707 extract_insn_cached (insn);
18708 for (i = 0; i < recog_data.n_operands; i++)
18709 if (REG_P (recog_data.operand[i])
18710 && REGNO (recog_data.operand[i]) >= 4)
18711 return true;
18712 return false;
18713}
18714
18715/* Return nonzero when P points to register encoded via REX prefix.
18716 Called via for_each_rtx. */
18717static int
18718extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18719{
18720 unsigned int regno;
18721 if (!REG_P (*p))
18722 return 0;
18723 regno = REGNO (*p);
18724 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18725}
18726
18727/* Return true when INSN mentions register that must be encoded using REX
18728 prefix. */
18729bool
18730x86_extended_reg_mentioned_p (rtx insn)
18731{
18732 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18733}
18734
18735/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18736 optabs would emit if we didn't have TFmode patterns. */
18737
18738void
18739x86_emit_floatuns (rtx operands[2])
18740{
18741 rtx neglab, donelab, i0, i1, f0, in, out;
18742 enum machine_mode mode, inmode;
18743
18744 inmode = GET_MODE (operands[1]);
18745 gcc_assert (inmode == SImode || inmode == DImode);
18746
18747 out = operands[0];
18748 in = force_reg (inmode, operands[1]);
18749 mode = GET_MODE (out);
18750 neglab = gen_label_rtx ();
18751 donelab = gen_label_rtx ();
18752 i1 = gen_reg_rtx (Pmode);
18753 f0 = gen_reg_rtx (mode);
18754
18755 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18756
18757 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18758 emit_jump_insn (gen_jump (donelab));
18759 emit_barrier ();
18760
18761 emit_label (neglab);
18762
18763 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18764 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18765 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18766 expand_float (f0, i0, 0);
18767 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18768
18769 emit_label (donelab);
18770}
18771
18772/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18773 with all elements equal to VAR. Return true if successful. */
18774
18775static bool
18776ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18777 rtx target, rtx val)
18778{
18779 enum machine_mode smode, wsmode, wvmode;
18780 rtx x;
18781
18782 switch (mode)
18783 {
18784 case V2SImode:
18785 case V2SFmode:
18786 if (!mmx_ok)
18787 return false;
18788 /* FALLTHRU */
18789
18790 case V2DFmode:
18791 case V2DImode:
18792 case V4SFmode:
18793 case V4SImode:
18794 val = force_reg (GET_MODE_INNER (mode), val);
18795 x = gen_rtx_VEC_DUPLICATE (mode, val);
18796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18797 return true;
18798
18799 case V4HImode:
18800 if (!mmx_ok)
18801 return false;
18802 if (TARGET_SSE || TARGET_3DNOW_A)
18803 {
18804 val = gen_lowpart (SImode, val);
18805 x = gen_rtx_TRUNCATE (HImode, val);
18806 x = gen_rtx_VEC_DUPLICATE (mode, x);
18807 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18808 return true;
18809 }
18810 else
18811 {
18812 smode = HImode;
18813 wsmode = SImode;
18814 wvmode = V2SImode;
18815 goto widen;
18816 }
18817
18818 case V8QImode:
18819 if (!mmx_ok)
18820 return false;
18821 smode = QImode;
18822 wsmode = HImode;
18823 wvmode = V4HImode;
18824 goto widen;
18825 case V8HImode:
18826 if (TARGET_SSE2)
18827 {
18828 rtx tmp1, tmp2;
18829 /* Extend HImode to SImode using a paradoxical SUBREG. */
18830 tmp1 = gen_reg_rtx (SImode);
18831 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18832 /* Insert the SImode value as low element of V4SImode vector. */
18833 tmp2 = gen_reg_rtx (V4SImode);
18834 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18835 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18836 CONST0_RTX (V4SImode),
18837 const1_rtx);
18838 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18839 /* Cast the V4SImode vector back to a V8HImode vector. */
18840 tmp1 = gen_reg_rtx (V8HImode);
18841 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18842 /* Duplicate the low short through the whole low SImode word. */
18843 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18844 /* Cast the V8HImode vector back to a V4SImode vector. */
18845 tmp2 = gen_reg_rtx (V4SImode);
18846 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18847 /* Replicate the low element of the V4SImode vector. */
18848 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18849 /* Cast the V2SImode back to V8HImode, and store in target. */
18850 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18851 return true;
18852 }
18853 smode = HImode;
18854 wsmode = SImode;
18855 wvmode = V4SImode;
18856 goto widen;
18857 case V16QImode:
18858 if (TARGET_SSE2)
18859 {
18860 rtx tmp1, tmp2;
18861 /* Extend QImode to SImode using a paradoxical SUBREG. */
18862 tmp1 = gen_reg_rtx (SImode);
18863 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18864 /* Insert the SImode value as low element of V4SImode vector. */
18865 tmp2 = gen_reg_rtx (V4SImode);
18866 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18867 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18868 CONST0_RTX (V4SImode),
18869 const1_rtx);
18870 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18871 /* Cast the V4SImode vector back to a V16QImode vector. */
18872 tmp1 = gen_reg_rtx (V16QImode);
18873 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18874 /* Duplicate the low byte through the whole low SImode word. */
18875 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18876 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18877 /* Cast the V16QImode vector back to a V4SImode vector. */
18878 tmp2 = gen_reg_rtx (V4SImode);
18879 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18880 /* Replicate the low element of the V4SImode vector. */
18881 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18882 /* Cast the V2SImode back to V16QImode, and store in target. */
18883 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18884 return true;
18885 }
18886 smode = QImode;
18887 wsmode = HImode;
18888 wvmode = V8HImode;
18889 goto widen;
18890 widen:
18891 /* Replicate the value once into the next wider mode and recurse. */
18892 val = convert_modes (wsmode, smode, val, true);
18893 x = expand_simple_binop (wsmode, ASHIFT, val,
18894 GEN_INT (GET_MODE_BITSIZE (smode)),
18895 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18896 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18897
18898 x = gen_reg_rtx (wvmode);
18899 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18900 gcc_unreachable ();
18901 emit_move_insn (target, gen_lowpart (mode, x));
18902 return true;
18903
18904 default:
18905 return false;
18906 }
18907}
18908
18909/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18910 whose ONE_VAR element is VAR, and other elements are zero. Return true
18911 if successful. */
18912
18913static bool
18914ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18915 rtx target, rtx var, int one_var)
18916{
18917 enum machine_mode vsimode;
18918 rtx new_target;
18919 rtx x, tmp;
18920
18921 switch (mode)
18922 {
18923 case V2SFmode:
18924 case V2SImode:
18925 if (!mmx_ok)
18926 return false;
18927 /* FALLTHRU */
18928
18929 case V2DFmode:
18930 case V2DImode:
18931 if (one_var != 0)
18932 return false;
18933 var = force_reg (GET_MODE_INNER (mode), var);
18934 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18935 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18936 return true;
18937
18938 case V4SFmode:
18939 case V4SImode:
18940 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18941 new_target = gen_reg_rtx (mode);
18942 else
18943 new_target = target;
18944 var = force_reg (GET_MODE_INNER (mode), var);
18945 x = gen_rtx_VEC_DUPLICATE (mode, var);
18946 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18947 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18948 if (one_var != 0)
18949 {
18950 /* We need to shuffle the value to the correct position, so
18951 create a new pseudo to store the intermediate result. */
18952
18953 /* With SSE2, we can use the integer shuffle insns. */
18954 if (mode != V4SFmode && TARGET_SSE2)
18955 {
18956 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18957 GEN_INT (1),
18958 GEN_INT (one_var == 1 ? 0 : 1),
18959 GEN_INT (one_var == 2 ? 0 : 1),
18960 GEN_INT (one_var == 3 ? 0 : 1)));
18961 if (target != new_target)
18962 emit_move_insn (target, new_target);
18963 return true;
18964 }
18965
18966 /* Otherwise convert the intermediate result to V4SFmode and
18967 use the SSE1 shuffle instructions. */
18968 if (mode != V4SFmode)
18969 {
18970 tmp = gen_reg_rtx (V4SFmode);
18971 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18972 }
18973 else
18974 tmp = new_target;
18975
18976 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18977 GEN_INT (1),
18978 GEN_INT (one_var == 1 ? 0 : 1),
18979 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18980 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18981
18982 if (mode != V4SFmode)
18983 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18984 else if (tmp != target)
18985 emit_move_insn (target, tmp);
18986 }
18987 else if (target != new_target)
18988 emit_move_insn (target, new_target);
18989 return true;
18990
18991 case V8HImode:
18992 case V16QImode:
18993 vsimode = V4SImode;
18994 goto widen;
18995 case V4HImode:
18996 case V8QImode:
18997 if (!mmx_ok)
18998 return false;
18999 vsimode = V2SImode;
19000 goto widen;
19001 widen:
19002 if (one_var != 0)
19003 return false;
19004
19005 /* Zero extend the variable element to SImode and recurse. */
19006 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19007
19008 x = gen_reg_rtx (vsimode);
19009 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19010 var, one_var))
19011 gcc_unreachable ();
19012
19013 emit_move_insn (target, gen_lowpart (mode, x));
19014 return true;
19015
19016 default:
19017 return false;
19018 }
19019}
19020
19021/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19022 consisting of the values in VALS. It is known that all elements
19023 except ONE_VAR are constants. Return true if successful. */
19024
19025static bool
19026ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19027 rtx target, rtx vals, int one_var)
19028{
19029 rtx var = XVECEXP (vals, 0, one_var);
19030 enum machine_mode wmode;
19031 rtx const_vec, x;
19032
19033 const_vec = copy_rtx (vals);
19034 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19035 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19036
19037 switch (mode)
19038 {
19039 case V2DFmode:
19040 case V2DImode:
19041 case V2SFmode:
19042 case V2SImode:
19043 /* For the two element vectors, it's just as easy to use
19044 the general case. */
19045 return false;
19046
19047 case V4SFmode:
19048 case V4SImode:
19049 case V8HImode:
19050 case V4HImode:
19051 break;
19052
19053 case V16QImode:
19054 wmode = V8HImode;
19055 goto widen;
19056 case V8QImode:
19057 wmode = V4HImode;
19058 goto widen;
19059 widen:
19060 /* There's no way to set one QImode entry easily. Combine
19061 the variable value with its adjacent constant value, and
19062 promote to an HImode set. */
19063 x = XVECEXP (vals, 0, one_var ^ 1);
19064 if (one_var & 1)
19065 {
19066 var = convert_modes (HImode, QImode, var, true);
19067 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19068 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19069 x = GEN_INT (INTVAL (x) & 0xff);
19070 }
19071 else
19072 {
19073 var = convert_modes (HImode, QImode, var, true);
19074 x = gen_int_mode (INTVAL (x) << 8, HImode);
19075 }
19076 if (x != const0_rtx)
19077 var = expand_simple_binop (HImode, IOR, var, x, var,
19078 1, OPTAB_LIB_WIDEN);
19079
19080 x = gen_reg_rtx (wmode);
19081 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19082 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19083
19084 emit_move_insn (target, gen_lowpart (mode, x));
19085 return true;
19086
19087 default:
19088 return false;
19089 }
19090
19091 emit_move_insn (target, const_vec);
19092 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19093 return true;
19094}
19095
19096/* A subroutine of ix86_expand_vector_init. Handle the most general case:
19097 all values variable, and none identical. */
19098
19099static void
19100ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19101 rtx target, rtx vals)
19102{
19103 enum machine_mode half_mode = GET_MODE_INNER (mode);
19104 rtx op0 = NULL, op1 = NULL;
19105 bool use_vec_concat = false;
19106
19107 switch (mode)
19108 {
19109 case V2SFmode:
19110 case V2SImode:
19111 if (!mmx_ok && !TARGET_SSE)
19112 break;
19113 /* FALLTHRU */
19114
19115 case V2DFmode:
19116 case V2DImode:
19117 /* For the two element vectors, we always implement VEC_CONCAT. */
19118 op0 = XVECEXP (vals, 0, 0);
19119 op1 = XVECEXP (vals, 0, 1);
19120 use_vec_concat = true;
19121 break;
19122
19123 case V4SFmode:
19124 half_mode = V2SFmode;
19125 goto half;
19126 case V4SImode:
19127 half_mode = V2SImode;
19128 goto half;
19129 half:
19130 {
19131 rtvec v;
19132
19133 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19134 Recurse to load the two halves. */
19135
19136 op0 = gen_reg_rtx (half_mode);
19137 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19138 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19139
19140 op1 = gen_reg_rtx (half_mode);
19141 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19142 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19143
19144 use_vec_concat = true;
19145 }
19146 break;
19147
19148 case V8HImode:
19149 case V16QImode:
19150 case V4HImode:
19151 case V8QImode:
19152 break;
19153
19154 default:
19155 gcc_unreachable ();
19156 }
19157
19158 if (use_vec_concat)
19159 {
19160 if (!register_operand (op0, half_mode))
19161 op0 = force_reg (half_mode, op0);
19162 if (!register_operand (op1, half_mode))
19163 op1 = force_reg (half_mode, op1);
19164
19165 emit_insn (gen_rtx_SET (VOIDmode, target,
19166 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19167 }
19168 else
19169 {
19170 int i, j, n_elts, n_words, n_elt_per_word;
19171 enum machine_mode inner_mode;
19172 rtx words[4], shift;
19173
19174 inner_mode = GET_MODE_INNER (mode);
19175 n_elts = GET_MODE_NUNITS (mode);
19176 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19177 n_elt_per_word = n_elts / n_words;
19178 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19179
19180 for (i = 0; i < n_words; ++i)
19181 {
19182 rtx word = NULL_RTX;
19183
19184 for (j = 0; j < n_elt_per_word; ++j)
19185 {
19186 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19187 elt = convert_modes (word_mode, inner_mode, elt, true);
19188
19189 if (j == 0)
19190 word = elt;
19191 else
19192 {
19193 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19194 word, 1, OPTAB_LIB_WIDEN);
19195 word = expand_simple_binop (word_mode, IOR, word, elt,
19196 word, 1, OPTAB_LIB_WIDEN);
19197 }
19198 }
19199
19200 words[i] = word;
19201 }
19202
19203 if (n_words == 1)
19204 emit_move_insn (target, gen_lowpart (mode, words[0]));
19205 else if (n_words == 2)
19206 {
19207 rtx tmp = gen_reg_rtx (mode);
19208 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19209 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19210 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19211 emit_move_insn (target, tmp);
19212 }
19213 else if (n_words == 4)
19214 {
19215 rtx tmp = gen_reg_rtx (V4SImode);
19216 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19217 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19218 emit_move_insn (target, gen_lowpart (mode, tmp));
19219 }
19220 else
19221 gcc_unreachable ();
19222 }
19223}
19224
19225/* Initialize vector TARGET via VALS. Suppress the use of MMX
19226 instructions unless MMX_OK is true. */
19227
19228void
19229ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19230{
19231 enum machine_mode mode = GET_MODE (target);
19232 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19233 int n_elts = GET_MODE_NUNITS (mode);
19234 int n_var = 0, one_var = -1;
19235 bool all_same = true, all_const_zero = true;
19236 int i;
19237 rtx x;
19238
19239 for (i = 0; i < n_elts; ++i)
19240 {
19241 x = XVECEXP (vals, 0, i);
19242 if (!CONSTANT_P (x))
19243 n_var++, one_var = i;
19244 else if (x != CONST0_RTX (inner_mode))
19245 all_const_zero = false;
19246 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19247 all_same = false;
19248 }
19249
19250 /* Constants are best loaded from the constant pool. */
19251 if (n_var == 0)
19252 {
19253 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19254 return;
19255 }
19256
19257 /* If all values are identical, broadcast the value. */
19258 if (all_same
19259 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19260 XVECEXP (vals, 0, 0)))
19261 return;
19262
19263 /* Values where only one field is non-constant are best loaded from
19264 the pool and overwritten via move later. */
19265 if (n_var == 1)
19266 {
19267 if (all_const_zero
19268 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19269 XVECEXP (vals, 0, one_var),
19270 one_var))
19271 return;
19272
19273 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19274 return;
19275 }
19276
19277 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19278}
19279
19280void
19281ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19282{
19283 enum machine_mode mode = GET_MODE (target);
19284 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19285 bool use_vec_merge = false;
19286 rtx tmp;
19287
19288 switch (mode)
19289 {
19290 case V2SFmode:
19291 case V2SImode:
19292 if (mmx_ok)
19293 {
19294 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19295 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19296 if (elt == 0)
19297 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19298 else
19299 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19300 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19301 return;
19302 }
19303 break;
19304
19305 case V2DFmode:
19306 case V2DImode:
19307 {
19308 rtx op0, op1;
19309
19310 /* For the two element vectors, we implement a VEC_CONCAT with
19311 the extraction of the other element. */
19312
19313 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19314 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19315
19316 if (elt == 0)
19317 op0 = val, op1 = tmp;
19318 else
19319 op0 = tmp, op1 = val;
19320
19321 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19322 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19323 }
19324 return;
19325
19326 case V4SFmode:
19327 switch (elt)
19328 {
19329 case 0:
19330 use_vec_merge = true;
19331 break;
19332
19333 case 1:
19334 /* tmp = target = A B C D */
19335 tmp = copy_to_reg (target);
19336 /* target = A A B B */
19337 emit_insn (gen_sse_unpcklps (target, target, target));
19338 /* target = X A B B */
19339 ix86_expand_vector_set (false, target, val, 0);
19340 /* target = A X C D */
19341 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19342 GEN_INT (1), GEN_INT (0),
19343 GEN_INT (2+4), GEN_INT (3+4)));
19344 return;
19345
19346 case 2:
19347 /* tmp = target = A B C D */
19348 tmp = copy_to_reg (target);
19349 /* tmp = X B C D */
19350 ix86_expand_vector_set (false, tmp, val, 0);
19351 /* target = A B X D */
19352 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19353 GEN_INT (0), GEN_INT (1),
19354 GEN_INT (0+4), GEN_INT (3+4)));
19355 return;
19356
19357 case 3:
19358 /* tmp = target = A B C D */
19359 tmp = copy_to_reg (target);
19360 /* tmp = X B C D */
19361 ix86_expand_vector_set (false, tmp, val, 0);
19362 /* target = A B X D */
19363 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19364 GEN_INT (0), GEN_INT (1),
19365 GEN_INT (2+4), GEN_INT (0+4)));
19366 return;
19367
19368 default:
19369 gcc_unreachable ();
19370 }
19371 break;
19372
19373 case V4SImode:
19374 /* Element 0 handled by vec_merge below. */
19375 if (elt == 0)
19376 {
19377 use_vec_merge = true;
19378 break;
19379 }
19380
19381 if (TARGET_SSE2)
19382 {
19383 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19384 store into element 0, then shuffle them back. */
19385
19386 rtx order[4];
19387
19388 order[0] = GEN_INT (elt);
19389 order[1] = const1_rtx;
19390 order[2] = const2_rtx;
19391 order[3] = GEN_INT (3);
19392 order[elt] = const0_rtx;
19393
19394 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19395 order[1], order[2], order[3]));
19396
19397 ix86_expand_vector_set (false, target, val, 0);
19398
19399 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19400 order[1], order[2], order[3]));
19401 }
19402 else
19403 {
19404 /* For SSE1, we have to reuse the V4SF code. */
19405 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19406 gen_lowpart (SFmode, val), elt);
19407 }
19408 return;
19409
19410 case V8HImode:
19411 use_vec_merge = TARGET_SSE2;
19412 break;
19413 case V4HImode:
19414 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19415 break;
19416
19417 case V16QImode:
19418 case V8QImode:
19419 default:
19420 break;
19421 }
19422
19423 if (use_vec_merge)
19424 {
19425 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19426 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19427 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19428 }
19429 else
19430 {
19431 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19432
19433 emit_move_insn (mem, target);
19434
19435 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19436 emit_move_insn (tmp, val);
19437
19438 emit_move_insn (target, mem);
19439 }
19440}
19441
19442void
19443ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19444{
19445 enum machine_mode mode = GET_MODE (vec);
19446 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19447 bool use_vec_extr = false;
19448 rtx tmp;
19449
19450 switch (mode)
19451 {
19452 case V2SImode:
19453 case V2SFmode:
19454 if (!mmx_ok)
19455 break;
19456 /* FALLTHRU */
19457
19458 case V2DFmode:
19459 case V2DImode:
19460 use_vec_extr = true;
19461 break;
19462
19463 case V4SFmode:
19464 switch (elt)
19465 {
19466 case 0:
19467 tmp = vec;
19468 break;
19469
19470 case 1:
19471 case 3:
19472 tmp = gen_reg_rtx (mode);
19473 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19474 GEN_INT (elt), GEN_INT (elt),
19475 GEN_INT (elt+4), GEN_INT (elt+4)));
19476 break;
19477
19478 case 2:
19479 tmp = gen_reg_rtx (mode);
19480 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19481 break;
19482
19483 default:
19484 gcc_unreachable ();
19485 }
19486 vec = tmp;
19487 use_vec_extr = true;
19488 elt = 0;
19489 break;
19490
19491 case V4SImode:
19492 if (TARGET_SSE2)
19493 {
19494 switch (elt)
19495 {
19496 case 0:
19497 tmp = vec;
19498 break;
19499
19500 case 1:
19501 case 3:
19502 tmp = gen_reg_rtx (mode);
19503 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19504 GEN_INT (elt), GEN_INT (elt),
19505 GEN_INT (elt), GEN_INT (elt)));
19506 break;
19507
19508 case 2:
19509 tmp = gen_reg_rtx (mode);
19510 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19511 break;
19512
19513 default:
19514 gcc_unreachable ();
19515 }
19516 vec = tmp;
19517 use_vec_extr = true;
19518 elt = 0;
19519 }
19520 else
19521 {
19522 /* For SSE1, we have to reuse the V4SF code. */
19523 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19524 gen_lowpart (V4SFmode, vec), elt);
19525 return;
19526 }
19527 break;
19528
19529 case V8HImode:
19530 use_vec_extr = TARGET_SSE2;
19531 break;
19532 case V4HImode:
19533 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19534 break;
19535
19536 case V16QImode:
19537 case V8QImode:
19538 /* ??? Could extract the appropriate HImode element and shift. */
19539 default:
19540 break;
19541 }
19542
19543 if (use_vec_extr)
19544 {
19545 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19546 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19547
19548 /* Let the rtl optimizers know about the zero extension performed. */
19549 if (inner_mode == HImode)
19550 {
19551 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19552 target = gen_lowpart (SImode, target);
19553 }
19554
19555 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19556 }
19557 else
19558 {
19559 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19560
19561 emit_move_insn (mem, vec);
19562
19563 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19564 emit_move_insn (target, tmp);
19565 }
19566}
19567
19568/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19569 pattern to reduce; DEST is the destination; IN is the input vector. */
19570
19571void
19572ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19573{
19574 rtx tmp1, tmp2, tmp3;
19575
19576 tmp1 = gen_reg_rtx (V4SFmode);
19577 tmp2 = gen_reg_rtx (V4SFmode);
19578 tmp3 = gen_reg_rtx (V4SFmode);
19579
19580 emit_insn (gen_sse_movhlps (tmp1, in, in));
19581 emit_insn (fn (tmp2, tmp1, in));
19582
19583 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19584 GEN_INT (1), GEN_INT (1),
19585 GEN_INT (1+4), GEN_INT (1+4)));
19586 emit_insn (fn (dest, tmp2, tmp3));
19587}
19588
19589/* Target hook for scalar_mode_supported_p. */
19590static bool
19591ix86_scalar_mode_supported_p (enum machine_mode mode)
19592{
19593 if (DECIMAL_FLOAT_MODE_P (mode))
19594 return true;
19595 else
19596 return default_scalar_mode_supported_p (mode);
19597}
19598
19599/* Implements target hook vector_mode_supported_p. */
19600static bool
19601ix86_vector_mode_supported_p (enum machine_mode mode)
19602{
19603 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19604 return true;
19605 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19606 return true;
19607 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19608 return true;
19609 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19610 return true;
19611 return false;
19612}
19613
19614/* Worker function for TARGET_MD_ASM_CLOBBERS.
19615
19616 We do this in the new i386 backend to maintain source compatibility
19617 with the old cc0-based compiler. */
19618
19619static tree
19620ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19621 tree inputs ATTRIBUTE_UNUSED,
19622 tree clobbers)
19623{
19624 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19625 clobbers);
19626 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19627 clobbers);
19628 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19629 clobbers);
19630 return clobbers;
19631}
19632
19633/* Return true if this goes in small data/bss. */
19634
19635static bool
19636ix86_in_large_data_p (tree exp)
19637{
19638 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19639 return false;
19640
19641 /* Functions are never large data. */
19642 if (TREE_CODE (exp) == FUNCTION_DECL)
19643 return false;
19644
19645 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19646 {
19647 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19648 if (strcmp (section, ".ldata") == 0
19649 || strcmp (section, ".lbss") == 0)
19650 return true;
19651 return false;
19652 }
19653 else
19654 {
19655 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19656
19657 /* If this is an incomplete type with size 0, then we can't put it
19658 in data because it might be too big when completed. */
19659 if (!size || size > ix86_section_threshold)
19660 return true;
19661 }
19662
19663 return false;
19664}
19665static void
19666ix86_encode_section_info (tree decl, rtx rtl, int first)
19667{
19668 default_encode_section_info (decl, rtl, first);
19669
19670 if (TREE_CODE (decl) == VAR_DECL
19671 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19672 && ix86_in_large_data_p (decl))
19673 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19674}
19675
19676/* Worker function for REVERSE_CONDITION. */
19677
19678enum rtx_code
19679ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19680{
19681 return (mode != CCFPmode && mode != CCFPUmode
19682 ? reverse_condition (code)
19683 : reverse_condition_maybe_unordered (code));
19684}
19685
19686/* Output code to perform an x87 FP register move, from OPERANDS[1]
19687 to OPERANDS[0]. */
19688
19689const char *
19690output_387_reg_move (rtx insn, rtx *operands)
19691{
19692 if (REG_P (operands[1])
19693 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19694 {
19695 if (REGNO (operands[0]) == FIRST_STACK_REG)
19696 return output_387_ffreep (operands, 0);
19697 return "fstp\t%y0";
19698 }
19699 if (STACK_TOP_P (operands[0]))
19700 return "fld%z1\t%y1";
19701 return "fst\t%y0";
19702}
19703
19704/* Output code to perform a conditional jump to LABEL, if C2 flag in
19705 FP status register is set. */
19706
19707void
19708ix86_emit_fp_unordered_jump (rtx label)
19709{
19710 rtx reg = gen_reg_rtx (HImode);
19711 rtx temp;
19712
19713 emit_insn (gen_x86_fnstsw_1 (reg));
19714
19715 if (TARGET_USE_SAHF)
19716 {
19717 emit_insn (gen_x86_sahf_1 (reg));
19718
19719 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19720 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19721 }
19722 else
19723 {
19724 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19725
19726 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19727 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19728 }
19729
19730 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19731 gen_rtx_LABEL_REF (VOIDmode, label),
19732 pc_rtx);
19733 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19734 emit_jump_insn (temp);
19735}
19736
19737/* Output code to perform a log1p XFmode calculation. */
19738
19739void ix86_emit_i387_log1p (rtx op0, rtx op1)
19740{
19741 rtx label1 = gen_label_rtx ();
19742 rtx label2 = gen_label_rtx ();
19743
19744 rtx tmp = gen_reg_rtx (XFmode);
19745 rtx tmp2 = gen_reg_rtx (XFmode);
19746
19747 emit_insn (gen_absxf2 (tmp, op1));
19748 emit_insn (gen_cmpxf (tmp,
19749 CONST_DOUBLE_FROM_REAL_VALUE (
19750 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19751 XFmode)));
19752 emit_jump_insn (gen_bge (label1));
19753
19754 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19755 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19756 emit_jump (label2);
19757
19758 emit_label (label1);
19759 emit_move_insn (tmp, CONST1_RTX (XFmode));
19760 emit_insn (gen_addxf3 (tmp, op1, tmp));
19761 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19762 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19763
19764 emit_label (label2);
19765}
19766
19767/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19768
19769static void
19770i386_solaris_elf_named_section (const char *name, unsigned int flags,
19771 tree decl)
19772{
19773 /* With Binutils 2.15, the "@unwind" marker must be specified on
19774 every occurrence of the ".eh_frame" section, not just the first
19775 one. */
19776 if (TARGET_64BIT
19777 && strcmp (name, ".eh_frame") == 0)
19778 {
19779 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19780 flags & SECTION_WRITE ? "aw" : "a");
19781 return;
19782 }
19783 default_elf_asm_named_section (name, flags, decl);
19784}
19785
19786/* Return the mangling of TYPE if it is an extended fundamental type. */
19787
19788static const char *
19789ix86_mangle_fundamental_type (tree type)
19790{
19791 switch (TYPE_MODE (type))
19792 {
19793 case TFmode:
19794 /* __float128 is "g". */
19795 return "g";
19796 case XFmode:
19797 /* "long double" or __float80 is "e". */
19798 return "e";
19799 default:
19800 return NULL;
19801 }
19802}
19803
19804/* For 32-bit code we can save PIC register setup by using
19805 __stack_chk_fail_local hidden function instead of calling
19806 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19807 register, so it is better to call __stack_chk_fail directly. */
19808
19809static tree
19810ix86_stack_protect_fail (void)
19811{
19812 return TARGET_64BIT
19813 ? default_external_stack_protect_fail ()
19814 : default_hidden_stack_protect_fail ();
19815}
19816
19817/* Select a format to encode pointers in exception handling data. CODE
19818 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19819 true if the symbol may be affected by dynamic relocations.
19820
19821 ??? All x86 object file formats are capable of representing this.
19822 After all, the relocation needed is the same as for the call insn.
19823 Whether or not a particular assembler allows us to enter such, I
19824 guess we'll have to see. */
19825int
19826asm_preferred_eh_data_format (int code, int global)
19827{
19828 if (flag_pic)
19829 {
19830 int type = DW_EH_PE_sdata8;
19831 if (!TARGET_64BIT
19832 || ix86_cmodel == CM_SMALL_PIC
19833 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19834 type = DW_EH_PE_sdata4;
19835 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19836 }
19837 if (ix86_cmodel == CM_SMALL
19838 || (ix86_cmodel == CM_MEDIUM && code))
19839 return DW_EH_PE_udata4;
19840 return DW_EH_PE_absptr;
19841}
19842
19843#include "gt-i386.h"