1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988-2020 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "rtl.h"
27#include "tree.h"
28#include "memmodel.h"
29#include "gimple.h"
30#include "cfghooks.h"
31#include "cfgloop.h"
32#include "df.h"
33#include "tm_p.h"
34#include "stringpool.h"
35#include "expmed.h"
36#include "optabs.h"
37#include "regs.h"
38#include "emit-rtl.h"
39#include "recog.h"
40#include "cgraph.h"
41#include "diagnostic.h"
42#include "cfgbuild.h"
43#include "alias.h"
44#include "fold-const.h"
45#include "attribs.h"
46#include "calls.h"
47#include "stor-layout.h"
48#include "varasm.h"
49#include "output.h"
50#include "insn-attr.h"
51#include "flags.h"
52#include "except.h"
53#include "explow.h"
54#include "expr.h"
55#include "cfgrtl.h"
56#include "common/common-target.h"
57#include "langhooks.h"
58#include "reload.h"
59#include "gimplify.h"
60#include "dwarf2.h"
61#include "tm-constrs.h"
62#include "cselib.h"
63#include "sched-int.h"
64#include "opts.h"
65#include "tree-pass.h"
66#include "context.h"
67#include "pass_manager.h"
68#include "target-globals.h"
69#include "gimple-iterator.h"
70#include "tree-vectorizer.h"
71#include "shrink-wrap.h"
72#include "builtins.h"
73#include "rtl-iter.h"
74#include "tree-iterator.h"
75#include "dbgcnt.h"
76#include "case-cfn-macros.h"
77#include "dojump.h"
78#include "fold-const-call.h"
79#include "tree-vrp.h"
80#include "tree-ssanames.h"
81#include "selftest.h"
82#include "selftest-rtl.h"
83#include "print-rtl.h"
84#include "intl.h"
85#include "ifcvt.h"
86#include "symbol-summary.h"
87#include "ipa-prop.h"
88#include "ipa-fnsummary.h"
89#include "wide-int-bitmask.h"
90#include "tree-vector-builder.h"
91#include "debug.h"
92#include "dwarf2out.h"
93#include "i386-options.h"
94#include "i386-builtins.h"
95#include "i386-expand.h"
96#include "i386-features.h"
97#include "function-abi.h"
98
99/* This file should be included last.  */
100#include "target-def.h"
101
102static rtx legitimize_dllimport_symbol (rtx, bool);
103static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105static void ix86_emit_restore_reg_using_pop (rtx);
106
107
108#ifndef CHECK_STACK_LIMIT
109#define CHECK_STACK_LIMIT (-1)
110#endif
111
112/* Return index of given mode in mult and division cost tables.  */
113#define MODE_INDEX(mode)					\
114  ((mode) == QImode ? 0						\
115   : (mode) == HImode ? 1					\
116   : (mode) == SImode ? 2					\
117   : (mode) == DImode ? 3					\
118   : 4)
119
120
121/* Set by -mtune.  */
122const struct processor_costs *ix86_tune_cost = NULL;
123
124/* Set by -mtune or -Os.  */
125const struct processor_costs *ix86_cost = NULL;
126
127/* In case the average insn count for single function invocation is
128   lower than this constant, emit fast (but longer) prologue and
129   epilogue code.  */
130#define FAST_PROLOGUE_INSN_COUNT 20
131
132/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
133static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
136
137/* Array of the smallest class containing reg number REGNO, indexed by
138   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
139
140enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
141{
142  /* ax, dx, cx, bx */
143  AREG, DREG, CREG, BREG,
144  /* si, di, bp, sp */
145  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146  /* FP registers */
147  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149  /* arg pointer, flags, fpsr, frame */
150  NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151  /* SSE registers */
152  SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154  /* MMX registers */
155  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157  /* REX registers */
158  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160  /* SSE REX registers */
161  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163  /* AVX-512 SSE registers */
164  ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165  ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166  ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167  ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168  /* Mask registers.  */
169  ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170  MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
171};
172
173/* The "default" register map used in 32bit mode.  */
174
175int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
176{
177  /* general regs */
178  0, 2, 1, 3, 6, 7, 4, 5,
179  /* fp regs */
180  12, 13, 14, 15, 16, 17, 18, 19,
181  /* arg, flags, fpsr, frame */
182  IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183  IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184  /* SSE */
185  21, 22, 23, 24, 25, 26, 27, 28,
186  /* MMX */
187  29, 30, 31, 32, 33, 34, 35, 36,
188  /* extended integer registers */
189  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191  /* extended sse registers */
192  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194  /* AVX-512 registers 16-23 */
195  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197  /* AVX-512 registers 24-31 */
198  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200  /* Mask registers */
201  93, 94, 95, 96, 97, 98, 99, 100
202};
203
204/* The "default" register map used in 64bit mode.  */
205
206int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
207{
208  /* general regs */
209  0, 1, 2, 3, 4, 5, 6, 7,
210  /* fp regs */
211  33, 34, 35, 36, 37, 38, 39, 40,
212  /* arg, flags, fpsr, frame */
213  IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214  IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215  /* SSE */
216  17, 18, 19, 20, 21, 22, 23, 24,
217  /* MMX */
218  41, 42, 43, 44, 45, 46, 47, 48,
219  /* extended integer registers */
220  8, 9, 10, 11, 12, 13, 14, 15,
221  /* extended SSE registers */
222  25, 26, 27, 28, 29, 30, 31, 32,
223  /* AVX-512 registers 16-23 */
224  67, 68, 69, 70, 71, 72, 73, 74,
225  /* AVX-512 registers 24-31 */
226  75, 76, 77, 78, 79, 80, 81, 82,
227  /* Mask registers */
228  118, 119, 120, 121, 122, 123, 124, 125
229};
230
231/* Define the register numbers to be used in Dwarf debugging information.
232   The SVR4 reference port C compiler uses the following register numbers
233   in its Dwarf output code:
234	0 for %eax (gcc regno = 0)
235	1 for %ecx (gcc regno = 2)
236	2 for %edx (gcc regno = 1)
237	3 for %ebx (gcc regno = 3)
238	4 for %esp (gcc regno = 7)
239	5 for %ebp (gcc regno = 6)
240	6 for %esi (gcc regno = 4)
241	7 for %edi (gcc regno = 5)
242   The following three DWARF register numbers are never generated by
243   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244   believed these numbers have these meanings.
245	8  for %eip    (no gcc equivalent)
246	9  for %eflags (gcc regno = 17)
247	10 for %trapno (no gcc equivalent)
248   It is not at all clear how we should number the FP stack registers
249   for the x86 architecture.  If the version of SDB on x86/svr4 were
250   a bit less brain dead with respect to floating-point then we would
251   have a precedent to follow with respect to DWARF register numbers
252   for x86 FP registers, but the SDB on x86/svr4 was so completely
253   broken with respect to FP registers that it is hardly worth thinking
254   of it as something to strive for compatibility with.
255   The version of x86/svr4 SDB I had does (partially)
256   seem to believe that DWARF register number 11 is associated with
257   the x86 register %st(0), but that's about all.  Higher DWARF
258   register numbers don't seem to be associated with anything in
259   particular, and even for DWARF regno 11, SDB only seemed to under-
260   stand that it should say that a variable lives in %st(0) (when
261   asked via an `=' command) if we said it was in DWARF regno 11,
262   but SDB still printed garbage when asked for the value of the
263   variable in question (via a `/' command).
264   (Also note that the labels SDB printed for various FP stack regs
265   when doing an `x' command were all wrong.)
266   Note that these problems generally don't affect the native SVR4
267   C compiler because it doesn't allow the use of -O with -g and
268   because when it is *not* optimizing, it allocates a memory
269   location for each floating-point variable, and the memory
270   location is what gets described in the DWARF AT_location
271   attribute for the variable in question.
272   Regardless of the severe mental illness of the x86/svr4 SDB, we
273   do something sensible here and we use the following DWARF
274   register numbers.  Note that these are all stack-top-relative
275   numbers.
276	11 for %st(0) (gcc regno = 8)
277	12 for %st(1) (gcc regno = 9)
278	13 for %st(2) (gcc regno = 10)
279	14 for %st(3) (gcc regno = 11)
280	15 for %st(4) (gcc regno = 12)
281	16 for %st(5) (gcc regno = 13)
282	17 for %st(6) (gcc regno = 14)
283	18 for %st(7) (gcc regno = 15)
284*/
285int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
286{
287  /* general regs */
288  0, 2, 1, 3, 6, 7, 5, 4,
289  /* fp regs */
290  11, 12, 13, 14, 15, 16, 17, 18,
291  /* arg, flags, fpsr, frame */
292  IGNORED_DWARF_REGNUM, 9,
293  IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294  /* SSE registers */
295  21, 22, 23, 24, 25, 26, 27, 28,
296  /* MMX registers */
297  29, 30, 31, 32, 33, 34, 35, 36,
298  /* extended integer registers */
299  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301  /* extended sse registers */
302  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304  /* AVX-512 registers 16-23 */
305  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307  /* AVX-512 registers 24-31 */
308  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310  /* Mask registers */
311  93, 94, 95, 96, 97, 98, 99, 100
312};
313
314/* Define parameter passing and return registers.  */
315
316static int const x86_64_int_parameter_registers[6] =
317{
318  DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
319};
320
321static int const x86_64_ms_abi_int_parameter_registers[4] =
322{
323  CX_REG, DX_REG, R8_REG, R9_REG
324};
325
326static int const x86_64_int_return_registers[4] =
327{
328  AX_REG, DX_REG, DI_REG, SI_REG
329};
330
331/* Define the structure for the machine field in struct function.  */
332
333struct GTY(()) stack_local_entry {
334  unsigned short mode;
335  unsigned short n;
336  rtx rtl;
337  struct stack_local_entry *next;
338};
339
340/* Which cpu are we scheduling for.  */
341enum attr_cpu ix86_schedule;
342
343/* Which cpu are we optimizing for.  */
344enum processor_type ix86_tune;
345
346/* Which instruction set architecture to use.  */
347enum processor_type ix86_arch;
348
349/* True if processor has SSE prefetch instruction.  */
350unsigned char x86_prefetch_sse;
351
352/* Preferred alignment for stack boundary in bits.  */
353unsigned int ix86_preferred_stack_boundary;
354
355/* Alignment for incoming stack boundary in bits specified at
356   command line.  */
357unsigned int ix86_user_incoming_stack_boundary;
358
359/* Default alignment for incoming stack boundary in bits.  */
360unsigned int ix86_default_incoming_stack_boundary;
361
362/* Alignment for incoming stack boundary in bits.  */
363unsigned int ix86_incoming_stack_boundary;
364
365/* Calling abi specific va_list type nodes.  */
366tree sysv_va_list_type_node;
367tree ms_va_list_type_node;
368
369/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
370char internal_label_prefix[16];
371int internal_label_prefix_len;
372
373/* Fence to use after loop using movnt.  */
374tree x86_mfence;
375
376/* Register class used for passing given 64bit part of the argument.
377   These represent classes as documented by the PS ABI, with the exception
378   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379   use SF or DFmode move instead of DImode to avoid reformatting penalties.
380
381   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382   whenever possible (upper half does contain padding).  */
383enum x86_64_reg_class
384  {
385    X86_64_NO_CLASS,
386    X86_64_INTEGER_CLASS,
387    X86_64_INTEGERSI_CLASS,
388    X86_64_SSE_CLASS,
389    X86_64_SSESF_CLASS,
390    X86_64_SSEDF_CLASS,
391    X86_64_SSEUP_CLASS,
392    X86_64_X87_CLASS,
393    X86_64_X87UP_CLASS,
394    X86_64_COMPLEX_X87_CLASS,
395    X86_64_MEMORY_CLASS
396  };
397
398#define MAX_CLASSES 8
399
400/* Table of constants used by fldpi, fldln2, etc....  */
401static REAL_VALUE_TYPE ext_80387_constants_table [5];
402static bool ext_80387_constants_init;
403
404
405static rtx ix86_function_value (const_tree, const_tree, bool);
406static bool ix86_function_value_regno_p (const unsigned int);
407static unsigned int ix86_function_arg_boundary (machine_mode,
408						const_tree);
409static rtx ix86_static_chain (const_tree, bool);
410static int ix86_function_regparm (const_tree, const_tree);
411static void ix86_compute_frame_layout (void);
412static tree ix86_canonical_va_list_type (tree);
413static unsigned int split_stack_prologue_scratch_regno (void);
414static bool i386_asm_output_addr_const_extra (FILE *, rtx);
415
416static bool ix86_can_inline_p (tree, tree);
417static unsigned int ix86_minimum_incoming_stack_boundary (bool);
418
419
420/* Whether -mtune= or -march= were specified */
421int ix86_tune_defaulted;
422int ix86_arch_specified;
423
424/* Return true if a red-zone is in use.  We can't use red-zone when
425   there are local indirect jumps, like "indirect_jump" or "tablejump",
426   which jumps to another place in the function, since "call" in the
427   indirect thunk pushes the return address onto stack, destroying
428   red-zone.
429
430   TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431   for CALL, in red-zone, we can allow local indirect jumps with
432   indirect thunk.  */
433
434bool
435ix86_using_red_zone (void)
436{
437  return (TARGET_RED_ZONE
438	  && !TARGET_64BIT_MS_ABI
439	  && (!cfun->machine->has_local_indirect_jump
440	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
441}
442
443/* Return true, if profiling code should be emitted before
444   prologue. Otherwise it returns false.
445   Note: For x86 with "hotfix" it is sorried.  */
446static bool
447ix86_profile_before_prologue (void)
448{
449  return flag_fentry != 0;
450}
451
452/* Update register usage after having seen the compiler flags.  */
453
454static void
455ix86_conditional_register_usage (void)
456{
457  int i, c_mask;
458
459  /* If there are no caller-saved registers, preserve all registers.
460     except fixed_regs and registers used for function return value
461     since aggregate_value_p checks call_used_regs[regno] on return
462     value.  */
463  if (cfun && cfun->machine->no_caller_saved_registers)
464    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465      if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466	call_used_regs[i] = 0;
467
468  /* For 32-bit targets, disable the REX registers.  */
469  if (! TARGET_64BIT)
470    {
471      for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
477    }
478
479  /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
480  c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
481
482  CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
483
484  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
485    {
486      /* Set/reset conditionally defined registers from
487	 CALL_USED_REGISTERS initializer.  */
488      if (call_used_regs[i] > 1)
489	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
490
491      /* Calculate registers of CLOBBERED_REGS register set
492	 as call used registers from GENERAL_REGS register set.  */
493      if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494	  && call_used_regs[i])
495	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
496    }
497
498  /* If MMX is disabled, disable the registers.  */
499  if (! TARGET_MMX)
500    accessible_reg_set &= ~reg_class_contents[MMX_REGS];
501
502  /* If SSE is disabled, disable the registers.  */
503  if (! TARGET_SSE)
504    accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
505
506  /* If the FPU is disabled, disable the registers.  */
507  if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508    accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
509
510  /* If AVX512F is disabled, disable the registers.  */
511  if (! TARGET_AVX512F)
512    {
513      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
515
516      accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
517    }
518}
519
520/* Canonicalize a comparison from one we don't have to one we do have.  */
521
522static void
523ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524			      bool op0_preserve_value)
525{
526  /* The order of operands in x87 ficom compare is forced by combine in
527     simplify_comparison () function. Float operator is treated as RTX_OBJ
528     with a precedence over other operators and is always put in the first
529     place. Swap condition and operands to match ficom instruction.  */
530  if (!op0_preserve_value
531      && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
532    {
533      enum rtx_code scode = swap_condition ((enum rtx_code) *code);
534
535      /* We are called only for compares that are split to SAHF instruction.
536	 Ensure that we have setcc/jcc insn for the swapped condition.  */
537      if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
538	{
539	  std::swap (*op0, *op1);
540	  *code = (int) scode;
541	}
542    }
543}
544
545
546/* Hook to determine if one function can safely inline another.  */
547
548static bool
549ix86_can_inline_p (tree caller, tree callee)
550{
551  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
553
554  /* Changes of those flags can be tolerated for always inlines. Lets hope
555     user knows what he is doing.  */
556  const unsigned HOST_WIDE_INT always_inline_safe_mask
557	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560	    | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561	    | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562	    | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563	    | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
564
565
566  if (!callee_tree)
567    callee_tree = target_option_default_node;
568  if (!caller_tree)
569    caller_tree = target_option_default_node;
570  if (callee_tree == caller_tree)
571    return true;
572
573  struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574  struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575  bool ret = false;
576  bool always_inline
577    = (DECL_DISREGARD_INLINE_LIMITS (callee)
578       && lookup_attribute ("always_inline",
579			    DECL_ATTRIBUTES (callee)));
580
581  cgraph_node *callee_node = cgraph_node::get (callee);
582  /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583     function can inline a SSE2 function but a SSE2 function can't inline
584     a SSE4 function.  */
585  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586       != callee_opts->x_ix86_isa_flags)
587      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588	  != callee_opts->x_ix86_isa_flags2))
589    ret = false;
590
591  /* See if we have the same non-isa options.  */
592  else if ((!always_inline
593	    && caller_opts->x_target_flags != callee_opts->x_target_flags)
594	   || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595	       != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596    ret = false;
597
598  /* See if arch, tune, etc. are the same.  */
599  else if (caller_opts->arch != callee_opts->arch)
600    ret = false;
601
602  else if (!always_inline && caller_opts->tune != callee_opts->tune)
603    ret = false;
604
605  else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606	   /* If the calle doesn't use FP expressions differences in
607	      ix86_fpmath can be ignored.  We are called from FEs
608	      for multi-versioning call optimization, so beware of
609	      ipa_fn_summaries not available.  */
610	   && (! ipa_fn_summaries
611	       || ipa_fn_summaries->get (callee_node) == NULL
612	       || ipa_fn_summaries->get (callee_node)->fp_expressions))
613    ret = false;
614
615  else if (!always_inline
616	   && caller_opts->branch_cost != callee_opts->branch_cost)
617    ret = false;
618
619  else
620    ret = true;
621
622  return ret;
623}
624
625/* Return true if this goes in large data/bss.  */
626
627static bool
628ix86_in_large_data_p (tree exp)
629{
630  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631    return false;
632
633  if (exp == NULL_TREE)
634    return false;
635
636  /* Functions are never large data.  */
637  if (TREE_CODE (exp) == FUNCTION_DECL)
638    return false;
639
640  /* Automatic variables are never large data.  */
641  if (VAR_P (exp) && !is_global_var (exp))
642    return false;
643
644  if (VAR_P (exp) && DECL_SECTION_NAME (exp))
645    {
646      const char *section = DECL_SECTION_NAME (exp);
647      if (strcmp (section, ".ldata") == 0
648	  || strcmp (section, ".lbss") == 0)
649	return true;
650      return false;
651    }
652  else
653    {
654      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
655
656      /* If this is an incomplete type with size 0, then we can't put it
657	 in data because it might be too big when completed.  Also,
658	 int_size_in_bytes returns -1 if size can vary or is larger than
659	 an integer in which case also it is safer to assume that it goes in
660	 large data.  */
661      if (size <= 0 || size > ix86_section_threshold)
662	return true;
663    }
664
665  return false;
666}
667
668/* i386-specific section flag to mark large sections.  */
669#define SECTION_LARGE SECTION_MACH_DEP
670
671/* Switch to the appropriate section for output of DECL.
672   DECL is either a `VAR_DECL' node or a constant of some sort.
673   RELOC indicates whether forming the initial value of DECL requires
674   link-time relocations.  */
675
676ATTRIBUTE_UNUSED static section *
677x86_64_elf_select_section (tree decl, int reloc,
678			   unsigned HOST_WIDE_INT align)
679{
680  if (ix86_in_large_data_p (decl))
681    {
682      const char *sname = NULL;
683      unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684      switch (categorize_decl_for_section (decl, reloc))
685	{
686	case SECCAT_DATA:
687	  sname = ".ldata";
688	  break;
689	case SECCAT_DATA_REL:
690	  sname = ".ldata.rel";
691	  break;
692	case SECCAT_DATA_REL_LOCAL:
693	  sname = ".ldata.rel.local";
694	  break;
695	case SECCAT_DATA_REL_RO:
696	  sname = ".ldata.rel.ro";
697	  break;
698	case SECCAT_DATA_REL_RO_LOCAL:
699	  sname = ".ldata.rel.ro.local";
700	  break;
701	case SECCAT_BSS:
702	  sname = ".lbss";
703	  flags |= SECTION_BSS;
704	  break;
705	case SECCAT_RODATA:
706	case SECCAT_RODATA_MERGE_STR:
707	case SECCAT_RODATA_MERGE_STR_INIT:
708	case SECCAT_RODATA_MERGE_CONST:
709	  sname = ".lrodata";
710	  flags &= ~SECTION_WRITE;
711	  break;
712	case SECCAT_SRODATA:
713	case SECCAT_SDATA:
714	case SECCAT_SBSS:
715	  gcc_unreachable ();
716	case SECCAT_TEXT:
717	case SECCAT_TDATA:
718	case SECCAT_TBSS:
719	  /* We don't split these for medium model.  Place them into
720	     default sections and hope for best.  */
721	  break;
722	}
723      if (sname)
724	{
725	  /* We might get called with string constants, but get_named_section
726	     doesn't like them as they are not DECLs.  Also, we need to set
727	     flags in that case.  */
728	  if (!DECL_P (decl))
729	    return get_section (sname, flags, NULL);
730	  return get_named_section (decl, sname, reloc);
731	}
732    }
733  return default_elf_select_section (decl, reloc, align);
734}
735
736/* Select a set of attributes for section NAME based on the properties
737   of DECL and whether or not RELOC indicates that DECL's initializer
738   might contain runtime relocations.  */
739
740static unsigned int ATTRIBUTE_UNUSED
741x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
742{
743  unsigned int flags = default_section_type_flags (decl, name, reloc);
744
745  if (ix86_in_large_data_p (decl))
746    flags |= SECTION_LARGE;
747
748  if (decl == NULL_TREE
749      && (strcmp (name, ".ldata.rel.ro") == 0
750	  || strcmp (name, ".ldata.rel.ro.local") == 0))
751    flags |= SECTION_RELRO;
752
753  if (strcmp (name, ".lbss") == 0
754      || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755      || strncmp (name, ".gnu.linkonce.lb.",
756		  sizeof (".gnu.linkonce.lb.") - 1) == 0)
757    flags |= SECTION_BSS;
758
759  return flags;
760}
761
762/* Build up a unique section name, expressed as a
763   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764   RELOC indicates whether the initial value of EXP requires
765   link-time relocations.  */
766
767static void ATTRIBUTE_UNUSED
768x86_64_elf_unique_section (tree decl, int reloc)
769{
770  if (ix86_in_large_data_p (decl))
771    {
772      const char *prefix = NULL;
773      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
774      bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
775
776      switch (categorize_decl_for_section (decl, reloc))
777	{
778	case SECCAT_DATA:
779	case SECCAT_DATA_REL:
780	case SECCAT_DATA_REL_LOCAL:
781	case SECCAT_DATA_REL_RO:
782	case SECCAT_DATA_REL_RO_LOCAL:
783          prefix = one_only ? ".ld" : ".ldata";
784	  break;
785	case SECCAT_BSS:
786          prefix = one_only ? ".lb" : ".lbss";
787	  break;
788	case SECCAT_RODATA:
789	case SECCAT_RODATA_MERGE_STR:
790	case SECCAT_RODATA_MERGE_STR_INIT:
791	case SECCAT_RODATA_MERGE_CONST:
792          prefix = one_only ? ".lr" : ".lrodata";
793	  break;
794	case SECCAT_SRODATA:
795	case SECCAT_SDATA:
796	case SECCAT_SBSS:
797	  gcc_unreachable ();
798	case SECCAT_TEXT:
799	case SECCAT_TDATA:
800	case SECCAT_TBSS:
801	  /* We don't split these for medium model.  Place them into
802	     default sections and hope for best.  */
803	  break;
804	}
805      if (prefix)
806	{
807	  const char *name, *linkonce;
808	  char *string;
809
810	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811	  name = targetm.strip_name_encoding (name);
812
813	  /* If we're using one_only, then there needs to be a .gnu.linkonce
814     	     prefix to the section name.  */
815	  linkonce = one_only ? ".gnu.linkonce" : "";
816
817	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
818
819	  set_decl_section_name (decl, string);
820	  return;
821	}
822    }
823  default_unique_section (decl, reloc);
824}
825
826#ifdef COMMON_ASM_OP
827
828#ifndef LARGECOMM_SECTION_ASM_OP
829#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830#endif
831
832/* This says how to output assembler code to declare an
833   uninitialized external linkage data object.
834
835   For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836   large objects.  */
837void
838x86_elf_aligned_decl_common (FILE *file, tree decl,
839			const char *name, unsigned HOST_WIDE_INT size,
840			int align)
841{
842  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843      && size > (unsigned int)ix86_section_threshold)
844    {
845      switch_to_section (get_named_section (decl, ".lbss", 0));
846      fputs (LARGECOMM_SECTION_ASM_OP, file);
847    }
848  else
849    fputs (COMMON_ASM_OP, file);
850  assemble_name (file, name);
851  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852	   size, align / BITS_PER_UNIT);
853}
854#endif
855
856/* Utility function for targets to use in implementing
857   ASM_OUTPUT_ALIGNED_BSS.  */
858
859void
860x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861		       	unsigned HOST_WIDE_INT size, int align)
862{
863  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864      && size > (unsigned int)ix86_section_threshold)
865    switch_to_section (get_named_section (decl, ".lbss", 0));
866  else
867    switch_to_section (bss_section);
868  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869#ifdef ASM_DECLARE_OBJECT_NAME
870  last_assemble_variable_decl = decl;
871  ASM_DECLARE_OBJECT_NAME (file, name, decl);
872#else
873  /* Standard thing is just output label for the object.  */
874  ASM_OUTPUT_LABEL (file, name);
875#endif /* ASM_DECLARE_OBJECT_NAME */
876  ASM_OUTPUT_SKIP (file, size ? size : 1);
877}
878
879/* Decide whether we must probe the stack before any space allocation
880   on this target.  It's essentially TARGET_STACK_PROBE except when
881   -fstack-check causes the stack to be already probed differently.  */
882
883bool
884ix86_target_stack_probe (void)
885{
886  /* Do not probe the stack twice if static stack checking is enabled.  */
887  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888    return false;
889
890  return TARGET_STACK_PROBE;
891}
892
893/* Decide whether we can make a sibling call to a function.  DECL is the
894   declaration of the function being targeted by the call and EXP is the
895   CALL_EXPR representing the call.  */
896
897static bool
898ix86_function_ok_for_sibcall (tree decl, tree exp)
899{
900  tree type, decl_or_type;
901  rtx a, b;
902  bool bind_global = decl && !targetm.binds_local_p (decl);
903
904  if (ix86_function_naked (current_function_decl))
905    return false;
906
907  /* Sibling call isn't OK if there are no caller-saved registers
908     since all registers must be preserved before return.  */
909  if (cfun->machine->no_caller_saved_registers)
910    return false;
911
912  /* If we are generating position-independent code, we cannot sibcall
913     optimize direct calls to global functions, as the PLT requires
914     %ebx be live. (Darwin does not have a PLT.)  */
915  if (!TARGET_MACHO
916      && !TARGET_64BIT
917      && flag_pic
918      && flag_plt
919      && bind_global)
920    return false;
921
922  /* If we need to align the outgoing stack, then sibcalling would
923     unalign the stack, which may break the called function.  */
924  if (ix86_minimum_incoming_stack_boundary (true)
925      < PREFERRED_STACK_BOUNDARY)
926    return false;
927
928  if (decl)
929    {
930      decl_or_type = decl;
931      type = TREE_TYPE (decl);
932    }
933  else
934    {
935      /* We're looking at the CALL_EXPR, we need the type of the function.  */
936      type = CALL_EXPR_FN (exp);		/* pointer expression */
937      type = TREE_TYPE (type);			/* pointer type */
938      type = TREE_TYPE (type);			/* function type */
939      decl_or_type = type;
940    }
941
942  /* Check that the return value locations are the same.  Like
943     if we are returning floats on the 80387 register stack, we cannot
944     make a sibcall from a function that doesn't return a float to a
945     function that does or, conversely, from a function that does return
946     a float to a function that doesn't; the necessary stack adjustment
947     would not be executed.  This is also the place we notice
948     differences in the return value ABI.  Note that it is ok for one
949     of the functions to have void return type as long as the return
950     value of the other is passed in a register.  */
951  a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
952  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
953			   cfun->decl, false);
954  if (STACK_REG_P (a) || STACK_REG_P (b))
955    {
956      if (!rtx_equal_p (a, b))
957	return false;
958    }
959  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
960    ;
961  else if (!rtx_equal_p (a, b))
962    return false;
963
964  if (TARGET_64BIT)
965    {
966      /* The SYSV ABI has more call-clobbered registers;
967	 disallow sibcalls from MS to SYSV.  */
968      if (cfun->machine->call_abi == MS_ABI
969	  && ix86_function_type_abi (type) == SYSV_ABI)
970	return false;
971    }
972  else
973    {
974      /* If this call is indirect, we'll need to be able to use a
975	 call-clobbered register for the address of the target function.
976	 Make sure that all such registers are not used for passing
977	 parameters.  Note that DLLIMPORT functions and call to global
978	 function via GOT slot are indirect.  */
979      if (!decl
980	  || (bind_global && flag_pic && !flag_plt)
981	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
982	  || flag_force_indirect_call)
983	{
984	  /* Check if regparm >= 3 since arg_reg_available is set to
985	     false if regparm == 0.  If regparm is 1 or 2, there is
986	     always a call-clobbered register available.
987
988	     ??? The symbol indirect call doesn't need a call-clobbered
989	     register.  But we don't know if this is a symbol indirect
990	     call or not here.  */
991	  if (ix86_function_regparm (type, decl) >= 3
992	      && !cfun->machine->arg_reg_available)
993	    return false;
994	}
995    }
996
997  /* Otherwise okay.  That also includes certain types of indirect calls.  */
998  return true;
999}
1000
1001/* This function determines from TYPE the calling-convention.  */
1002
1003unsigned int
1004ix86_get_callcvt (const_tree type)
1005{
1006  unsigned int ret = 0;
1007  bool is_stdarg;
1008  tree attrs;
1009
1010  if (TARGET_64BIT)
1011    return IX86_CALLCVT_CDECL;
1012
1013  attrs = TYPE_ATTRIBUTES (type);
1014  if (attrs != NULL_TREE)
1015    {
1016      if (lookup_attribute ("cdecl", attrs))
1017	ret |= IX86_CALLCVT_CDECL;
1018      else if (lookup_attribute ("stdcall", attrs))
1019	ret |= IX86_CALLCVT_STDCALL;
1020      else if (lookup_attribute ("fastcall", attrs))
1021	ret |= IX86_CALLCVT_FASTCALL;
1022      else if (lookup_attribute ("thiscall", attrs))
1023	ret |= IX86_CALLCVT_THISCALL;
1024
1025      /* Regparam isn't allowed for thiscall and fastcall.  */
1026      if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1027	{
1028	  if (lookup_attribute ("regparm", attrs))
1029	    ret |= IX86_CALLCVT_REGPARM;
1030	  if (lookup_attribute ("sseregparm", attrs))
1031	    ret |= IX86_CALLCVT_SSEREGPARM;
1032	}
1033
1034      if (IX86_BASE_CALLCVT(ret) != 0)
1035	return ret;
1036    }
1037
1038  is_stdarg = stdarg_p (type);
1039  if (TARGET_RTD && !is_stdarg)
1040    return IX86_CALLCVT_STDCALL | ret;
1041
1042  if (ret != 0
1043      || is_stdarg
1044      || TREE_CODE (type) != METHOD_TYPE
1045      || ix86_function_type_abi (type) != MS_ABI)
1046    return IX86_CALLCVT_CDECL | ret;
1047
1048  return IX86_CALLCVT_THISCALL;
1049}
1050
1051/* Return 0 if the attributes for two types are incompatible, 1 if they
1052   are compatible, and 2 if they are nearly compatible (which causes a
1053   warning to be generated).  */
1054
1055static int
1056ix86_comp_type_attributes (const_tree type1, const_tree type2)
1057{
1058  unsigned int ccvt1, ccvt2;
1059
1060  if (TREE_CODE (type1) != FUNCTION_TYPE
1061      && TREE_CODE (type1) != METHOD_TYPE)
1062    return 1;
1063
1064  ccvt1 = ix86_get_callcvt (type1);
1065  ccvt2 = ix86_get_callcvt (type2);
1066  if (ccvt1 != ccvt2)
1067    return 0;
1068  if (ix86_function_regparm (type1, NULL)
1069      != ix86_function_regparm (type2, NULL))
1070    return 0;
1071
1072  return 1;
1073}
1074
1075/* Return the regparm value for a function with the indicated TYPE and DECL.
1076   DECL may be NULL when calling function indirectly
1077   or considering a libcall.  */
1078
1079static int
1080ix86_function_regparm (const_tree type, const_tree decl)
1081{
1082  tree attr;
1083  int regparm;
1084  unsigned int ccvt;
1085
1086  if (TARGET_64BIT)
1087    return (ix86_function_type_abi (type) == SYSV_ABI
1088	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1089  ccvt = ix86_get_callcvt (type);
1090  regparm = ix86_regparm;
1091
1092  if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1093    {
1094      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1095      if (attr)
1096	{
1097	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1098	  return regparm;
1099	}
1100    }
1101  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1102    return 2;
1103  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1104    return 1;
1105
1106  /* Use register calling convention for local functions when possible.  */
1107  if (decl
1108      && TREE_CODE (decl) == FUNCTION_DECL)
1109    {
1110      cgraph_node *target = cgraph_node::get (decl);
1111      if (target)
1112	target = target->function_symbol ();
1113
1114      /* Caller and callee must agree on the calling convention, so
1115	 checking here just optimize means that with
1116	 __attribute__((optimize (...))) caller could use regparm convention
1117	 and callee not, or vice versa.  Instead look at whether the callee
1118	 is optimized or not.  */
1119      if (target && opt_for_fn (target->decl, optimize)
1120	  && !(profile_flag && !flag_fentry))
1121	{
1122	  if (target->local && target->can_change_signature)
1123	    {
1124	      int local_regparm, globals = 0, regno;
1125
1126	      /* Make sure no regparm register is taken by a
1127		 fixed register variable.  */
1128	      for (local_regparm = 0; local_regparm < REGPARM_MAX;
1129		   local_regparm++)
1130		if (fixed_regs[local_regparm])
1131		  break;
1132
1133	      /* We don't want to use regparm(3) for nested functions as
1134		 these use a static chain pointer in the third argument.  */
1135	      if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1136		local_regparm = 2;
1137
1138	      /* Save a register for the split stack.  */
1139	      if (flag_split_stack)
1140		{
1141		  if (local_regparm == 3)
1142		    local_regparm = 2;
1143		  else if (local_regparm == 2
1144			   && DECL_STATIC_CHAIN (target->decl))
1145		    local_regparm = 1;
1146		}
1147
1148	      /* Each fixed register usage increases register pressure,
1149		 so less registers should be used for argument passing.
1150		 This functionality can be overriden by an explicit
1151		 regparm value.  */
1152	      for (regno = AX_REG; regno <= DI_REG; regno++)
1153		if (fixed_regs[regno])
1154		  globals++;
1155
1156	      local_regparm
1157		= globals < local_regparm ? local_regparm - globals : 0;
1158
1159	      if (local_regparm > regparm)
1160		regparm = local_regparm;
1161	    }
1162	}
1163    }
1164
1165  return regparm;
1166}
1167
1168/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1169   DFmode (2) arguments in SSE registers for a function with the
1170   indicated TYPE and DECL.  DECL may be NULL when calling function
1171   indirectly or considering a libcall.  Return -1 if any FP parameter
1172   should be rejected by error.  This is used in siutation we imply SSE
1173   calling convetion but the function is called from another function with
1174   SSE disabled. Otherwise return 0.  */
1175
1176static int
1177ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1178{
1179  gcc_assert (!TARGET_64BIT);
1180
1181  /* Use SSE registers to pass SFmode and DFmode arguments if requested
1182     by the sseregparm attribute.  */
1183  if (TARGET_SSEREGPARM
1184      || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1185    {
1186      if (!TARGET_SSE)
1187	{
1188	  if (warn)
1189	    {
1190	      if (decl)
1191		error ("calling %qD with attribute sseregparm without "
1192		       "SSE/SSE2 enabled", decl);
1193	      else
1194		error ("calling %qT with attribute sseregparm without "
1195		       "SSE/SSE2 enabled", type);
1196	    }
1197	  return 0;
1198	}
1199
1200      return 2;
1201    }
1202
1203  if (!decl)
1204    return 0;
1205
1206  cgraph_node *target = cgraph_node::get (decl);
1207  if (target)
1208    target = target->function_symbol ();
1209
1210  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1211     (and DFmode for SSE2) arguments in SSE registers.  */
1212  if (target
1213      /* TARGET_SSE_MATH */
1214      && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1215      && opt_for_fn (target->decl, optimize)
1216      && !(profile_flag && !flag_fentry))
1217    {
1218      if (target->local && target->can_change_signature)
1219	{
1220	  /* Refuse to produce wrong code when local function with SSE enabled
1221	     is called from SSE disabled function.
1222	     FIXME: We need a way to detect these cases cross-ltrans partition
1223	     and avoid using SSE calling conventions on local functions called
1224	     from function with SSE disabled.  For now at least delay the
1225	     warning until we know we are going to produce wrong code.
1226	     See PR66047  */
1227	  if (!TARGET_SSE && warn)
1228	    return -1;
1229	  return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1230				->x_ix86_isa_flags) ? 2 : 1;
1231	}
1232    }
1233
1234  return 0;
1235}
1236
1237/* Return true if EAX is live at the start of the function.  Used by
1238   ix86_expand_prologue to determine if we need special help before
1239   calling allocate_stack_worker.  */
1240
1241static bool
1242ix86_eax_live_at_start_p (void)
1243{
1244  /* Cheat.  Don't bother working forward from ix86_function_regparm
1245     to the function type to whether an actual argument is located in
1246     eax.  Instead just look at cfg info, which is still close enough
1247     to correct at this point.  This gives false positives for broken
1248     functions that might use uninitialized data that happens to be
1249     allocated in eax, but who cares?  */
1250  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1251}
1252
1253static bool
1254ix86_keep_aggregate_return_pointer (tree fntype)
1255{
1256  tree attr;
1257
1258  if (!TARGET_64BIT)
1259    {
1260      attr = lookup_attribute ("callee_pop_aggregate_return",
1261			       TYPE_ATTRIBUTES (fntype));
1262      if (attr)
1263	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1264
1265      /* For 32-bit MS-ABI the default is to keep aggregate
1266         return pointer.  */
1267      if (ix86_function_type_abi (fntype) == MS_ABI)
1268	return true;
1269    }
1270  return KEEP_AGGREGATE_RETURN_POINTER != 0;
1271}
1272
1273/* Value is the number of bytes of arguments automatically
1274   popped when returning from a subroutine call.
1275   FUNDECL is the declaration node of the function (as a tree),
1276   FUNTYPE is the data type of the function (as a tree),
1277   or for a library call it is an identifier node for the subroutine name.
1278   SIZE is the number of bytes of arguments passed on the stack.
1279
1280   On the 80386, the RTD insn may be used to pop them if the number
1281     of args is fixed, but if the number is variable then the caller
1282     must pop them all.  RTD can't be used for library calls now
1283     because the library is compiled with the Unix compiler.
1284   Use of RTD is a selectable option, since it is incompatible with
1285   standard Unix calling sequences.  If the option is not selected,
1286   the caller must always pop the args.
1287
1288   The attribute stdcall is equivalent to RTD on a per module basis.  */
1289
1290static poly_int64
1291ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1292{
1293  unsigned int ccvt;
1294
1295  /* None of the 64-bit ABIs pop arguments.  */
1296  if (TARGET_64BIT)
1297    return 0;
1298
1299  ccvt = ix86_get_callcvt (funtype);
1300
1301  if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1302	       | IX86_CALLCVT_THISCALL)) != 0
1303      && ! stdarg_p (funtype))
1304    return size;
1305
1306  /* Lose any fake structure return argument if it is passed on the stack.  */
1307  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1308      && !ix86_keep_aggregate_return_pointer (funtype))
1309    {
1310      int nregs = ix86_function_regparm (funtype, fundecl);
1311      if (nregs == 0)
1312	return GET_MODE_SIZE (Pmode);
1313    }
1314
1315  return 0;
1316}
1317
1318/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
1319
1320static bool
1321ix86_legitimate_combined_insn (rtx_insn *insn)
1322{
1323  int i;
1324
1325  /* Check operand constraints in case hard registers were propagated
1326     into insn pattern.  This check prevents combine pass from
1327     generating insn patterns with invalid hard register operands.
1328     These invalid insns can eventually confuse reload to error out
1329     with a spill failure.  See also PRs 46829 and 46843.  */
1330
1331  gcc_assert (INSN_CODE (insn) >= 0);
1332
1333  extract_insn (insn);
1334  preprocess_constraints (insn);
1335
1336  int n_operands = recog_data.n_operands;
1337  int n_alternatives = recog_data.n_alternatives;
1338  for (i = 0; i < n_operands; i++)
1339    {
1340      rtx op = recog_data.operand[i];
1341      machine_mode mode = GET_MODE (op);
1342      const operand_alternative *op_alt;
1343      int offset = 0;
1344      bool win;
1345      int j;
1346
1347      /* A unary operator may be accepted by the predicate, but it
1348	 is irrelevant for matching constraints.  */
1349      if (UNARY_P (op))
1350	op = XEXP (op, 0);
1351
1352      if (SUBREG_P (op))
1353	{
1354	  if (REG_P (SUBREG_REG (op))
1355	      && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1356	    offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1357					  GET_MODE (SUBREG_REG (op)),
1358					  SUBREG_BYTE (op),
1359					  GET_MODE (op));
1360	  op = SUBREG_REG (op);
1361	}
1362
1363      if (!(REG_P (op) && HARD_REGISTER_P (op)))
1364	continue;
1365
1366      op_alt = recog_op_alt;
1367
1368      /* Operand has no constraints, anything is OK.  */
1369      win = !n_alternatives;
1370
1371      alternative_mask preferred = get_preferred_alternatives (insn);
1372      for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1373	{
1374	  if (!TEST_BIT (preferred, j))
1375	    continue;
1376	  if (op_alt[i].anything_ok
1377	      || (op_alt[i].matches != -1
1378		  && operands_match_p
1379		  (recog_data.operand[i],
1380		   recog_data.operand[op_alt[i].matches]))
1381	      || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1382	    {
1383	      win = true;
1384	      break;
1385	    }
1386	}
1387
1388      if (!win)
1389	return false;
1390    }
1391
1392  return true;
1393}
1394
1395/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
1396
1397static unsigned HOST_WIDE_INT
1398ix86_asan_shadow_offset (void)
1399{
1400  return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
1401				     : HOST_WIDE_INT_C (0x7fff8000))
1402		     : (HOST_WIDE_INT_1 << X86_32_ASAN_BIT_OFFSET);
1403}
1404
1405/* Argument support functions.  */
1406
1407/* Return true when register may be used to pass function parameters.  */
1408bool
1409ix86_function_arg_regno_p (int regno)
1410{
1411  int i;
1412  enum calling_abi call_abi;
1413  const int *parm_regs;
1414
1415  if (!TARGET_64BIT)
1416    {
1417      if (TARGET_MACHO)
1418        return (regno < REGPARM_MAX
1419                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1420      else
1421        return (regno < REGPARM_MAX
1422	        || (TARGET_MMX && MMX_REGNO_P (regno)
1423	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1424	        || (TARGET_SSE && SSE_REGNO_P (regno)
1425		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1426    }
1427
1428  if (TARGET_SSE && SSE_REGNO_P (regno)
1429      && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1430    return true;
1431
1432  /* TODO: The function should depend on current function ABI but
1433     builtins.c would need updating then. Therefore we use the
1434     default ABI.  */
1435  call_abi = ix86_cfun_abi ();
1436
1437  /* RAX is used as hidden argument to va_arg functions.  */
1438  if (call_abi == SYSV_ABI && regno == AX_REG)
1439    return true;
1440
1441  if (call_abi == MS_ABI)
1442    parm_regs = x86_64_ms_abi_int_parameter_registers;
1443  else
1444    parm_regs = x86_64_int_parameter_registers;
1445
1446  for (i = 0; i < (call_abi == MS_ABI
1447		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1448    if (regno == parm_regs[i])
1449      return true;
1450  return false;
1451}
1452
1453/* Return if we do not know how to pass ARG solely in registers.  */
1454
1455static bool
1456ix86_must_pass_in_stack (const function_arg_info &arg)
1457{
1458  if (must_pass_in_stack_var_size_or_pad (arg))
1459    return true;
1460
1461  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
1462     The layout_type routine is crafty and tries to trick us into passing
1463     currently unsupported vector types on the stack by using TImode.  */
1464  return (!TARGET_64BIT && arg.mode == TImode
1465	  && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1466}
1467
1468/* It returns the size, in bytes, of the area reserved for arguments passed
1469   in registers for the function represented by fndecl dependent to the used
1470   abi format.  */
1471int
1472ix86_reg_parm_stack_space (const_tree fndecl)
1473{
1474  enum calling_abi call_abi = SYSV_ABI;
1475  if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1476    call_abi = ix86_function_abi (fndecl);
1477  else
1478    call_abi = ix86_function_type_abi (fndecl);
1479  if (TARGET_64BIT && call_abi == MS_ABI)
1480    return 32;
1481  return 0;
1482}
1483
1484/* We add this as a workaround in order to use libc_has_function
1485   hook in i386.md.  */
1486bool
1487ix86_libc_has_function (enum function_class fn_class)
1488{
1489  return targetm.libc_has_function (fn_class);
1490}
1491
1492/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1493   specifying the call abi used.  */
1494enum calling_abi
1495ix86_function_type_abi (const_tree fntype)
1496{
1497  enum calling_abi abi = ix86_abi;
1498
1499  if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1500    return abi;
1501
1502  if (abi == SYSV_ABI
1503      && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1504    {
1505      static int warned;
1506      if (TARGET_X32 && !warned)
1507	{
1508	  error ("X32 does not support %<ms_abi%> attribute");
1509	  warned = 1;
1510	}
1511
1512      abi = MS_ABI;
1513    }
1514  else if (abi == MS_ABI
1515	   && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1516    abi = SYSV_ABI;
1517
1518  return abi;
1519}
1520
1521enum calling_abi
1522ix86_function_abi (const_tree fndecl)
1523{
1524  return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1525}
1526
1527/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1528   specifying the call abi used.  */
1529enum calling_abi
1530ix86_cfun_abi (void)
1531{
1532  return cfun ? cfun->machine->call_abi : ix86_abi;
1533}
1534
1535bool
1536ix86_function_ms_hook_prologue (const_tree fn)
1537{
1538  if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1539    {
1540      if (decl_function_context (fn) != NULL_TREE)
1541	error_at (DECL_SOURCE_LOCATION (fn),
1542		  "%<ms_hook_prologue%> attribute is not compatible "
1543		  "with nested function");
1544      else
1545        return true;
1546    }
1547  return false;
1548}
1549
1550bool
1551ix86_function_naked (const_tree fn)
1552{
1553  if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1554    return true;
1555
1556  return false;
1557}
1558
1559/* Write the extra assembler code needed to declare a function properly.  */
1560
1561void
1562ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1563				tree decl)
1564{
1565  bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1566
1567  if (is_ms_hook)
1568    {
1569      int i, filler_count = (TARGET_64BIT ? 32 : 16);
1570      unsigned int filler_cc = 0xcccccccc;
1571
1572      for (i = 0; i < filler_count; i += 4)
1573        fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1574    }
1575
1576#ifdef SUBTARGET_ASM_UNWIND_INIT
1577  SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1578#endif
1579
1580  ASM_OUTPUT_LABEL (asm_out_file, fname);
1581
1582  /* Output magic byte marker, if hot-patch attribute is set.  */
1583  if (is_ms_hook)
1584    {
1585      if (TARGET_64BIT)
1586	{
1587	  /* leaq [%rsp + 0], %rsp  */
1588	  fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1589		 asm_out_file);
1590	}
1591      else
1592	{
1593          /* movl.s %edi, %edi
1594	     push   %ebp
1595	     movl.s %esp, %ebp */
1596	  fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1597	}
1598    }
1599}
1600
1601/* Implementation of call abi switching target hook. Specific to FNDECL
1602   the specific call register sets are set.  See also
1603   ix86_conditional_register_usage for more details.  */
1604void
1605ix86_call_abi_override (const_tree fndecl)
1606{
1607  cfun->machine->call_abi = ix86_function_abi (fndecl);
1608}
1609
1610/* Return 1 if pseudo register should be created and used to hold
1611   GOT address for PIC code.  */
1612bool
1613ix86_use_pseudo_pic_reg (void)
1614{
1615  if ((TARGET_64BIT
1616       && (ix86_cmodel == CM_SMALL_PIC
1617	   || TARGET_PECOFF))
1618      || !flag_pic)
1619    return false;
1620  return true;
1621}
1622
1623/* Initialize large model PIC register.  */
1624
1625static void
1626ix86_init_large_pic_reg (unsigned int tmp_regno)
1627{
1628  rtx_code_label *label;
1629  rtx tmp_reg;
1630
1631  gcc_assert (Pmode == DImode);
1632  label = gen_label_rtx ();
1633  emit_label (label);
1634  LABEL_PRESERVE_P (label) = 1;
1635  tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1636  gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1637  emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1638				label));
1639  emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1640  emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1641  const char *name = LABEL_NAME (label);
1642  PUT_CODE (label, NOTE);
1643  NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1644  NOTE_DELETED_LABEL_NAME (label) = name;
1645}
1646
1647/* Create and initialize PIC register if required.  */
1648static void
1649ix86_init_pic_reg (void)
1650{
1651  edge entry_edge;
1652  rtx_insn *seq;
1653
1654  if (!ix86_use_pseudo_pic_reg ())
1655    return;
1656
1657  start_sequence ();
1658
1659  if (TARGET_64BIT)
1660    {
1661      if (ix86_cmodel == CM_LARGE_PIC)
1662	ix86_init_large_pic_reg (R11_REG);
1663      else
1664	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1665    }
1666  else
1667    {
1668      /*  If there is future mcount call in the function it is more profitable
1669	  to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
1670      rtx reg = crtl->profile
1671		? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1672		: pic_offset_table_rtx;
1673      rtx_insn *insn = emit_insn (gen_set_got (reg));
1674      RTX_FRAME_RELATED_P (insn) = 1;
1675      if (crtl->profile)
1676        emit_move_insn (pic_offset_table_rtx, reg);
1677      add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1678    }
1679
1680  seq = get_insns ();
1681  end_sequence ();
1682
1683  entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1684  insert_insn_on_edge (seq, entry_edge);
1685  commit_one_edge_insertion (entry_edge);
1686}
1687
1688/* Initialize a variable CUM of type CUMULATIVE_ARGS
1689   for a call to a function whose data type is FNTYPE.
1690   For a library call, FNTYPE is 0.  */
1691
1692void
1693init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
1694		      tree fntype,	/* tree ptr for function decl */
1695		      rtx libname,	/* SYMBOL_REF of library name or 0 */
1696		      tree fndecl,
1697		      int caller)
1698{
1699  struct cgraph_node *local_info_node = NULL;
1700  struct cgraph_node *target = NULL;
1701
1702  memset (cum, 0, sizeof (*cum));
1703
1704  if (fndecl)
1705    {
1706      target = cgraph_node::get (fndecl);
1707      if (target)
1708	{
1709	  target = target->function_symbol ();
1710	  local_info_node = cgraph_node::local_info_node (target->decl);
1711	  cum->call_abi = ix86_function_abi (target->decl);
1712	}
1713      else
1714	cum->call_abi = ix86_function_abi (fndecl);
1715    }
1716  else
1717    cum->call_abi = ix86_function_type_abi (fntype);
1718
1719  cum->caller = caller;
1720
1721  /* Set up the number of registers to use for passing arguments.  */
1722  cum->nregs = ix86_regparm;
1723  if (TARGET_64BIT)
1724    {
1725      cum->nregs = (cum->call_abi == SYSV_ABI
1726                   ? X86_64_REGPARM_MAX
1727                   : X86_64_MS_REGPARM_MAX);
1728    }
1729  if (TARGET_SSE)
1730    {
1731      cum->sse_nregs = SSE_REGPARM_MAX;
1732      if (TARGET_64BIT)
1733        {
1734          cum->sse_nregs = (cum->call_abi == SYSV_ABI
1735                           ? X86_64_SSE_REGPARM_MAX
1736                           : X86_64_MS_SSE_REGPARM_MAX);
1737        }
1738    }
1739  if (TARGET_MMX)
1740    cum->mmx_nregs = MMX_REGPARM_MAX;
1741  cum->warn_avx512f = true;
1742  cum->warn_avx = true;
1743  cum->warn_sse = true;
1744  cum->warn_mmx = true;
1745
1746  /* Because type might mismatch in between caller and callee, we need to
1747     use actual type of function for local calls.
1748     FIXME: cgraph_analyze can be told to actually record if function uses
1749     va_start so for local functions maybe_vaarg can be made aggressive
1750     helping K&R code.
1751     FIXME: once typesytem is fixed, we won't need this code anymore.  */
1752  if (local_info_node && local_info_node->local
1753      && local_info_node->can_change_signature)
1754    fntype = TREE_TYPE (target->decl);
1755  cum->stdarg = stdarg_p (fntype);
1756  cum->maybe_vaarg = (fntype
1757		      ? (!prototype_p (fntype) || stdarg_p (fntype))
1758		      : !libname);
1759
1760  cum->decl = fndecl;
1761
1762  cum->warn_empty = !warn_abi || cum->stdarg;
1763  if (!cum->warn_empty && fntype)
1764    {
1765      function_args_iterator iter;
1766      tree argtype;
1767      bool seen_empty_type = false;
1768      FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1769	{
1770	  if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1771	    break;
1772	  if (TYPE_EMPTY_P (argtype))
1773	    seen_empty_type = true;
1774	  else if (seen_empty_type)
1775	    {
1776	      cum->warn_empty = true;
1777	      break;
1778	    }
1779	}
1780    }
1781
1782  if (!TARGET_64BIT)
1783    {
1784      /* If there are variable arguments, then we won't pass anything
1785         in registers in 32-bit mode. */
1786      if (stdarg_p (fntype))
1787	{
1788	  cum->nregs = 0;
1789	  /* Since in 32-bit, variable arguments are always passed on
1790	     stack, there is scratch register available for indirect
1791	     sibcall.  */
1792	  cfun->machine->arg_reg_available = true;
1793	  cum->sse_nregs = 0;
1794	  cum->mmx_nregs = 0;
1795	  cum->warn_avx512f = false;
1796	  cum->warn_avx = false;
1797	  cum->warn_sse = false;
1798	  cum->warn_mmx = false;
1799	  return;
1800	}
1801
1802      /* Use ecx and edx registers if function has fastcall attribute,
1803	 else look for regparm information.  */
1804      if (fntype)
1805	{
1806	  unsigned int ccvt = ix86_get_callcvt (fntype);
1807	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1808	    {
1809	      cum->nregs = 1;
1810	      cum->fastcall = 1; /* Same first register as in fastcall.  */
1811	    }
1812	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1813	    {
1814	      cum->nregs = 2;
1815	      cum->fastcall = 1;
1816	    }
1817	  else
1818	    cum->nregs = ix86_function_regparm (fntype, fndecl);
1819	}
1820
1821      /* Set up the number of SSE registers used for passing SFmode
1822	 and DFmode arguments.  Warn for mismatching ABI.  */
1823      cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1824    }
1825
1826  cfun->machine->arg_reg_available = (cum->nregs > 0);
1827}
1828
1829/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
1830   But in the case of vector types, it is some vector mode.
1831
1832   When we have only some of our vector isa extensions enabled, then there
1833   are some modes for which vector_mode_supported_p is false.  For these
1834   modes, the generic vector support in gcc will choose some non-vector mode
1835   in order to implement the type.  By computing the natural mode, we'll
1836   select the proper ABI location for the operand and not depend on whatever
1837   the middle-end decides to do with these vector types.
1838
1839   The midde-end can't deal with the vector types > 16 bytes.  In this
1840   case, we return the original mode and warn ABI change if CUM isn't
1841   NULL.
1842
1843   If INT_RETURN is true, warn ABI change if the vector mode isn't
1844   available for function return value.  */
1845
1846static machine_mode
1847type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1848		   bool in_return)
1849{
1850  machine_mode mode = TYPE_MODE (type);
1851
1852  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1853    {
1854      HOST_WIDE_INT size = int_size_in_bytes (type);
1855      if ((size == 8 || size == 16 || size == 32 || size == 64)
1856	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
1857	  && TYPE_VECTOR_SUBPARTS (type) > 1)
1858	{
1859	  machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1860
1861	  /* There are no XFmode vector modes.  */
1862	  if (innermode == XFmode)
1863	    return mode;
1864
1865	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1866	    mode = MIN_MODE_VECTOR_FLOAT;
1867	  else
1868	    mode = MIN_MODE_VECTOR_INT;
1869
1870	  /* Get the mode which has this inner mode and number of units.  */
1871	  FOR_EACH_MODE_FROM (mode, mode)
1872	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1873		&& GET_MODE_INNER (mode) == innermode)
1874	      {
1875		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1876		  {
1877		    static bool warnedavx512f;
1878		    static bool warnedavx512f_ret;
1879
1880		    if (cum && cum->warn_avx512f && !warnedavx512f)
1881		      {
1882			if (warning (OPT_Wpsabi, "AVX512F vector argument "
1883				     "without AVX512F enabled changes the ABI"))
1884			  warnedavx512f = true;
1885		      }
1886		    else if (in_return && !warnedavx512f_ret)
1887		      {
1888			if (warning (OPT_Wpsabi, "AVX512F vector return "
1889				     "without AVX512F enabled changes the ABI"))
1890			  warnedavx512f_ret = true;
1891		      }
1892
1893		    return TYPE_MODE (type);
1894		  }
1895		else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1896		  {
1897		    static bool warnedavx;
1898		    static bool warnedavx_ret;
1899
1900		    if (cum && cum->warn_avx && !warnedavx)
1901		      {
1902			if (warning (OPT_Wpsabi, "AVX vector argument "
1903				     "without AVX enabled changes the ABI"))
1904			  warnedavx = true;
1905		      }
1906		    else if (in_return && !warnedavx_ret)
1907		      {
1908			if (warning (OPT_Wpsabi, "AVX vector return "
1909				     "without AVX enabled changes the ABI"))
1910			  warnedavx_ret = true;
1911		      }
1912
1913		    return TYPE_MODE (type);
1914		  }
1915		else if (((size == 8 && TARGET_64BIT) || size == 16)
1916			 && !TARGET_SSE
1917			 && !TARGET_IAMCU)
1918		  {
1919		    static bool warnedsse;
1920		    static bool warnedsse_ret;
1921
1922		    if (cum && cum->warn_sse && !warnedsse)
1923		      {
1924			if (warning (OPT_Wpsabi, "SSE vector argument "
1925				     "without SSE enabled changes the ABI"))
1926			  warnedsse = true;
1927		      }
1928		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1929		      {
1930			if (warning (OPT_Wpsabi, "SSE vector return "
1931				     "without SSE enabled changes the ABI"))
1932			  warnedsse_ret = true;
1933		      }
1934		  }
1935		else if ((size == 8 && !TARGET_64BIT)
1936			 && (!cfun
1937			     || cfun->machine->func_type == TYPE_NORMAL)
1938			 && !TARGET_MMX
1939			 && !TARGET_IAMCU)
1940		  {
1941		    static bool warnedmmx;
1942		    static bool warnedmmx_ret;
1943
1944		    if (cum && cum->warn_mmx && !warnedmmx)
1945		      {
1946			if (warning (OPT_Wpsabi, "MMX vector argument "
1947				     "without MMX enabled changes the ABI"))
1948			  warnedmmx = true;
1949		      }
1950		    else if (in_return && !warnedmmx_ret)
1951		      {
1952			if (warning (OPT_Wpsabi, "MMX vector return "
1953				     "without MMX enabled changes the ABI"))
1954			  warnedmmx_ret = true;
1955		      }
1956		  }
1957		return mode;
1958	      }
1959
1960	  gcc_unreachable ();
1961	}
1962    }
1963
1964  return mode;
1965}
1966
1967/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
1968   this may not agree with the mode that the type system has chosen for the
1969   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
1970   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
1971
1972static rtx
1973gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1974		     unsigned int regno)
1975{
1976  rtx tmp;
1977
1978  if (orig_mode != BLKmode)
1979    tmp = gen_rtx_REG (orig_mode, regno);
1980  else
1981    {
1982      tmp = gen_rtx_REG (mode, regno);
1983      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1984      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1985    }
1986
1987  return tmp;
1988}
1989
1990/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
1991   of this code is to classify each 8bytes of incoming argument by the register
1992   class and assign registers accordingly.  */
1993
1994/* Return the union class of CLASS1 and CLASS2.
1995   See the x86-64 PS ABI for details.  */
1996
1997static enum x86_64_reg_class
1998merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1999{
2000  /* Rule #1: If both classes are equal, this is the resulting class.  */
2001  if (class1 == class2)
2002    return class1;
2003
2004  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2005     the other class.  */
2006  if (class1 == X86_64_NO_CLASS)
2007    return class2;
2008  if (class2 == X86_64_NO_CLASS)
2009    return class1;
2010
2011  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2012  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2013    return X86_64_MEMORY_CLASS;
2014
2015  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2016  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2017      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2018    return X86_64_INTEGERSI_CLASS;
2019  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2020      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2021    return X86_64_INTEGER_CLASS;
2022
2023  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2024     MEMORY is used.  */
2025  if (class1 == X86_64_X87_CLASS
2026      || class1 == X86_64_X87UP_CLASS
2027      || class1 == X86_64_COMPLEX_X87_CLASS
2028      || class2 == X86_64_X87_CLASS
2029      || class2 == X86_64_X87UP_CLASS
2030      || class2 == X86_64_COMPLEX_X87_CLASS)
2031    return X86_64_MEMORY_CLASS;
2032
2033  /* Rule #6: Otherwise class SSE is used.  */
2034  return X86_64_SSE_CLASS;
2035}
2036
2037/* Classify the argument of type TYPE and mode MODE.
2038   CLASSES will be filled by the register class used to pass each word
2039   of the operand.  The number of words is returned.  In case the parameter
2040   should be passed in memory, 0 is returned. As a special case for zero
2041   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2042
2043   BIT_OFFSET is used internally for handling records and specifies offset
2044   of the offset in bits modulo 512 to avoid overflow cases.
2045
2046   See the x86-64 PS ABI for details.
2047*/
2048
2049static int
2050classify_argument (machine_mode mode, const_tree type,
2051		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2052{
2053  HOST_WIDE_INT bytes
2054    = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2055  int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2056
2057  /* Variable sized entities are always passed/returned in memory.  */
2058  if (bytes < 0)
2059    return 0;
2060
2061  if (mode != VOIDmode)
2062    {
2063      /* The value of "named" doesn't matter.  */
2064      function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2065      if (targetm.calls.must_pass_in_stack (arg))
2066	return 0;
2067    }
2068
2069  if (type && AGGREGATE_TYPE_P (type))
2070    {
2071      int i;
2072      tree field;
2073      enum x86_64_reg_class subclasses[MAX_CLASSES];
2074
2075      /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
2076      if (bytes > 64)
2077	return 0;
2078
2079      for (i = 0; i < words; i++)
2080	classes[i] = X86_64_NO_CLASS;
2081
2082      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2083	 signalize memory class, so handle it as special case.  */
2084      if (!words)
2085	{
2086	  classes[0] = X86_64_NO_CLASS;
2087	  return 1;
2088	}
2089
2090      /* Classify each field of record and merge classes.  */
2091      switch (TREE_CODE (type))
2092	{
2093	case RECORD_TYPE:
2094	  /* And now merge the fields of structure.  */
2095	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2096	    {
2097	      if (TREE_CODE (field) == FIELD_DECL)
2098		{
2099		  int num;
2100
2101		  if (TREE_TYPE (field) == error_mark_node)
2102		    continue;
2103
2104		  /* Bitfields are always classified as integer.  Handle them
2105		     early, since later code would consider them to be
2106		     misaligned integers.  */
2107		  if (DECL_BIT_FIELD (field))
2108		    {
2109		      for (i = (int_bit_position (field)
2110				+ (bit_offset % 64)) / 8 / 8;
2111			   i < ((int_bit_position (field) + (bit_offset % 64))
2112			        + tree_to_shwi (DECL_SIZE (field))
2113				+ 63) / 8 / 8; i++)
2114			classes[i]
2115			  = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2116		    }
2117		  else
2118		    {
2119		      int pos;
2120
2121		      type = TREE_TYPE (field);
2122
2123		      /* Flexible array member is ignored.  */
2124		      if (TYPE_MODE (type) == BLKmode
2125			  && TREE_CODE (type) == ARRAY_TYPE
2126			  && TYPE_SIZE (type) == NULL_TREE
2127			  && TYPE_DOMAIN (type) != NULL_TREE
2128			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2129			      == NULL_TREE))
2130			{
2131			  static bool warned;
2132
2133			  if (!warned && warn_psabi)
2134			    {
2135			      warned = true;
2136			      inform (input_location,
2137				      "the ABI of passing struct with"
2138				      " a flexible array member has"
2139				      " changed in GCC 4.4");
2140			    }
2141			  continue;
2142			}
2143		      num = classify_argument (TYPE_MODE (type), type,
2144					       subclasses,
2145					       (int_bit_position (field)
2146						+ bit_offset) % 512);
2147		      if (!num)
2148			return 0;
2149		      pos = (int_bit_position (field)
2150			     + (bit_offset % 64)) / 8 / 8;
2151		      for (i = 0; i < num && (i + pos) < words; i++)
2152			classes[i + pos]
2153			  = merge_classes (subclasses[i], classes[i + pos]);
2154		    }
2155		}
2156	    }
2157	  break;
2158
2159	case ARRAY_TYPE:
2160	  /* Arrays are handled as small records.  */
2161	  {
2162	    int num;
2163	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2164				     TREE_TYPE (type), subclasses, bit_offset);
2165	    if (!num)
2166	      return 0;
2167
2168	    /* The partial classes are now full classes.  */
2169	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2170	      subclasses[0] = X86_64_SSE_CLASS;
2171	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
2172		&& !((bit_offset % 64) == 0 && bytes == 4))
2173	      subclasses[0] = X86_64_INTEGER_CLASS;
2174
2175	    for (i = 0; i < words; i++)
2176	      classes[i] = subclasses[i % num];
2177
2178	    break;
2179	  }
2180	case UNION_TYPE:
2181	case QUAL_UNION_TYPE:
2182	  /* Unions are similar to RECORD_TYPE but offset is always 0.
2183	     */
2184	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2185	    {
2186	      if (TREE_CODE (field) == FIELD_DECL)
2187		{
2188		  int num;
2189
2190		  if (TREE_TYPE (field) == error_mark_node)
2191		    continue;
2192
2193		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2194					   TREE_TYPE (field), subclasses,
2195					   bit_offset);
2196		  if (!num)
2197		    return 0;
2198		  for (i = 0; i < num && i < words; i++)
2199		    classes[i] = merge_classes (subclasses[i], classes[i]);
2200		}
2201	    }
2202	  break;
2203
2204	default:
2205	  gcc_unreachable ();
2206	}
2207
2208      if (words > 2)
2209	{
2210	  /* When size > 16 bytes, if the first one isn't
2211	     X86_64_SSE_CLASS or any other ones aren't
2212	     X86_64_SSEUP_CLASS, everything should be passed in
2213	     memory.  */
2214	  if (classes[0] != X86_64_SSE_CLASS)
2215	      return 0;
2216
2217	  for (i = 1; i < words; i++)
2218	    if (classes[i] != X86_64_SSEUP_CLASS)
2219	      return 0;
2220	}
2221
2222      /* Final merger cleanup.  */
2223      for (i = 0; i < words; i++)
2224	{
2225	  /* If one class is MEMORY, everything should be passed in
2226	     memory.  */
2227	  if (classes[i] == X86_64_MEMORY_CLASS)
2228	    return 0;
2229
2230	  /* The X86_64_SSEUP_CLASS should be always preceded by
2231	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
2232	  if (classes[i] == X86_64_SSEUP_CLASS
2233	      && classes[i - 1] != X86_64_SSE_CLASS
2234	      && classes[i - 1] != X86_64_SSEUP_CLASS)
2235	    {
2236	      /* The first one should never be X86_64_SSEUP_CLASS.  */
2237	      gcc_assert (i != 0);
2238	      classes[i] = X86_64_SSE_CLASS;
2239	    }
2240
2241	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2242	       everything should be passed in memory.  */
2243	  if (classes[i] == X86_64_X87UP_CLASS
2244	      && (classes[i - 1] != X86_64_X87_CLASS))
2245	    {
2246	      static bool warned;
2247
2248	      /* The first one should never be X86_64_X87UP_CLASS.  */
2249	      gcc_assert (i != 0);
2250	      if (!warned && warn_psabi)
2251		{
2252		  warned = true;
2253		  inform (input_location,
2254			  "the ABI of passing union with %<long double%>"
2255			  " has changed in GCC 4.4");
2256		}
2257	      return 0;
2258	    }
2259	}
2260      return words;
2261    }
2262
2263  /* Compute alignment needed.  We align all types to natural boundaries with
2264     exception of XFmode that is aligned to 64bits.  */
2265  if (mode != VOIDmode && mode != BLKmode)
2266    {
2267      int mode_alignment = GET_MODE_BITSIZE (mode);
2268
2269      if (mode == XFmode)
2270	mode_alignment = 128;
2271      else if (mode == XCmode)
2272	mode_alignment = 256;
2273      if (COMPLEX_MODE_P (mode))
2274	mode_alignment /= 2;
2275      /* Misaligned fields are always returned in memory.  */
2276      if (bit_offset % mode_alignment)
2277	return 0;
2278    }
2279
2280  /* for V1xx modes, just use the base mode */
2281  if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2282      && GET_MODE_UNIT_SIZE (mode) == bytes)
2283    mode = GET_MODE_INNER (mode);
2284
2285  /* Classification of atomic types.  */
2286  switch (mode)
2287    {
2288    case E_SDmode:
2289    case E_DDmode:
2290      classes[0] = X86_64_SSE_CLASS;
2291      return 1;
2292    case E_TDmode:
2293      classes[0] = X86_64_SSE_CLASS;
2294      classes[1] = X86_64_SSEUP_CLASS;
2295      return 2;
2296    case E_DImode:
2297    case E_SImode:
2298    case E_HImode:
2299    case E_QImode:
2300    case E_CSImode:
2301    case E_CHImode:
2302    case E_CQImode:
2303      {
2304	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2305
2306	/* Analyze last 128 bits only.  */
2307	size = (size - 1) & 0x7f;
2308
2309	if (size < 32)
2310	  {
2311	    classes[0] = X86_64_INTEGERSI_CLASS;
2312	    return 1;
2313	  }
2314	else if (size < 64)
2315	  {
2316	    classes[0] = X86_64_INTEGER_CLASS;
2317	    return 1;
2318	  }
2319	else if (size < 64+32)
2320	  {
2321	    classes[0] = X86_64_INTEGER_CLASS;
2322	    classes[1] = X86_64_INTEGERSI_CLASS;
2323	    return 2;
2324	  }
2325	else if (size < 64+64)
2326	  {
2327	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2328	    return 2;
2329	  }
2330	else
2331	  gcc_unreachable ();
2332      }
2333    case E_CDImode:
2334    case E_TImode:
2335      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2336      return 2;
2337    case E_COImode:
2338    case E_OImode:
2339      /* OImode shouldn't be used directly.  */
2340      gcc_unreachable ();
2341    case E_CTImode:
2342      return 0;
2343    case E_SFmode:
2344      if (!(bit_offset % 64))
2345	classes[0] = X86_64_SSESF_CLASS;
2346      else
2347	classes[0] = X86_64_SSE_CLASS;
2348      return 1;
2349    case E_DFmode:
2350      classes[0] = X86_64_SSEDF_CLASS;
2351      return 1;
2352    case E_XFmode:
2353      classes[0] = X86_64_X87_CLASS;
2354      classes[1] = X86_64_X87UP_CLASS;
2355      return 2;
2356    case E_TFmode:
2357      classes[0] = X86_64_SSE_CLASS;
2358      classes[1] = X86_64_SSEUP_CLASS;
2359      return 2;
2360    case E_SCmode:
2361      classes[0] = X86_64_SSE_CLASS;
2362      if (!(bit_offset % 64))
2363	return 1;
2364      else
2365	{
2366	  static bool warned;
2367
2368	  if (!warned && warn_psabi)
2369	    {
2370	      warned = true;
2371	      inform (input_location,
2372		      "the ABI of passing structure with %<complex float%>"
2373		      " member has changed in GCC 4.4");
2374	    }
2375	  classes[1] = X86_64_SSESF_CLASS;
2376	  return 2;
2377	}
2378    case E_DCmode:
2379      classes[0] = X86_64_SSEDF_CLASS;
2380      classes[1] = X86_64_SSEDF_CLASS;
2381      return 2;
2382    case E_XCmode:
2383      classes[0] = X86_64_COMPLEX_X87_CLASS;
2384      return 1;
2385    case E_TCmode:
2386      /* This modes is larger than 16 bytes.  */
2387      return 0;
2388    case E_V8SFmode:
2389    case E_V8SImode:
2390    case E_V32QImode:
2391    case E_V16HImode:
2392    case E_V4DFmode:
2393    case E_V4DImode:
2394      classes[0] = X86_64_SSE_CLASS;
2395      classes[1] = X86_64_SSEUP_CLASS;
2396      classes[2] = X86_64_SSEUP_CLASS;
2397      classes[3] = X86_64_SSEUP_CLASS;
2398      return 4;
2399    case E_V8DFmode:
2400    case E_V16SFmode:
2401    case E_V8DImode:
2402    case E_V16SImode:
2403    case E_V32HImode:
2404    case E_V64QImode:
2405      classes[0] = X86_64_SSE_CLASS;
2406      classes[1] = X86_64_SSEUP_CLASS;
2407      classes[2] = X86_64_SSEUP_CLASS;
2408      classes[3] = X86_64_SSEUP_CLASS;
2409      classes[4] = X86_64_SSEUP_CLASS;
2410      classes[5] = X86_64_SSEUP_CLASS;
2411      classes[6] = X86_64_SSEUP_CLASS;
2412      classes[7] = X86_64_SSEUP_CLASS;
2413      return 8;
2414    case E_V4SFmode:
2415    case E_V4SImode:
2416    case E_V16QImode:
2417    case E_V8HImode:
2418    case E_V2DFmode:
2419    case E_V2DImode:
2420      classes[0] = X86_64_SSE_CLASS;
2421      classes[1] = X86_64_SSEUP_CLASS;
2422      return 2;
2423    case E_V1TImode:
2424    case E_V1DImode:
2425    case E_V2SFmode:
2426    case E_V2SImode:
2427    case E_V4HImode:
2428    case E_V8QImode:
2429      classes[0] = X86_64_SSE_CLASS;
2430      return 1;
2431    case E_BLKmode:
2432    case E_VOIDmode:
2433      return 0;
2434    default:
2435      gcc_assert (VECTOR_MODE_P (mode));
2436
2437      if (bytes > 16)
2438	return 0;
2439
2440      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2441
2442      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2443	classes[0] = X86_64_INTEGERSI_CLASS;
2444      else
2445	classes[0] = X86_64_INTEGER_CLASS;
2446      classes[1] = X86_64_INTEGER_CLASS;
2447      return 1 + (bytes > 8);
2448    }
2449}
2450
2451/* Examine the argument and return set number of register required in each
2452   class.  Return true iff parameter should be passed in memory.  */
2453
2454static bool
2455examine_argument (machine_mode mode, const_tree type, int in_return,
2456		  int *int_nregs, int *sse_nregs)
2457{
2458  enum x86_64_reg_class regclass[MAX_CLASSES];
2459  int n = classify_argument (mode, type, regclass, 0);
2460
2461  *int_nregs = 0;
2462  *sse_nregs = 0;
2463
2464  if (!n)
2465    return true;
2466  for (n--; n >= 0; n--)
2467    switch (regclass[n])
2468      {
2469      case X86_64_INTEGER_CLASS:
2470      case X86_64_INTEGERSI_CLASS:
2471	(*int_nregs)++;
2472	break;
2473      case X86_64_SSE_CLASS:
2474      case X86_64_SSESF_CLASS:
2475      case X86_64_SSEDF_CLASS:
2476	(*sse_nregs)++;
2477	break;
2478      case X86_64_NO_CLASS:
2479      case X86_64_SSEUP_CLASS:
2480	break;
2481      case X86_64_X87_CLASS:
2482      case X86_64_X87UP_CLASS:
2483      case X86_64_COMPLEX_X87_CLASS:
2484	if (!in_return)
2485	  return true;
2486	break;
2487      case X86_64_MEMORY_CLASS:
2488	gcc_unreachable ();
2489      }
2490
2491  return false;
2492}
2493
2494/* Construct container for the argument used by GCC interface.  See
2495   FUNCTION_ARG for the detailed description.  */
2496
2497static rtx
2498construct_container (machine_mode mode, machine_mode orig_mode,
2499		     const_tree type, int in_return, int nintregs, int nsseregs,
2500		     const int *intreg, int sse_regno)
2501{
2502  /* The following variables hold the static issued_error state.  */
2503  static bool issued_sse_arg_error;
2504  static bool issued_sse_ret_error;
2505  static bool issued_x87_ret_error;
2506
2507  machine_mode tmpmode;
2508  int bytes
2509    = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510  enum x86_64_reg_class regclass[MAX_CLASSES];
2511  int n;
2512  int i;
2513  int nexps = 0;
2514  int needed_sseregs, needed_intregs;
2515  rtx exp[MAX_CLASSES];
2516  rtx ret;
2517
2518  n = classify_argument (mode, type, regclass, 0);
2519  if (!n)
2520    return NULL;
2521  if (examine_argument (mode, type, in_return, &needed_intregs,
2522			&needed_sseregs))
2523    return NULL;
2524  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2525    return NULL;
2526
2527  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
2528     some less clueful developer tries to use floating-point anyway.  */
2529  if (needed_sseregs && !TARGET_SSE)
2530    {
2531      if (in_return)
2532	{
2533	  if (!issued_sse_ret_error)
2534	    {
2535	      error ("SSE register return with SSE disabled");
2536	      issued_sse_ret_error = true;
2537	    }
2538	}
2539      else if (!issued_sse_arg_error)
2540	{
2541	  error ("SSE register argument with SSE disabled");
2542	  issued_sse_arg_error = true;
2543	}
2544      return NULL;
2545    }
2546
2547  /* Likewise, error if the ABI requires us to return values in the
2548     x87 registers and the user specified -mno-80387.  */
2549  if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2550    for (i = 0; i < n; i++)
2551      if (regclass[i] == X86_64_X87_CLASS
2552	  || regclass[i] == X86_64_X87UP_CLASS
2553	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2554	{
2555	  if (!issued_x87_ret_error)
2556	    {
2557	      error ("x87 register return with x87 disabled");
2558	      issued_x87_ret_error = true;
2559	    }
2560	  return NULL;
2561	}
2562
2563  /* First construct simple cases.  Avoid SCmode, since we want to use
2564     single register to pass this type.  */
2565  if (n == 1 && mode != SCmode)
2566    switch (regclass[0])
2567      {
2568      case X86_64_INTEGER_CLASS:
2569      case X86_64_INTEGERSI_CLASS:
2570	return gen_rtx_REG (mode, intreg[0]);
2571      case X86_64_SSE_CLASS:
2572      case X86_64_SSESF_CLASS:
2573      case X86_64_SSEDF_CLASS:
2574	if (mode != BLKmode)
2575	  return gen_reg_or_parallel (mode, orig_mode,
2576				      GET_SSE_REGNO (sse_regno));
2577	break;
2578      case X86_64_X87_CLASS:
2579      case X86_64_COMPLEX_X87_CLASS:
2580	return gen_rtx_REG (mode, FIRST_STACK_REG);
2581      case X86_64_NO_CLASS:
2582	/* Zero sized array, struct or class.  */
2583	return NULL;
2584      default:
2585	gcc_unreachable ();
2586      }
2587  if (n == 2
2588      && regclass[0] == X86_64_SSE_CLASS
2589      && regclass[1] == X86_64_SSEUP_CLASS
2590      && mode != BLKmode)
2591    return gen_reg_or_parallel (mode, orig_mode,
2592				GET_SSE_REGNO (sse_regno));
2593  if (n == 4
2594      && regclass[0] == X86_64_SSE_CLASS
2595      && regclass[1] == X86_64_SSEUP_CLASS
2596      && regclass[2] == X86_64_SSEUP_CLASS
2597      && regclass[3] == X86_64_SSEUP_CLASS
2598      && mode != BLKmode)
2599    return gen_reg_or_parallel (mode, orig_mode,
2600				GET_SSE_REGNO (sse_regno));
2601  if (n == 8
2602      && regclass[0] == X86_64_SSE_CLASS
2603      && regclass[1] == X86_64_SSEUP_CLASS
2604      && regclass[2] == X86_64_SSEUP_CLASS
2605      && regclass[3] == X86_64_SSEUP_CLASS
2606      && regclass[4] == X86_64_SSEUP_CLASS
2607      && regclass[5] == X86_64_SSEUP_CLASS
2608      && regclass[6] == X86_64_SSEUP_CLASS
2609      && regclass[7] == X86_64_SSEUP_CLASS
2610      && mode != BLKmode)
2611    return gen_reg_or_parallel (mode, orig_mode,
2612				GET_SSE_REGNO (sse_regno));
2613  if (n == 2
2614      && regclass[0] == X86_64_X87_CLASS
2615      && regclass[1] == X86_64_X87UP_CLASS)
2616    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2617
2618  if (n == 2
2619      && regclass[0] == X86_64_INTEGER_CLASS
2620      && regclass[1] == X86_64_INTEGER_CLASS
2621      && (mode == CDImode || mode == TImode || mode == BLKmode)
2622      && intreg[0] + 1 == intreg[1])
2623    {
2624      if (mode == BLKmode)
2625	{
2626	  /* Use TImode for BLKmode values in 2 integer registers.  */
2627	  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2628				      gen_rtx_REG (TImode, intreg[0]),
2629				      GEN_INT (0));
2630	  ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2631	  XVECEXP (ret, 0, 0) = exp[0];
2632	  return ret;
2633	}
2634      else
2635	return gen_rtx_REG (mode, intreg[0]);
2636    }
2637
2638  /* Otherwise figure out the entries of the PARALLEL.  */
2639  for (i = 0; i < n; i++)
2640    {
2641      int pos;
2642
2643      switch (regclass[i])
2644        {
2645	  case X86_64_NO_CLASS:
2646	    break;
2647	  case X86_64_INTEGER_CLASS:
2648	  case X86_64_INTEGERSI_CLASS:
2649	    /* Merge TImodes on aligned occasions here too.  */
2650	    if (i * 8 + 8 > bytes)
2651	      {
2652		unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2653		if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2654		  /* We've requested 24 bytes we
2655		     don't have mode for.  Use DImode.  */
2656		  tmpmode = DImode;
2657	      }
2658	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2659	      tmpmode = SImode;
2660	    else
2661	      tmpmode = DImode;
2662	    exp [nexps++]
2663	      = gen_rtx_EXPR_LIST (VOIDmode,
2664				   gen_rtx_REG (tmpmode, *intreg),
2665				   GEN_INT (i*8));
2666	    intreg++;
2667	    break;
2668	  case X86_64_SSESF_CLASS:
2669	    exp [nexps++]
2670	      = gen_rtx_EXPR_LIST (VOIDmode,
2671				   gen_rtx_REG (SFmode,
2672						GET_SSE_REGNO (sse_regno)),
2673				   GEN_INT (i*8));
2674	    sse_regno++;
2675	    break;
2676	  case X86_64_SSEDF_CLASS:
2677	    exp [nexps++]
2678	      = gen_rtx_EXPR_LIST (VOIDmode,
2679				   gen_rtx_REG (DFmode,
2680						GET_SSE_REGNO (sse_regno)),
2681				   GEN_INT (i*8));
2682	    sse_regno++;
2683	    break;
2684	  case X86_64_SSE_CLASS:
2685	    pos = i;
2686	    switch (n)
2687	      {
2688	      case 1:
2689		tmpmode = DImode;
2690		break;
2691	      case 2:
2692		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2693		  {
2694		    tmpmode = TImode;
2695		    i++;
2696		  }
2697		else
2698		  tmpmode = DImode;
2699		break;
2700	      case 4:
2701		gcc_assert (i == 0
2702			    && regclass[1] == X86_64_SSEUP_CLASS
2703			    && regclass[2] == X86_64_SSEUP_CLASS
2704			    && regclass[3] == X86_64_SSEUP_CLASS);
2705		tmpmode = OImode;
2706		i += 3;
2707		break;
2708	      case 8:
2709		gcc_assert (i == 0
2710			    && regclass[1] == X86_64_SSEUP_CLASS
2711			    && regclass[2] == X86_64_SSEUP_CLASS
2712			    && regclass[3] == X86_64_SSEUP_CLASS
2713			    && regclass[4] == X86_64_SSEUP_CLASS
2714			    && regclass[5] == X86_64_SSEUP_CLASS
2715			    && regclass[6] == X86_64_SSEUP_CLASS
2716			    && regclass[7] == X86_64_SSEUP_CLASS);
2717		tmpmode = XImode;
2718		i += 7;
2719		break;
2720	      default:
2721		gcc_unreachable ();
2722	      }
2723	    exp [nexps++]
2724	      = gen_rtx_EXPR_LIST (VOIDmode,
2725				   gen_rtx_REG (tmpmode,
2726						GET_SSE_REGNO (sse_regno)),
2727				   GEN_INT (pos*8));
2728	    sse_regno++;
2729	    break;
2730	  default:
2731	    gcc_unreachable ();
2732	}
2733    }
2734
2735  /* Empty aligned struct, union or class.  */
2736  if (nexps == 0)
2737    return NULL;
2738
2739  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2740  for (i = 0; i < nexps; i++)
2741    XVECEXP (ret, 0, i) = exp [i];
2742  return ret;
2743}
2744
2745/* Update the data in CUM to advance over an argument of mode MODE
2746   and data type TYPE.  (TYPE is null for libcalls where that information
2747   may not be available.)
2748
2749   Return a number of integer regsiters advanced over.  */
2750
2751static int
2752function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2753			 const_tree type, HOST_WIDE_INT bytes,
2754			 HOST_WIDE_INT words)
2755{
2756  int res = 0;
2757  bool error_p = false;
2758
2759  if (TARGET_IAMCU)
2760    {
2761      /* Intel MCU psABI passes scalars and aggregates no larger than 8
2762	 bytes in registers.  */
2763      if (!VECTOR_MODE_P (mode) && bytes <= 8)
2764	goto pass_in_reg;
2765      return res;
2766    }
2767
2768  switch (mode)
2769    {
2770    default:
2771      break;
2772
2773    case E_BLKmode:
2774      if (bytes < 0)
2775	break;
2776      /* FALLTHRU */
2777
2778    case E_DImode:
2779    case E_SImode:
2780    case E_HImode:
2781    case E_QImode:
2782pass_in_reg:
2783      cum->words += words;
2784      cum->nregs -= words;
2785      cum->regno += words;
2786      if (cum->nregs >= 0)
2787	res = words;
2788      if (cum->nregs <= 0)
2789	{
2790	  cum->nregs = 0;
2791	  cfun->machine->arg_reg_available = false;
2792	  cum->regno = 0;
2793	}
2794      break;
2795
2796    case E_OImode:
2797      /* OImode shouldn't be used directly.  */
2798      gcc_unreachable ();
2799
2800    case E_DFmode:
2801      if (cum->float_in_sse == -1)
2802	error_p = true;
2803      if (cum->float_in_sse < 2)
2804	break;
2805      /* FALLTHRU */
2806    case E_SFmode:
2807      if (cum->float_in_sse == -1)
2808	error_p = true;
2809      if (cum->float_in_sse < 1)
2810	break;
2811      /* FALLTHRU */
2812
2813    case E_V8SFmode:
2814    case E_V8SImode:
2815    case E_V64QImode:
2816    case E_V32HImode:
2817    case E_V16SImode:
2818    case E_V8DImode:
2819    case E_V16SFmode:
2820    case E_V8DFmode:
2821    case E_V32QImode:
2822    case E_V16HImode:
2823    case E_V4DFmode:
2824    case E_V4DImode:
2825    case E_TImode:
2826    case E_V16QImode:
2827    case E_V8HImode:
2828    case E_V4SImode:
2829    case E_V2DImode:
2830    case E_V4SFmode:
2831    case E_V2DFmode:
2832      if (!type || !AGGREGATE_TYPE_P (type))
2833	{
2834	  cum->sse_words += words;
2835	  cum->sse_nregs -= 1;
2836	  cum->sse_regno += 1;
2837	  if (cum->sse_nregs <= 0)
2838	    {
2839	      cum->sse_nregs = 0;
2840	      cum->sse_regno = 0;
2841	    }
2842	}
2843      break;
2844
2845    case E_V8QImode:
2846    case E_V4HImode:
2847    case E_V2SImode:
2848    case E_V2SFmode:
2849    case E_V1TImode:
2850    case E_V1DImode:
2851      if (!type || !AGGREGATE_TYPE_P (type))
2852	{
2853	  cum->mmx_words += words;
2854	  cum->mmx_nregs -= 1;
2855	  cum->mmx_regno += 1;
2856	  if (cum->mmx_nregs <= 0)
2857	    {
2858	      cum->mmx_nregs = 0;
2859	      cum->mmx_regno = 0;
2860	    }
2861	}
2862      break;
2863    }
2864  if (error_p)
2865    {
2866      cum->float_in_sse = 0;
2867      error ("calling %qD with SSE calling convention without "
2868	     "SSE/SSE2 enabled", cum->decl);
2869      sorry ("this is a GCC bug that can be worked around by adding "
2870	     "attribute used to function called");
2871    }
2872
2873  return res;
2874}
2875
2876static int
2877function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2878			 const_tree type, HOST_WIDE_INT words, bool named)
2879{
2880  int int_nregs, sse_nregs;
2881
2882  /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
2883  if (!named && (VALID_AVX512F_REG_MODE (mode)
2884		 || VALID_AVX256_REG_MODE (mode)))
2885    return 0;
2886
2887  if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2888      && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2889    {
2890      cum->nregs -= int_nregs;
2891      cum->sse_nregs -= sse_nregs;
2892      cum->regno += int_nregs;
2893      cum->sse_regno += sse_nregs;
2894      return int_nregs;
2895    }
2896  else
2897    {
2898      int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2899      cum->words = ROUND_UP (cum->words, align);
2900      cum->words += words;
2901      return 0;
2902    }
2903}
2904
2905static int
2906function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2907			    HOST_WIDE_INT words)
2908{
2909  /* Otherwise, this should be passed indirect.  */
2910  gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2911
2912  cum->words += words;
2913  if (cum->nregs > 0)
2914    {
2915      cum->nregs -= 1;
2916      cum->regno += 1;
2917      return 1;
2918    }
2919  return 0;
2920}
2921
2922/* Update the data in CUM to advance over argument ARG.  */
2923
2924static void
2925ix86_function_arg_advance (cumulative_args_t cum_v,
2926			   const function_arg_info &arg)
2927{
2928  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2929  machine_mode mode = arg.mode;
2930  HOST_WIDE_INT bytes, words;
2931  int nregs;
2932
2933  /* The argument of interrupt handler is a special case and is
2934     handled in ix86_function_arg.  */
2935  if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2936    return;
2937
2938  bytes = arg.promoted_size_in_bytes ();
2939  words = CEIL (bytes, UNITS_PER_WORD);
2940
2941  if (arg.type)
2942    mode = type_natural_mode (arg.type, NULL, false);
2943
2944  if (TARGET_64BIT)
2945    {
2946      enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2947
2948      if (call_abi == MS_ABI)
2949	nregs = function_arg_advance_ms_64 (cum, bytes, words);
2950      else
2951	nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2952					 arg.named);
2953    }
2954  else
2955    nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2956
2957  if (!nregs)
2958    {
2959      /* Track if there are outgoing arguments on stack.  */
2960      if (cum->caller)
2961	cfun->machine->outgoing_args_on_stack = true;
2962    }
2963}
2964
2965/* Define where to put the arguments to a function.
2966   Value is zero to push the argument on the stack,
2967   or a hard register in which to store the argument.
2968
2969   MODE is the argument's machine mode.
2970   TYPE is the data type of the argument (as a tree).
2971    This is null for libcalls where that information may
2972    not be available.
2973   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2974    the preceding args and about the function being called.
2975   NAMED is nonzero if this argument is a named parameter
2976    (otherwise it is an extra parameter matching an ellipsis).  */
2977
2978static rtx
2979function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2980		 machine_mode orig_mode, const_tree type,
2981		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2982{
2983  bool error_p = false;
2984
2985  /* Avoid the AL settings for the Unix64 ABI.  */
2986  if (mode == VOIDmode)
2987    return constm1_rtx;
2988
2989  if (TARGET_IAMCU)
2990    {
2991      /* Intel MCU psABI passes scalars and aggregates no larger than 8
2992	 bytes in registers.  */
2993      if (!VECTOR_MODE_P (mode) && bytes <= 8)
2994	goto pass_in_reg;
2995      return NULL_RTX;
2996    }
2997
2998  switch (mode)
2999    {
3000    default:
3001      break;
3002
3003    case E_BLKmode:
3004      if (bytes < 0)
3005	break;
3006      /* FALLTHRU */
3007    case E_DImode:
3008    case E_SImode:
3009    case E_HImode:
3010    case E_QImode:
3011pass_in_reg:
3012      if (words <= cum->nregs)
3013	{
3014	  int regno = cum->regno;
3015
3016	  /* Fastcall allocates the first two DWORD (SImode) or
3017            smaller arguments to ECX and EDX if it isn't an
3018            aggregate type .  */
3019	  if (cum->fastcall)
3020	    {
3021	      if (mode == BLKmode
3022		  || mode == DImode
3023		  || (type && AGGREGATE_TYPE_P (type)))
3024	        break;
3025
3026	      /* ECX not EAX is the first allocated register.  */
3027	      if (regno == AX_REG)
3028		regno = CX_REG;
3029	    }
3030	  return gen_rtx_REG (mode, regno);
3031	}
3032      break;
3033
3034    case E_DFmode:
3035      if (cum->float_in_sse == -1)
3036	error_p = true;
3037      if (cum->float_in_sse < 2)
3038	break;
3039      /* FALLTHRU */
3040    case E_SFmode:
3041      if (cum->float_in_sse == -1)
3042	error_p = true;
3043      if (cum->float_in_sse < 1)
3044	break;
3045      /* FALLTHRU */
3046    case E_TImode:
3047      /* In 32bit, we pass TImode in xmm registers.  */
3048    case E_V16QImode:
3049    case E_V8HImode:
3050    case E_V4SImode:
3051    case E_V2DImode:
3052    case E_V4SFmode:
3053    case E_V2DFmode:
3054      if (!type || !AGGREGATE_TYPE_P (type))
3055	{
3056	  if (cum->sse_nregs)
3057	    return gen_reg_or_parallel (mode, orig_mode,
3058				        cum->sse_regno + FIRST_SSE_REG);
3059	}
3060      break;
3061
3062    case E_OImode:
3063    case E_XImode:
3064      /* OImode and XImode shouldn't be used directly.  */
3065      gcc_unreachable ();
3066
3067    case E_V64QImode:
3068    case E_V32HImode:
3069    case E_V16SImode:
3070    case E_V8DImode:
3071    case E_V16SFmode:
3072    case E_V8DFmode:
3073    case E_V8SFmode:
3074    case E_V8SImode:
3075    case E_V32QImode:
3076    case E_V16HImode:
3077    case E_V4DFmode:
3078    case E_V4DImode:
3079      if (!type || !AGGREGATE_TYPE_P (type))
3080	{
3081	  if (cum->sse_nregs)
3082	    return gen_reg_or_parallel (mode, orig_mode,
3083				        cum->sse_regno + FIRST_SSE_REG);
3084	}
3085      break;
3086
3087    case E_V8QImode:
3088    case E_V4HImode:
3089    case E_V2SImode:
3090    case E_V2SFmode:
3091    case E_V1TImode:
3092    case E_V1DImode:
3093      if (!type || !AGGREGATE_TYPE_P (type))
3094	{
3095	  if (cum->mmx_nregs)
3096	    return gen_reg_or_parallel (mode, orig_mode,
3097				        cum->mmx_regno + FIRST_MMX_REG);
3098	}
3099      break;
3100    }
3101  if (error_p)
3102    {
3103      cum->float_in_sse = 0;
3104      error ("calling %qD with SSE calling convention without "
3105	     "SSE/SSE2 enabled", cum->decl);
3106      sorry ("this is a GCC bug that can be worked around by adding "
3107	     "attribute used to function called");
3108    }
3109
3110  return NULL_RTX;
3111}
3112
3113static rtx
3114function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3115		 machine_mode orig_mode, const_tree type, bool named)
3116{
3117  /* Handle a hidden AL argument containing number of registers
3118     for varargs x86-64 functions.  */
3119  if (mode == VOIDmode)
3120    return GEN_INT (cum->maybe_vaarg
3121		    ? (cum->sse_nregs < 0
3122		       ? X86_64_SSE_REGPARM_MAX
3123		       : cum->sse_regno)
3124		    : -1);
3125
3126  switch (mode)
3127    {
3128    default:
3129      break;
3130
3131    case E_V8SFmode:
3132    case E_V8SImode:
3133    case E_V32QImode:
3134    case E_V16HImode:
3135    case E_V4DFmode:
3136    case E_V4DImode:
3137    case E_V16SFmode:
3138    case E_V16SImode:
3139    case E_V64QImode:
3140    case E_V32HImode:
3141    case E_V8DFmode:
3142    case E_V8DImode:
3143      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
3144      if (!named)
3145	return NULL;
3146      break;
3147    }
3148
3149  return construct_container (mode, orig_mode, type, 0, cum->nregs,
3150			      cum->sse_nregs,
3151			      &x86_64_int_parameter_registers [cum->regno],
3152			      cum->sse_regno);
3153}
3154
3155static rtx
3156function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3157		    machine_mode orig_mode, bool named, const_tree type,
3158		    HOST_WIDE_INT bytes)
3159{
3160  unsigned int regno;
3161
3162  /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3163     We use value of -2 to specify that current function call is MSABI.  */
3164  if (mode == VOIDmode)
3165    return GEN_INT (-2);
3166
3167  /* If we've run out of registers, it goes on the stack.  */
3168  if (cum->nregs == 0)
3169    return NULL_RTX;
3170
3171  regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3172
3173  /* Only floating point modes are passed in anything but integer regs.  */
3174  if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3175    {
3176      if (named)
3177	{
3178	  if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3179	    regno = cum->regno + FIRST_SSE_REG;
3180	}
3181      else
3182	{
3183	  rtx t1, t2;
3184
3185	  /* Unnamed floating parameters are passed in both the
3186	     SSE and integer registers.  */
3187	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3188	  t2 = gen_rtx_REG (mode, regno);
3189	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3190	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3191	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3192	}
3193    }
3194  /* Handle aggregated types passed in register.  */
3195  if (orig_mode == BLKmode)
3196    {
3197      if (bytes > 0 && bytes <= 8)
3198        mode = (bytes > 4 ? DImode : SImode);
3199      if (mode == BLKmode)
3200        mode = DImode;
3201    }
3202
3203  return gen_reg_or_parallel (mode, orig_mode, regno);
3204}
3205
3206/* Return where to put the arguments to a function.
3207   Return zero to push the argument on the stack, or a hard register in which to store the argument.
3208
3209   ARG describes the argument while CUM gives information about the
3210   preceding args and about the function being called.  */
3211
3212static rtx
3213ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3214{
3215  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3216  machine_mode mode = arg.mode;
3217  HOST_WIDE_INT bytes, words;
3218  rtx reg;
3219
3220  if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3221    {
3222      gcc_assert (arg.type != NULL_TREE);
3223      if (POINTER_TYPE_P (arg.type))
3224	{
3225	  /* This is the pointer argument.  */
3226	  gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3227	  /* It is at -WORD(AP) in the current frame in interrupt and
3228	     exception handlers.  */
3229	  reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3230	}
3231      else
3232	{
3233	  gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3234		      && TREE_CODE (arg.type) == INTEGER_TYPE
3235		      && TYPE_MODE (arg.type) == word_mode);
3236	  /* The error code is the word-mode integer argument at
3237	     -2 * WORD(AP) in the current frame of the exception
3238	     handler.  */
3239	  reg = gen_rtx_MEM (word_mode,
3240			     plus_constant (Pmode,
3241					    arg_pointer_rtx,
3242					    -2 * UNITS_PER_WORD));
3243	}
3244      return reg;
3245    }
3246
3247  bytes = arg.promoted_size_in_bytes ();
3248  words = CEIL (bytes, UNITS_PER_WORD);
3249
3250  /* To simplify the code below, represent vector types with a vector mode
3251     even if MMX/SSE are not active.  */
3252  if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3253    mode = type_natural_mode (arg.type, cum, false);
3254
3255  if (TARGET_64BIT)
3256    {
3257      enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3258
3259      if (call_abi == MS_ABI)
3260	reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3261				  arg.type, bytes);
3262      else
3263	reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3264    }
3265  else
3266    reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3267
3268  /* Track if there are outgoing arguments on stack.  */
3269  if (reg == NULL_RTX && cum->caller)
3270    cfun->machine->outgoing_args_on_stack = true;
3271
3272  return reg;
3273}
3274
3275/* A C expression that indicates when an argument must be passed by
3276   reference.  If nonzero for an argument, a copy of that argument is
3277   made in memory and a pointer to the argument is passed instead of
3278   the argument itself.  The pointer is passed in whatever way is
3279   appropriate for passing a pointer to that type.  */
3280
3281static bool
3282ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3283{
3284  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3285
3286  if (TARGET_64BIT)
3287    {
3288      enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3289
3290      /* See Windows x64 Software Convention.  */
3291      if (call_abi == MS_ABI)
3292	{
3293	  HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3294
3295	  if (tree type = arg.type)
3296	    {
3297	      /* Arrays are passed by reference.  */
3298	      if (TREE_CODE (type) == ARRAY_TYPE)
3299		return true;
3300
3301	      if (RECORD_OR_UNION_TYPE_P (type))
3302		{
3303		  /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3304		     are passed by reference.  */
3305		  msize = int_size_in_bytes (type);
3306		}
3307	    }
3308
3309	  /* __m128 is passed by reference.  */
3310	  return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3311	}
3312      else if (arg.type && int_size_in_bytes (arg.type) == -1)
3313	return true;
3314    }
3315
3316  return false;
3317}
3318
3319/* Return true when TYPE should be 128bit aligned for 32bit argument
3320   passing ABI.  XXX: This function is obsolete and is only used for
3321   checking psABI compatibility with previous versions of GCC.  */
3322
3323static bool
3324ix86_compat_aligned_value_p (const_tree type)
3325{
3326  machine_mode mode = TYPE_MODE (type);
3327  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3328       || mode == TDmode
3329       || mode == TFmode
3330       || mode == TCmode)
3331      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3332    return true;
3333  if (TYPE_ALIGN (type) < 128)
3334    return false;
3335
3336  if (AGGREGATE_TYPE_P (type))
3337    {
3338      /* Walk the aggregates recursively.  */
3339      switch (TREE_CODE (type))
3340	{
3341	case RECORD_TYPE:
3342	case UNION_TYPE:
3343	case QUAL_UNION_TYPE:
3344	  {
3345	    tree field;
3346
3347	    /* Walk all the structure fields.  */
3348	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3349	      {
3350		if (TREE_CODE (field) == FIELD_DECL
3351		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3352		  return true;
3353	      }
3354	    break;
3355	  }
3356
3357	case ARRAY_TYPE:
3358	  /* Just for use if some languages passes arrays by value.  */
3359	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3360	    return true;
3361	  break;
3362
3363	default:
3364	  gcc_unreachable ();
3365	}
3366    }
3367  return false;
3368}
3369
3370/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3371   XXX: This function is obsolete and is only used for checking psABI
3372   compatibility with previous versions of GCC.  */
3373
3374static unsigned int
3375ix86_compat_function_arg_boundary (machine_mode mode,
3376				   const_tree type, unsigned int align)
3377{
3378  /* In 32bit, only _Decimal128 and __float128 are aligned to their
3379     natural boundaries.  */
3380  if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3381    {
3382      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3383	 make an exception for SSE modes since these require 128bit
3384	 alignment.
3385
3386	 The handling here differs from field_alignment.  ICC aligns MMX
3387	 arguments to 4 byte boundaries, while structure fields are aligned
3388	 to 8 byte boundaries.  */
3389      if (!type)
3390	{
3391	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3392	    align = PARM_BOUNDARY;
3393	}
3394      else
3395	{
3396	  if (!ix86_compat_aligned_value_p (type))
3397	    align = PARM_BOUNDARY;
3398	}
3399    }
3400  if (align > BIGGEST_ALIGNMENT)
3401    align = BIGGEST_ALIGNMENT;
3402  return align;
3403}
3404
3405/* Return true when TYPE should be 128bit aligned for 32bit argument
3406   passing ABI.  */
3407
3408static bool
3409ix86_contains_aligned_value_p (const_tree type)
3410{
3411  machine_mode mode = TYPE_MODE (type);
3412
3413  if (mode == XFmode || mode == XCmode)
3414    return false;
3415
3416  if (TYPE_ALIGN (type) < 128)
3417    return false;
3418
3419  if (AGGREGATE_TYPE_P (type))
3420    {
3421      /* Walk the aggregates recursively.  */
3422      switch (TREE_CODE (type))
3423	{
3424	case RECORD_TYPE:
3425	case UNION_TYPE:
3426	case QUAL_UNION_TYPE:
3427	  {
3428	    tree field;
3429
3430	    /* Walk all the structure fields.  */
3431	    for (field = TYPE_FIELDS (type);
3432		 field;
3433		 field = DECL_CHAIN (field))
3434	      {
3435		if (TREE_CODE (field) == FIELD_DECL
3436		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3437		  return true;
3438	      }
3439	    break;
3440	  }
3441
3442	case ARRAY_TYPE:
3443	  /* Just for use if some languages passes arrays by value.  */
3444	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3445	    return true;
3446	  break;
3447
3448	default:
3449	  gcc_unreachable ();
3450	}
3451    }
3452  else
3453    return TYPE_ALIGN (type) >= 128;
3454
3455  return false;
3456}
3457
3458/* Gives the alignment boundary, in bits, of an argument with the
3459   specified mode and type.  */
3460
3461static unsigned int
3462ix86_function_arg_boundary (machine_mode mode, const_tree type)
3463{
3464  unsigned int align;
3465  if (type)
3466    {
3467      /* Since the main variant type is used for call, we convert it to
3468	 the main variant type.  */
3469      type = TYPE_MAIN_VARIANT (type);
3470      align = TYPE_ALIGN (type);
3471      if (TYPE_EMPTY_P (type))
3472	return PARM_BOUNDARY;
3473    }
3474  else
3475    align = GET_MODE_ALIGNMENT (mode);
3476  if (align < PARM_BOUNDARY)
3477    align = PARM_BOUNDARY;
3478  else
3479    {
3480      static bool warned;
3481      unsigned int saved_align = align;
3482
3483      if (!TARGET_64BIT)
3484	{
3485	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
3486	  if (!type)
3487	    {
3488	      if (mode == XFmode || mode == XCmode)
3489		align = PARM_BOUNDARY;
3490	    }
3491	  else if (!ix86_contains_aligned_value_p (type))
3492	    align = PARM_BOUNDARY;
3493
3494	  if (align < 128)
3495	    align = PARM_BOUNDARY;
3496	}
3497
3498      if (warn_psabi
3499	  && !warned
3500	  && align != ix86_compat_function_arg_boundary (mode, type,
3501							 saved_align))
3502	{
3503	  warned = true;
3504	  inform (input_location,
3505		  "the ABI for passing parameters with %d-byte"
3506		  " alignment has changed in GCC 4.6",
3507		  align / BITS_PER_UNIT);
3508	}
3509    }
3510
3511  return align;
3512}
3513
3514/* Return true if N is a possible register number of function value.  */
3515
3516static bool
3517ix86_function_value_regno_p (const unsigned int regno)
3518{
3519  switch (regno)
3520    {
3521    case AX_REG:
3522      return true;
3523    case DX_REG:
3524      return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3525    case DI_REG:
3526    case SI_REG:
3527      return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3528
3529      /* Complex values are returned in %st(0)/%st(1) pair.  */
3530    case ST0_REG:
3531    case ST1_REG:
3532      /* TODO: The function should depend on current function ABI but
3533       builtins.c would need updating then. Therefore we use the
3534       default ABI.  */
3535      if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3536	return false;
3537      return TARGET_FLOAT_RETURNS_IN_80387;
3538
3539      /* Complex values are returned in %xmm0/%xmm1 pair.  */
3540    case XMM0_REG:
3541    case XMM1_REG:
3542      return TARGET_SSE;
3543
3544    case MM0_REG:
3545      if (TARGET_MACHO || TARGET_64BIT)
3546	return false;
3547      return TARGET_MMX;
3548    }
3549
3550  return false;
3551}
3552
3553/* Define how to find the value returned by a function.
3554   VALTYPE is the data type of the value (as a tree).
3555   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3556   otherwise, FUNC is 0.  */
3557
3558static rtx
3559function_value_32 (machine_mode orig_mode, machine_mode mode,
3560		   const_tree fntype, const_tree fn)
3561{
3562  unsigned int regno;
3563
3564  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3565     we normally prevent this case when mmx is not available.  However
3566     some ABIs may require the result to be returned like DImode.  */
3567  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3568    regno = FIRST_MMX_REG;
3569
3570  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
3571     we prevent this case when sse is not available.  However some ABIs
3572     may require the result to be returned like integer TImode.  */
3573  else if (mode == TImode
3574	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3575    regno = FIRST_SSE_REG;
3576
3577  /* 32-byte vector modes in %ymm0.   */
3578  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3579    regno = FIRST_SSE_REG;
3580
3581  /* 64-byte vector modes in %zmm0.   */
3582  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3583    regno = FIRST_SSE_REG;
3584
3585  /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
3586  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3587    regno = FIRST_FLOAT_REG;
3588  else
3589    /* Most things go in %eax.  */
3590    regno = AX_REG;
3591
3592  /* Override FP return register with %xmm0 for local functions when
3593     SSE math is enabled or for functions with sseregparm attribute.  */
3594  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3595    {
3596      int sse_level = ix86_function_sseregparm (fntype, fn, false);
3597      if (sse_level == -1)
3598	{
3599	  error ("calling %qD with SSE calling convention without "
3600		 "SSE/SSE2 enabled", fn);
3601	  sorry ("this is a GCC bug that can be worked around by adding "
3602		 "attribute used to function called");
3603	}
3604      else if ((sse_level >= 1 && mode == SFmode)
3605	       || (sse_level == 2 && mode == DFmode))
3606	regno = FIRST_SSE_REG;
3607    }
3608
3609  /* OImode shouldn't be used directly.  */
3610  gcc_assert (mode != OImode);
3611
3612  return gen_rtx_REG (orig_mode, regno);
3613}
3614
3615static rtx
3616function_value_64 (machine_mode orig_mode, machine_mode mode,
3617		   const_tree valtype)
3618{
3619  rtx ret;
3620
3621  /* Handle libcalls, which don't provide a type node.  */
3622  if (valtype == NULL)
3623    {
3624      unsigned int regno;
3625
3626      switch (mode)
3627	{
3628	case E_SFmode:
3629	case E_SCmode:
3630	case E_DFmode:
3631	case E_DCmode:
3632	case E_TFmode:
3633	case E_SDmode:
3634	case E_DDmode:
3635	case E_TDmode:
3636	  regno = FIRST_SSE_REG;
3637	  break;
3638	case E_XFmode:
3639	case E_XCmode:
3640	  regno = FIRST_FLOAT_REG;
3641	  break;
3642	case E_TCmode:
3643	  return NULL;
3644	default:
3645	  regno = AX_REG;
3646	}
3647
3648      return gen_rtx_REG (mode, regno);
3649    }
3650  else if (POINTER_TYPE_P (valtype))
3651    {
3652      /* Pointers are always returned in word_mode.  */
3653      mode = word_mode;
3654    }
3655
3656  ret = construct_container (mode, orig_mode, valtype, 1,
3657			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3658			     x86_64_int_return_registers, 0);
3659
3660  /* For zero sized structures, construct_container returns NULL, but we
3661     need to keep rest of compiler happy by returning meaningful value.  */
3662  if (!ret)
3663    ret = gen_rtx_REG (orig_mode, AX_REG);
3664
3665  return ret;
3666}
3667
3668static rtx
3669function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3670		      const_tree fntype, const_tree fn, const_tree valtype)
3671{
3672  unsigned int regno;
3673
3674  /* Floating point return values in %st(0)
3675     (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
3676  if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3677	   && (GET_MODE_SIZE (mode) > 8
3678	       || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3679  {
3680    regno = FIRST_FLOAT_REG;
3681    return gen_rtx_REG (orig_mode, regno);
3682  }
3683  else
3684    return function_value_32(orig_mode, mode, fntype,fn);
3685}
3686
3687static rtx
3688function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3689		      const_tree valtype)
3690{
3691  unsigned int regno = AX_REG;
3692
3693  if (TARGET_SSE)
3694    {
3695      switch (GET_MODE_SIZE (mode))
3696	{
3697	case 16:
3698	  if (valtype != NULL_TREE
3699	      && !VECTOR_INTEGER_TYPE_P (valtype)
3700	      && !VECTOR_INTEGER_TYPE_P (valtype)
3701	      && !INTEGRAL_TYPE_P (valtype)
3702	      && !VECTOR_FLOAT_TYPE_P (valtype))
3703	    break;
3704	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3705	      && !COMPLEX_MODE_P (mode))
3706	    regno = FIRST_SSE_REG;
3707	  break;
3708	case 8:
3709	case 4:
3710	  if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3711	    break;
3712	  if (mode == SFmode || mode == DFmode)
3713	    regno = FIRST_SSE_REG;
3714	  break;
3715	default:
3716	  break;
3717        }
3718    }
3719  return gen_rtx_REG (orig_mode, regno);
3720}
3721
3722static rtx
3723ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3724		       machine_mode orig_mode, machine_mode mode)
3725{
3726  const_tree fn, fntype;
3727
3728  fn = NULL_TREE;
3729  if (fntype_or_decl && DECL_P (fntype_or_decl))
3730    fn = fntype_or_decl;
3731  fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3732
3733  if (ix86_function_type_abi (fntype) == MS_ABI)
3734    {
3735      if (TARGET_64BIT)
3736	return function_value_ms_64 (orig_mode, mode, valtype);
3737      else
3738	return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3739    }
3740  else if (TARGET_64BIT)
3741    return function_value_64 (orig_mode, mode, valtype);
3742  else
3743    return function_value_32 (orig_mode, mode, fntype, fn);
3744}
3745
3746static rtx
3747ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3748{
3749  machine_mode mode, orig_mode;
3750
3751  orig_mode = TYPE_MODE (valtype);
3752  mode = type_natural_mode (valtype, NULL, true);
3753  return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3754}
3755
3756/* Pointer function arguments and return values are promoted to
3757   word_mode for normal functions.  */
3758
3759static machine_mode
3760ix86_promote_function_mode (const_tree type, machine_mode mode,
3761			    int *punsignedp, const_tree fntype,
3762			    int for_return)
3763{
3764  if (cfun->machine->func_type == TYPE_NORMAL
3765      && type != NULL_TREE
3766      && POINTER_TYPE_P (type))
3767    {
3768      *punsignedp = POINTERS_EXTEND_UNSIGNED;
3769      return word_mode;
3770    }
3771  return default_promote_function_mode (type, mode, punsignedp, fntype,
3772					for_return);
3773}
3774
3775/* Return true if a structure, union or array with MODE containing FIELD
3776   should be accessed using BLKmode.  */
3777
3778static bool
3779ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3780{
3781  /* Union with XFmode must be in BLKmode.  */
3782  return (mode == XFmode
3783	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3784	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3785}
3786
3787rtx
3788ix86_libcall_value (machine_mode mode)
3789{
3790  return ix86_function_value_1 (NULL, NULL, mode, mode);
3791}
3792
3793/* Return true iff type is returned in memory.  */
3794
3795static bool
3796ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3797{
3798#ifdef SUBTARGET_RETURN_IN_MEMORY
3799  return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3800#else
3801  const machine_mode mode = type_natural_mode (type, NULL, true);
3802  HOST_WIDE_INT size;
3803
3804  if (TARGET_64BIT)
3805    {
3806      if (ix86_function_type_abi (fntype) == MS_ABI)
3807	{
3808	  size = int_size_in_bytes (type);
3809
3810	  /* __m128 is returned in xmm0.  */
3811	  if ((!type || VECTOR_INTEGER_TYPE_P (type)
3812	       || INTEGRAL_TYPE_P (type)
3813	       || VECTOR_FLOAT_TYPE_P (type))
3814	      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3815	      && !COMPLEX_MODE_P (mode)
3816	      && (GET_MODE_SIZE (mode) == 16 || size == 16))
3817	    return false;
3818
3819	  /* Otherwise, the size must be exactly in [1248]. */
3820	  return size != 1 && size != 2 && size != 4 && size != 8;
3821	}
3822      else
3823	{
3824	  int needed_intregs, needed_sseregs;
3825
3826	  return examine_argument (mode, type, 1,
3827				   &needed_intregs, &needed_sseregs);
3828	}
3829    }
3830  else
3831    {
3832      size = int_size_in_bytes (type);
3833
3834      /* Intel MCU psABI returns scalars and aggregates no larger than 8
3835	 bytes in registers.  */
3836      if (TARGET_IAMCU)
3837	return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3838
3839      if (mode == BLKmode)
3840	return true;
3841
3842      if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3843	return false;
3844
3845      if (VECTOR_MODE_P (mode) || mode == TImode)
3846	{
3847	  /* User-created vectors small enough to fit in EAX.  */
3848	  if (size < 8)
3849	    return false;
3850
3851	  /* Unless ABI prescibes otherwise,
3852	     MMX/3dNow values are returned in MM0 if available.  */
3853
3854	  if (size == 8)
3855	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
3856
3857	  /* SSE values are returned in XMM0 if available.  */
3858	  if (size == 16)
3859	    return !TARGET_SSE;
3860
3861	  /* AVX values are returned in YMM0 if available.  */
3862	  if (size == 32)
3863	    return !TARGET_AVX;
3864
3865	  /* AVX512F values are returned in ZMM0 if available.  */
3866	  if (size == 64)
3867	    return !TARGET_AVX512F;
3868	}
3869
3870      if (mode == XFmode)
3871	return false;
3872
3873      if (size > 12)
3874	return true;
3875
3876      /* OImode shouldn't be used directly.  */
3877      gcc_assert (mode != OImode);
3878
3879      return false;
3880    }
3881#endif
3882}
3883
3884
3885/* Create the va_list data type.  */
3886
3887static tree
3888ix86_build_builtin_va_list_64 (void)
3889{
3890  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3891
3892  record = lang_hooks.types.make_type (RECORD_TYPE);
3893  type_decl = build_decl (BUILTINS_LOCATION,
3894			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
3895
3896  f_gpr = build_decl (BUILTINS_LOCATION,
3897		      FIELD_DECL, get_identifier ("gp_offset"),
3898		      unsigned_type_node);
3899  f_fpr = build_decl (BUILTINS_LOCATION,
3900		      FIELD_DECL, get_identifier ("fp_offset"),
3901		      unsigned_type_node);
3902  f_ovf = build_decl (BUILTINS_LOCATION,
3903		      FIELD_DECL, get_identifier ("overflow_arg_area"),
3904		      ptr_type_node);
3905  f_sav = build_decl (BUILTINS_LOCATION,
3906		      FIELD_DECL, get_identifier ("reg_save_area"),
3907		      ptr_type_node);
3908
3909  va_list_gpr_counter_field = f_gpr;
3910  va_list_fpr_counter_field = f_fpr;
3911
3912  DECL_FIELD_CONTEXT (f_gpr) = record;
3913  DECL_FIELD_CONTEXT (f_fpr) = record;
3914  DECL_FIELD_CONTEXT (f_ovf) = record;
3915  DECL_FIELD_CONTEXT (f_sav) = record;
3916
3917  TYPE_STUB_DECL (record) = type_decl;
3918  TYPE_NAME (record) = type_decl;
3919  TYPE_FIELDS (record) = f_gpr;
3920  DECL_CHAIN (f_gpr) = f_fpr;
3921  DECL_CHAIN (f_fpr) = f_ovf;
3922  DECL_CHAIN (f_ovf) = f_sav;
3923
3924  layout_type (record);
3925
3926  TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3927					NULL_TREE, TYPE_ATTRIBUTES (record));
3928
3929  /* The correct type is an array type of one element.  */
3930  return build_array_type (record, build_index_type (size_zero_node));
3931}
3932
3933/* Setup the builtin va_list data type and for 64-bit the additional
3934   calling convention specific va_list data types.  */
3935
3936static tree
3937ix86_build_builtin_va_list (void)
3938{
3939  if (TARGET_64BIT)
3940    {
3941      /* Initialize ABI specific va_list builtin types.
3942
3943	 In lto1, we can encounter two va_list types:
3944	 - one as a result of the type-merge across TUs, and
3945	 - the one constructed here.
3946	 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3947	 a type identity check in canonical_va_list_type based on
3948	 TYPE_MAIN_VARIANT (which we used to have) will not work.
3949	 Instead, we tag each va_list_type_node with its unique attribute, and
3950	 look for the attribute in the type identity check in
3951	 canonical_va_list_type.
3952
3953	 Tagging sysv_va_list_type_node directly with the attribute is
3954	 problematic since it's a array of one record, which will degrade into a
3955	 pointer to record when used as parameter (see build_va_arg comments for
3956	 an example), dropping the attribute in the process.  So we tag the
3957	 record instead.  */
3958
3959      /* For SYSV_ABI we use an array of one record.  */
3960      sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3961
3962      /* For MS_ABI we use plain pointer to argument area.  */
3963      tree char_ptr_type = build_pointer_type (char_type_node);
3964      tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3965			     TYPE_ATTRIBUTES (char_ptr_type));
3966      ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3967
3968      return ((ix86_abi == MS_ABI)
3969	      ? ms_va_list_type_node
3970	      : sysv_va_list_type_node);
3971    }
3972  else
3973    {
3974      /* For i386 we use plain pointer to argument area.  */
3975      return build_pointer_type (char_type_node);
3976    }
3977}
3978
3979/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
3980
3981static void
3982setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
3983{
3984  rtx save_area, mem;
3985  alias_set_type set;
3986  int i, max;
3987
3988  /* GPR size of varargs save area.  */
3989  if (cfun->va_list_gpr_size)
3990    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
3991  else
3992    ix86_varargs_gpr_size = 0;
3993
3994  /* FPR size of varargs save area.  We don't need it if we don't pass
3995     anything in SSE registers.  */
3996  if (TARGET_SSE && cfun->va_list_fpr_size)
3997    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
3998  else
3999    ix86_varargs_fpr_size = 0;
4000
4001  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4002    return;
4003
4004  save_area = frame_pointer_rtx;
4005  set = get_varargs_alias_set ();
4006
4007  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4008  if (max > X86_64_REGPARM_MAX)
4009    max = X86_64_REGPARM_MAX;
4010
4011  for (i = cum->regno; i < max; i++)
4012    {
4013      mem = gen_rtx_MEM (word_mode,
4014			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4015      MEM_NOTRAP_P (mem) = 1;
4016      set_mem_alias_set (mem, set);
4017      emit_move_insn (mem,
4018		      gen_rtx_REG (word_mode,
4019				   x86_64_int_parameter_registers[i]));
4020    }
4021
4022  if (ix86_varargs_fpr_size)
4023    {
4024      machine_mode smode;
4025      rtx_code_label *label;
4026      rtx test;
4027
4028      /* Now emit code to save SSE registers.  The AX parameter contains number
4029	 of SSE parameter registers used to call this function, though all we
4030	 actually check here is the zero/non-zero status.  */
4031
4032      label = gen_label_rtx ();
4033      test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4034      emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4035				      label));
4036
4037      /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4038	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
4039	 be if we could determine the real mode of the data, via a hook
4040	 into pass_stdarg.  Ignore all that for now.  */
4041      smode = V4SFmode;
4042      if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4043	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4044
4045      max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4046      if (max > X86_64_SSE_REGPARM_MAX)
4047	max = X86_64_SSE_REGPARM_MAX;
4048
4049      for (i = cum->sse_regno; i < max; ++i)
4050	{
4051	  mem = plus_constant (Pmode, save_area,
4052			       i * 16 + ix86_varargs_gpr_size);
4053	  mem = gen_rtx_MEM (smode, mem);
4054	  MEM_NOTRAP_P (mem) = 1;
4055	  set_mem_alias_set (mem, set);
4056	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4057
4058	  emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4059	}
4060
4061      emit_label (label);
4062    }
4063}
4064
4065static void
4066setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4067{
4068  alias_set_type set = get_varargs_alias_set ();
4069  int i;
4070
4071  /* Reset to zero, as there might be a sysv vaarg used
4072     before.  */
4073  ix86_varargs_gpr_size = 0;
4074  ix86_varargs_fpr_size = 0;
4075
4076  for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4077    {
4078      rtx reg, mem;
4079
4080      mem = gen_rtx_MEM (Pmode,
4081			 plus_constant (Pmode, virtual_incoming_args_rtx,
4082					i * UNITS_PER_WORD));
4083      MEM_NOTRAP_P (mem) = 1;
4084      set_mem_alias_set (mem, set);
4085
4086      reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4087      emit_move_insn (mem, reg);
4088    }
4089}
4090
4091static void
4092ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4093			     const function_arg_info &arg,
4094			     int *, int no_rtl)
4095{
4096  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4097  CUMULATIVE_ARGS next_cum;
4098  tree fntype;
4099
4100  /* This argument doesn't appear to be used anymore.  Which is good,
4101     because the old code here didn't suppress rtl generation.  */
4102  gcc_assert (!no_rtl);
4103
4104  if (!TARGET_64BIT)
4105    return;
4106
4107  fntype = TREE_TYPE (current_function_decl);
4108
4109  /* For varargs, we do not want to skip the dummy va_dcl argument.
4110     For stdargs, we do want to skip the last named argument.  */
4111  next_cum = *cum;
4112  if (stdarg_p (fntype))
4113    ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4114
4115  if (cum->call_abi == MS_ABI)
4116    setup_incoming_varargs_ms_64 (&next_cum);
4117  else
4118    setup_incoming_varargs_64 (&next_cum);
4119}
4120
4121/* Checks if TYPE is of kind va_list char *.  */
4122
4123static bool
4124is_va_list_char_pointer (tree type)
4125{
4126  tree canonic;
4127
4128  /* For 32-bit it is always true.  */
4129  if (!TARGET_64BIT)
4130    return true;
4131  canonic = ix86_canonical_va_list_type (type);
4132  return (canonic == ms_va_list_type_node
4133          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4134}
4135
4136/* Implement va_start.  */
4137
4138static void
4139ix86_va_start (tree valist, rtx nextarg)
4140{
4141  HOST_WIDE_INT words, n_gpr, n_fpr;
4142  tree f_gpr, f_fpr, f_ovf, f_sav;
4143  tree gpr, fpr, ovf, sav, t;
4144  tree type;
4145  rtx ovf_rtx;
4146
4147  if (flag_split_stack
4148      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4149    {
4150      unsigned int scratch_regno;
4151
4152      /* When we are splitting the stack, we can't refer to the stack
4153	 arguments using internal_arg_pointer, because they may be on
4154	 the old stack.  The split stack prologue will arrange to
4155	 leave a pointer to the old stack arguments in a scratch
4156	 register, which we here copy to a pseudo-register.  The split
4157	 stack prologue can't set the pseudo-register directly because
4158	 it (the prologue) runs before any registers have been saved.  */
4159
4160      scratch_regno = split_stack_prologue_scratch_regno ();
4161      if (scratch_regno != INVALID_REGNUM)
4162	{
4163	  rtx reg;
4164	  rtx_insn *seq;
4165
4166	  reg = gen_reg_rtx (Pmode);
4167	  cfun->machine->split_stack_varargs_pointer = reg;
4168
4169	  start_sequence ();
4170	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4171	  seq = get_insns ();
4172	  end_sequence ();
4173
4174	  push_topmost_sequence ();
4175	  emit_insn_after (seq, entry_of_function ());
4176	  pop_topmost_sequence ();
4177	}
4178    }
4179
4180  /* Only 64bit target needs something special.  */
4181  if (is_va_list_char_pointer (TREE_TYPE (valist)))
4182    {
4183      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4184	std_expand_builtin_va_start (valist, nextarg);
4185      else
4186	{
4187	  rtx va_r, next;
4188
4189	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4190	  next = expand_binop (ptr_mode, add_optab,
4191			       cfun->machine->split_stack_varargs_pointer,
4192			       crtl->args.arg_offset_rtx,
4193			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
4194	  convert_move (va_r, next, 0);
4195	}
4196      return;
4197    }
4198
4199  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4200  f_fpr = DECL_CHAIN (f_gpr);
4201  f_ovf = DECL_CHAIN (f_fpr);
4202  f_sav = DECL_CHAIN (f_ovf);
4203
4204  valist = build_simple_mem_ref (valist);
4205  TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4206  /* The following should be folded into the MEM_REF offset.  */
4207  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4208		f_gpr, NULL_TREE);
4209  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4210		f_fpr, NULL_TREE);
4211  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4212		f_ovf, NULL_TREE);
4213  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4214		f_sav, NULL_TREE);
4215
4216  /* Count number of gp and fp argument registers used.  */
4217  words = crtl->args.info.words;
4218  n_gpr = crtl->args.info.regno;
4219  n_fpr = crtl->args.info.sse_regno;
4220
4221  if (cfun->va_list_gpr_size)
4222    {
4223      type = TREE_TYPE (gpr);
4224      t = build2 (MODIFY_EXPR, type,
4225		  gpr, build_int_cst (type, n_gpr * 8));
4226      TREE_SIDE_EFFECTS (t) = 1;
4227      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4228    }
4229
4230  if (TARGET_SSE && cfun->va_list_fpr_size)
4231    {
4232      type = TREE_TYPE (fpr);
4233      t = build2 (MODIFY_EXPR, type, fpr,
4234		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4235      TREE_SIDE_EFFECTS (t) = 1;
4236      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237    }
4238
4239  /* Find the overflow area.  */
4240  type = TREE_TYPE (ovf);
4241  if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4242    ovf_rtx = crtl->args.internal_arg_pointer;
4243  else
4244    ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4245  t = make_tree (type, ovf_rtx);
4246  if (words != 0)
4247    t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4248
4249  t = build2 (MODIFY_EXPR, type, ovf, t);
4250  TREE_SIDE_EFFECTS (t) = 1;
4251  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4252
4253  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4254    {
4255      /* Find the register save area.
4256	 Prologue of the function save it right above stack frame.  */
4257      type = TREE_TYPE (sav);
4258      t = make_tree (type, frame_pointer_rtx);
4259      if (!ix86_varargs_gpr_size)
4260	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4261
4262      t = build2 (MODIFY_EXPR, type, sav, t);
4263      TREE_SIDE_EFFECTS (t) = 1;
4264      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4265    }
4266}
4267
4268/* Implement va_arg.  */
4269
4270static tree
4271ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4272		      gimple_seq *post_p)
4273{
4274  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4275  tree f_gpr, f_fpr, f_ovf, f_sav;
4276  tree gpr, fpr, ovf, sav, t;
4277  int size, rsize;
4278  tree lab_false, lab_over = NULL_TREE;
4279  tree addr, t2;
4280  rtx container;
4281  int indirect_p = 0;
4282  tree ptrtype;
4283  machine_mode nat_mode;
4284  unsigned int arg_boundary;
4285  unsigned int type_align;
4286
4287  /* Only 64bit target needs something special.  */
4288  if (is_va_list_char_pointer (TREE_TYPE (valist)))
4289    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4290
4291  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4292  f_fpr = DECL_CHAIN (f_gpr);
4293  f_ovf = DECL_CHAIN (f_fpr);
4294  f_sav = DECL_CHAIN (f_ovf);
4295
4296  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4297		valist, f_gpr, NULL_TREE);
4298
4299  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4300  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4301  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4302
4303  indirect_p = pass_va_arg_by_reference (type);
4304  if (indirect_p)
4305    type = build_pointer_type (type);
4306  size = arg_int_size_in_bytes (type);
4307  rsize = CEIL (size, UNITS_PER_WORD);
4308
4309  nat_mode = type_natural_mode (type, NULL, false);
4310  switch (nat_mode)
4311    {
4312    case E_V8SFmode:
4313    case E_V8SImode:
4314    case E_V32QImode:
4315    case E_V16HImode:
4316    case E_V4DFmode:
4317    case E_V4DImode:
4318    case E_V16SFmode:
4319    case E_V16SImode:
4320    case E_V64QImode:
4321    case E_V32HImode:
4322    case E_V8DFmode:
4323    case E_V8DImode:
4324      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
4325      if (!TARGET_64BIT_MS_ABI)
4326	{
4327	  container = NULL;
4328	  break;
4329	}
4330      /* FALLTHRU */
4331
4332    default:
4333      container = construct_container (nat_mode, TYPE_MODE (type),
4334				       type, 0, X86_64_REGPARM_MAX,
4335				       X86_64_SSE_REGPARM_MAX, intreg,
4336				       0);
4337      break;
4338    }
4339
4340  /* Pull the value out of the saved registers.  */
4341
4342  addr = create_tmp_var (ptr_type_node, "addr");
4343  type_align = TYPE_ALIGN (type);
4344
4345  if (container)
4346    {
4347      int needed_intregs, needed_sseregs;
4348      bool need_temp;
4349      tree int_addr, sse_addr;
4350
4351      lab_false = create_artificial_label (UNKNOWN_LOCATION);
4352      lab_over = create_artificial_label (UNKNOWN_LOCATION);
4353
4354      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4355
4356      need_temp = (!REG_P (container)
4357		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4358		       || TYPE_ALIGN (type) > 128));
4359
4360      /* In case we are passing structure, verify that it is consecutive block
4361         on the register save area.  If not we need to do moves.  */
4362      if (!need_temp && !REG_P (container))
4363	{
4364	  /* Verify that all registers are strictly consecutive  */
4365	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4366	    {
4367	      int i;
4368
4369	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4370		{
4371		  rtx slot = XVECEXP (container, 0, i);
4372		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4373		      || INTVAL (XEXP (slot, 1)) != i * 16)
4374		    need_temp = true;
4375		}
4376	    }
4377	  else
4378	    {
4379	      int i;
4380
4381	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4382		{
4383		  rtx slot = XVECEXP (container, 0, i);
4384		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4385		      || INTVAL (XEXP (slot, 1)) != i * 8)
4386		    need_temp = true;
4387		}
4388	    }
4389	}
4390      if (!need_temp)
4391	{
4392	  int_addr = addr;
4393	  sse_addr = addr;
4394	}
4395      else
4396	{
4397	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4398	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4399	}
4400
4401      /* First ensure that we fit completely in registers.  */
4402      if (needed_intregs)
4403	{
4404	  t = build_int_cst (TREE_TYPE (gpr),
4405			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4406	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4407	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4408	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4409	  gimplify_and_add (t, pre_p);
4410	}
4411      if (needed_sseregs)
4412	{
4413	  t = build_int_cst (TREE_TYPE (fpr),
4414			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4415			     + X86_64_REGPARM_MAX * 8);
4416	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4417	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4418	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4419	  gimplify_and_add (t, pre_p);
4420	}
4421
4422      /* Compute index to start of area used for integer regs.  */
4423      if (needed_intregs)
4424	{
4425	  /* int_addr = gpr + sav; */
4426	  t = fold_build_pointer_plus (sav, gpr);
4427	  gimplify_assign (int_addr, t, pre_p);
4428	}
4429      if (needed_sseregs)
4430	{
4431	  /* sse_addr = fpr + sav; */
4432	  t = fold_build_pointer_plus (sav, fpr);
4433	  gimplify_assign (sse_addr, t, pre_p);
4434	}
4435      if (need_temp)
4436	{
4437	  int i, prev_size = 0;
4438	  tree temp = create_tmp_var (type, "va_arg_tmp");
4439
4440	  /* addr = &temp; */
4441	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4442	  gimplify_assign (addr, t, pre_p);
4443
4444	  for (i = 0; i < XVECLEN (container, 0); i++)
4445	    {
4446	      rtx slot = XVECEXP (container, 0, i);
4447	      rtx reg = XEXP (slot, 0);
4448	      machine_mode mode = GET_MODE (reg);
4449	      tree piece_type;
4450	      tree addr_type;
4451	      tree daddr_type;
4452	      tree src_addr, src;
4453	      int src_offset;
4454	      tree dest_addr, dest;
4455	      int cur_size = GET_MODE_SIZE (mode);
4456
4457	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4458	      prev_size = INTVAL (XEXP (slot, 1));
4459	      if (prev_size + cur_size > size)
4460		{
4461		  cur_size = size - prev_size;
4462		  unsigned int nbits = cur_size * BITS_PER_UNIT;
4463		  if (!int_mode_for_size (nbits, 1).exists (&mode))
4464		    mode = QImode;
4465		}
4466	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
4467	      if (mode == GET_MODE (reg))
4468		addr_type = build_pointer_type (piece_type);
4469	      else
4470		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4471							 true);
4472	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4473							true);
4474
4475	      if (SSE_REGNO_P (REGNO (reg)))
4476		{
4477		  src_addr = sse_addr;
4478		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4479		}
4480	      else
4481		{
4482		  src_addr = int_addr;
4483		  src_offset = REGNO (reg) * 8;
4484		}
4485	      src_addr = fold_convert (addr_type, src_addr);
4486	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4487
4488	      dest_addr = fold_convert (daddr_type, addr);
4489	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4490	      if (cur_size == GET_MODE_SIZE (mode))
4491		{
4492		  src = build_va_arg_indirect_ref (src_addr);
4493		  dest = build_va_arg_indirect_ref (dest_addr);
4494
4495		  gimplify_assign (dest, src, pre_p);
4496		}
4497	      else
4498		{
4499		  tree copy
4500		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4501				       3, dest_addr, src_addr,
4502				       size_int (cur_size));
4503		  gimplify_and_add (copy, pre_p);
4504		}
4505	      prev_size += cur_size;
4506	    }
4507	}
4508
4509      if (needed_intregs)
4510	{
4511	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4512		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4513	  gimplify_assign (gpr, t, pre_p);
4514	  /* The GPR save area guarantees only 8-byte alignment.  */
4515	  if (!need_temp)
4516	    type_align = MIN (type_align, 64);
4517	}
4518
4519      if (needed_sseregs)
4520	{
4521	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4522		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4523	  gimplify_assign (unshare_expr (fpr), t, pre_p);
4524	}
4525
4526      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4527
4528      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4529    }
4530
4531  /* ... otherwise out of the overflow area.  */
4532
4533  /* When we align parameter on stack for caller, if the parameter
4534     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4535     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
4536     here with caller.  */
4537  arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4538  if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4539    arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4540
4541  /* Care for on-stack alignment if needed.  */
4542  if (arg_boundary <= 64 || size == 0)
4543    t = ovf;
4544 else
4545    {
4546      HOST_WIDE_INT align = arg_boundary / 8;
4547      t = fold_build_pointer_plus_hwi (ovf, align - 1);
4548      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4549		  build_int_cst (TREE_TYPE (t), -align));
4550    }
4551
4552  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4553  gimplify_assign (addr, t, pre_p);
4554
4555  t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4556  gimplify_assign (unshare_expr (ovf), t, pre_p);
4557
4558  if (container)
4559    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4560
4561  type = build_aligned_type (type, type_align);
4562  ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4563  addr = fold_convert (ptrtype, addr);
4564
4565  if (indirect_p)
4566    addr = build_va_arg_indirect_ref (addr);
4567  return build_va_arg_indirect_ref (addr);
4568}
4569
4570/* Return true if OPNUM's MEM should be matched
4571   in movabs* patterns.  */
4572
4573bool
4574ix86_check_movabs (rtx insn, int opnum)
4575{
4576  rtx set, mem;
4577
4578  set = PATTERN (insn);
4579  if (GET_CODE (set) == PARALLEL)
4580    set = XVECEXP (set, 0, 0);
4581  gcc_assert (GET_CODE (set) == SET);
4582  mem = XEXP (set, opnum);
4583  while (SUBREG_P (mem))
4584    mem = SUBREG_REG (mem);
4585  gcc_assert (MEM_P (mem));
4586  return volatile_ok || !MEM_VOLATILE_P (mem);
4587}
4588
4589/* Return false if INSN contains a MEM with a non-default address space.  */
4590bool
4591ix86_check_no_addr_space (rtx insn)
4592{
4593  subrtx_var_iterator::array_type array;
4594  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4595    {
4596      rtx x = *iter;
4597      if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4598	return false;
4599    }
4600  return true;
4601}
4602
4603/* Initialize the table of extra 80387 mathematical constants.  */
4604
4605static void
4606init_ext_80387_constants (void)
4607{
4608  static const char * cst[5] =
4609  {
4610    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4611    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4612    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4613    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4614    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4615  };
4616  int i;
4617
4618  for (i = 0; i < 5; i++)
4619    {
4620      real_from_string (&ext_80387_constants_table[i], cst[i]);
4621      /* Ensure each constant is rounded to XFmode precision.  */
4622      real_convert (&ext_80387_constants_table[i],
4623		    XFmode, &ext_80387_constants_table[i]);
4624    }
4625
4626  ext_80387_constants_init = 1;
4627}
4628
4629/* Return non-zero if the constant is something that
4630   can be loaded with a special instruction.  */
4631
4632int
4633standard_80387_constant_p (rtx x)
4634{
4635  machine_mode mode = GET_MODE (x);
4636
4637  const REAL_VALUE_TYPE *r;
4638
4639  if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4640    return -1;
4641
4642  if (x == CONST0_RTX (mode))
4643    return 1;
4644  if (x == CONST1_RTX (mode))
4645    return 2;
4646
4647  r = CONST_DOUBLE_REAL_VALUE (x);
4648
4649  /* For XFmode constants, try to find a special 80387 instruction when
4650     optimizing for size or on those CPUs that benefit from them.  */
4651  if (mode == XFmode
4652      && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
4653      && !flag_rounding_math)
4654    {
4655      int i;
4656
4657      if (! ext_80387_constants_init)
4658	init_ext_80387_constants ();
4659
4660      for (i = 0; i < 5; i++)
4661        if (real_identical (r, &ext_80387_constants_table[i]))
4662	  return i + 3;
4663    }
4664
4665  /* Load of the constant -0.0 or -1.0 will be split as
4666     fldz;fchs or fld1;fchs sequence.  */
4667  if (real_isnegzero (r))
4668    return 8;
4669  if (real_identical (r, &dconstm1))
4670    return 9;
4671
4672  return 0;
4673}
4674
4675/* Return the opcode of the special instruction to be used to load
4676   the constant X.  */
4677
4678const char *
4679standard_80387_constant_opcode (rtx x)
4680{
4681  switch (standard_80387_constant_p (x))
4682    {
4683    case 1:
4684      return "fldz";
4685    case 2:
4686      return "fld1";
4687    case 3:
4688      return "fldlg2";
4689    case 4:
4690      return "fldln2";
4691    case 5:
4692      return "fldl2e";
4693    case 6:
4694      return "fldl2t";
4695    case 7:
4696      return "fldpi";
4697    case 8:
4698    case 9:
4699      return "#";
4700    default:
4701      gcc_unreachable ();
4702    }
4703}
4704
4705/* Return the CONST_DOUBLE representing the 80387 constant that is
4706   loaded by the specified special instruction.  The argument IDX
4707   matches the return value from standard_80387_constant_p.  */
4708
4709rtx
4710standard_80387_constant_rtx (int idx)
4711{
4712  int i;
4713
4714  if (! ext_80387_constants_init)
4715    init_ext_80387_constants ();
4716
4717  switch (idx)
4718    {
4719    case 3:
4720    case 4:
4721    case 5:
4722    case 6:
4723    case 7:
4724      i = idx - 3;
4725      break;
4726
4727    default:
4728      gcc_unreachable ();
4729    }
4730
4731  return const_double_from_real_value (ext_80387_constants_table[i],
4732				       XFmode);
4733}
4734
4735/* Return 1 if X is all bits 0 and 2 if X is all bits 1
4736   in supported SSE/AVX vector mode.  */
4737
4738int
4739standard_sse_constant_p (rtx x, machine_mode pred_mode)
4740{
4741  machine_mode mode;
4742
4743  if (!TARGET_SSE)
4744    return 0;
4745
4746  mode = GET_MODE (x);
4747
4748  if (x == const0_rtx || const0_operand (x, mode))
4749    return 1;
4750
4751  if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4752    {
4753      /* VOIDmode integer constant, get mode from the predicate.  */
4754      if (mode == VOIDmode)
4755	mode = pred_mode;
4756
4757      switch (GET_MODE_SIZE (mode))
4758	{
4759	case 64:
4760	  if (TARGET_AVX512F)
4761	    return 2;
4762	  break;
4763	case 32:
4764	  if (TARGET_AVX2)
4765	    return 2;
4766	  break;
4767	case 16:
4768	  if (TARGET_SSE2)
4769	    return 2;
4770	  break;
4771	case 0:
4772	  /* VOIDmode */
4773	  gcc_unreachable ();
4774	default:
4775	  break;
4776	}
4777    }
4778
4779  return 0;
4780}
4781
4782/* Return the opcode of the special instruction to be used to load
4783   the constant operands[1] into operands[0].  */
4784
4785const char *
4786standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4787{
4788  machine_mode mode;
4789  rtx x = operands[1];
4790
4791  gcc_assert (TARGET_SSE);
4792
4793  mode = GET_MODE (x);
4794
4795  if (x == const0_rtx || const0_operand (x, mode))
4796    {
4797      switch (get_attr_mode (insn))
4798	{
4799	case MODE_TI:
4800	  if (!EXT_REX_SSE_REG_P (operands[0]))
4801	    return "%vpxor\t%0, %d0";
4802	  /* FALLTHRU */
4803	case MODE_XI:
4804	case MODE_OI:
4805	  if (EXT_REX_SSE_REG_P (operands[0]))
4806	    return (TARGET_AVX512VL
4807		    ? "vpxord\t%x0, %x0, %x0"
4808		    : "vpxord\t%g0, %g0, %g0");
4809	  return "vpxor\t%x0, %x0, %x0";
4810
4811	case MODE_V2DF:
4812	  if (!EXT_REX_SSE_REG_P (operands[0]))
4813	    return "%vxorpd\t%0, %d0";
4814	  /* FALLTHRU */
4815	case MODE_V8DF:
4816	case MODE_V4DF:
4817	  if (!EXT_REX_SSE_REG_P (operands[0]))
4818	    return "vxorpd\t%x0, %x0, %x0";
4819	  else if (TARGET_AVX512DQ)
4820	    return (TARGET_AVX512VL
4821		    ? "vxorpd\t%x0, %x0, %x0"
4822		    : "vxorpd\t%g0, %g0, %g0");
4823	  else
4824	    return (TARGET_AVX512VL
4825		    ? "vpxorq\t%x0, %x0, %x0"
4826		    : "vpxorq\t%g0, %g0, %g0");
4827
4828	case MODE_V4SF:
4829	  if (!EXT_REX_SSE_REG_P (operands[0]))
4830	    return "%vxorps\t%0, %d0";
4831	  /* FALLTHRU */
4832	case MODE_V16SF:
4833	case MODE_V8SF:
4834	  if (!EXT_REX_SSE_REG_P (operands[0]))
4835	    return "vxorps\t%x0, %x0, %x0";
4836	  else if (TARGET_AVX512DQ)
4837	    return (TARGET_AVX512VL
4838		    ? "vxorps\t%x0, %x0, %x0"
4839		    : "vxorps\t%g0, %g0, %g0");
4840	  else
4841	    return (TARGET_AVX512VL
4842		    ? "vpxord\t%x0, %x0, %x0"
4843		    : "vpxord\t%g0, %g0, %g0");
4844
4845	default:
4846	  gcc_unreachable ();
4847	}
4848    }
4849  else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4850    {
4851      enum attr_mode insn_mode = get_attr_mode (insn);
4852
4853      switch (insn_mode)
4854	{
4855	case MODE_XI:
4856	case MODE_V8DF:
4857	case MODE_V16SF:
4858	  gcc_assert (TARGET_AVX512F);
4859	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4860
4861	case MODE_OI:
4862	case MODE_V4DF:
4863	case MODE_V8SF:
4864	  gcc_assert (TARGET_AVX2);
4865	  /* FALLTHRU */
4866	case MODE_TI:
4867	case MODE_V2DF:
4868	case MODE_V4SF:
4869	  gcc_assert (TARGET_SSE2);
4870	  if (!EXT_REX_SSE_REG_P (operands[0]))
4871	    return (TARGET_AVX
4872		    ? "vpcmpeqd\t%0, %0, %0"
4873		    : "pcmpeqd\t%0, %0");
4874	  else if (TARGET_AVX512VL)
4875	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4876	  else
4877	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4878
4879	default:
4880	  gcc_unreachable ();
4881	}
4882   }
4883
4884  gcc_unreachable ();
4885}
4886
4887/* Returns true if INSN can be transformed from a memory load
4888   to a supported FP constant load.  */
4889
4890bool
4891ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4892{
4893  rtx src = find_constant_src (insn);
4894
4895  gcc_assert (REG_P (dst));
4896
4897  if (src == NULL
4898      || (SSE_REGNO_P (REGNO (dst))
4899	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4900      || (STACK_REGNO_P (REGNO (dst))
4901	   && standard_80387_constant_p (src) < 1))
4902    return false;
4903
4904  return true;
4905}
4906
4907/* Predicate for pre-reload splitters with associated instructions,
4908   which can match any time before the split1 pass (usually combine),
4909   then are unconditionally split in that pass and should not be
4910   matched again afterwards.  */
4911
4912bool
4913ix86_pre_reload_split (void)
4914{
4915  return (can_create_pseudo_p ()
4916	  && !(cfun->curr_properties & PROP_rtl_split_insns));
4917}
4918
4919/* Return the opcode of the TYPE_SSEMOV instruction.  To move from
4920   or to xmm16-xmm31/ymm16-ymm31 registers, we either require
4921   TARGET_AVX512VL or it is a register to register move which can
4922   be done with zmm register move. */
4923
4924static const char *
4925ix86_get_ssemov (rtx *operands, unsigned size,
4926		 enum attr_mode insn_mode, machine_mode mode)
4927{
4928  char buf[128];
4929  bool misaligned_p = (misaligned_operand (operands[0], mode)
4930		       || misaligned_operand (operands[1], mode));
4931  bool evex_reg_p = (size == 64
4932		     || EXT_REX_SSE_REG_P (operands[0])
4933		     || EXT_REX_SSE_REG_P (operands[1]));
4934  machine_mode scalar_mode;
4935
4936  const char *opcode = NULL;
4937  enum
4938    {
4939      opcode_int,
4940      opcode_float,
4941      opcode_double
4942    } type = opcode_int;
4943
4944  switch (insn_mode)
4945    {
4946    case MODE_V16SF:
4947    case MODE_V8SF:
4948    case MODE_V4SF:
4949      scalar_mode = E_SFmode;
4950      type = opcode_float;
4951      break;
4952    case MODE_V8DF:
4953    case MODE_V4DF:
4954    case MODE_V2DF:
4955      scalar_mode = E_DFmode;
4956      type = opcode_double;
4957      break;
4958    case MODE_XI:
4959    case MODE_OI:
4960    case MODE_TI:
4961      scalar_mode = GET_MODE_INNER (mode);
4962      break;
4963    default:
4964      gcc_unreachable ();
4965    }
4966
4967  /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
4968     we can only use zmm register move without memory operand.  */
4969  if (evex_reg_p
4970      && !TARGET_AVX512VL
4971      && GET_MODE_SIZE (mode) < 64)
4972    {
4973      /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
4974	 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
4975	 AVX512VL is disabled, LRA can still generate reg to
4976	 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
4977	 modes.  */
4978      if (memory_operand (operands[0], mode)
4979	  || memory_operand (operands[1], mode))
4980	gcc_unreachable ();
4981      size = 64;
4982      switch (type)
4983	{
4984	case opcode_int:
4985	  opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
4986	  break;
4987	case opcode_float:
4988	  opcode = misaligned_p ? "vmovups" : "vmovaps";
4989	  break;
4990	case opcode_double:
4991	  opcode = misaligned_p ? "vmovupd" : "vmovapd";
4992	  break;
4993	}
4994    }
4995  else if (SCALAR_FLOAT_MODE_P (scalar_mode))
4996    {
4997      switch (scalar_mode)
4998	{
4999	case E_SFmode:
5000	  opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5001	  break;
5002	case E_DFmode:
5003	  opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5004	  break;
5005	case E_TFmode:
5006	  if (evex_reg_p)
5007	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5008	  else
5009	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5010	  break;
5011	default:
5012	  gcc_unreachable ();
5013	}
5014    }
5015  else if (SCALAR_INT_MODE_P (scalar_mode))
5016    {
5017      switch (scalar_mode)
5018	{
5019	case E_QImode:
5020	  if (evex_reg_p)
5021	    opcode = (misaligned_p
5022		      ? (TARGET_AVX512BW
5023			 ? "vmovdqu8"
5024			 : "vmovdqu64")
5025		      : "vmovdqa64");
5026	  else
5027	    opcode = (misaligned_p
5028		      ? (TARGET_AVX512BW
5029			 ? "vmovdqu8"
5030			 : "%vmovdqu")
5031		      : "%vmovdqa");
5032	  break;
5033	case E_HImode:
5034	  if (evex_reg_p)
5035	    opcode = (misaligned_p
5036		      ? (TARGET_AVX512BW
5037			 ? "vmovdqu16"
5038			 : "vmovdqu64")
5039		      : "vmovdqa64");
5040	  else
5041	    opcode = (misaligned_p
5042		      ? (TARGET_AVX512BW
5043			 ? "vmovdqu16"
5044			 : "%vmovdqu")
5045		      : "%vmovdqa");
5046	  break;
5047	case E_SImode:
5048	  if (evex_reg_p)
5049	    opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5050	  else
5051	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5052	  break;
5053	case E_DImode:
5054	case E_TImode:
5055	case E_OImode:
5056	  if (evex_reg_p)
5057	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5058	  else
5059	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5060	  break;
5061	case E_XImode:
5062	  opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5063	  break;
5064	default:
5065	  gcc_unreachable ();
5066	}
5067    }
5068  else
5069    gcc_unreachable ();
5070
5071  switch (size)
5072    {
5073    case 64:
5074      snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5075		opcode);
5076      break;
5077    case 32:
5078      snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5079		opcode);
5080      break;
5081    case 16:
5082      snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5083		opcode);
5084      break;
5085    default:
5086      gcc_unreachable ();
5087    }
5088  output_asm_insn (buf, operands);
5089  return "";
5090}
5091
5092/* Return the template of the TYPE_SSEMOV instruction to move
5093   operands[1] into operands[0].  */
5094
5095const char *
5096ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5097{
5098  machine_mode mode = GET_MODE (operands[0]);
5099  if (get_attr_type (insn) != TYPE_SSEMOV
5100      || mode != GET_MODE (operands[1]))
5101    gcc_unreachable ();
5102
5103  enum attr_mode insn_mode = get_attr_mode (insn);
5104
5105  switch (insn_mode)
5106    {
5107    case MODE_XI:
5108    case MODE_V8DF:
5109    case MODE_V16SF:
5110      return ix86_get_ssemov (operands, 64, insn_mode, mode);
5111
5112    case MODE_OI:
5113    case MODE_V4DF:
5114    case MODE_V8SF:
5115      return ix86_get_ssemov (operands, 32, insn_mode, mode);
5116
5117    case MODE_TI:
5118    case MODE_V2DF:
5119    case MODE_V4SF:
5120      return ix86_get_ssemov (operands, 16, insn_mode, mode);
5121
5122    case MODE_DI:
5123      /* Handle broken assemblers that require movd instead of movq. */
5124      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5125	  && (GENERAL_REG_P (operands[0])
5126	      || GENERAL_REG_P (operands[1])))
5127	return "%vmovd\t{%1, %0|%0, %1}";
5128      else
5129	return "%vmovq\t{%1, %0|%0, %1}";
5130
5131    case MODE_SI:
5132      return "%vmovd\t{%1, %0|%0, %1}";
5133
5134    case MODE_DF:
5135      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5136	return "vmovsd\t{%d1, %0|%0, %d1}";
5137      else
5138	return "%vmovsd\t{%1, %0|%0, %1}";
5139
5140    case MODE_SF:
5141      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5142	return "vmovss\t{%d1, %0|%0, %d1}";
5143      else
5144	return "%vmovss\t{%1, %0|%0, %1}";
5145
5146    case MODE_V1DF:
5147      gcc_assert (!TARGET_AVX);
5148      return "movlpd\t{%1, %0|%0, %1}";
5149
5150    case MODE_V2SF:
5151      if (TARGET_AVX && REG_P (operands[0]))
5152	return "vmovlps\t{%1, %d0|%d0, %1}";
5153      else
5154	return "%vmovlps\t{%1, %0|%0, %1}";
5155
5156    default:
5157      gcc_unreachable ();
5158    }
5159}
5160
5161/* Returns true if OP contains a symbol reference */
5162
5163bool
5164symbolic_reference_mentioned_p (rtx op)
5165{
5166  const char *fmt;
5167  int i;
5168
5169  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5170    return true;
5171
5172  fmt = GET_RTX_FORMAT (GET_CODE (op));
5173  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5174    {
5175      if (fmt[i] == 'E')
5176	{
5177	  int j;
5178
5179	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5180	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5181	      return true;
5182	}
5183
5184      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5185	return true;
5186    }
5187
5188  return false;
5189}
5190
5191/* Return true if it is appropriate to emit `ret' instructions in the
5192   body of a function.  Do this only if the epilogue is simple, needing a
5193   couple of insns.  Prior to reloading, we can't tell how many registers
5194   must be saved, so return false then.  Return false if there is no frame
5195   marker to de-allocate.  */
5196
5197bool
5198ix86_can_use_return_insn_p (void)
5199{
5200  if (ix86_function_naked (current_function_decl))
5201    return false;
5202
5203  /* Don't use `ret' instruction in interrupt handler.  */
5204  if (! reload_completed
5205      || frame_pointer_needed
5206      || cfun->machine->func_type != TYPE_NORMAL)
5207    return 0;
5208
5209  /* Don't allow more than 32k pop, since that's all we can do
5210     with one instruction.  */
5211  if (crtl->args.pops_args && crtl->args.size >= 32768)
5212    return 0;
5213
5214  struct ix86_frame &frame = cfun->machine->frame;
5215  return (frame.stack_pointer_offset == UNITS_PER_WORD
5216	  && (frame.nregs + frame.nsseregs) == 0);
5217}
5218
5219/* Return stack frame size.  get_frame_size () returns used stack slots
5220   during compilation, which may be optimized out later.  If stack frame
5221   is needed, stack_frame_required should be true.  */
5222
5223static HOST_WIDE_INT
5224ix86_get_frame_size (void)
5225{
5226  if (cfun->machine->stack_frame_required)
5227    return get_frame_size ();
5228  else
5229    return 0;
5230}
5231
5232/* Value should be nonzero if functions must have frame pointers.
5233   Zero means the frame pointer need not be set up (and parms may
5234   be accessed via the stack pointer) in functions that seem suitable.  */
5235
5236static bool
5237ix86_frame_pointer_required (void)
5238{
5239  /* If we accessed previous frames, then the generated code expects
5240     to be able to access the saved ebp value in our frame.  */
5241  if (cfun->machine->accesses_prev_frame)
5242    return true;
5243
5244  /* Several x86 os'es need a frame pointer for other reasons,
5245     usually pertaining to setjmp.  */
5246  if (SUBTARGET_FRAME_POINTER_REQUIRED)
5247    return true;
5248
5249  /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
5250  if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5251    return true;
5252
5253  /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5254     allocation is 4GB.  */
5255  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5256    return true;
5257
5258  /* SSE saves require frame-pointer when stack is misaligned.  */
5259  if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5260    return true;
5261
5262  /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5263     turns off the frame pointer by default.  Turn it back on now if
5264     we've not got a leaf function.  */
5265  if (TARGET_OMIT_LEAF_FRAME_POINTER
5266      && (!crtl->is_leaf
5267	  || ix86_current_function_calls_tls_descriptor))
5268    return true;
5269
5270  if (crtl->profile && !flag_fentry)
5271    return true;
5272
5273  return false;
5274}
5275
5276/* Record that the current function accesses previous call frames.  */
5277
5278void
5279ix86_setup_frame_addresses (void)
5280{
5281  cfun->machine->accesses_prev_frame = 1;
5282}
5283
5284#ifndef USE_HIDDEN_LINKONCE
5285# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5286#  define USE_HIDDEN_LINKONCE 1
5287# else
5288#  define USE_HIDDEN_LINKONCE 0
5289# endif
5290#endif
5291
5292/* Label count for call and return thunks.  It is used to make unique
5293   labels in call and return thunks.  */
5294static int indirectlabelno;
5295
5296/* True if call thunk function is needed.  */
5297static bool indirect_thunk_needed = false;
5298
5299/* Bit masks of integer registers, which contain branch target, used
5300   by call thunk functions.  */
5301static int indirect_thunks_used;
5302
5303/* True if return thunk function is needed.  */
5304static bool indirect_return_needed = false;
5305
5306/* True if return thunk function via CX is needed.  */
5307static bool indirect_return_via_cx;
5308
5309#ifndef INDIRECT_LABEL
5310# define INDIRECT_LABEL "LIND"
5311#endif
5312
5313/* Indicate what prefix is needed for an indirect branch.  */
5314enum indirect_thunk_prefix
5315{
5316  indirect_thunk_prefix_none,
5317  indirect_thunk_prefix_nt
5318};
5319
5320/* Return the prefix needed for an indirect branch INSN.  */
5321
5322enum indirect_thunk_prefix
5323indirect_thunk_need_prefix (rtx_insn *insn)
5324{
5325  enum indirect_thunk_prefix need_prefix;
5326  if ((cfun->machine->indirect_branch_type
5327	    == indirect_branch_thunk_extern)
5328	   && ix86_notrack_prefixed_insn_p (insn))
5329    {
5330      /* NOTRACK prefix is only used with external thunk so that it
5331	 can be properly updated to support CET at run-time.  */
5332      need_prefix = indirect_thunk_prefix_nt;
5333    }
5334  else
5335    need_prefix = indirect_thunk_prefix_none;
5336  return need_prefix;
5337}
5338
5339/* Fills in the label name that should be used for the indirect thunk.  */
5340
5341static void
5342indirect_thunk_name (char name[32], unsigned int regno,
5343		     enum indirect_thunk_prefix need_prefix,
5344		     bool ret_p)
5345{
5346  if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5347    gcc_unreachable ();
5348
5349  if (USE_HIDDEN_LINKONCE)
5350    {
5351      const char *prefix;
5352
5353      if (need_prefix == indirect_thunk_prefix_nt
5354	  && regno != INVALID_REGNUM)
5355	{
5356	  /* NOTRACK prefix is only used with external thunk via
5357	     register so that NOTRACK prefix can be added to indirect
5358	     branch via register to support CET at run-time.  */
5359	  prefix = "_nt";
5360	}
5361      else
5362	prefix = "";
5363
5364      const char *ret = ret_p ? "return" : "indirect";
5365
5366      if (regno != INVALID_REGNUM)
5367	{
5368	  const char *reg_prefix;
5369	  if (LEGACY_INT_REGNO_P (regno))
5370	    reg_prefix = TARGET_64BIT ? "r" : "e";
5371	  else
5372	    reg_prefix = "";
5373	  sprintf (name, "__x86_%s_thunk%s_%s%s",
5374		   ret, prefix, reg_prefix, reg_names[regno]);
5375	}
5376      else
5377	sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5378    }
5379  else
5380    {
5381      if (regno != INVALID_REGNUM)
5382	ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5383      else
5384	{
5385	  if (ret_p)
5386	    ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5387	  else
5388	    ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5389	}
5390    }
5391}
5392
5393/* Output a call and return thunk for indirect branch.  If REGNO != -1,
5394   the function address is in REGNO and the call and return thunk looks like:
5395
5396	call	L2
5397   L1:
5398	pause
5399	lfence
5400	jmp	L1
5401   L2:
5402	mov	%REG, (%sp)
5403	ret
5404
5405   Otherwise, the function address is on the top of stack and the
5406   call and return thunk looks like:
5407
5408	call L2
5409  L1:
5410	pause
5411	lfence
5412	jmp L1
5413  L2:
5414	lea WORD_SIZE(%sp), %sp
5415	ret
5416 */
5417
5418static void
5419output_indirect_thunk (unsigned int regno)
5420{
5421  char indirectlabel1[32];
5422  char indirectlabel2[32];
5423
5424  ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5425			       indirectlabelno++);
5426  ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5427			       indirectlabelno++);
5428
5429  /* Call */
5430  fputs ("\tcall\t", asm_out_file);
5431  assemble_name_raw (asm_out_file, indirectlabel2);
5432  fputc ('\n', asm_out_file);
5433
5434  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5435
5436  /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5437     Usage of both pause + lfence is compromise solution.  */
5438  fprintf (asm_out_file, "\tpause\n\tlfence\n");
5439
5440  /* Jump.  */
5441  fputs ("\tjmp\t", asm_out_file);
5442  assemble_name_raw (asm_out_file, indirectlabel1);
5443  fputc ('\n', asm_out_file);
5444
5445  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5446
5447  /* The above call insn pushed a word to stack.  Adjust CFI info.  */
5448  if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5449    {
5450      if (! dwarf2out_do_cfi_asm ())
5451	{
5452	  dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5453	  xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5454	  xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5455	  vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5456	}
5457      dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5458      xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5459      xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5460      vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5461      dwarf2out_emit_cfi (xcfi);
5462    }
5463
5464  if (regno != INVALID_REGNUM)
5465    {
5466      /* MOV.  */
5467      rtx xops[2];
5468      xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5469      xops[1] = gen_rtx_REG (word_mode, regno);
5470      output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5471    }
5472  else
5473    {
5474      /* LEA.  */
5475      rtx xops[2];
5476      xops[0] = stack_pointer_rtx;
5477      xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5478      output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5479    }
5480
5481  fputs ("\tret\n", asm_out_file);
5482}
5483
5484/* Output a funtion with a call and return thunk for indirect branch.
5485   If REGNO != INVALID_REGNUM, the function address is in REGNO.
5486   Otherwise, the function address is on the top of stack.  Thunk is
5487   used for function return if RET_P is true.  */
5488
5489static void
5490output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5491				unsigned int regno, bool ret_p)
5492{
5493  char name[32];
5494  tree decl;
5495
5496  /* Create __x86_indirect_thunk.  */
5497  indirect_thunk_name (name, regno, need_prefix, ret_p);
5498  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5499		     get_identifier (name),
5500		     build_function_type_list (void_type_node, NULL_TREE));
5501  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5502				   NULL_TREE, void_type_node);
5503  TREE_PUBLIC (decl) = 1;
5504  TREE_STATIC (decl) = 1;
5505  DECL_IGNORED_P (decl) = 1;
5506
5507#if TARGET_MACHO
5508  if (TARGET_MACHO)
5509    {
5510      switch_to_section (darwin_sections[picbase_thunk_section]);
5511      fputs ("\t.weak_definition\t", asm_out_file);
5512      assemble_name (asm_out_file, name);
5513      fputs ("\n\t.private_extern\t", asm_out_file);
5514      assemble_name (asm_out_file, name);
5515      putc ('\n', asm_out_file);
5516      ASM_OUTPUT_LABEL (asm_out_file, name);
5517      DECL_WEAK (decl) = 1;
5518    }
5519  else
5520#endif
5521    if (USE_HIDDEN_LINKONCE)
5522      {
5523	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5524
5525	targetm.asm_out.unique_section (decl, 0);
5526	switch_to_section (get_named_section (decl, NULL, 0));
5527
5528	targetm.asm_out.globalize_label (asm_out_file, name);
5529	fputs ("\t.hidden\t", asm_out_file);
5530	assemble_name (asm_out_file, name);
5531	putc ('\n', asm_out_file);
5532	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5533      }
5534    else
5535      {
5536	switch_to_section (text_section);
5537	ASM_OUTPUT_LABEL (asm_out_file, name);
5538      }
5539
5540  DECL_INITIAL (decl) = make_node (BLOCK);
5541  current_function_decl = decl;
5542  allocate_struct_function (decl, false);
5543  init_function_start (decl);
5544  /* We're about to hide the function body from callees of final_* by
5545     emitting it directly; tell them we're a thunk, if they care.  */
5546  cfun->is_thunk = true;
5547  first_function_block_is_cold = false;
5548  /* Make sure unwind info is emitted for the thunk if needed.  */
5549  final_start_function (emit_barrier (), asm_out_file, 1);
5550
5551  output_indirect_thunk (regno);
5552
5553  final_end_function ();
5554  init_insn_lengths ();
5555  free_after_compilation (cfun);
5556  set_cfun (NULL);
5557  current_function_decl = NULL;
5558}
5559
5560static int pic_labels_used;
5561
5562/* Fills in the label name that should be used for a pc thunk for
5563   the given register.  */
5564
5565static void
5566get_pc_thunk_name (char name[32], unsigned int regno)
5567{
5568  gcc_assert (!TARGET_64BIT);
5569
5570  if (USE_HIDDEN_LINKONCE)
5571    sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5572  else
5573    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5574}
5575
5576
5577/* This function generates code for -fpic that loads %ebx with
5578   the return address of the caller and then returns.  */
5579
5580static void
5581ix86_code_end (void)
5582{
5583  rtx xops[2];
5584  unsigned int regno;
5585
5586  if (indirect_return_needed)
5587    output_indirect_thunk_function (indirect_thunk_prefix_none,
5588				    INVALID_REGNUM, true);
5589  if (indirect_return_via_cx)
5590    output_indirect_thunk_function (indirect_thunk_prefix_none,
5591				    CX_REG, true);
5592  if (indirect_thunk_needed)
5593    output_indirect_thunk_function (indirect_thunk_prefix_none,
5594				    INVALID_REGNUM, false);
5595
5596  for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5597    {
5598      unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5599      if ((indirect_thunks_used & (1 << i)))
5600	output_indirect_thunk_function (indirect_thunk_prefix_none,
5601					regno, false);
5602    }
5603
5604  for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5605    {
5606      char name[32];
5607      tree decl;
5608
5609      if ((indirect_thunks_used & (1 << regno)))
5610	output_indirect_thunk_function (indirect_thunk_prefix_none,
5611					regno, false);
5612
5613      if (!(pic_labels_used & (1 << regno)))
5614	continue;
5615
5616      get_pc_thunk_name (name, regno);
5617
5618      decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5619			 get_identifier (name),
5620			 build_function_type_list (void_type_node, NULL_TREE));
5621      DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5622				       NULL_TREE, void_type_node);
5623      TREE_PUBLIC (decl) = 1;
5624      TREE_STATIC (decl) = 1;
5625      DECL_IGNORED_P (decl) = 1;
5626
5627#if TARGET_MACHO
5628      if (TARGET_MACHO)
5629	{
5630	  switch_to_section (darwin_sections[picbase_thunk_section]);
5631	  fputs ("\t.weak_definition\t", asm_out_file);
5632	  assemble_name (asm_out_file, name);
5633	  fputs ("\n\t.private_extern\t", asm_out_file);
5634	  assemble_name (asm_out_file, name);
5635	  putc ('\n', asm_out_file);
5636	  ASM_OUTPUT_LABEL (asm_out_file, name);
5637	  DECL_WEAK (decl) = 1;
5638	}
5639      else
5640#endif
5641      if (USE_HIDDEN_LINKONCE)
5642	{
5643	  cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5644
5645	  targetm.asm_out.unique_section (decl, 0);
5646	  switch_to_section (get_named_section (decl, NULL, 0));
5647
5648	  targetm.asm_out.globalize_label (asm_out_file, name);
5649	  fputs ("\t.hidden\t", asm_out_file);
5650	  assemble_name (asm_out_file, name);
5651	  putc ('\n', asm_out_file);
5652	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5653	}
5654      else
5655	{
5656	  switch_to_section (text_section);
5657	  ASM_OUTPUT_LABEL (asm_out_file, name);
5658	}
5659
5660      DECL_INITIAL (decl) = make_node (BLOCK);
5661      current_function_decl = decl;
5662      allocate_struct_function (decl, false);
5663      init_function_start (decl);
5664      /* We're about to hide the function body from callees of final_* by
5665	 emitting it directly; tell them we're a thunk, if they care.  */
5666      cfun->is_thunk = true;
5667      first_function_block_is_cold = false;
5668      /* Make sure unwind info is emitted for the thunk if needed.  */
5669      final_start_function (emit_barrier (), asm_out_file, 1);
5670
5671      /* Pad stack IP move with 4 instructions (two NOPs count
5672	 as one instruction).  */
5673      if (TARGET_PAD_SHORT_FUNCTION)
5674	{
5675	  int i = 8;
5676
5677	  while (i--)
5678	    fputs ("\tnop\n", asm_out_file);
5679	}
5680
5681      xops[0] = gen_rtx_REG (Pmode, regno);
5682      xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5683      output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5684      output_asm_insn ("%!ret", NULL);
5685      final_end_function ();
5686      init_insn_lengths ();
5687      free_after_compilation (cfun);
5688      set_cfun (NULL);
5689      current_function_decl = NULL;
5690    }
5691
5692  if (flag_split_stack)
5693    file_end_indicate_split_stack ();
5694}
5695
5696/* Emit code for the SET_GOT patterns.  */
5697
5698const char *
5699output_set_got (rtx dest, rtx label)
5700{
5701  rtx xops[3];
5702
5703  xops[0] = dest;
5704
5705  if (TARGET_VXWORKS_RTP && flag_pic)
5706    {
5707      /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
5708      xops[2] = gen_rtx_MEM (Pmode,
5709			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5710      output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5711
5712      /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5713	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5714	 an unadorned address.  */
5715      xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5716      SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5717      output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5718      return "";
5719    }
5720
5721  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5722
5723  if (flag_pic)
5724    {
5725      char name[32];
5726      get_pc_thunk_name (name, REGNO (dest));
5727      pic_labels_used |= 1 << REGNO (dest);
5728
5729      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5730      xops[2] = gen_rtx_MEM (QImode, xops[2]);
5731      output_asm_insn ("%!call\t%X2", xops);
5732
5733#if TARGET_MACHO
5734      /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5735         This is what will be referenced by the Mach-O PIC subsystem.  */
5736      if (machopic_should_output_picbase_label () || !label)
5737	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5738
5739      /* When we are restoring the pic base at the site of a nonlocal label,
5740         and we decided to emit the pic base above, we will still output a
5741         local label used for calculating the correction offset (even though
5742         the offset will be 0 in that case).  */
5743      if (label)
5744        targetm.asm_out.internal_label (asm_out_file, "L",
5745					   CODE_LABEL_NUMBER (label));
5746#endif
5747    }
5748  else
5749    {
5750      if (TARGET_MACHO)
5751	/* We don't need a pic base, we're not producing pic.  */
5752	gcc_unreachable ();
5753
5754      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5755      output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5756      targetm.asm_out.internal_label (asm_out_file, "L",
5757				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5758    }
5759
5760  if (!TARGET_MACHO)
5761    output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5762
5763  return "";
5764}
5765
5766/* Generate an "push" pattern for input ARG.  */
5767
5768rtx
5769gen_push (rtx arg)
5770{
5771  struct machine_function *m = cfun->machine;
5772
5773  if (m->fs.cfa_reg == stack_pointer_rtx)
5774    m->fs.cfa_offset += UNITS_PER_WORD;
5775  m->fs.sp_offset += UNITS_PER_WORD;
5776
5777  if (REG_P (arg) && GET_MODE (arg) != word_mode)
5778    arg = gen_rtx_REG (word_mode, REGNO (arg));
5779
5780  return gen_rtx_SET (gen_rtx_MEM (word_mode,
5781				   gen_rtx_PRE_DEC (Pmode,
5782						    stack_pointer_rtx)),
5783		      arg);
5784}
5785
5786/* Generate an "pop" pattern for input ARG.  */
5787
5788rtx
5789gen_pop (rtx arg)
5790{
5791  if (REG_P (arg) && GET_MODE (arg) != word_mode)
5792    arg = gen_rtx_REG (word_mode, REGNO (arg));
5793
5794  return gen_rtx_SET (arg,
5795		      gen_rtx_MEM (word_mode,
5796				   gen_rtx_POST_INC (Pmode,
5797						     stack_pointer_rtx)));
5798}
5799
5800/* Return >= 0 if there is an unused call-clobbered register available
5801   for the entire function.  */
5802
5803static unsigned int
5804ix86_select_alt_pic_regnum (void)
5805{
5806  if (ix86_use_pseudo_pic_reg ())
5807    return INVALID_REGNUM;
5808
5809  if (crtl->is_leaf
5810      && !crtl->profile
5811      && !ix86_current_function_calls_tls_descriptor)
5812    {
5813      int i, drap;
5814      /* Can't use the same register for both PIC and DRAP.  */
5815      if (crtl->drap_reg)
5816	drap = REGNO (crtl->drap_reg);
5817      else
5818	drap = -1;
5819      for (i = 2; i >= 0; --i)
5820        if (i != drap && !df_regs_ever_live_p (i))
5821	  return i;
5822    }
5823
5824  return INVALID_REGNUM;
5825}
5826
5827/* Return true if REGNO is used by the epilogue.  */
5828
5829bool
5830ix86_epilogue_uses (int regno)
5831{
5832  /* If there are no caller-saved registers, we preserve all registers,
5833     except for MMX and x87 registers which aren't supported when saving
5834     and restoring registers.  Don't explicitly save SP register since
5835     it is always preserved.  */
5836  return (epilogue_completed
5837	  && cfun->machine->no_caller_saved_registers
5838	  && !fixed_regs[regno]
5839	  && !STACK_REGNO_P (regno)
5840	  && !MMX_REGNO_P (regno));
5841}
5842
5843/* Return nonzero if register REGNO can be used as a scratch register
5844   in peephole2.  */
5845
5846static bool
5847ix86_hard_regno_scratch_ok (unsigned int regno)
5848{
5849  /* If there are no caller-saved registers, we can't use any register
5850     as a scratch register after epilogue and use REGNO as scratch
5851     register only if it has been used before to avoid saving and
5852     restoring it.  */
5853  return (!cfun->machine->no_caller_saved_registers
5854	  || (!epilogue_completed
5855	      && df_regs_ever_live_p (regno)));
5856}
5857
5858/* Return TRUE if we need to save REGNO.  */
5859
5860bool
5861ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5862{
5863  /* If there are no caller-saved registers, we preserve all registers,
5864     except for MMX and x87 registers which aren't supported when saving
5865     and restoring registers.  Don't explicitly save SP register since
5866     it is always preserved.  */
5867  if (cfun->machine->no_caller_saved_registers)
5868    {
5869      /* Don't preserve registers used for function return value.  */
5870      rtx reg = crtl->return_rtx;
5871      if (reg)
5872	{
5873	  unsigned int i = REGNO (reg);
5874	  unsigned int nregs = REG_NREGS (reg);
5875	  while (nregs-- > 0)
5876	    if ((i + nregs) == regno)
5877	      return false;
5878	}
5879
5880      return (df_regs_ever_live_p (regno)
5881	      && !fixed_regs[regno]
5882	      && !STACK_REGNO_P (regno)
5883	      && !MMX_REGNO_P (regno)
5884	      && (regno != HARD_FRAME_POINTER_REGNUM
5885		  || !frame_pointer_needed));
5886    }
5887
5888  if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5889      && pic_offset_table_rtx)
5890    {
5891      if (ix86_use_pseudo_pic_reg ())
5892	{
5893	  /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5894	  _mcount in prologue.  */
5895	  if (!TARGET_64BIT && flag_pic && crtl->profile)
5896	    return true;
5897	}
5898      else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5899	       || crtl->profile
5900	       || crtl->calls_eh_return
5901	       || crtl->uses_const_pool
5902	       || cfun->has_nonlocal_label)
5903        return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5904    }
5905
5906  if (crtl->calls_eh_return && maybe_eh_return)
5907    {
5908      unsigned i;
5909      for (i = 0; ; i++)
5910	{
5911	  unsigned test = EH_RETURN_DATA_REGNO (i);
5912	  if (test == INVALID_REGNUM)
5913	    break;
5914	  if (test == regno)
5915	    return true;
5916	}
5917    }
5918
5919  if (ignore_outlined && cfun->machine->call_ms2sysv)
5920    {
5921      unsigned count = cfun->machine->call_ms2sysv_extra_regs
5922		       + xlogue_layout::MIN_REGS;
5923      if (xlogue_layout::is_stub_managed_reg (regno, count))
5924	return false;
5925    }
5926
5927  if (crtl->drap_reg
5928      && regno == REGNO (crtl->drap_reg)
5929      && !cfun->machine->no_drap_save_restore)
5930    return true;
5931
5932  return (df_regs_ever_live_p (regno)
5933	  && !call_used_or_fixed_reg_p (regno)
5934	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5935}
5936
5937/* Return number of saved general prupose registers.  */
5938
5939static int
5940ix86_nsaved_regs (void)
5941{
5942  int nregs = 0;
5943  int regno;
5944
5945  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5946    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5947      nregs ++;
5948  return nregs;
5949}
5950
5951/* Return number of saved SSE registers.  */
5952
5953static int
5954ix86_nsaved_sseregs (void)
5955{
5956  int nregs = 0;
5957  int regno;
5958
5959  if (!TARGET_64BIT_MS_ABI)
5960    return 0;
5961  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5962    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5963      nregs ++;
5964  return nregs;
5965}
5966
5967/* Given FROM and TO register numbers, say whether this elimination is
5968   allowed.  If stack alignment is needed, we can only replace argument
5969   pointer with hard frame pointer, or replace frame pointer with stack
5970   pointer.  Otherwise, frame pointer elimination is automatically
5971   handled and all other eliminations are valid.  */
5972
5973static bool
5974ix86_can_eliminate (const int from, const int to)
5975{
5976  if (stack_realign_fp)
5977    return ((from == ARG_POINTER_REGNUM
5978	     && to == HARD_FRAME_POINTER_REGNUM)
5979	    || (from == FRAME_POINTER_REGNUM
5980		&& to == STACK_POINTER_REGNUM));
5981  else
5982    return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5983}
5984
5985/* Return the offset between two registers, one to be eliminated, and the other
5986   its replacement, at the start of a routine.  */
5987
5988HOST_WIDE_INT
5989ix86_initial_elimination_offset (int from, int to)
5990{
5991  struct ix86_frame &frame = cfun->machine->frame;
5992
5993  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5994    return frame.hard_frame_pointer_offset;
5995  else if (from == FRAME_POINTER_REGNUM
5996	   && to == HARD_FRAME_POINTER_REGNUM)
5997    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5998  else
5999    {
6000      gcc_assert (to == STACK_POINTER_REGNUM);
6001
6002      if (from == ARG_POINTER_REGNUM)
6003	return frame.stack_pointer_offset;
6004
6005      gcc_assert (from == FRAME_POINTER_REGNUM);
6006      return frame.stack_pointer_offset - frame.frame_pointer_offset;
6007    }
6008}
6009
6010/* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
6011void warn_once_call_ms2sysv_xlogues (const char *feature)
6012{
6013  static bool warned_once = false;
6014  if (!warned_once)
6015    {
6016      warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6017	       feature);
6018      warned_once = true;
6019    }
6020}
6021
6022/* Return the probing interval for -fstack-clash-protection.  */
6023
6024static HOST_WIDE_INT
6025get_probe_interval (void)
6026{
6027  if (flag_stack_clash_protection)
6028    return (HOST_WIDE_INT_1U
6029	    << param_stack_clash_protection_probe_interval);
6030  else
6031    return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6032}
6033
6034/* When using -fsplit-stack, the allocation routines set a field in
6035   the TCB to the bottom of the stack plus this much space, measured
6036   in bytes.  */
6037
6038#define SPLIT_STACK_AVAILABLE 256
6039
6040/* Fill structure ix86_frame about frame of currently computed function.  */
6041
6042static void
6043ix86_compute_frame_layout (void)
6044{
6045  struct ix86_frame *frame = &cfun->machine->frame;
6046  struct machine_function *m = cfun->machine;
6047  unsigned HOST_WIDE_INT stack_alignment_needed;
6048  HOST_WIDE_INT offset;
6049  unsigned HOST_WIDE_INT preferred_alignment;
6050  HOST_WIDE_INT size = ix86_get_frame_size ();
6051  HOST_WIDE_INT to_allocate;
6052
6053  /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6054   * ms_abi functions that call a sysv function.  We now need to prune away
6055   * cases where it should be disabled.  */
6056  if (TARGET_64BIT && m->call_ms2sysv)
6057    {
6058      gcc_assert (TARGET_64BIT_MS_ABI);
6059      gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6060      gcc_assert (!TARGET_SEH);
6061      gcc_assert (TARGET_SSE);
6062      gcc_assert (!ix86_using_red_zone ());
6063
6064      if (crtl->calls_eh_return)
6065	{
6066	  gcc_assert (!reload_completed);
6067	  m->call_ms2sysv = false;
6068	  warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6069	}
6070
6071      else if (ix86_static_chain_on_stack)
6072	{
6073	  gcc_assert (!reload_completed);
6074	  m->call_ms2sysv = false;
6075	  warn_once_call_ms2sysv_xlogues ("static call chains");
6076	}
6077
6078      /* Finally, compute which registers the stub will manage.  */
6079      else
6080	{
6081	  unsigned count = xlogue_layout::count_stub_managed_regs ();
6082	  m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6083	  m->call_ms2sysv_pad_in = 0;
6084	}
6085    }
6086
6087  frame->nregs = ix86_nsaved_regs ();
6088  frame->nsseregs = ix86_nsaved_sseregs ();
6089
6090  /* 64-bit MS ABI seem to require stack alignment to be always 16,
6091     except for function prologues, leaf functions and when the defult
6092     incoming stack boundary is overriden at command line or via
6093     force_align_arg_pointer attribute.
6094
6095     Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
6096     at call sites, including profile function calls.
6097 */
6098  if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6099        && crtl->preferred_stack_boundary < 128)
6100      && (!crtl->is_leaf || cfun->calls_alloca != 0
6101	  || ix86_current_function_calls_tls_descriptor
6102	  || (TARGET_MACHO && crtl->profile)
6103	  || ix86_incoming_stack_boundary < 128))
6104    {
6105      crtl->preferred_stack_boundary = 128;
6106      crtl->stack_alignment_needed = 128;
6107    }
6108
6109  stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6110  preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6111
6112  gcc_assert (!size || stack_alignment_needed);
6113  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6114  gcc_assert (preferred_alignment <= stack_alignment_needed);
6115
6116  /* The only ABI saving SSE regs should be 64-bit ms_abi.  */
6117  gcc_assert (TARGET_64BIT || !frame->nsseregs);
6118  if (TARGET_64BIT && m->call_ms2sysv)
6119    {
6120      gcc_assert (stack_alignment_needed >= 16);
6121      gcc_assert (!frame->nsseregs);
6122    }
6123
6124  /* For SEH we have to limit the amount of code movement into the prologue.
6125     At present we do this via a BLOCKAGE, at which point there's very little
6126     scheduling that can be done, which means that there's very little point
6127     in doing anything except PUSHs.  */
6128  if (TARGET_SEH)
6129    m->use_fast_prologue_epilogue = false;
6130  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6131    {
6132      int count = frame->nregs;
6133      struct cgraph_node *node = cgraph_node::get (current_function_decl);
6134
6135      /* The fast prologue uses move instead of push to save registers.  This
6136         is significantly longer, but also executes faster as modern hardware
6137         can execute the moves in parallel, but can't do that for push/pop.
6138
6139	 Be careful about choosing what prologue to emit:  When function takes
6140	 many instructions to execute we may use slow version as well as in
6141	 case function is known to be outside hot spot (this is known with
6142	 feedback only).  Weight the size of function by number of registers
6143	 to save as it is cheap to use one or two push instructions but very
6144	 slow to use many of them.
6145
6146	 Calling this hook multiple times with the same frame requirements
6147	 must produce the same layout, since the RA might otherwise be
6148	 unable to reach a fixed point or might fail its final sanity checks.
6149	 This means that once we've assumed that a function does or doesn't
6150	 have a particular size, we have to stick to that assumption
6151	 regardless of how the function has changed since.  */
6152      if (count)
6153	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6154      if (node->frequency < NODE_FREQUENCY_NORMAL
6155	  || (flag_branch_probabilities
6156	      && node->frequency < NODE_FREQUENCY_HOT))
6157	m->use_fast_prologue_epilogue = false;
6158      else
6159	{
6160	  if (count != frame->expensive_count)
6161	    {
6162	      frame->expensive_count = count;
6163	      frame->expensive_p = expensive_function_p (count);
6164	    }
6165	  m->use_fast_prologue_epilogue = !frame->expensive_p;
6166	}
6167    }
6168
6169  frame->save_regs_using_mov
6170    = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
6171       /* If static stack checking is enabled and done with probes,
6172	  the registers need to be saved before allocating the frame.  */
6173       && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
6174
6175  /* Skip return address and error code in exception handler.  */
6176  offset = INCOMING_FRAME_SP_OFFSET;
6177
6178  /* Skip pushed static chain.  */
6179  if (ix86_static_chain_on_stack)
6180    offset += UNITS_PER_WORD;
6181
6182  /* Skip saved base pointer.  */
6183  if (frame_pointer_needed)
6184    offset += UNITS_PER_WORD;
6185  frame->hfp_save_offset = offset;
6186
6187  /* The traditional frame pointer location is at the top of the frame.  */
6188  frame->hard_frame_pointer_offset = offset;
6189
6190  /* Register save area */
6191  offset += frame->nregs * UNITS_PER_WORD;
6192  frame->reg_save_offset = offset;
6193
6194  /* Calculate the size of the va-arg area (not including padding, if any).  */
6195  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6196
6197  /* Also adjust stack_realign_offset for the largest alignment of
6198     stack slot actually used.  */
6199  if (stack_realign_fp
6200      || (cfun->machine->max_used_stack_alignment != 0
6201	  && (offset % cfun->machine->max_used_stack_alignment) != 0))
6202    {
6203      /* We may need a 16-byte aligned stack for the remainder of the
6204	 register save area, but the stack frame for the local function
6205	 may require a greater alignment if using AVX/2/512.  In order
6206	 to avoid wasting space, we first calculate the space needed for
6207	 the rest of the register saves, add that to the stack pointer,
6208	 and then realign the stack to the boundary of the start of the
6209	 frame for the local function.  */
6210      HOST_WIDE_INT space_needed = 0;
6211      HOST_WIDE_INT sse_reg_space_needed = 0;
6212
6213      if (TARGET_64BIT)
6214	{
6215	  if (m->call_ms2sysv)
6216	    {
6217	      m->call_ms2sysv_pad_in = 0;
6218	      space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6219	    }
6220
6221	  else if (frame->nsseregs)
6222	    /* The only ABI that has saved SSE registers (Win64) also has a
6223	       16-byte aligned default stack.  However, many programs violate
6224	       the ABI, and Wine64 forces stack realignment to compensate.  */
6225	    space_needed = frame->nsseregs * 16;
6226
6227	  sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6228
6229	  /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6230	     rounding to be pedantic.  */
6231	  space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6232	}
6233      else
6234	space_needed = frame->va_arg_size;
6235
6236      /* Record the allocation size required prior to the realignment AND.  */
6237      frame->stack_realign_allocate = space_needed;
6238
6239      /* The re-aligned stack starts at frame->stack_realign_offset.  Values
6240	 before this point are not directly comparable with values below
6241	 this point.  Use sp_valid_at to determine if the stack pointer is
6242	 valid for a given offset, fp_valid_at for the frame pointer, or
6243	 choose_baseaddr to have a base register chosen for you.
6244
6245	 Note that the result of (frame->stack_realign_offset
6246	 & (stack_alignment_needed - 1)) may not equal zero.  */
6247      offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6248      frame->stack_realign_offset = offset - space_needed;
6249      frame->sse_reg_save_offset = frame->stack_realign_offset
6250							+ sse_reg_space_needed;
6251    }
6252  else
6253    {
6254      frame->stack_realign_offset = offset;
6255
6256      if (TARGET_64BIT && m->call_ms2sysv)
6257	{
6258	  m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6259	  offset += xlogue_layout::get_instance ().get_stack_space_used ();
6260	}
6261
6262      /* Align and set SSE register save area.  */
6263      else if (frame->nsseregs)
6264	{
6265	  /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6266	     required and the DRAP re-alignment boundary is at least 16 bytes,
6267	     then we want the SSE register save area properly aligned.  */
6268	  if (ix86_incoming_stack_boundary >= 128
6269		  || (stack_realign_drap && stack_alignment_needed >= 16))
6270	    offset = ROUND_UP (offset, 16);
6271	  offset += frame->nsseregs * 16;
6272	}
6273      frame->sse_reg_save_offset = offset;
6274      offset += frame->va_arg_size;
6275    }
6276
6277  /* Align start of frame for local function.  When a function call
6278     is removed, it may become a leaf function.  But if argument may
6279     be passed on stack, we need to align the stack when there is no
6280     tail call.  */
6281  if (m->call_ms2sysv
6282      || frame->va_arg_size != 0
6283      || size != 0
6284      || !crtl->is_leaf
6285      || (!crtl->tail_call_emit
6286	  && cfun->machine->outgoing_args_on_stack)
6287      || cfun->calls_alloca
6288      || ix86_current_function_calls_tls_descriptor)
6289    offset = ROUND_UP (offset, stack_alignment_needed);
6290
6291  /* Frame pointer points here.  */
6292  frame->frame_pointer_offset = offset;
6293
6294  offset += size;
6295
6296  /* Add outgoing arguments area.  Can be skipped if we eliminated
6297     all the function calls as dead code.
6298     Skipping is however impossible when function calls alloca.  Alloca
6299     expander assumes that last crtl->outgoing_args_size
6300     of stack frame are unused.  */
6301  if (ACCUMULATE_OUTGOING_ARGS
6302      && (!crtl->is_leaf || cfun->calls_alloca
6303	  || ix86_current_function_calls_tls_descriptor))
6304    {
6305      offset += crtl->outgoing_args_size;
6306      frame->outgoing_arguments_size = crtl->outgoing_args_size;
6307    }
6308  else
6309    frame->outgoing_arguments_size = 0;
6310
6311  /* Align stack boundary.  Only needed if we're calling another function
6312     or using alloca.  */
6313  if (!crtl->is_leaf || cfun->calls_alloca
6314      || ix86_current_function_calls_tls_descriptor)
6315    offset = ROUND_UP (offset, preferred_alignment);
6316
6317  /* We've reached end of stack frame.  */
6318  frame->stack_pointer_offset = offset;
6319
6320  /* Size prologue needs to allocate.  */
6321  to_allocate = offset - frame->sse_reg_save_offset;
6322
6323  if ((!to_allocate && frame->nregs <= 1)
6324      || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6325      /* If stack clash probing needs a loop, then it needs a
6326	 scratch register.  But the returned register is only guaranteed
6327	 to be safe to use after register saves are complete.  So if
6328	 stack clash protections are enabled and the allocated frame is
6329	 larger than the probe interval, then use pushes to save
6330	 callee saved registers.  */
6331      || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6332    frame->save_regs_using_mov = false;
6333
6334  if (ix86_using_red_zone ()
6335      && crtl->sp_is_unchanging
6336      && crtl->is_leaf
6337      && !ix86_pc_thunk_call_expanded
6338      && !ix86_current_function_calls_tls_descriptor)
6339    {
6340      frame->red_zone_size = to_allocate;
6341      if (frame->save_regs_using_mov)
6342	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6343      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6344	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6345    }
6346  else
6347    frame->red_zone_size = 0;
6348  frame->stack_pointer_offset -= frame->red_zone_size;
6349
6350  /* The SEH frame pointer location is near the bottom of the frame.
6351     This is enforced by the fact that the difference between the
6352     stack pointer and the frame pointer is limited to 240 bytes in
6353     the unwind data structure.  */
6354  if (TARGET_SEH)
6355    {
6356      /* Force the frame pointer to point at or below the lowest register save
6357	 area, see the SEH code in config/i386/winnt.c for the rationale.  */
6358      frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6359
6360      /* If we can leave the frame pointer where it is, do so; however return
6361	 the establisher frame for __builtin_frame_address (0) or else if the
6362	 frame overflows the SEH maximum frame size.
6363
6364	 Note that the value returned by __builtin_frame_address (0) is quite
6365	 constrained, because setjmp is piggybacked on the SEH machinery with
6366	 recent versions of MinGW:
6367
6368	  #    elif defined(__SEH__)
6369	  #     if defined(__aarch64__) || defined(_ARM64_)
6370	  #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6371	  #     elif (__MINGW_GCC_VERSION < 40702)
6372	  #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6373	  #     else
6374	  #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6375	  #     endif
6376
6377	 and the second argument passed to _setjmp, if not null, is forwarded
6378	 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6379	 built an ExceptionRecord on the fly describing the setjmp buffer).  */
6380      const HOST_WIDE_INT diff
6381	= frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6382      if (diff <= 255 && !crtl->accesses_prior_frames)
6383	{
6384	  /* The resulting diff will be a multiple of 16 lower than 255,
6385	     i.e. at most 240 as required by the unwind data structure.  */
6386	  frame->hard_frame_pointer_offset += (diff & 15);
6387	}
6388      else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6389	{
6390	  /* Ideally we'd determine what portion of the local stack frame
6391	     (within the constraint of the lowest 240) is most heavily used.
6392	     But without that complication, simply bias the frame pointer
6393	     by 128 bytes so as to maximize the amount of the local stack
6394	     frame that is addressable with 8-bit offsets.  */
6395	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6396	}
6397      else
6398	frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6399    }
6400}
6401
6402/* This is semi-inlined memory_address_length, but simplified
6403   since we know that we're always dealing with reg+offset, and
6404   to avoid having to create and discard all that rtl.  */
6405
6406static inline int
6407choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6408{
6409  int len = 4;
6410
6411  if (offset == 0)
6412    {
6413      /* EBP and R13 cannot be encoded without an offset.  */
6414      len = (regno == BP_REG || regno == R13_REG);
6415    }
6416  else if (IN_RANGE (offset, -128, 127))
6417    len = 1;
6418
6419  /* ESP and R12 must be encoded with a SIB byte.  */
6420  if (regno == SP_REG || regno == R12_REG)
6421    len++;
6422
6423  return len;
6424}
6425
6426/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6427   the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
6428
6429static bool
6430sp_valid_at (HOST_WIDE_INT cfa_offset)
6431{
6432  const struct machine_frame_state &fs = cfun->machine->fs;
6433  if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6434    {
6435      /* Validate that the cfa_offset isn't in a "no-man's land".  */
6436      gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6437      return false;
6438    }
6439  return fs.sp_valid;
6440}
6441
6442/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6443   the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
6444
6445static inline bool
6446fp_valid_at (HOST_WIDE_INT cfa_offset)
6447{
6448  const struct machine_frame_state &fs = cfun->machine->fs;
6449  if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6450    {
6451      /* Validate that the cfa_offset isn't in a "no-man's land".  */
6452      gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6453      return false;
6454    }
6455  return fs.fp_valid;
6456}
6457
6458/* Choose a base register based upon alignment requested, speed and/or
6459   size.  */
6460
6461static void
6462choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6463		HOST_WIDE_INT &base_offset,
6464		unsigned int align_reqested, unsigned int *align)
6465{
6466  const struct machine_function *m = cfun->machine;
6467  unsigned int hfp_align;
6468  unsigned int drap_align;
6469  unsigned int sp_align;
6470  bool hfp_ok  = fp_valid_at (cfa_offset);
6471  bool drap_ok = m->fs.drap_valid;
6472  bool sp_ok   = sp_valid_at (cfa_offset);
6473
6474  hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6475
6476  /* Filter out any registers that don't meet the requested alignment
6477     criteria.  */
6478  if (align_reqested)
6479    {
6480      if (m->fs.realigned)
6481	hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6482      /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6483	 notes (which we would need to use a realigned stack pointer),
6484	 so disable on SEH targets.  */
6485      else if (m->fs.sp_realigned)
6486	sp_align = crtl->stack_alignment_needed;
6487
6488      hfp_ok = hfp_ok && hfp_align >= align_reqested;
6489      drap_ok = drap_ok && drap_align >= align_reqested;
6490      sp_ok = sp_ok && sp_align >= align_reqested;
6491    }
6492
6493  if (m->use_fast_prologue_epilogue)
6494    {
6495      /* Choose the base register most likely to allow the most scheduling
6496         opportunities.  Generally FP is valid throughout the function,
6497         while DRAP must be reloaded within the epilogue.  But choose either
6498         over the SP due to increased encoding size.  */
6499
6500      if (hfp_ok)
6501	{
6502	  base_reg = hard_frame_pointer_rtx;
6503	  base_offset = m->fs.fp_offset - cfa_offset;
6504	}
6505      else if (drap_ok)
6506	{
6507	  base_reg = crtl->drap_reg;
6508	  base_offset = 0 - cfa_offset;
6509	}
6510      else if (sp_ok)
6511	{
6512	  base_reg = stack_pointer_rtx;
6513	  base_offset = m->fs.sp_offset - cfa_offset;
6514	}
6515    }
6516  else
6517    {
6518      HOST_WIDE_INT toffset;
6519      int len = 16, tlen;
6520
6521      /* Choose the base register with the smallest address encoding.
6522         With a tie, choose FP > DRAP > SP.  */
6523      if (sp_ok)
6524	{
6525	  base_reg = stack_pointer_rtx;
6526	  base_offset = m->fs.sp_offset - cfa_offset;
6527          len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6528	}
6529      if (drap_ok)
6530	{
6531	  toffset = 0 - cfa_offset;
6532	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6533	  if (tlen <= len)
6534	    {
6535	      base_reg = crtl->drap_reg;
6536	      base_offset = toffset;
6537	      len = tlen;
6538	    }
6539	}
6540      if (hfp_ok)
6541	{
6542	  toffset = m->fs.fp_offset - cfa_offset;
6543	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6544	  if (tlen <= len)
6545	    {
6546	      base_reg = hard_frame_pointer_rtx;
6547	      base_offset = toffset;
6548	    }
6549	}
6550    }
6551
6552    /* Set the align return value.  */
6553    if (align)
6554      {
6555	if (base_reg == stack_pointer_rtx)
6556	  *align = sp_align;
6557	else if (base_reg == crtl->drap_reg)
6558	  *align = drap_align;
6559	else if (base_reg == hard_frame_pointer_rtx)
6560	  *align = hfp_align;
6561      }
6562}
6563
6564/* Return an RTX that points to CFA_OFFSET within the stack frame and
6565   the alignment of address.  If ALIGN is non-null, it should point to
6566   an alignment value (in bits) that is preferred or zero and will
6567   recieve the alignment of the base register that was selected,
6568   irrespective of rather or not CFA_OFFSET is a multiple of that
6569   alignment value.  If it is possible for the base register offset to be
6570   non-immediate then SCRATCH_REGNO should specify a scratch register to
6571   use.
6572
6573   The valid base registers are taken from CFUN->MACHINE->FS.  */
6574
6575static rtx
6576choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6577		 unsigned int scratch_regno = INVALID_REGNUM)
6578{
6579  rtx base_reg = NULL;
6580  HOST_WIDE_INT base_offset = 0;
6581
6582  /* If a specific alignment is requested, try to get a base register
6583     with that alignment first.  */
6584  if (align && *align)
6585    choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6586
6587  if (!base_reg)
6588    choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6589
6590  gcc_assert (base_reg != NULL);
6591
6592  rtx base_offset_rtx = GEN_INT (base_offset);
6593
6594  if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6595    {
6596      gcc_assert (scratch_regno != INVALID_REGNUM);
6597
6598      rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6599      emit_move_insn (scratch_reg, base_offset_rtx);
6600
6601      return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6602    }
6603
6604  return plus_constant (Pmode, base_reg, base_offset);
6605}
6606
6607/* Emit code to save registers in the prologue.  */
6608
6609static void
6610ix86_emit_save_regs (void)
6611{
6612  unsigned int regno;
6613  rtx_insn *insn;
6614
6615  for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6616    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6617      {
6618	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6619	RTX_FRAME_RELATED_P (insn) = 1;
6620      }
6621}
6622
6623/* Emit a single register save at CFA - CFA_OFFSET.  */
6624
6625static void
6626ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6627			      HOST_WIDE_INT cfa_offset)
6628{
6629  struct machine_function *m = cfun->machine;
6630  rtx reg = gen_rtx_REG (mode, regno);
6631  rtx mem, addr, base, insn;
6632  unsigned int align = GET_MODE_ALIGNMENT (mode);
6633
6634  addr = choose_baseaddr (cfa_offset, &align);
6635  mem = gen_frame_mem (mode, addr);
6636
6637  /* The location aligment depends upon the base register.  */
6638  align = MIN (GET_MODE_ALIGNMENT (mode), align);
6639  gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6640  set_mem_align (mem, align);
6641
6642  insn = emit_insn (gen_rtx_SET (mem, reg));
6643  RTX_FRAME_RELATED_P (insn) = 1;
6644
6645  base = addr;
6646  if (GET_CODE (base) == PLUS)
6647    base = XEXP (base, 0);
6648  gcc_checking_assert (REG_P (base));
6649
6650  /* When saving registers into a re-aligned local stack frame, avoid
6651     any tricky guessing by dwarf2out.  */
6652  if (m->fs.realigned)
6653    {
6654      gcc_checking_assert (stack_realign_drap);
6655
6656      if (regno == REGNO (crtl->drap_reg))
6657	{
6658	  /* A bit of a hack.  We force the DRAP register to be saved in
6659	     the re-aligned stack frame, which provides us with a copy
6660	     of the CFA that will last past the prologue.  Install it.  */
6661	  gcc_checking_assert (cfun->machine->fs.fp_valid);
6662	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6663				cfun->machine->fs.fp_offset - cfa_offset);
6664	  mem = gen_rtx_MEM (mode, addr);
6665	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6666	}
6667      else
6668	{
6669	  /* The frame pointer is a stable reference within the
6670	     aligned frame.  Use it.  */
6671	  gcc_checking_assert (cfun->machine->fs.fp_valid);
6672	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6673				cfun->machine->fs.fp_offset - cfa_offset);
6674	  mem = gen_rtx_MEM (mode, addr);
6675	  add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6676	}
6677    }
6678
6679  else if (base == stack_pointer_rtx && m->fs.sp_realigned
6680	   && cfa_offset >= m->fs.sp_realigned_offset)
6681    {
6682      gcc_checking_assert (stack_realign_fp);
6683      add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6684    }
6685
6686  /* The memory may not be relative to the current CFA register,
6687     which means that we may need to generate a new pattern for
6688     use by the unwind info.  */
6689  else if (base != m->fs.cfa_reg)
6690    {
6691      addr = plus_constant (Pmode, m->fs.cfa_reg,
6692			    m->fs.cfa_offset - cfa_offset);
6693      mem = gen_rtx_MEM (mode, addr);
6694      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6695    }
6696}
6697
6698/* Emit code to save registers using MOV insns.
6699   First register is stored at CFA - CFA_OFFSET.  */
6700static void
6701ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6702{
6703  unsigned int regno;
6704
6705  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6706    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6707      {
6708        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6709	cfa_offset -= UNITS_PER_WORD;
6710      }
6711}
6712
6713/* Emit code to save SSE registers using MOV insns.
6714   First register is stored at CFA - CFA_OFFSET.  */
6715static void
6716ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6717{
6718  unsigned int regno;
6719
6720  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6721    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6722      {
6723	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6724	cfa_offset -= GET_MODE_SIZE (V4SFmode);
6725      }
6726}
6727
6728static GTY(()) rtx queued_cfa_restores;
6729
6730/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6731   manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
6732   Don't add the note if the previously saved value will be left untouched
6733   within stack red-zone till return, as unwinders can find the same value
6734   in the register and on the stack.  */
6735
6736static void
6737ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6738{
6739  if (!crtl->shrink_wrapped
6740      && cfa_offset <= cfun->machine->fs.red_zone_offset)
6741    return;
6742
6743  if (insn)
6744    {
6745      add_reg_note (insn, REG_CFA_RESTORE, reg);
6746      RTX_FRAME_RELATED_P (insn) = 1;
6747    }
6748  else
6749    queued_cfa_restores
6750      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6751}
6752
6753/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
6754
6755static void
6756ix86_add_queued_cfa_restore_notes (rtx insn)
6757{
6758  rtx last;
6759  if (!queued_cfa_restores)
6760    return;
6761  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6762    ;
6763  XEXP (last, 1) = REG_NOTES (insn);
6764  REG_NOTES (insn) = queued_cfa_restores;
6765  queued_cfa_restores = NULL_RTX;
6766  RTX_FRAME_RELATED_P (insn) = 1;
6767}
6768
6769/* Expand prologue or epilogue stack adjustment.
6770   The pattern exist to put a dependency on all ebp-based memory accesses.
6771   STYLE should be negative if instructions should be marked as frame related,
6772   zero if %r11 register is live and cannot be freely used and positive
6773   otherwise.  */
6774
6775static rtx
6776pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6777			   int style, bool set_cfa)
6778{
6779  struct machine_function *m = cfun->machine;
6780  rtx addend = offset;
6781  rtx insn;
6782  bool add_frame_related_expr = false;
6783
6784  if (!x86_64_immediate_operand (offset, Pmode))
6785    {
6786      /* r11 is used by indirect sibcall return as well, set before the
6787	 epilogue and used after the epilogue.  */
6788      if (style)
6789        addend = gen_rtx_REG (Pmode, R11_REG);
6790      else
6791	{
6792	  gcc_assert (src != hard_frame_pointer_rtx
6793		      && dest != hard_frame_pointer_rtx);
6794	  addend = hard_frame_pointer_rtx;
6795	}
6796      emit_insn (gen_rtx_SET (addend, offset));
6797      if (style < 0)
6798	add_frame_related_expr = true;
6799    }
6800
6801  insn = emit_insn (gen_pro_epilogue_adjust_stack_add
6802		    (Pmode, dest, src, addend));
6803  if (style >= 0)
6804    ix86_add_queued_cfa_restore_notes (insn);
6805
6806  if (set_cfa)
6807    {
6808      rtx r;
6809
6810      gcc_assert (m->fs.cfa_reg == src);
6811      m->fs.cfa_offset += INTVAL (offset);
6812      m->fs.cfa_reg = dest;
6813
6814      r = gen_rtx_PLUS (Pmode, src, offset);
6815      r = gen_rtx_SET (dest, r);
6816      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6817      RTX_FRAME_RELATED_P (insn) = 1;
6818    }
6819  else if (style < 0)
6820    {
6821      RTX_FRAME_RELATED_P (insn) = 1;
6822      if (add_frame_related_expr)
6823	{
6824	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
6825	  r = gen_rtx_SET (dest, r);
6826	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6827	}
6828    }
6829
6830  if (dest == stack_pointer_rtx)
6831    {
6832      HOST_WIDE_INT ooffset = m->fs.sp_offset;
6833      bool valid = m->fs.sp_valid;
6834      bool realigned = m->fs.sp_realigned;
6835
6836      if (src == hard_frame_pointer_rtx)
6837	{
6838	  valid = m->fs.fp_valid;
6839	  realigned = false;
6840	  ooffset = m->fs.fp_offset;
6841	}
6842      else if (src == crtl->drap_reg)
6843	{
6844	  valid = m->fs.drap_valid;
6845	  realigned = false;
6846	  ooffset = 0;
6847	}
6848      else
6849	{
6850	  /* Else there are two possibilities: SP itself, which we set
6851	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
6852	     taken care of this by hand along the eh_return path.  */
6853	  gcc_checking_assert (src == stack_pointer_rtx
6854			       || offset == const0_rtx);
6855	}
6856
6857      m->fs.sp_offset = ooffset - INTVAL (offset);
6858      m->fs.sp_valid = valid;
6859      m->fs.sp_realigned = realigned;
6860    }
6861  return insn;
6862}
6863
6864/* Find an available register to be used as dynamic realign argument
6865   pointer regsiter.  Such a register will be written in prologue and
6866   used in begin of body, so it must not be
6867	1. parameter passing register.
6868	2. GOT pointer.
6869   We reuse static-chain register if it is available.  Otherwise, we
6870   use DI for i386 and R13 for x86-64.  We chose R13 since it has
6871   shorter encoding.
6872
6873   Return: the regno of chosen register.  */
6874
6875static unsigned int
6876find_drap_reg (void)
6877{
6878  tree decl = cfun->decl;
6879
6880  /* Always use callee-saved register if there are no caller-saved
6881     registers.  */
6882  if (TARGET_64BIT)
6883    {
6884      /* Use R13 for nested function or function need static chain.
6885	 Since function with tail call may use any caller-saved
6886	 registers in epilogue, DRAP must not use caller-saved
6887	 register in such case.  */
6888      if (DECL_STATIC_CHAIN (decl)
6889	  || cfun->machine->no_caller_saved_registers
6890	  || crtl->tail_call_emit)
6891	return R13_REG;
6892
6893      return R10_REG;
6894    }
6895  else
6896    {
6897      /* Use DI for nested function or function need static chain.
6898	 Since function with tail call may use any caller-saved
6899	 registers in epilogue, DRAP must not use caller-saved
6900	 register in such case.  */
6901      if (DECL_STATIC_CHAIN (decl)
6902	  || cfun->machine->no_caller_saved_registers
6903	  || crtl->tail_call_emit)
6904	return DI_REG;
6905
6906      /* Reuse static chain register if it isn't used for parameter
6907         passing.  */
6908      if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6909	{
6910	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6911	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6912	    return CX_REG;
6913	}
6914      return DI_REG;
6915    }
6916}
6917
6918/* Return minimum incoming stack alignment.  */
6919
6920static unsigned int
6921ix86_minimum_incoming_stack_boundary (bool sibcall)
6922{
6923  unsigned int incoming_stack_boundary;
6924
6925  /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
6926  if (cfun->machine->func_type != TYPE_NORMAL)
6927    incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6928  /* Prefer the one specified at command line. */
6929  else if (ix86_user_incoming_stack_boundary)
6930    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6931  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6932     if -mstackrealign is used, it isn't used for sibcall check and
6933     estimated stack alignment is 128bit.  */
6934  else if (!sibcall
6935	   && ix86_force_align_arg_pointer
6936	   && crtl->stack_alignment_estimated == 128)
6937    incoming_stack_boundary = MIN_STACK_BOUNDARY;
6938  else
6939    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6940
6941  /* Incoming stack alignment can be changed on individual functions
6942     via force_align_arg_pointer attribute.  We use the smallest
6943     incoming stack boundary.  */
6944  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6945      && lookup_attribute ("force_align_arg_pointer",
6946			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6947    incoming_stack_boundary = MIN_STACK_BOUNDARY;
6948
6949  /* The incoming stack frame has to be aligned at least at
6950     parm_stack_boundary.  */
6951  if (incoming_stack_boundary < crtl->parm_stack_boundary)
6952    incoming_stack_boundary = crtl->parm_stack_boundary;
6953
6954  /* Stack at entrance of main is aligned by runtime.  We use the
6955     smallest incoming stack boundary. */
6956  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6957      && DECL_NAME (current_function_decl)
6958      && MAIN_NAME_P (DECL_NAME (current_function_decl))
6959      && DECL_FILE_SCOPE_P (current_function_decl))
6960    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6961
6962  return incoming_stack_boundary;
6963}
6964
6965/* Update incoming stack boundary and estimated stack alignment.  */
6966
6967static void
6968ix86_update_stack_boundary (void)
6969{
6970  ix86_incoming_stack_boundary
6971    = ix86_minimum_incoming_stack_boundary (false);
6972
6973  /* x86_64 vararg needs 16byte stack alignment for register save area.  */
6974  if (TARGET_64BIT
6975      && cfun->stdarg
6976      && crtl->stack_alignment_estimated < 128)
6977    crtl->stack_alignment_estimated = 128;
6978
6979  /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
6980  if (ix86_tls_descriptor_calls_expanded_in_cfun
6981      && crtl->preferred_stack_boundary < 128)
6982    crtl->preferred_stack_boundary = 128;
6983}
6984
6985/* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
6986   needed or an rtx for DRAP otherwise.  */
6987
6988static rtx
6989ix86_get_drap_rtx (void)
6990{
6991  /* We must use DRAP if there are outgoing arguments on stack or
6992     the stack pointer register is clobbered by asm statment and
6993     ACCUMULATE_OUTGOING_ARGS is false.  */
6994  if (ix86_force_drap
6995      || ((cfun->machine->outgoing_args_on_stack
6996	   || crtl->sp_is_clobbered_by_asm)
6997	  && !ACCUMULATE_OUTGOING_ARGS))
6998    crtl->need_drap = true;
6999
7000  if (stack_realign_drap)
7001    {
7002      /* Assign DRAP to vDRAP and returns vDRAP */
7003      unsigned int regno = find_drap_reg ();
7004      rtx drap_vreg;
7005      rtx arg_ptr;
7006      rtx_insn *seq, *insn;
7007
7008      arg_ptr = gen_rtx_REG (Pmode, regno);
7009      crtl->drap_reg = arg_ptr;
7010
7011      start_sequence ();
7012      drap_vreg = copy_to_reg (arg_ptr);
7013      seq = get_insns ();
7014      end_sequence ();
7015
7016      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7017      if (!optimize)
7018	{
7019	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7020	  RTX_FRAME_RELATED_P (insn) = 1;
7021	}
7022      return drap_vreg;
7023    }
7024  else
7025    return NULL;
7026}
7027
7028/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
7029
7030static rtx
7031ix86_internal_arg_pointer (void)
7032{
7033  return virtual_incoming_args_rtx;
7034}
7035
7036struct scratch_reg {
7037  rtx reg;
7038  bool saved;
7039};
7040
7041/* Return a short-lived scratch register for use on function entry.
7042   In 32-bit mode, it is valid only after the registers are saved
7043   in the prologue.  This register must be released by means of
7044   release_scratch_register_on_entry once it is dead.  */
7045
7046static void
7047get_scratch_register_on_entry (struct scratch_reg *sr)
7048{
7049  int regno;
7050
7051  sr->saved = false;
7052
7053  if (TARGET_64BIT)
7054    {
7055      /* We always use R11 in 64-bit mode.  */
7056      regno = R11_REG;
7057    }
7058  else
7059    {
7060      tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7061      bool fastcall_p
7062	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7063      bool thiscall_p
7064	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7065      bool static_chain_p = DECL_STATIC_CHAIN (decl);
7066      int regparm = ix86_function_regparm (fntype, decl);
7067      int drap_regno
7068	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7069
7070      /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7071	  for the static chain register.  */
7072      if ((regparm < 1 || (fastcall_p && !static_chain_p))
7073	  && drap_regno != AX_REG)
7074	regno = AX_REG;
7075      /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7076	  for the static chain register.  */
7077      else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7078        regno = AX_REG;
7079      else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7080	regno = DX_REG;
7081      /* ecx is the static chain register.  */
7082      else if (regparm < 3 && !fastcall_p && !thiscall_p
7083	       && !static_chain_p
7084	       && drap_regno != CX_REG)
7085	regno = CX_REG;
7086      else if (ix86_save_reg (BX_REG, true, false))
7087	regno = BX_REG;
7088      /* esi is the static chain register.  */
7089      else if (!(regparm == 3 && static_chain_p)
7090	       && ix86_save_reg (SI_REG, true, false))
7091	regno = SI_REG;
7092      else if (ix86_save_reg (DI_REG, true, false))
7093	regno = DI_REG;
7094      else
7095	{
7096	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7097	  sr->saved = true;
7098	}
7099    }
7100
7101  sr->reg = gen_rtx_REG (Pmode, regno);
7102  if (sr->saved)
7103    {
7104      rtx_insn *insn = emit_insn (gen_push (sr->reg));
7105      RTX_FRAME_RELATED_P (insn) = 1;
7106    }
7107}
7108
7109/* Release a scratch register obtained from the preceding function.
7110
7111   If RELEASE_VIA_POP is true, we just pop the register off the stack
7112   to release it.  This is what non-Linux systems use with -fstack-check.
7113
7114   Otherwise we use OFFSET to locate the saved register and the
7115   allocated stack space becomes part of the local frame and is
7116   deallocated by the epilogue.  */
7117
7118static void
7119release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7120				   bool release_via_pop)
7121{
7122  if (sr->saved)
7123    {
7124      if (release_via_pop)
7125	{
7126	  struct machine_function *m = cfun->machine;
7127	  rtx x, insn = emit_insn (gen_pop (sr->reg));
7128
7129	  /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
7130	  RTX_FRAME_RELATED_P (insn) = 1;
7131	  x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
7132	  x = gen_rtx_SET (stack_pointer_rtx, x);
7133	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7134	  m->fs.sp_offset -= UNITS_PER_WORD;
7135	}
7136      else
7137	{
7138	  rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
7139	  x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7140	  emit_insn (x);
7141	}
7142    }
7143}
7144
7145/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7146
7147   This differs from the next routine in that it tries hard to prevent
7148   attacks that jump the stack guard.  Thus it is never allowed to allocate
7149   more than PROBE_INTERVAL bytes of stack space without a suitable
7150   probe.
7151
7152   INT_REGISTERS_SAVED is true if integer registers have already been
7153   pushed on the stack.  */
7154
7155static void
7156ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
7157					 const bool int_registers_saved)
7158{
7159  struct machine_function *m = cfun->machine;
7160
7161  /* If this function does not statically allocate stack space, then
7162     no probes are needed.  */
7163  if (!size)
7164    {
7165      /* However, the allocation of space via pushes for register
7166	 saves could be viewed as allocating space, but without the
7167	 need to probe.  */
7168      if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7169        dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7170      else
7171	dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7172      return;
7173    }
7174
7175  /* If we are a noreturn function, then we have to consider the
7176     possibility that we're called via a jump rather than a call.
7177
7178     Thus we don't have the implicit probe generated by saving the
7179     return address into the stack at the call.  Thus, the stack
7180     pointer could be anywhere in the guard page.  The safe thing
7181     to do is emit a probe now.
7182
7183     The probe can be avoided if we have already emitted any callee
7184     register saves into the stack or have a frame pointer (which will
7185     have been saved as well).  Those saves will function as implicit
7186     probes.
7187
7188     ?!? This should be revamped to work like aarch64 and s390 where
7189     we track the offset from the most recent probe.  Normally that
7190     offset would be zero.  For a noreturn function we would reset
7191     it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
7192     we just probe when we cross PROBE_INTERVAL.  */
7193  if (TREE_THIS_VOLATILE (cfun->decl)
7194      && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7195    {
7196      /* We can safely use any register here since we're just going to push
7197	 its value and immediately pop it back.  But we do try and avoid
7198	 argument passing registers so as not to introduce dependencies in
7199	 the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
7200      rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7201      rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7202      rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7203      m->fs.sp_offset -= UNITS_PER_WORD;
7204      if (m->fs.cfa_reg == stack_pointer_rtx)
7205	{
7206	  m->fs.cfa_offset -= UNITS_PER_WORD;
7207	  rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7208	  x = gen_rtx_SET (stack_pointer_rtx, x);
7209	  add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7210	  RTX_FRAME_RELATED_P (insn_push) = 1;
7211	  x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7212	  x = gen_rtx_SET (stack_pointer_rtx, x);
7213	  add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7214	  RTX_FRAME_RELATED_P (insn_pop) = 1;
7215	}
7216      emit_insn (gen_blockage ());
7217    }
7218
7219  /* If we allocate less than the size of the guard statically,
7220     then no probing is necessary, but we do need to allocate
7221     the stack.  */
7222  if (size < (1 << param_stack_clash_protection_guard_size))
7223    {
7224      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7225			         GEN_INT (-size), -1,
7226			         m->fs.cfa_reg == stack_pointer_rtx);
7227      dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7228      return;
7229    }
7230
7231  /* We're allocating a large enough stack frame that we need to
7232     emit probes.  Either emit them inline or in a loop depending
7233     on the size.  */
7234  HOST_WIDE_INT probe_interval = get_probe_interval ();
7235  if (size <= 4 * probe_interval)
7236    {
7237      HOST_WIDE_INT i;
7238      for (i = probe_interval; i <= size; i += probe_interval)
7239	{
7240	  /* Allocate PROBE_INTERVAL bytes.  */
7241	  rtx insn
7242	    = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7243					 GEN_INT (-probe_interval), -1,
7244					 m->fs.cfa_reg == stack_pointer_rtx);
7245	  add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7246
7247	  /* And probe at *sp.  */
7248	  emit_stack_probe (stack_pointer_rtx);
7249	  emit_insn (gen_blockage ());
7250	}
7251
7252      /* We need to allocate space for the residual, but we do not need
7253	 to probe the residual.  */
7254      HOST_WIDE_INT residual = (i - probe_interval - size);
7255      if (residual)
7256	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7257				   GEN_INT (residual), -1,
7258				   m->fs.cfa_reg == stack_pointer_rtx);
7259      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7260    }
7261  else
7262    {
7263      /* We expect the GP registers to be saved when probes are used
7264	 as the probing sequences might need a scratch register and
7265	 the routine to allocate one assumes the integer registers
7266	 have already been saved.  */
7267      gcc_assert (int_registers_saved);
7268
7269      struct scratch_reg sr;
7270      get_scratch_register_on_entry (&sr);
7271
7272      /* If we needed to save a register, then account for any space
7273	 that was pushed (we are not going to pop the register when
7274	 we do the restore).  */
7275      if (sr.saved)
7276	size -= UNITS_PER_WORD;
7277
7278      /* Step 1: round SIZE down to a multiple of the interval.  */
7279      HOST_WIDE_INT rounded_size = size & -probe_interval;
7280
7281      /* Step 2: compute final value of the loop counter.  Use lea if
7282	 possible.  */
7283      rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7284      rtx insn;
7285      if (address_no_seg_operand (addr, Pmode))
7286	insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7287      else
7288	{
7289	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7290	  insn = emit_insn (gen_rtx_SET (sr.reg,
7291					 gen_rtx_PLUS (Pmode, sr.reg,
7292						       stack_pointer_rtx)));
7293	}
7294      if (m->fs.cfa_reg == stack_pointer_rtx)
7295	{
7296	  add_reg_note (insn, REG_CFA_DEF_CFA,
7297			plus_constant (Pmode, sr.reg,
7298				       m->fs.cfa_offset + rounded_size));
7299	  RTX_FRAME_RELATED_P (insn) = 1;
7300	}
7301
7302      /* Step 3: the loop.  */
7303      rtx size_rtx = GEN_INT (rounded_size);
7304      insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7305						    size_rtx));
7306      if (m->fs.cfa_reg == stack_pointer_rtx)
7307	{
7308	  m->fs.cfa_offset += rounded_size;
7309	  add_reg_note (insn, REG_CFA_DEF_CFA,
7310			plus_constant (Pmode, stack_pointer_rtx,
7311				       m->fs.cfa_offset));
7312	  RTX_FRAME_RELATED_P (insn) = 1;
7313	}
7314      m->fs.sp_offset += rounded_size;
7315      emit_insn (gen_blockage ());
7316
7317      /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7318	 is equal to ROUNDED_SIZE.  */
7319
7320      if (size != rounded_size)
7321	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7322				   GEN_INT (rounded_size - size), -1,
7323				   m->fs.cfa_reg == stack_pointer_rtx);
7324      dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7325
7326      /* This does not deallocate the space reserved for the scratch
7327	 register.  That will be deallocated in the epilogue.  */
7328      release_scratch_register_on_entry (&sr, size, false);
7329    }
7330
7331  /* Make sure nothing is scheduled before we are done.  */
7332  emit_insn (gen_blockage ());
7333}
7334
7335/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7336
7337   INT_REGISTERS_SAVED is true if integer registers have already been
7338   pushed on the stack.  */
7339
7340static void
7341ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7342			     const bool int_registers_saved)
7343{
7344  /* We skip the probe for the first interval + a small dope of 4 words and
7345     probe that many bytes past the specified size to maintain a protection
7346     area at the botton of the stack.  */
7347  const int dope = 4 * UNITS_PER_WORD;
7348  rtx size_rtx = GEN_INT (size), last;
7349
7350  /* See if we have a constant small number of probes to generate.  If so,
7351     that's the easy case.  The run-time loop is made up of 9 insns in the
7352     generic case while the compile-time loop is made up of 3+2*(n-1) insns
7353     for n # of intervals.  */
7354  if (size <= 4 * get_probe_interval ())
7355    {
7356      HOST_WIDE_INT i, adjust;
7357      bool first_probe = true;
7358
7359      /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7360	 values of N from 1 until it exceeds SIZE.  If only one probe is
7361	 needed, this will not generate any code.  Then adjust and probe
7362	 to PROBE_INTERVAL + SIZE.  */
7363      for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7364	{
7365	  if (first_probe)
7366	    {
7367	      adjust = 2 * get_probe_interval () + dope;
7368	      first_probe = false;
7369	    }
7370	  else
7371	    adjust = get_probe_interval ();
7372
7373	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
7374				  plus_constant (Pmode, stack_pointer_rtx,
7375						 -adjust)));
7376	  emit_stack_probe (stack_pointer_rtx);
7377	}
7378
7379      if (first_probe)
7380	adjust = size + get_probe_interval () + dope;
7381      else
7382        adjust = size + get_probe_interval () - i;
7383
7384      emit_insn (gen_rtx_SET (stack_pointer_rtx,
7385			      plus_constant (Pmode, stack_pointer_rtx,
7386					     -adjust)));
7387      emit_stack_probe (stack_pointer_rtx);
7388
7389      /* Adjust back to account for the additional first interval.  */
7390      last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7391				     plus_constant (Pmode, stack_pointer_rtx,
7392						    (get_probe_interval ()
7393						     + dope))));
7394    }
7395
7396  /* Otherwise, do the same as above, but in a loop.  Note that we must be
7397     extra careful with variables wrapping around because we might be at
7398     the very top (or the very bottom) of the address space and we have
7399     to be able to handle this case properly; in particular, we use an
7400     equality test for the loop condition.  */
7401  else
7402    {
7403      /* We expect the GP registers to be saved when probes are used
7404	 as the probing sequences might need a scratch register and
7405	 the routine to allocate one assumes the integer registers
7406	 have already been saved.  */
7407      gcc_assert (int_registers_saved);
7408
7409      HOST_WIDE_INT rounded_size;
7410      struct scratch_reg sr;
7411
7412      get_scratch_register_on_entry (&sr);
7413
7414      /* If we needed to save a register, then account for any space
7415	 that was pushed (we are not going to pop the register when
7416	 we do the restore).  */
7417      if (sr.saved)
7418	size -= UNITS_PER_WORD;
7419
7420      /* Step 1: round SIZE to the previous multiple of the interval.  */
7421
7422      rounded_size = ROUND_DOWN (size, get_probe_interval ());
7423
7424
7425      /* Step 2: compute initial and final value of the loop counter.  */
7426
7427      /* SP = SP_0 + PROBE_INTERVAL.  */
7428      emit_insn (gen_rtx_SET (stack_pointer_rtx,
7429			      plus_constant (Pmode, stack_pointer_rtx,
7430					     - (get_probe_interval () + dope))));
7431
7432      /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
7433      if (rounded_size <= (HOST_WIDE_INT_1 << 31))
7434	emit_insn (gen_rtx_SET (sr.reg,
7435				plus_constant (Pmode, stack_pointer_rtx,
7436					       -rounded_size)));
7437      else
7438	{
7439	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7440	  emit_insn (gen_rtx_SET (sr.reg,
7441				  gen_rtx_PLUS (Pmode, sr.reg,
7442						stack_pointer_rtx)));
7443	}
7444
7445
7446      /* Step 3: the loop
7447
7448	 do
7449	   {
7450	     SP = SP + PROBE_INTERVAL
7451	     probe at SP
7452	   }
7453	 while (SP != LAST_ADDR)
7454
7455	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7456	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
7457
7458      emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, size_rtx));
7459
7460
7461      /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7462	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
7463
7464      if (size != rounded_size)
7465	{
7466	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
7467			          plus_constant (Pmode, stack_pointer_rtx,
7468						 rounded_size - size)));
7469	  emit_stack_probe (stack_pointer_rtx);
7470	}
7471
7472      /* Adjust back to account for the additional first interval.  */
7473      last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7474				     plus_constant (Pmode, stack_pointer_rtx,
7475						    (get_probe_interval ()
7476						     + dope))));
7477
7478      /* This does not deallocate the space reserved for the scratch
7479	 register.  That will be deallocated in the epilogue.  */
7480      release_scratch_register_on_entry (&sr, size, false);
7481    }
7482
7483  /* Even if the stack pointer isn't the CFA register, we need to correctly
7484     describe the adjustments made to it, in particular differentiate the
7485     frame-related ones from the frame-unrelated ones.  */
7486  if (size > 0)
7487    {
7488      rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
7489      XVECEXP (expr, 0, 0)
7490	= gen_rtx_SET (stack_pointer_rtx,
7491		       plus_constant (Pmode, stack_pointer_rtx, -size));
7492      XVECEXP (expr, 0, 1)
7493	= gen_rtx_SET (stack_pointer_rtx,
7494		       plus_constant (Pmode, stack_pointer_rtx,
7495				      get_probe_interval () + dope + size));
7496      add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
7497      RTX_FRAME_RELATED_P (last) = 1;
7498
7499      cfun->machine->fs.sp_offset += size;
7500    }
7501
7502  /* Make sure nothing is scheduled before we are done.  */
7503  emit_insn (gen_blockage ());
7504}
7505
7506/* Adjust the stack pointer up to REG while probing it.  */
7507
7508const char *
7509output_adjust_stack_and_probe (rtx reg)
7510{
7511  static int labelno = 0;
7512  char loop_lab[32];
7513  rtx xops[2];
7514
7515  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7516
7517  /* Loop.  */
7518  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7519
7520  /* SP = SP + PROBE_INTERVAL.  */
7521  xops[0] = stack_pointer_rtx;
7522  xops[1] = GEN_INT (get_probe_interval ());
7523  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7524
7525  /* Probe at SP.  */
7526  xops[1] = const0_rtx;
7527  output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7528
7529  /* Test if SP == LAST_ADDR.  */
7530  xops[0] = stack_pointer_rtx;
7531  xops[1] = reg;
7532  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7533
7534  /* Branch.  */
7535  fputs ("\tjne\t", asm_out_file);
7536  assemble_name_raw (asm_out_file, loop_lab);
7537  fputc ('\n', asm_out_file);
7538
7539  return "";
7540}
7541
7542/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7543   inclusive.  These are offsets from the current stack pointer.
7544
7545   INT_REGISTERS_SAVED is true if integer registers have already been
7546   pushed on the stack.  */
7547
7548static void
7549ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7550			     const bool int_registers_saved)
7551{
7552  /* See if we have a constant small number of probes to generate.  If so,
7553     that's the easy case.  The run-time loop is made up of 6 insns in the
7554     generic case while the compile-time loop is made up of n insns for n #
7555     of intervals.  */
7556  if (size <= 6 * get_probe_interval ())
7557    {
7558      HOST_WIDE_INT i;
7559
7560      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7561	 it exceeds SIZE.  If only one probe is needed, this will not
7562	 generate any code.  Then probe at FIRST + SIZE.  */
7563      for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7564	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7565					 -(first + i)));
7566
7567      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7568				       -(first + size)));
7569    }
7570
7571  /* Otherwise, do the same as above, but in a loop.  Note that we must be
7572     extra careful with variables wrapping around because we might be at
7573     the very top (or the very bottom) of the address space and we have
7574     to be able to handle this case properly; in particular, we use an
7575     equality test for the loop condition.  */
7576  else
7577    {
7578      /* We expect the GP registers to be saved when probes are used
7579	 as the probing sequences might need a scratch register and
7580	 the routine to allocate one assumes the integer registers
7581	 have already been saved.  */
7582      gcc_assert (int_registers_saved);
7583
7584      HOST_WIDE_INT rounded_size, last;
7585      struct scratch_reg sr;
7586
7587      get_scratch_register_on_entry (&sr);
7588
7589
7590      /* Step 1: round SIZE to the previous multiple of the interval.  */
7591
7592      rounded_size = ROUND_DOWN (size, get_probe_interval ());
7593
7594
7595      /* Step 2: compute initial and final value of the loop counter.  */
7596
7597      /* TEST_OFFSET = FIRST.  */
7598      emit_move_insn (sr.reg, GEN_INT (-first));
7599
7600      /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
7601      last = first + rounded_size;
7602
7603
7604      /* Step 3: the loop
7605
7606	 do
7607	   {
7608	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7609	     probe at TEST_ADDR
7610	   }
7611	 while (TEST_ADDR != LAST_ADDR)
7612
7613         probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7614         until it is equal to ROUNDED_SIZE.  */
7615
7616      emit_insn
7617	(gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7618
7619
7620      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7621	 that SIZE is equal to ROUNDED_SIZE.  */
7622
7623      if (size != rounded_size)
7624	emit_stack_probe (plus_constant (Pmode,
7625					 gen_rtx_PLUS (Pmode,
7626						       stack_pointer_rtx,
7627						       sr.reg),
7628					 rounded_size - size));
7629
7630      release_scratch_register_on_entry (&sr, size, true);
7631    }
7632
7633  /* Make sure nothing is scheduled before we are done.  */
7634  emit_insn (gen_blockage ());
7635}
7636
7637/* Probe a range of stack addresses from REG to END, inclusive.  These are
7638   offsets from the current stack pointer.  */
7639
7640const char *
7641output_probe_stack_range (rtx reg, rtx end)
7642{
7643  static int labelno = 0;
7644  char loop_lab[32];
7645  rtx xops[3];
7646
7647  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7648
7649  /* Loop.  */
7650  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7651
7652  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
7653  xops[0] = reg;
7654  xops[1] = GEN_INT (get_probe_interval ());
7655  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7656
7657  /* Probe at TEST_ADDR.  */
7658  xops[0] = stack_pointer_rtx;
7659  xops[1] = reg;
7660  xops[2] = const0_rtx;
7661  output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7662
7663  /* Test if TEST_ADDR == LAST_ADDR.  */
7664  xops[0] = reg;
7665  xops[1] = end;
7666  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7667
7668  /* Branch.  */
7669  fputs ("\tjne\t", asm_out_file);
7670  assemble_name_raw (asm_out_file, loop_lab);
7671  fputc ('\n', asm_out_file);
7672
7673  return "";
7674}
7675
7676/* Set stack_frame_required to false if stack frame isn't required.
7677   Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7678   slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
7679
7680static void
7681ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7682				    bool check_stack_slot)
7683{
7684  HARD_REG_SET set_up_by_prologue, prologue_used;
7685  basic_block bb;
7686
7687  CLEAR_HARD_REG_SET (prologue_used);
7688  CLEAR_HARD_REG_SET (set_up_by_prologue);
7689  add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7690  add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7691  add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7692		       HARD_FRAME_POINTER_REGNUM);
7693
7694  /* The preferred stack alignment is the minimum stack alignment.  */
7695  if (stack_alignment > crtl->preferred_stack_boundary)
7696    stack_alignment = crtl->preferred_stack_boundary;
7697
7698  bool require_stack_frame = false;
7699
7700  FOR_EACH_BB_FN (bb, cfun)
7701    {
7702      rtx_insn *insn;
7703      FOR_BB_INSNS (bb, insn)
7704	if (NONDEBUG_INSN_P (insn)
7705	    && requires_stack_frame_p (insn, prologue_used,
7706				       set_up_by_prologue))
7707	  {
7708	    require_stack_frame = true;
7709
7710	    if (check_stack_slot)
7711	      {
7712		/* Find the maximum stack alignment.  */
7713		subrtx_iterator::array_type array;
7714		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7715		  if (MEM_P (*iter)
7716		      && (reg_mentioned_p (stack_pointer_rtx,
7717					   *iter)
7718			  || reg_mentioned_p (frame_pointer_rtx,
7719					      *iter)))
7720		    {
7721		      unsigned int alignment = MEM_ALIGN (*iter);
7722		      if (alignment > stack_alignment)
7723			stack_alignment = alignment;
7724		    }
7725	      }
7726	  }
7727    }
7728
7729  cfun->machine->stack_frame_required = require_stack_frame;
7730}
7731
7732/* Finalize stack_realign_needed and frame_pointer_needed flags, which
7733   will guide prologue/epilogue to be generated in correct form.  */
7734
7735static void
7736ix86_finalize_stack_frame_flags (void)
7737{
7738  /* Check if stack realign is really needed after reload, and
7739     stores result in cfun */
7740  unsigned int incoming_stack_boundary
7741    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7742       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7743  unsigned int stack_alignment
7744    = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7745       ? crtl->max_used_stack_slot_alignment
7746       : crtl->stack_alignment_needed);
7747  unsigned int stack_realign
7748    = (incoming_stack_boundary < stack_alignment);
7749  bool recompute_frame_layout_p = false;
7750
7751  if (crtl->stack_realign_finalized)
7752    {
7753      /* After stack_realign_needed is finalized, we can't no longer
7754	 change it.  */
7755      gcc_assert (crtl->stack_realign_needed == stack_realign);
7756      return;
7757    }
7758
7759  /* It is always safe to compute max_used_stack_alignment.  We
7760     compute it only if 128-bit aligned load/store may be generated
7761     on misaligned stack slot which will lead to segfault. */
7762  bool check_stack_slot
7763    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7764  ix86_find_max_used_stack_alignment (stack_alignment,
7765				      check_stack_slot);
7766
7767  /* If the only reason for frame_pointer_needed is that we conservatively
7768     assumed stack realignment might be needed or -fno-omit-frame-pointer
7769     is used, but in the end nothing that needed the stack alignment had
7770     been spilled nor stack access, clear frame_pointer_needed and say we
7771     don't need stack realignment.  */
7772  if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7773      && frame_pointer_needed
7774      && crtl->is_leaf
7775      && crtl->sp_is_unchanging
7776      && !ix86_current_function_calls_tls_descriptor
7777      && !crtl->accesses_prior_frames
7778      && !cfun->calls_alloca
7779      && !crtl->calls_eh_return
7780      /* See ira_setup_eliminable_regset for the rationale.  */
7781      && !(STACK_CHECK_MOVING_SP
7782	   && flag_stack_check
7783	   && flag_exceptions
7784	   && cfun->can_throw_non_call_exceptions)
7785      && !ix86_frame_pointer_required ()
7786      && ix86_get_frame_size () == 0
7787      && ix86_nsaved_sseregs () == 0
7788      && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7789    {
7790      if (cfun->machine->stack_frame_required)
7791	{
7792	  /* Stack frame is required.  If stack alignment needed is less
7793	     than incoming stack boundary, don't realign stack.  */
7794	  stack_realign = incoming_stack_boundary < stack_alignment;
7795	  if (!stack_realign)
7796	    {
7797	      crtl->max_used_stack_slot_alignment
7798		= incoming_stack_boundary;
7799	      crtl->stack_alignment_needed
7800		= incoming_stack_boundary;
7801	      /* Also update preferred_stack_boundary for leaf
7802	         functions.  */
7803	      crtl->preferred_stack_boundary
7804		= incoming_stack_boundary;
7805	    }
7806	}
7807      else
7808	{
7809	  /* If drap has been set, but it actually isn't live at the
7810	     start of the function, there is no reason to set it up.  */
7811	  if (crtl->drap_reg)
7812	    {
7813	      basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7814	      if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7815				     REGNO (crtl->drap_reg)))
7816		{
7817		  crtl->drap_reg = NULL_RTX;
7818		  crtl->need_drap = false;
7819		}
7820	    }
7821	  else
7822	    cfun->machine->no_drap_save_restore = true;
7823
7824	  frame_pointer_needed = false;
7825	  stack_realign = false;
7826	  crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7827	  crtl->stack_alignment_needed = incoming_stack_boundary;
7828	  crtl->stack_alignment_estimated = incoming_stack_boundary;
7829	  if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7830	    crtl->preferred_stack_boundary = incoming_stack_boundary;
7831	  df_finish_pass (true);
7832	  df_scan_alloc (NULL);
7833	  df_scan_blocks ();
7834	  df_compute_regs_ever_live (true);
7835	  df_analyze ();
7836
7837	  if (flag_var_tracking)
7838	    {
7839	      /* Since frame pointer is no longer available, replace it with
7840		 stack pointer - UNITS_PER_WORD in debug insns.  */
7841	      df_ref ref, next;
7842	      for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7843		   ref; ref = next)
7844		{
7845		  next = DF_REF_NEXT_REG (ref);
7846		  if (!DF_REF_INSN_INFO (ref))
7847		    continue;
7848
7849		  /* Make sure the next ref is for a different instruction,
7850		     so that we're not affected by the rescan.  */
7851		  rtx_insn *insn = DF_REF_INSN (ref);
7852		  while (next && DF_REF_INSN (next) == insn)
7853		    next = DF_REF_NEXT_REG (next);
7854
7855		  if (DEBUG_INSN_P (insn))
7856		    {
7857		      bool changed = false;
7858		      for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7859			{
7860			  rtx *loc = DF_REF_LOC (ref);
7861			  if (*loc == hard_frame_pointer_rtx)
7862			    {
7863			      *loc = plus_constant (Pmode,
7864						    stack_pointer_rtx,
7865						    -UNITS_PER_WORD);
7866			      changed = true;
7867			    }
7868			}
7869		      if (changed)
7870			df_insn_rescan (insn);
7871		    }
7872		}
7873	    }
7874
7875	  recompute_frame_layout_p = true;
7876	}
7877    }
7878  else if (crtl->max_used_stack_slot_alignment >= 128
7879	   && cfun->machine->stack_frame_required)
7880    {
7881      /* We don't need to realign stack.  max_used_stack_alignment is
7882	 used to decide how stack frame should be aligned.  This is
7883	 independent of any psABIs nor 32-bit vs 64-bit.  */
7884      cfun->machine->max_used_stack_alignment
7885	= stack_alignment / BITS_PER_UNIT;
7886    }
7887
7888  if (crtl->stack_realign_needed != stack_realign)
7889    recompute_frame_layout_p = true;
7890  crtl->stack_realign_needed = stack_realign;
7891  crtl->stack_realign_finalized = true;
7892  if (recompute_frame_layout_p)
7893    ix86_compute_frame_layout ();
7894}
7895
7896/* Delete SET_GOT right after entry block if it is allocated to reg.  */
7897
7898static void
7899ix86_elim_entry_set_got (rtx reg)
7900{
7901  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7902  rtx_insn *c_insn = BB_HEAD (bb);
7903  if (!NONDEBUG_INSN_P (c_insn))
7904    c_insn = next_nonnote_nondebug_insn (c_insn);
7905  if (c_insn && NONJUMP_INSN_P (c_insn))
7906    {
7907      rtx pat = PATTERN (c_insn);
7908      if (GET_CODE (pat) == PARALLEL)
7909	{
7910	  rtx vec = XVECEXP (pat, 0, 0);
7911	  if (GET_CODE (vec) == SET
7912	      && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7913	      && REGNO (XEXP (vec, 0)) == REGNO (reg))
7914	    delete_insn (c_insn);
7915	}
7916    }
7917}
7918
7919static rtx
7920gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7921{
7922  rtx addr, mem;
7923
7924  if (offset)
7925    addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
7926  mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7927  return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7928}
7929
7930static inline rtx
7931gen_frame_load (rtx reg, rtx frame_reg, int offset)
7932{
7933  return gen_frame_set (reg, frame_reg, offset, false);
7934}
7935
7936static inline rtx
7937gen_frame_store (rtx reg, rtx frame_reg, int offset)
7938{
7939  return gen_frame_set (reg, frame_reg, offset, true);
7940}
7941
7942static void
7943ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7944{
7945  struct machine_function *m = cfun->machine;
7946  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7947			  + m->call_ms2sysv_extra_regs;
7948  rtvec v = rtvec_alloc (ncregs + 1);
7949  unsigned int align, i, vi = 0;
7950  rtx_insn *insn;
7951  rtx sym, addr;
7952  rtx rax = gen_rtx_REG (word_mode, AX_REG);
7953  const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
7954
7955  /* AL should only be live with sysv_abi.  */
7956  gcc_assert (!ix86_eax_live_at_start_p ());
7957  gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7958
7959  /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
7960     we've actually realigned the stack or not.  */
7961  align = GET_MODE_ALIGNMENT (V4SFmode);
7962  addr = choose_baseaddr (frame.stack_realign_offset
7963			  + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7964  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7965
7966  emit_insn (gen_rtx_SET (rax, addr));
7967
7968  /* Get the stub symbol.  */
7969  sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7970						  : XLOGUE_STUB_SAVE);
7971  RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7972
7973  for (i = 0; i < ncregs; ++i)
7974    {
7975      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7976      rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7977			     r.regno);
7978      RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7979    }
7980
7981  gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7982
7983  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7984  RTX_FRAME_RELATED_P (insn) = true;
7985}
7986
7987/* Generate and return an insn body to AND X with Y.  */
7988
7989static rtx_insn *
7990gen_and2_insn (rtx x, rtx y)
7991{
7992  enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
7993
7994  gcc_assert (insn_operand_matches (icode, 0, x));
7995  gcc_assert (insn_operand_matches (icode, 1, x));
7996  gcc_assert (insn_operand_matches (icode, 2, y));
7997
7998  return GEN_FCN (icode) (x, x, y);
7999}
8000
8001/* Expand the prologue into a bunch of separate insns.  */
8002
8003void
8004ix86_expand_prologue (void)
8005{
8006  struct machine_function *m = cfun->machine;
8007  rtx insn, t;
8008  HOST_WIDE_INT allocate;
8009  bool int_registers_saved;
8010  bool sse_registers_saved;
8011  bool save_stub_call_needed;
8012  rtx static_chain = NULL_RTX;
8013
8014  if (ix86_function_naked (current_function_decl))
8015    return;
8016
8017  ix86_finalize_stack_frame_flags ();
8018
8019  /* DRAP should not coexist with stack_realign_fp */
8020  gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8021
8022  memset (&m->fs, 0, sizeof (m->fs));
8023
8024  /* Initialize CFA state for before the prologue.  */
8025  m->fs.cfa_reg = stack_pointer_rtx;
8026  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8027
8028  /* Track SP offset to the CFA.  We continue tracking this after we've
8029     swapped the CFA register away from SP.  In the case of re-alignment
8030     this is fudged; we're interested to offsets within the local frame.  */
8031  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8032  m->fs.sp_valid = true;
8033  m->fs.sp_realigned = false;
8034
8035  const struct ix86_frame &frame = cfun->machine->frame;
8036
8037  if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8038    {
8039      /* We should have already generated an error for any use of
8040         ms_hook on a nested function.  */
8041      gcc_checking_assert (!ix86_static_chain_on_stack);
8042
8043      /* Check if profiling is active and we shall use profiling before
8044         prologue variant. If so sorry.  */
8045      if (crtl->profile && flag_fentry != 0)
8046	sorry ("%<ms_hook_prologue%> attribute is not compatible "
8047	       "with %<-mfentry%> for 32-bit");
8048
8049      /* In ix86_asm_output_function_label we emitted:
8050	 8b ff     movl.s %edi,%edi
8051	 55        push   %ebp
8052	 8b ec     movl.s %esp,%ebp
8053
8054	 This matches the hookable function prologue in Win32 API
8055	 functions in Microsoft Windows XP Service Pack 2 and newer.
8056	 Wine uses this to enable Windows apps to hook the Win32 API
8057	 functions provided by Wine.
8058
8059	 What that means is that we've already set up the frame pointer.  */
8060
8061      if (frame_pointer_needed
8062	  && !(crtl->drap_reg && crtl->stack_realign_needed))
8063	{
8064	  rtx push, mov;
8065
8066	  /* We've decided to use the frame pointer already set up.
8067	     Describe this to the unwinder by pretending that both
8068	     push and mov insns happen right here.
8069
8070	     Putting the unwind info here at the end of the ms_hook
8071	     is done so that we can make absolutely certain we get
8072	     the required byte sequence at the start of the function,
8073	     rather than relying on an assembler that can produce
8074	     the exact encoding required.
8075
8076	     However it does mean (in the unpatched case) that we have
8077	     a 1 insn window where the asynchronous unwind info is
8078	     incorrect.  However, if we placed the unwind info at
8079	     its correct location we would have incorrect unwind info
8080	     in the patched case.  Which is probably all moot since
8081	     I don't expect Wine generates dwarf2 unwind info for the
8082	     system libraries that use this feature.  */
8083
8084	  insn = emit_insn (gen_blockage ());
8085
8086	  push = gen_push (hard_frame_pointer_rtx);
8087	  mov = gen_rtx_SET (hard_frame_pointer_rtx,
8088			     stack_pointer_rtx);
8089	  RTX_FRAME_RELATED_P (push) = 1;
8090	  RTX_FRAME_RELATED_P (mov) = 1;
8091
8092	  RTX_FRAME_RELATED_P (insn) = 1;
8093	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8094			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8095
8096	  /* Note that gen_push incremented m->fs.cfa_offset, even
8097	     though we didn't emit the push insn here.  */
8098	  m->fs.cfa_reg = hard_frame_pointer_rtx;
8099	  m->fs.fp_offset = m->fs.cfa_offset;
8100	  m->fs.fp_valid = true;
8101	}
8102      else
8103	{
8104	  /* The frame pointer is not needed so pop %ebp again.
8105	     This leaves us with a pristine state.  */
8106	  emit_insn (gen_pop (hard_frame_pointer_rtx));
8107	}
8108    }
8109
8110  /* The first insn of a function that accepts its static chain on the
8111     stack is to push the register that would be filled in by a direct
8112     call.  This insn will be skipped by the trampoline.  */
8113  else if (ix86_static_chain_on_stack)
8114    {
8115      static_chain = ix86_static_chain (cfun->decl, false);
8116      insn = emit_insn (gen_push (static_chain));
8117      emit_insn (gen_blockage ());
8118
8119      /* We don't want to interpret this push insn as a register save,
8120	 only as a stack adjustment.  The real copy of the register as
8121	 a save will be done later, if needed.  */
8122      t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8123      t = gen_rtx_SET (stack_pointer_rtx, t);
8124      add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8125      RTX_FRAME_RELATED_P (insn) = 1;
8126    }
8127
8128  /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8129     of DRAP is needed and stack realignment is really needed after reload */
8130  if (stack_realign_drap)
8131    {
8132      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8133
8134      /* Can't use DRAP in interrupt function.  */
8135      if (cfun->machine->func_type != TYPE_NORMAL)
8136	sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8137	       "in interrupt service routine.  This may be worked "
8138	       "around by avoiding functions with aggregate return.");
8139
8140      /* Only need to push parameter pointer reg if it is caller saved.  */
8141      if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8142	{
8143	  /* Push arg pointer reg */
8144	  insn = emit_insn (gen_push (crtl->drap_reg));
8145	  RTX_FRAME_RELATED_P (insn) = 1;
8146	}
8147
8148      /* Grab the argument pointer.  */
8149      t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8150      insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8151      RTX_FRAME_RELATED_P (insn) = 1;
8152      m->fs.cfa_reg = crtl->drap_reg;
8153      m->fs.cfa_offset = 0;
8154
8155      /* Align the stack.  */
8156      insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8157				       GEN_INT (-align_bytes)));
8158      RTX_FRAME_RELATED_P (insn) = 1;
8159
8160      /* Replicate the return address on the stack so that return
8161	 address can be reached via (argp - 1) slot.  This is needed
8162	 to implement macro RETURN_ADDR_RTX and intrinsic function
8163	 expand_builtin_return_addr etc.  */
8164      t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8165      t = gen_frame_mem (word_mode, t);
8166      insn = emit_insn (gen_push (t));
8167      RTX_FRAME_RELATED_P (insn) = 1;
8168
8169      /* For the purposes of frame and register save area addressing,
8170	 we've started over with a new frame.  */
8171      m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8172      m->fs.realigned = true;
8173
8174      if (static_chain)
8175	{
8176	  /* Replicate static chain on the stack so that static chain
8177	     can be reached via (argp - 2) slot.  This is needed for
8178	     nested function with stack realignment.  */
8179	  insn = emit_insn (gen_push (static_chain));
8180	  RTX_FRAME_RELATED_P (insn) = 1;
8181	}
8182    }
8183
8184  int_registers_saved = (frame.nregs == 0);
8185  sse_registers_saved = (frame.nsseregs == 0);
8186  save_stub_call_needed = (m->call_ms2sysv);
8187  gcc_assert (sse_registers_saved || !save_stub_call_needed);
8188
8189  if (frame_pointer_needed && !m->fs.fp_valid)
8190    {
8191      /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8192         slower on all targets.  Also sdb didn't like it.  */
8193      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8194      RTX_FRAME_RELATED_P (insn) = 1;
8195
8196      if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8197	{
8198	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8199	  RTX_FRAME_RELATED_P (insn) = 1;
8200
8201	  if (m->fs.cfa_reg == stack_pointer_rtx)
8202	    m->fs.cfa_reg = hard_frame_pointer_rtx;
8203	  m->fs.fp_offset = m->fs.sp_offset;
8204	  m->fs.fp_valid = true;
8205	}
8206    }
8207
8208  if (!int_registers_saved)
8209    {
8210      /* If saving registers via PUSH, do so now.  */
8211      if (!frame.save_regs_using_mov)
8212	{
8213	  ix86_emit_save_regs ();
8214	  int_registers_saved = true;
8215	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8216	}
8217
8218      /* When using red zone we may start register saving before allocating
8219	 the stack frame saving one cycle of the prologue.  However, avoid
8220	 doing this if we have to probe the stack; at least on x86_64 the
8221	 stack probe can turn into a call that clobbers a red zone location. */
8222      else if (ix86_using_red_zone ()
8223	       && (! TARGET_STACK_PROBE
8224		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8225	{
8226	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8227	  int_registers_saved = true;
8228	}
8229    }
8230
8231  if (stack_realign_fp)
8232    {
8233      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8234      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8235
8236      /* Record last valid frame pointer offset.  */
8237      m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8238
8239      /* The computation of the size of the re-aligned stack frame means
8240	 that we must allocate the size of the register save area before
8241	 performing the actual alignment.  Otherwise we cannot guarantee
8242	 that there's enough storage above the realignment point.  */
8243      allocate = frame.reg_save_offset - m->fs.sp_offset
8244		 + frame.stack_realign_allocate;
8245      if (allocate)
8246        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8247				   GEN_INT (-allocate), -1, false);
8248
8249      /* Align the stack.  */
8250      emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8251      m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8252      m->fs.sp_realigned_offset = m->fs.sp_offset
8253					      - frame.stack_realign_allocate;
8254      /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8255	 Beyond this point, stack access should be done via choose_baseaddr or
8256	 by using sp_valid_at and fp_valid_at to determine the correct base
8257	 register.  Henceforth, any CFA offset should be thought of as logical
8258	 and not physical.  */
8259      gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8260      gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8261      m->fs.sp_realigned = true;
8262
8263      /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8264	 is needed to describe where a register is saved using a realigned
8265	 stack pointer, so we need to invalidate the stack pointer for that
8266	 target.  */
8267      if (TARGET_SEH)
8268	m->fs.sp_valid = false;
8269
8270      /* If SP offset is non-immediate after allocation of the stack frame,
8271	 then emit SSE saves or stub call prior to allocating the rest of the
8272	 stack frame.  This is less efficient for the out-of-line stub because
8273	 we can't combine allocations across the call barrier, but it's better
8274	 than using a scratch register.  */
8275      else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8276						   - m->fs.sp_realigned_offset),
8277					  Pmode))
8278	{
8279	  if (!sse_registers_saved)
8280	    {
8281	      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8282	      sse_registers_saved = true;
8283	    }
8284	  else if (save_stub_call_needed)
8285	    {
8286	      ix86_emit_outlined_ms2sysv_save (frame);
8287	      save_stub_call_needed = false;
8288	    }
8289	}
8290    }
8291
8292  allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8293
8294  if (flag_stack_usage_info)
8295    {
8296      /* We start to count from ARG_POINTER.  */
8297      HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8298
8299      /* If it was realigned, take into account the fake frame.  */
8300      if (stack_realign_drap)
8301	{
8302	  if (ix86_static_chain_on_stack)
8303	    stack_size += UNITS_PER_WORD;
8304
8305	  if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8306	    stack_size += UNITS_PER_WORD;
8307
8308	  /* This over-estimates by 1 minimal-stack-alignment-unit but
8309	     mitigates that by counting in the new return address slot.  */
8310	  current_function_dynamic_stack_size
8311	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
8312	}
8313
8314      current_function_static_stack_size = stack_size;
8315    }
8316
8317  /* On SEH target with very large frame size, allocate an area to save
8318     SSE registers (as the very large allocation won't be described).  */
8319  if (TARGET_SEH
8320      && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8321      && !sse_registers_saved)
8322    {
8323      HOST_WIDE_INT sse_size
8324	= frame.sse_reg_save_offset - frame.reg_save_offset;
8325
8326      gcc_assert (int_registers_saved);
8327
8328      /* No need to do stack checking as the area will be immediately
8329	 written.  */
8330      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8331			         GEN_INT (-sse_size), -1,
8332				 m->fs.cfa_reg == stack_pointer_rtx);
8333      allocate -= sse_size;
8334      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8335      sse_registers_saved = true;
8336    }
8337
8338  /* The stack has already been decremented by the instruction calling us
8339     so probe if the size is non-negative to preserve the protection area.  */
8340  if (allocate >= 0
8341      && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
8342	  || flag_stack_clash_protection))
8343    {
8344      if (flag_stack_clash_protection)
8345	{
8346	  ix86_adjust_stack_and_probe_stack_clash (allocate,
8347						   int_registers_saved);
8348	  allocate = 0;
8349	}
8350      else if (STACK_CHECK_MOVING_SP)
8351	{
8352	  if (!(crtl->is_leaf && !cfun->calls_alloca
8353		&& allocate <= get_probe_interval ()))
8354	    {
8355	      ix86_adjust_stack_and_probe (allocate, int_registers_saved);
8356	      allocate = 0;
8357	    }
8358	}
8359      else
8360	{
8361	  HOST_WIDE_INT size = allocate;
8362
8363	  if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8364	    size = 0x80000000 - get_stack_check_protect () - 1;
8365
8366	  if (TARGET_STACK_PROBE)
8367	    {
8368	      if (crtl->is_leaf && !cfun->calls_alloca)
8369		{
8370		  if (size > get_probe_interval ())
8371		    ix86_emit_probe_stack_range (0, size, int_registers_saved);
8372		}
8373	      else
8374		ix86_emit_probe_stack_range (0,
8375					     size + get_stack_check_protect (),
8376					     int_registers_saved);
8377	    }
8378	  else
8379	    {
8380	      if (crtl->is_leaf && !cfun->calls_alloca)
8381		{
8382		  if (size > get_probe_interval ()
8383		      && size > get_stack_check_protect ())
8384		    ix86_emit_probe_stack_range (get_stack_check_protect (),
8385						 (size
8386						  - get_stack_check_protect ()),
8387						 int_registers_saved);
8388		}
8389	      else
8390		ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8391					     int_registers_saved);
8392	    }
8393	}
8394    }
8395
8396  if (allocate == 0)
8397    ;
8398  else if (!ix86_target_stack_probe ()
8399	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8400    {
8401      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8402			         GEN_INT (-allocate), -1,
8403			         m->fs.cfa_reg == stack_pointer_rtx);
8404    }
8405  else
8406    {
8407      rtx eax = gen_rtx_REG (Pmode, AX_REG);
8408      rtx r10 = NULL;
8409      const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8410      bool eax_live = ix86_eax_live_at_start_p ();
8411      bool r10_live = false;
8412
8413      if (TARGET_64BIT)
8414        r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8415
8416      if (eax_live)
8417	{
8418	  insn = emit_insn (gen_push (eax));
8419	  allocate -= UNITS_PER_WORD;
8420	  /* Note that SEH directives need to continue tracking the stack
8421	     pointer even after the frame pointer has been set up.  */
8422	  if (sp_is_cfa_reg || TARGET_SEH)
8423	    {
8424	      if (sp_is_cfa_reg)
8425		m->fs.cfa_offset += UNITS_PER_WORD;
8426	      RTX_FRAME_RELATED_P (insn) = 1;
8427	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8428			    gen_rtx_SET (stack_pointer_rtx,
8429					 plus_constant (Pmode,
8430							stack_pointer_rtx,
8431							-UNITS_PER_WORD)));
8432	    }
8433	}
8434
8435      if (r10_live)
8436	{
8437	  r10 = gen_rtx_REG (Pmode, R10_REG);
8438	  insn = emit_insn (gen_push (r10));
8439	  allocate -= UNITS_PER_WORD;
8440	  if (sp_is_cfa_reg || TARGET_SEH)
8441	    {
8442	      if (sp_is_cfa_reg)
8443		m->fs.cfa_offset += UNITS_PER_WORD;
8444	      RTX_FRAME_RELATED_P (insn) = 1;
8445	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8446			    gen_rtx_SET (stack_pointer_rtx,
8447					 plus_constant (Pmode,
8448							stack_pointer_rtx,
8449							-UNITS_PER_WORD)));
8450	    }
8451	}
8452
8453      emit_move_insn (eax, GEN_INT (allocate));
8454      emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8455
8456      /* Use the fact that AX still contains ALLOCATE.  */
8457      insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8458			(Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8459
8460      if (sp_is_cfa_reg || TARGET_SEH)
8461	{
8462	  if (sp_is_cfa_reg)
8463	    m->fs.cfa_offset += allocate;
8464	  RTX_FRAME_RELATED_P (insn) = 1;
8465	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8466			gen_rtx_SET (stack_pointer_rtx,
8467				     plus_constant (Pmode, stack_pointer_rtx,
8468						    -allocate)));
8469	}
8470      m->fs.sp_offset += allocate;
8471
8472      /* Use stack_pointer_rtx for relative addressing so that code works for
8473	 realigned stack.  But this means that we need a blockage to prevent
8474	 stores based on the frame pointer from being scheduled before.  */
8475      if (r10_live && eax_live)
8476        {
8477	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8478	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8479			  gen_frame_mem (word_mode, t));
8480	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
8481	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8482			  gen_frame_mem (word_mode, t));
8483	  emit_insn (gen_memory_blockage ());
8484	}
8485      else if (eax_live || r10_live)
8486	{
8487	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8488	  emit_move_insn (gen_rtx_REG (word_mode,
8489				       (eax_live ? AX_REG : R10_REG)),
8490			  gen_frame_mem (word_mode, t));
8491	  emit_insn (gen_memory_blockage ());
8492	}
8493    }
8494  gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8495
8496  /* If we havn't already set up the frame pointer, do so now.  */
8497  if (frame_pointer_needed && !m->fs.fp_valid)
8498    {
8499      insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8500			    GEN_INT (frame.stack_pointer_offset
8501				     - frame.hard_frame_pointer_offset));
8502      insn = emit_insn (insn);
8503      RTX_FRAME_RELATED_P (insn) = 1;
8504      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8505
8506      if (m->fs.cfa_reg == stack_pointer_rtx)
8507	m->fs.cfa_reg = hard_frame_pointer_rtx;
8508      m->fs.fp_offset = frame.hard_frame_pointer_offset;
8509      m->fs.fp_valid = true;
8510    }
8511
8512  if (!int_registers_saved)
8513    ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8514  if (!sse_registers_saved)
8515    ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8516  else if (save_stub_call_needed)
8517    ix86_emit_outlined_ms2sysv_save (frame);
8518
8519  /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8520     in PROLOGUE.  */
8521  if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8522    {
8523      rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8524      insn = emit_insn (gen_set_got (pic));
8525      RTX_FRAME_RELATED_P (insn) = 1;
8526      add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8527      emit_insn (gen_prologue_use (pic));
8528      /* Deleting already emmitted SET_GOT if exist and allocated to
8529	 REAL_PIC_OFFSET_TABLE_REGNUM.  */
8530      ix86_elim_entry_set_got (pic);
8531    }
8532
8533  if (crtl->drap_reg && !crtl->stack_realign_needed)
8534    {
8535      /* vDRAP is setup but after reload it turns out stack realign
8536         isn't necessary, here we will emit prologue to setup DRAP
8537         without stack realign adjustment */
8538      t = choose_baseaddr (0, NULL);
8539      emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8540    }
8541
8542  /* Prevent instructions from being scheduled into register save push
8543     sequence when access to the redzone area is done through frame pointer.
8544     The offset between the frame pointer and the stack pointer is calculated
8545     relative to the value of the stack pointer at the end of the function
8546     prologue, and moving instructions that access redzone area via frame
8547     pointer inside push sequence violates this assumption.  */
8548  if (frame_pointer_needed && frame.red_zone_size)
8549    emit_insn (gen_memory_blockage ());
8550
8551  /* SEH requires that the prologue end within 256 bytes of the start of
8552     the function.  Prevent instruction schedules that would extend that.
8553     Further, prevent alloca modifications to the stack pointer from being
8554     combined with prologue modifications.  */
8555  if (TARGET_SEH)
8556    emit_insn (gen_prologue_use (stack_pointer_rtx));
8557}
8558
8559/* Emit code to restore REG using a POP insn.  */
8560
8561static void
8562ix86_emit_restore_reg_using_pop (rtx reg)
8563{
8564  struct machine_function *m = cfun->machine;
8565  rtx_insn *insn = emit_insn (gen_pop (reg));
8566
8567  ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8568  m->fs.sp_offset -= UNITS_PER_WORD;
8569
8570  if (m->fs.cfa_reg == crtl->drap_reg
8571      && REGNO (reg) == REGNO (crtl->drap_reg))
8572    {
8573      /* Previously we'd represented the CFA as an expression
8574	 like *(%ebp - 8).  We've just popped that value from
8575	 the stack, which means we need to reset the CFA to
8576	 the drap register.  This will remain until we restore
8577	 the stack pointer.  */
8578      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8579      RTX_FRAME_RELATED_P (insn) = 1;
8580
8581      /* This means that the DRAP register is valid for addressing too.  */
8582      m->fs.drap_valid = true;
8583      return;
8584    }
8585
8586  if (m->fs.cfa_reg == stack_pointer_rtx)
8587    {
8588      rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8589      x = gen_rtx_SET (stack_pointer_rtx, x);
8590      add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8591      RTX_FRAME_RELATED_P (insn) = 1;
8592
8593      m->fs.cfa_offset -= UNITS_PER_WORD;
8594    }
8595
8596  /* When the frame pointer is the CFA, and we pop it, we are
8597     swapping back to the stack pointer as the CFA.  This happens
8598     for stack frames that don't allocate other data, so we assume
8599     the stack pointer is now pointing at the return address, i.e.
8600     the function entry state, which makes the offset be 1 word.  */
8601  if (reg == hard_frame_pointer_rtx)
8602    {
8603      m->fs.fp_valid = false;
8604      if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8605	{
8606	  m->fs.cfa_reg = stack_pointer_rtx;
8607	  m->fs.cfa_offset -= UNITS_PER_WORD;
8608
8609	  add_reg_note (insn, REG_CFA_DEF_CFA,
8610			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8611				      GEN_INT (m->fs.cfa_offset)));
8612	  RTX_FRAME_RELATED_P (insn) = 1;
8613	}
8614    }
8615}
8616
8617/* Emit code to restore saved registers using POP insns.  */
8618
8619static void
8620ix86_emit_restore_regs_using_pop (void)
8621{
8622  unsigned int regno;
8623
8624  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8625    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8626      ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8627}
8628
8629/* Emit code and notes for the LEAVE instruction.  If insn is non-null,
8630   omits the emit and only attaches the notes.  */
8631
8632static void
8633ix86_emit_leave (rtx_insn *insn)
8634{
8635  struct machine_function *m = cfun->machine;
8636
8637  if (!insn)
8638    insn = emit_insn (gen_leave (word_mode));
8639
8640  ix86_add_queued_cfa_restore_notes (insn);
8641
8642  gcc_assert (m->fs.fp_valid);
8643  m->fs.sp_valid = true;
8644  m->fs.sp_realigned = false;
8645  m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8646  m->fs.fp_valid = false;
8647
8648  if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8649    {
8650      m->fs.cfa_reg = stack_pointer_rtx;
8651      m->fs.cfa_offset = m->fs.sp_offset;
8652
8653      add_reg_note (insn, REG_CFA_DEF_CFA,
8654		    plus_constant (Pmode, stack_pointer_rtx,
8655				   m->fs.sp_offset));
8656      RTX_FRAME_RELATED_P (insn) = 1;
8657    }
8658  ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8659			     m->fs.fp_offset);
8660}
8661
8662/* Emit code to restore saved registers using MOV insns.
8663   First register is restored from CFA - CFA_OFFSET.  */
8664static void
8665ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8666				  bool maybe_eh_return)
8667{
8668  struct machine_function *m = cfun->machine;
8669  unsigned int regno;
8670
8671  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8672    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8673      {
8674	rtx reg = gen_rtx_REG (word_mode, regno);
8675	rtx mem;
8676	rtx_insn *insn;
8677
8678	mem = choose_baseaddr (cfa_offset, NULL);
8679	mem = gen_frame_mem (word_mode, mem);
8680	insn = emit_move_insn (reg, mem);
8681
8682        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8683	  {
8684	    /* Previously we'd represented the CFA as an expression
8685	       like *(%ebp - 8).  We've just popped that value from
8686	       the stack, which means we need to reset the CFA to
8687	       the drap register.  This will remain until we restore
8688	       the stack pointer.  */
8689	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8690	    RTX_FRAME_RELATED_P (insn) = 1;
8691
8692	    /* This means that the DRAP register is valid for addressing.  */
8693	    m->fs.drap_valid = true;
8694	  }
8695	else
8696	  ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8697
8698	cfa_offset -= UNITS_PER_WORD;
8699      }
8700}
8701
8702/* Emit code to restore saved registers using MOV insns.
8703   First register is restored from CFA - CFA_OFFSET.  */
8704static void
8705ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8706				      bool maybe_eh_return)
8707{
8708  unsigned int regno;
8709
8710  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8711    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8712      {
8713	rtx reg = gen_rtx_REG (V4SFmode, regno);
8714	rtx mem;
8715	unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8716
8717	mem = choose_baseaddr (cfa_offset, &align);
8718	mem = gen_rtx_MEM (V4SFmode, mem);
8719
8720	/* The location aligment depends upon the base register.  */
8721	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8722	gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8723	set_mem_align (mem, align);
8724	emit_insn (gen_rtx_SET (reg, mem));
8725
8726	ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8727
8728	cfa_offset -= GET_MODE_SIZE (V4SFmode);
8729      }
8730}
8731
8732static void
8733ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8734				  bool use_call, int style)
8735{
8736  struct machine_function *m = cfun->machine;
8737  const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8738			  + m->call_ms2sysv_extra_regs;
8739  rtvec v;
8740  unsigned int elems_needed, align, i, vi = 0;
8741  rtx_insn *insn;
8742  rtx sym, tmp;
8743  rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8744  rtx r10 = NULL_RTX;
8745  const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8746  HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8747  HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8748  rtx rsi_frame_load = NULL_RTX;
8749  HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8750  enum xlogue_stub stub;
8751
8752  gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8753
8754  /* If using a realigned stack, we should never start with padding.  */
8755  gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8756
8757  /* Setup RSI as the stub's base pointer.  */
8758  align = GET_MODE_ALIGNMENT (V4SFmode);
8759  tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8760  gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8761
8762  emit_insn (gen_rtx_SET (rsi, tmp));
8763
8764  /* Get a symbol for the stub.  */
8765  if (frame_pointer_needed)
8766    stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8767		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
8768  else
8769    stub = use_call ? XLOGUE_STUB_RESTORE
8770		    : XLOGUE_STUB_RESTORE_TAIL;
8771  sym = xlogue.get_stub_rtx (stub);
8772
8773  elems_needed = ncregs;
8774  if (use_call)
8775    elems_needed += 1;
8776  else
8777    elems_needed += frame_pointer_needed ? 5 : 3;
8778  v = rtvec_alloc (elems_needed);
8779
8780  /* We call the epilogue stub when we need to pop incoming args or we are
8781     doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
8782     epilogue stub and it is the tail-call.  */
8783  if (use_call)
8784      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8785  else
8786    {
8787      RTVEC_ELT (v, vi++) = ret_rtx;
8788      RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8789      if (frame_pointer_needed)
8790	{
8791	  rtx rbp = gen_rtx_REG (DImode, BP_REG);
8792	  gcc_assert (m->fs.fp_valid);
8793	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8794
8795	  tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
8796	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8797	  RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8798	  tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8799	  RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8800	}
8801      else
8802	{
8803	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
8804	  gcc_assert (!m->fs.fp_valid);
8805	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8806	  gcc_assert (m->fs.sp_valid);
8807
8808	  r10 = gen_rtx_REG (DImode, R10_REG);
8809	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
8810	  emit_insn (gen_rtx_SET (r10, tmp));
8811
8812	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8813	}
8814    }
8815
8816  /* Generate frame load insns and restore notes.  */
8817  for (i = 0; i < ncregs; ++i)
8818    {
8819      const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8820      machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8821      rtx reg, frame_load;
8822
8823      reg = gen_rtx_REG (mode, r.regno);
8824      frame_load = gen_frame_load (reg, rsi, r.offset);
8825
8826      /* Save RSI frame load insn & note to add last.  */
8827      if (r.regno == SI_REG)
8828	{
8829	  gcc_assert (!rsi_frame_load);
8830	  rsi_frame_load = frame_load;
8831	  rsi_restore_offset = r.offset;
8832	}
8833      else
8834	{
8835	  RTVEC_ELT (v, vi++) = frame_load;
8836	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
8837	}
8838    }
8839
8840  /* Add RSI frame load & restore note at the end.  */
8841  gcc_assert (rsi_frame_load);
8842  gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8843  RTVEC_ELT (v, vi++) = rsi_frame_load;
8844  ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8845			     rsi_restore_offset);
8846
8847  /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
8848  if (!use_call && !frame_pointer_needed)
8849    {
8850      gcc_assert (m->fs.sp_valid);
8851      gcc_assert (!m->fs.sp_realigned);
8852
8853      /* At this point, R10 should point to frame.stack_realign_offset.  */
8854      if (m->fs.cfa_reg == stack_pointer_rtx)
8855	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8856      m->fs.sp_offset = frame.stack_realign_offset;
8857    }
8858
8859  gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8860  tmp = gen_rtx_PARALLEL (VOIDmode, v);
8861  if (use_call)
8862      insn = emit_insn (tmp);
8863  else
8864    {
8865      insn = emit_jump_insn (tmp);
8866      JUMP_LABEL (insn) = ret_rtx;
8867
8868      if (frame_pointer_needed)
8869	ix86_emit_leave (insn);
8870      else
8871	{
8872	  /* Need CFA adjust note.  */
8873	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8874	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8875	}
8876    }
8877
8878  RTX_FRAME_RELATED_P (insn) = true;
8879  ix86_add_queued_cfa_restore_notes (insn);
8880
8881  /* If we're not doing a tail-call, we need to adjust the stack.  */
8882  if (use_call && m->fs.sp_valid)
8883    {
8884      HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8885      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8886				GEN_INT (dealloc), style,
8887				m->fs.cfa_reg == stack_pointer_rtx);
8888    }
8889}
8890
8891/* Restore function stack, frame, and registers.  */
8892
8893void
8894ix86_expand_epilogue (int style)
8895{
8896  struct machine_function *m = cfun->machine;
8897  struct machine_frame_state frame_state_save = m->fs;
8898  bool restore_regs_via_mov;
8899  bool using_drap;
8900  bool restore_stub_is_tail = false;
8901
8902  if (ix86_function_naked (current_function_decl))
8903    {
8904      /* The program should not reach this point.  */
8905      emit_insn (gen_ud2 ());
8906      return;
8907    }
8908
8909  ix86_finalize_stack_frame_flags ();
8910  const struct ix86_frame &frame = cfun->machine->frame;
8911
8912  m->fs.sp_realigned = stack_realign_fp;
8913  m->fs.sp_valid = stack_realign_fp
8914		   || !frame_pointer_needed
8915		   || crtl->sp_is_unchanging;
8916  gcc_assert (!m->fs.sp_valid
8917	      || m->fs.sp_offset == frame.stack_pointer_offset);
8918
8919  /* The FP must be valid if the frame pointer is present.  */
8920  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8921  gcc_assert (!m->fs.fp_valid
8922	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8923
8924  /* We must have *some* valid pointer to the stack frame.  */
8925  gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8926
8927  /* The DRAP is never valid at this point.  */
8928  gcc_assert (!m->fs.drap_valid);
8929
8930  /* See the comment about red zone and frame
8931     pointer usage in ix86_expand_prologue.  */
8932  if (frame_pointer_needed && frame.red_zone_size)
8933    emit_insn (gen_memory_blockage ());
8934
8935  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8936  gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8937
8938  /* Determine the CFA offset of the end of the red-zone.  */
8939  m->fs.red_zone_offset = 0;
8940  if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8941    {
8942      /* The red-zone begins below return address and error code in
8943	 exception handler.  */
8944      m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8945
8946      /* When the register save area is in the aligned portion of
8947         the stack, determine the maximum runtime displacement that
8948	 matches up with the aligned frame.  */
8949      if (stack_realign_drap)
8950	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8951				  + UNITS_PER_WORD);
8952    }
8953
8954  HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8955
8956  /* Special care must be taken for the normal return case of a function
8957     using eh_return: the eax and edx registers are marked as saved, but
8958     not restored along this path.  Adjust the save location to match.  */
8959  if (crtl->calls_eh_return && style != 2)
8960    reg_save_offset -= 2 * UNITS_PER_WORD;
8961
8962  /* EH_RETURN requires the use of moves to function properly.  */
8963  if (crtl->calls_eh_return)
8964    restore_regs_via_mov = true;
8965  /* SEH requires the use of pops to identify the epilogue.  */
8966  else if (TARGET_SEH)
8967    restore_regs_via_mov = false;
8968  /* If we're only restoring one register and sp cannot be used then
8969     using a move instruction to restore the register since it's
8970     less work than reloading sp and popping the register.  */
8971  else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8972    restore_regs_via_mov = true;
8973  else if (TARGET_EPILOGUE_USING_MOVE
8974	   && cfun->machine->use_fast_prologue_epilogue
8975	   && (frame.nregs > 1
8976	       || m->fs.sp_offset != reg_save_offset))
8977    restore_regs_via_mov = true;
8978  else if (frame_pointer_needed
8979	   && !frame.nregs
8980	   && m->fs.sp_offset != reg_save_offset)
8981    restore_regs_via_mov = true;
8982  else if (frame_pointer_needed
8983	   && TARGET_USE_LEAVE
8984	   && cfun->machine->use_fast_prologue_epilogue
8985	   && frame.nregs == 1)
8986    restore_regs_via_mov = true;
8987  else
8988    restore_regs_via_mov = false;
8989
8990  if (restore_regs_via_mov || frame.nsseregs)
8991    {
8992      /* Ensure that the entire register save area is addressable via
8993	 the stack pointer, if we will restore SSE regs via sp.  */
8994      if (TARGET_64BIT
8995	  && m->fs.sp_offset > 0x7fffffff
8996	  && sp_valid_at (frame.stack_realign_offset + 1)
8997	  && (frame.nsseregs + frame.nregs) != 0)
8998	{
8999	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9000				     GEN_INT (m->fs.sp_offset
9001					      - frame.sse_reg_save_offset),
9002				     style,
9003				     m->fs.cfa_reg == stack_pointer_rtx);
9004	}
9005    }
9006
9007  /* If there are any SSE registers to restore, then we have to do it
9008     via moves, since there's obviously no pop for SSE regs.  */
9009  if (frame.nsseregs)
9010    ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9011					  style == 2);
9012
9013  if (m->call_ms2sysv)
9014    {
9015      int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9016
9017      /* We cannot use a tail-call for the stub if:
9018	 1. We have to pop incoming args,
9019	 2. We have additional int regs to restore, or
9020	 3. A sibling call will be the tail-call, or
9021	 4. We are emitting an eh_return_internal epilogue.
9022
9023	 TODO: Item 4 has not yet tested!
9024
9025	 If any of the above are true, we will call the stub rather than
9026	 jump to it.  */
9027      restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9028      ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9029    }
9030
9031  /* If using out-of-line stub that is a tail-call, then...*/
9032  if (m->call_ms2sysv && restore_stub_is_tail)
9033    {
9034      /* TODO: parinoid tests. (remove eventually)  */
9035      gcc_assert (m->fs.sp_valid);
9036      gcc_assert (!m->fs.sp_realigned);
9037      gcc_assert (!m->fs.fp_valid);
9038      gcc_assert (!m->fs.realigned);
9039      gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9040      gcc_assert (!crtl->drap_reg);
9041      gcc_assert (!frame.nregs);
9042    }
9043  else if (restore_regs_via_mov)
9044    {
9045      rtx t;
9046
9047      if (frame.nregs)
9048	ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9049
9050      /* eh_return epilogues need %ecx added to the stack pointer.  */
9051      if (style == 2)
9052	{
9053	  rtx sa = EH_RETURN_STACKADJ_RTX;
9054	  rtx_insn *insn;
9055
9056	  /* %ecx can't be used for both DRAP register and eh_return.  */
9057	  if (crtl->drap_reg)
9058	    gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
9059
9060	  /* regparm nested functions don't work with eh_return.  */
9061	  gcc_assert (!ix86_static_chain_on_stack);
9062
9063	  if (frame_pointer_needed)
9064	    {
9065	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9066	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9067	      emit_insn (gen_rtx_SET (sa, t));
9068
9069	      /* NB: eh_return epilogues must restore the frame pointer
9070		 in word_mode since the upper 32 bits of RBP register
9071		 can have any values.  */
9072	      t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9073	      rtx frame_reg = gen_rtx_REG (word_mode,
9074					   HARD_FRAME_POINTER_REGNUM);
9075	      insn = emit_move_insn (frame_reg, t);
9076
9077	      /* Note that we use SA as a temporary CFA, as the return
9078		 address is at the proper place relative to it.  We
9079		 pretend this happens at the FP restore insn because
9080		 prior to this insn the FP would be stored at the wrong
9081		 offset relative to SA, and after this insn we have no
9082		 other reasonable register to use for the CFA.  We don't
9083		 bother resetting the CFA to the SP for the duration of
9084		 the return insn, unless the control flow instrumentation
9085		 is done.  In this case the SP is used later and we have
9086		 to reset CFA to SP.  */
9087	      add_reg_note (insn, REG_CFA_DEF_CFA,
9088			    plus_constant (Pmode, sa, UNITS_PER_WORD));
9089	      ix86_add_queued_cfa_restore_notes (insn);
9090	      add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9091	      RTX_FRAME_RELATED_P (insn) = 1;
9092
9093	      m->fs.cfa_reg = sa;
9094	      m->fs.cfa_offset = UNITS_PER_WORD;
9095	      m->fs.fp_valid = false;
9096
9097	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9098					 const0_rtx, style,
9099					 flag_cf_protection);
9100	    }
9101	  else
9102	    {
9103	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9104	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9105	      insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9106	      ix86_add_queued_cfa_restore_notes (insn);
9107
9108	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9109	      if (m->fs.cfa_offset != UNITS_PER_WORD)
9110		{
9111		  m->fs.cfa_offset = UNITS_PER_WORD;
9112		  add_reg_note (insn, REG_CFA_DEF_CFA,
9113				plus_constant (Pmode, stack_pointer_rtx,
9114					       UNITS_PER_WORD));
9115		  RTX_FRAME_RELATED_P (insn) = 1;
9116		}
9117	    }
9118	  m->fs.sp_offset = UNITS_PER_WORD;
9119	  m->fs.sp_valid = true;
9120	  m->fs.sp_realigned = false;
9121	}
9122    }
9123  else
9124    {
9125      /* SEH requires that the function end with (1) a stack adjustment
9126	 if necessary, (2) a sequence of pops, and (3) a return or
9127	 jump instruction.  Prevent insns from the function body from
9128	 being scheduled into this sequence.  */
9129      if (TARGET_SEH)
9130	{
9131	  /* Prevent a catch region from being adjacent to the standard
9132	     epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
9133	     nor several other flags that would be interesting to test are
9134	     set up yet.  */
9135	  if (flag_non_call_exceptions)
9136	    emit_insn (gen_nops (const1_rtx));
9137	  else
9138	    emit_insn (gen_blockage ());
9139	}
9140
9141      /* First step is to deallocate the stack frame so that we can
9142	 pop the registers.  If the stack pointer was realigned, it needs
9143	 to be restored now.  Also do it on SEH target for very large
9144	 frame as the emitted instructions aren't allowed by the ABI
9145	 in epilogues.  */
9146      if (!m->fs.sp_valid || m->fs.sp_realigned
9147 	  || (TARGET_SEH
9148	      && (m->fs.sp_offset - reg_save_offset
9149		  >= SEH_MAX_FRAME_SIZE)))
9150	{
9151	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9152				     GEN_INT (m->fs.fp_offset
9153					      - reg_save_offset),
9154				     style, false);
9155	}
9156      else if (m->fs.sp_offset != reg_save_offset)
9157	{
9158	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9159				     GEN_INT (m->fs.sp_offset
9160					      - reg_save_offset),
9161				     style,
9162				     m->fs.cfa_reg == stack_pointer_rtx);
9163	}
9164
9165      ix86_emit_restore_regs_using_pop ();
9166    }
9167
9168  /* If we used a stack pointer and haven't already got rid of it,
9169     then do so now.  */
9170  if (m->fs.fp_valid)
9171    {
9172      /* If the stack pointer is valid and pointing at the frame
9173	 pointer store address, then we only need a pop.  */
9174      if (sp_valid_at (frame.hfp_save_offset)
9175	  && m->fs.sp_offset == frame.hfp_save_offset)
9176	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9177      /* Leave results in shorter dependency chains on CPUs that are
9178	 able to grok it fast.  */
9179      else if (TARGET_USE_LEAVE
9180	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9181	       || !cfun->machine->use_fast_prologue_epilogue)
9182	ix86_emit_leave (NULL);
9183      else
9184        {
9185	  pro_epilogue_adjust_stack (stack_pointer_rtx,
9186				     hard_frame_pointer_rtx,
9187				     const0_rtx, style, !using_drap);
9188	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9189        }
9190    }
9191
9192  if (using_drap)
9193    {
9194      int param_ptr_offset = UNITS_PER_WORD;
9195      rtx_insn *insn;
9196
9197      gcc_assert (stack_realign_drap);
9198
9199      if (ix86_static_chain_on_stack)
9200	param_ptr_offset += UNITS_PER_WORD;
9201      if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9202	param_ptr_offset += UNITS_PER_WORD;
9203
9204      insn = emit_insn (gen_rtx_SET
9205			(stack_pointer_rtx,
9206			 gen_rtx_PLUS (Pmode,
9207				       crtl->drap_reg,
9208				       GEN_INT (-param_ptr_offset))));
9209      m->fs.cfa_reg = stack_pointer_rtx;
9210      m->fs.cfa_offset = param_ptr_offset;
9211      m->fs.sp_offset = param_ptr_offset;
9212      m->fs.realigned = false;
9213
9214      add_reg_note (insn, REG_CFA_DEF_CFA,
9215		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9216				  GEN_INT (param_ptr_offset)));
9217      RTX_FRAME_RELATED_P (insn) = 1;
9218
9219      if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9220	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9221    }
9222
9223  /* At this point the stack pointer must be valid, and we must have
9224     restored all of the registers.  We may not have deallocated the
9225     entire stack frame.  We've delayed this until now because it may
9226     be possible to merge the local stack deallocation with the
9227     deallocation forced by ix86_static_chain_on_stack.   */
9228  gcc_assert (m->fs.sp_valid);
9229  gcc_assert (!m->fs.sp_realigned);
9230  gcc_assert (!m->fs.fp_valid);
9231  gcc_assert (!m->fs.realigned);
9232  if (m->fs.sp_offset != UNITS_PER_WORD)
9233    {
9234      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9235				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9236				 style, true);
9237    }
9238  else
9239    ix86_add_queued_cfa_restore_notes (get_last_insn ());
9240
9241  /* Sibcall epilogues don't want a return instruction.  */
9242  if (style == 0)
9243    {
9244      m->fs = frame_state_save;
9245      return;
9246    }
9247
9248  if (cfun->machine->func_type != TYPE_NORMAL)
9249    emit_jump_insn (gen_interrupt_return ());
9250  else if (crtl->args.pops_args && crtl->args.size)
9251    {
9252      rtx popc = GEN_INT (crtl->args.pops_args);
9253
9254      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
9255	 address, do explicit add, and jump indirectly to the caller.  */
9256
9257      if (crtl->args.pops_args >= 65536)
9258	{
9259	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
9260	  rtx_insn *insn;
9261
9262	  /* There is no "pascal" calling convention in any 64bit ABI.  */
9263	  gcc_assert (!TARGET_64BIT);
9264
9265	  insn = emit_insn (gen_pop (ecx));
9266	  m->fs.cfa_offset -= UNITS_PER_WORD;
9267	  m->fs.sp_offset -= UNITS_PER_WORD;
9268
9269	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9270	  x = gen_rtx_SET (stack_pointer_rtx, x);
9271	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9272	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9273	  RTX_FRAME_RELATED_P (insn) = 1;
9274
9275	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9276				     popc, -1, true);
9277	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9278	}
9279      else
9280	emit_jump_insn (gen_simple_return_pop_internal (popc));
9281    }
9282  else if (!m->call_ms2sysv || !restore_stub_is_tail)
9283    {
9284      /* In case of return from EH a simple return cannot be used
9285	 as a return address will be compared with a shadow stack
9286	 return address.  Use indirect jump instead.  */
9287      if (style == 2 && flag_cf_protection)
9288	{
9289	  /* Register used in indirect jump must be in word_mode.  But
9290	     Pmode may not be the same as word_mode for x32.  */
9291	  rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9292	  rtx_insn *insn;
9293
9294	  insn = emit_insn (gen_pop (ecx));
9295	  m->fs.cfa_offset -= UNITS_PER_WORD;
9296	  m->fs.sp_offset -= UNITS_PER_WORD;
9297
9298	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9299	  x = gen_rtx_SET (stack_pointer_rtx, x);
9300	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9301	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9302	  RTX_FRAME_RELATED_P (insn) = 1;
9303
9304	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9305	}
9306      else
9307	emit_jump_insn (gen_simple_return_internal ());
9308    }
9309
9310  /* Restore the state back to the state from the prologue,
9311     so that it's correct for the next epilogue.  */
9312  m->fs = frame_state_save;
9313}
9314
9315/* Reset from the function's potential modifications.  */
9316
9317static void
9318ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9319{
9320  if (pic_offset_table_rtx
9321      && !ix86_use_pseudo_pic_reg ())
9322    SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9323
9324  if (TARGET_MACHO)
9325    {
9326      rtx_insn *insn = get_last_insn ();
9327      rtx_insn *deleted_debug_label = NULL;
9328
9329      /* Mach-O doesn't support labels at the end of objects, so if
9330         it looks like we might want one, take special action.
9331        First, collect any sequence of deleted debug labels.  */
9332      while (insn
9333	     && NOTE_P (insn)
9334	     && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9335	{
9336	  /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9337	     notes only, instead set their CODE_LABEL_NUMBER to -1,
9338	     otherwise there would be code generation differences
9339	     in between -g and -g0.  */
9340	  if (NOTE_P (insn) && NOTE_KIND (insn)
9341	      == NOTE_INSN_DELETED_DEBUG_LABEL)
9342	    deleted_debug_label = insn;
9343	  insn = PREV_INSN (insn);
9344	}
9345
9346      /* If we have:
9347	 label:
9348	    barrier
9349	  then this needs to be detected, so skip past the barrier.  */
9350
9351      if (insn && BARRIER_P (insn))
9352	insn = PREV_INSN (insn);
9353
9354      /* Up to now we've only seen notes or barriers.  */
9355      if (insn)
9356	{
9357	  if (LABEL_P (insn)
9358	      || (NOTE_P (insn)
9359		  && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9360	    /* Trailing label.  */
9361	    fputs ("\tnop\n", file);
9362	  else if (cfun && ! cfun->is_thunk)
9363	    {
9364	      /* See if we have a completely empty function body, skipping
9365	         the special case of the picbase thunk emitted as asm.  */
9366	      while (insn && ! INSN_P (insn))
9367		insn = PREV_INSN (insn);
9368	      /* If we don't find any insns, we've got an empty function body;
9369		 I.e. completely empty - without a return or branch.  This is
9370		 taken as the case where a function body has been removed
9371		 because it contains an inline __builtin_unreachable().  GCC
9372		 declares that reaching __builtin_unreachable() means UB so
9373		 we're not obliged to do anything special; however, we want
9374		 non-zero-sized function bodies.  To meet this, and help the
9375		 user out, let's trap the case.  */
9376	      if (insn == NULL)
9377		fputs ("\tud2\n", file);
9378	    }
9379	}
9380      else if (deleted_debug_label)
9381	for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9382	  if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9383	    CODE_LABEL_NUMBER (insn) = -1;
9384    }
9385}
9386
9387/* Return a scratch register to use in the split stack prologue.  The
9388   split stack prologue is used for -fsplit-stack.  It is the first
9389   instructions in the function, even before the regular prologue.
9390   The scratch register can be any caller-saved register which is not
9391   used for parameters or for the static chain.  */
9392
9393static unsigned int
9394split_stack_prologue_scratch_regno (void)
9395{
9396  if (TARGET_64BIT)
9397    return R11_REG;
9398  else
9399    {
9400      bool is_fastcall, is_thiscall;
9401      int regparm;
9402
9403      is_fastcall = (lookup_attribute ("fastcall",
9404				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9405		     != NULL);
9406      is_thiscall = (lookup_attribute ("thiscall",
9407				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9408		     != NULL);
9409      regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9410
9411      if (is_fastcall)
9412	{
9413	  if (DECL_STATIC_CHAIN (cfun->decl))
9414	    {
9415	      sorry ("%<-fsplit-stack%> does not support fastcall with "
9416		     "nested function");
9417	      return INVALID_REGNUM;
9418	    }
9419	  return AX_REG;
9420	}
9421      else if (is_thiscall)
9422        {
9423	  if (!DECL_STATIC_CHAIN (cfun->decl))
9424	    return DX_REG;
9425	  return AX_REG;
9426	}
9427      else if (regparm < 3)
9428	{
9429	  if (!DECL_STATIC_CHAIN (cfun->decl))
9430	    return CX_REG;
9431	  else
9432	    {
9433	      if (regparm >= 2)
9434		{
9435		  sorry ("%<-fsplit-stack%> does not support 2 register "
9436			 "parameters for a nested function");
9437		  return INVALID_REGNUM;
9438		}
9439	      return DX_REG;
9440	    }
9441	}
9442      else
9443	{
9444	  /* FIXME: We could make this work by pushing a register
9445	     around the addition and comparison.  */
9446	  sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9447	  return INVALID_REGNUM;
9448	}
9449    }
9450}
9451
9452/* A SYMBOL_REF for the function which allocates new stackspace for
9453   -fsplit-stack.  */
9454
9455static GTY(()) rtx split_stack_fn;
9456
9457/* A SYMBOL_REF for the more stack function when using the large
9458   model.  */
9459
9460static GTY(()) rtx split_stack_fn_large;
9461
9462/* Return location of the stack guard value in the TLS block.  */
9463
9464rtx
9465ix86_split_stack_guard (void)
9466{
9467  int offset;
9468  addr_space_t as = DEFAULT_TLS_SEG_REG;
9469  rtx r;
9470
9471  gcc_assert (flag_split_stack);
9472
9473#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9474  offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9475#else
9476  gcc_unreachable ();
9477#endif
9478
9479  r = GEN_INT (offset);
9480  r = gen_const_mem (Pmode, r);
9481  set_mem_addr_space (r, as);
9482
9483  return r;
9484}
9485
9486/* Handle -fsplit-stack.  These are the first instructions in the
9487   function, even before the regular prologue.  */
9488
9489void
9490ix86_expand_split_stack_prologue (void)
9491{
9492  HOST_WIDE_INT allocate;
9493  unsigned HOST_WIDE_INT args_size;
9494  rtx_code_label *label;
9495  rtx limit, current, allocate_rtx, call_fusage;
9496  rtx_insn *call_insn;
9497  rtx scratch_reg = NULL_RTX;
9498  rtx_code_label *varargs_label = NULL;
9499  rtx fn;
9500
9501  gcc_assert (flag_split_stack && reload_completed);
9502
9503  ix86_finalize_stack_frame_flags ();
9504  struct ix86_frame &frame = cfun->machine->frame;
9505  allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9506
9507  /* This is the label we will branch to if we have enough stack
9508     space.  We expect the basic block reordering pass to reverse this
9509     branch if optimizing, so that we branch in the unlikely case.  */
9510  label = gen_label_rtx ();
9511
9512  /* We need to compare the stack pointer minus the frame size with
9513     the stack boundary in the TCB.  The stack boundary always gives
9514     us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9515     can compare directly.  Otherwise we need to do an addition.  */
9516
9517  limit = ix86_split_stack_guard ();
9518
9519  if (allocate < SPLIT_STACK_AVAILABLE)
9520    current = stack_pointer_rtx;
9521  else
9522    {
9523      unsigned int scratch_regno;
9524      rtx offset;
9525
9526      /* We need a scratch register to hold the stack pointer minus
9527	 the required frame size.  Since this is the very start of the
9528	 function, the scratch register can be any caller-saved
9529	 register which is not used for parameters.  */
9530      offset = GEN_INT (- allocate);
9531      scratch_regno = split_stack_prologue_scratch_regno ();
9532      if (scratch_regno == INVALID_REGNUM)
9533	return;
9534      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9535      if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9536	{
9537	  /* We don't use gen_add in this case because it will
9538	     want to split to lea, but when not optimizing the insn
9539	     will not be split after this point.  */
9540	  emit_insn (gen_rtx_SET (scratch_reg,
9541				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9542						offset)));
9543	}
9544      else
9545	{
9546	  emit_move_insn (scratch_reg, offset);
9547	  emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9548	}
9549      current = scratch_reg;
9550    }
9551
9552  ix86_expand_branch (GEU, current, limit, label);
9553  rtx_insn *jump_insn = get_last_insn ();
9554  JUMP_LABEL (jump_insn) = label;
9555
9556  /* Mark the jump as very likely to be taken.  */
9557  add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9558
9559  if (split_stack_fn == NULL_RTX)
9560    {
9561      split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9562      SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9563    }
9564  fn = split_stack_fn;
9565
9566  /* Get more stack space.  We pass in the desired stack space and the
9567     size of the arguments to copy to the new stack.  In 32-bit mode
9568     we push the parameters; __morestack will return on a new stack
9569     anyhow.  In 64-bit mode we pass the parameters in r10 and
9570     r11.  */
9571  allocate_rtx = GEN_INT (allocate);
9572  args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9573  call_fusage = NULL_RTX;
9574  rtx pop = NULL_RTX;
9575  if (TARGET_64BIT)
9576    {
9577      rtx reg10, reg11;
9578
9579      reg10 = gen_rtx_REG (Pmode, R10_REG);
9580      reg11 = gen_rtx_REG (Pmode, R11_REG);
9581
9582      /* If this function uses a static chain, it will be in %r10.
9583	 Preserve it across the call to __morestack.  */
9584      if (DECL_STATIC_CHAIN (cfun->decl))
9585	{
9586	  rtx rax;
9587
9588	  rax = gen_rtx_REG (word_mode, AX_REG);
9589	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9590	  use_reg (&call_fusage, rax);
9591	}
9592
9593      if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9594          && !TARGET_PECOFF)
9595	{
9596	  HOST_WIDE_INT argval;
9597
9598	  gcc_assert (Pmode == DImode);
9599	  /* When using the large model we need to load the address
9600	     into a register, and we've run out of registers.  So we
9601	     switch to a different calling convention, and we call a
9602	     different function: __morestack_large.  We pass the
9603	     argument size in the upper 32 bits of r10 and pass the
9604	     frame size in the lower 32 bits.  */
9605	  gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9606	  gcc_assert ((args_size & 0xffffffff) == args_size);
9607
9608	  if (split_stack_fn_large == NULL_RTX)
9609	    {
9610	      split_stack_fn_large
9611		= gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9612	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9613	    }
9614	  if (ix86_cmodel == CM_LARGE_PIC)
9615	    {
9616	      rtx_code_label *label;
9617	      rtx x;
9618
9619	      label = gen_label_rtx ();
9620	      emit_label (label);
9621	      LABEL_PRESERVE_P (label) = 1;
9622	      emit_insn (gen_set_rip_rex64 (reg10, label));
9623	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
9624	      emit_insn (gen_add2_insn (reg10, reg11));
9625	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9626				  UNSPEC_GOT);
9627	      x = gen_rtx_CONST (Pmode, x);
9628	      emit_move_insn (reg11, x);
9629	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
9630	      x = gen_const_mem (Pmode, x);
9631	      emit_move_insn (reg11, x);
9632	    }
9633	  else
9634	    emit_move_insn (reg11, split_stack_fn_large);
9635
9636	  fn = reg11;
9637
9638	  argval = ((args_size << 16) << 16) + allocate;
9639	  emit_move_insn (reg10, GEN_INT (argval));
9640	}
9641      else
9642	{
9643	  emit_move_insn (reg10, allocate_rtx);
9644	  emit_move_insn (reg11, GEN_INT (args_size));
9645	  use_reg (&call_fusage, reg11);
9646	}
9647
9648      use_reg (&call_fusage, reg10);
9649    }
9650  else
9651    {
9652      rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9653      add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9654      insn = emit_insn (gen_push (allocate_rtx));
9655      add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9656      pop = GEN_INT (2 * UNITS_PER_WORD);
9657    }
9658  call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9659				GEN_INT (UNITS_PER_WORD), constm1_rtx,
9660				pop, false);
9661  add_function_usage_to (call_insn, call_fusage);
9662  if (!TARGET_64BIT)
9663    add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9664  /* Indicate that this function can't jump to non-local gotos.  */
9665  make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9666
9667  /* In order to make call/return prediction work right, we now need
9668     to execute a return instruction.  See
9669     libgcc/config/i386/morestack.S for the details on how this works.
9670
9671     For flow purposes gcc must not see this as a return
9672     instruction--we need control flow to continue at the subsequent
9673     label.  Therefore, we use an unspec.  */
9674  gcc_assert (crtl->args.pops_args < 65536);
9675  rtx_insn *ret_insn
9676    = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9677
9678  if ((flag_cf_protection & CF_BRANCH))
9679    {
9680      /* Insert ENDBR since __morestack will jump back here via indirect
9681	 call.  */
9682      rtx cet_eb = gen_nop_endbr ();
9683      emit_insn_after (cet_eb, ret_insn);
9684    }
9685
9686  /* If we are in 64-bit mode and this function uses a static chain,
9687     we saved %r10 in %rax before calling _morestack.  */
9688  if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9689    emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9690		    gen_rtx_REG (word_mode, AX_REG));
9691
9692  /* If this function calls va_start, we need to store a pointer to
9693     the arguments on the old stack, because they may not have been
9694     all copied to the new stack.  At this point the old stack can be
9695     found at the frame pointer value used by __morestack, because
9696     __morestack has set that up before calling back to us.  Here we
9697     store that pointer in a scratch register, and in
9698     ix86_expand_prologue we store the scratch register in a stack
9699     slot.  */
9700  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9701    {
9702      unsigned int scratch_regno;
9703      rtx frame_reg;
9704      int words;
9705
9706      scratch_regno = split_stack_prologue_scratch_regno ();
9707      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9708      frame_reg = gen_rtx_REG (Pmode, BP_REG);
9709
9710      /* 64-bit:
9711	 fp -> old fp value
9712	       return address within this function
9713	       return address of caller of this function
9714	       stack arguments
9715	 So we add three words to get to the stack arguments.
9716
9717	 32-bit:
9718	 fp -> old fp value
9719	       return address within this function
9720               first argument to __morestack
9721               second argument to __morestack
9722               return address of caller of this function
9723               stack arguments
9724         So we add five words to get to the stack arguments.
9725      */
9726      words = TARGET_64BIT ? 3 : 5;
9727      emit_insn (gen_rtx_SET (scratch_reg,
9728			      gen_rtx_PLUS (Pmode, frame_reg,
9729					    GEN_INT (words * UNITS_PER_WORD))));
9730
9731      varargs_label = gen_label_rtx ();
9732      emit_jump_insn (gen_jump (varargs_label));
9733      JUMP_LABEL (get_last_insn ()) = varargs_label;
9734
9735      emit_barrier ();
9736    }
9737
9738  emit_label (label);
9739  LABEL_NUSES (label) = 1;
9740
9741  /* If this function calls va_start, we now have to set the scratch
9742     register for the case where we do not call __morestack.  In this
9743     case we need to set it based on the stack pointer.  */
9744  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9745    {
9746      emit_insn (gen_rtx_SET (scratch_reg,
9747			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9748					    GEN_INT (UNITS_PER_WORD))));
9749
9750      emit_label (varargs_label);
9751      LABEL_NUSES (varargs_label) = 1;
9752    }
9753}
9754
9755/* We may have to tell the dataflow pass that the split stack prologue
9756   is initializing a scratch register.  */
9757
9758static void
9759ix86_live_on_entry (bitmap regs)
9760{
9761  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9762    {
9763      gcc_assert (flag_split_stack);
9764      bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9765    }
9766}
9767
9768/* Extract the parts of an RTL expression that is a valid memory address
9769   for an instruction.  Return 0 if the structure of the address is
9770   grossly off.  Return -1 if the address contains ASHIFT, so it is not
9771   strictly valid, but still used for computing length of lea instruction.  */
9772
9773int
9774ix86_decompose_address (rtx addr, struct ix86_address *out)
9775{
9776  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9777  rtx base_reg, index_reg;
9778  HOST_WIDE_INT scale = 1;
9779  rtx scale_rtx = NULL_RTX;
9780  rtx tmp;
9781  int retval = 1;
9782  addr_space_t seg = ADDR_SPACE_GENERIC;
9783
9784  /* Allow zero-extended SImode addresses,
9785     they will be emitted with addr32 prefix.  */
9786  if (TARGET_64BIT && GET_MODE (addr) == DImode)
9787    {
9788      if (GET_CODE (addr) == ZERO_EXTEND
9789	  && GET_MODE (XEXP (addr, 0)) == SImode)
9790	{
9791	  addr = XEXP (addr, 0);
9792	  if (CONST_INT_P (addr))
9793	    return 0;
9794	}
9795      else if (GET_CODE (addr) == AND
9796	       && const_32bit_mask (XEXP (addr, 1), DImode))
9797	{
9798	  addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9799	  if (addr == NULL_RTX)
9800	    return 0;
9801
9802	  if (CONST_INT_P (addr))
9803	    return 0;
9804	}
9805    }
9806
9807  /* Allow SImode subregs of DImode addresses,
9808     they will be emitted with addr32 prefix.  */
9809  if (TARGET_64BIT && GET_MODE (addr) == SImode)
9810    {
9811      if (SUBREG_P (addr)
9812	  && GET_MODE (SUBREG_REG (addr)) == DImode)
9813	{
9814	  addr = SUBREG_REG (addr);
9815	  if (CONST_INT_P (addr))
9816	    return 0;
9817	}
9818    }
9819
9820  if (REG_P (addr))
9821    base = addr;
9822  else if (SUBREG_P (addr))
9823    {
9824      if (REG_P (SUBREG_REG (addr)))
9825	base = addr;
9826      else
9827	return 0;
9828    }
9829  else if (GET_CODE (addr) == PLUS)
9830    {
9831      rtx addends[4], op;
9832      int n = 0, i;
9833
9834      op = addr;
9835      do
9836	{
9837	  if (n >= 4)
9838	    return 0;
9839	  addends[n++] = XEXP (op, 1);
9840	  op = XEXP (op, 0);
9841	}
9842      while (GET_CODE (op) == PLUS);
9843      if (n >= 4)
9844	return 0;
9845      addends[n] = op;
9846
9847      for (i = n; i >= 0; --i)
9848	{
9849	  op = addends[i];
9850	  switch (GET_CODE (op))
9851	    {
9852	    case MULT:
9853	      if (index)
9854		return 0;
9855	      index = XEXP (op, 0);
9856	      scale_rtx = XEXP (op, 1);
9857	      break;
9858
9859	    case ASHIFT:
9860	      if (index)
9861		return 0;
9862	      index = XEXP (op, 0);
9863	      tmp = XEXP (op, 1);
9864	      if (!CONST_INT_P (tmp))
9865		return 0;
9866	      scale = INTVAL (tmp);
9867	      if ((unsigned HOST_WIDE_INT) scale > 3)
9868		return 0;
9869	      scale = 1 << scale;
9870	      break;
9871
9872	    case ZERO_EXTEND:
9873	      op = XEXP (op, 0);
9874	      if (GET_CODE (op) != UNSPEC)
9875		return 0;
9876	      /* FALLTHRU */
9877
9878	    case UNSPEC:
9879	      if (XINT (op, 1) == UNSPEC_TP
9880	          && TARGET_TLS_DIRECT_SEG_REFS
9881	          && seg == ADDR_SPACE_GENERIC)
9882		seg = DEFAULT_TLS_SEG_REG;
9883	      else
9884		return 0;
9885	      break;
9886
9887	    case SUBREG:
9888	      if (!REG_P (SUBREG_REG (op)))
9889		return 0;
9890	      /* FALLTHRU */
9891
9892	    case REG:
9893	      if (!base)
9894		base = op;
9895	      else if (!index)
9896		index = op;
9897	      else
9898		return 0;
9899	      break;
9900
9901	    case CONST:
9902	    case CONST_INT:
9903	    case SYMBOL_REF:
9904	    case LABEL_REF:
9905	      if (disp)
9906		return 0;
9907	      disp = op;
9908	      break;
9909
9910	    default:
9911	      return 0;
9912	    }
9913	}
9914    }
9915  else if (GET_CODE (addr) == MULT)
9916    {
9917      index = XEXP (addr, 0);		/* index*scale */
9918      scale_rtx = XEXP (addr, 1);
9919    }
9920  else if (GET_CODE (addr) == ASHIFT)
9921    {
9922      /* We're called for lea too, which implements ashift on occasion.  */
9923      index = XEXP (addr, 0);
9924      tmp = XEXP (addr, 1);
9925      if (!CONST_INT_P (tmp))
9926	return 0;
9927      scale = INTVAL (tmp);
9928      if ((unsigned HOST_WIDE_INT) scale > 3)
9929	return 0;
9930      scale = 1 << scale;
9931      retval = -1;
9932    }
9933  else
9934    disp = addr;			/* displacement */
9935
9936  if (index)
9937    {
9938      if (REG_P (index))
9939	;
9940      else if (SUBREG_P (index)
9941	       && REG_P (SUBREG_REG (index)))
9942	;
9943      else
9944	return 0;
9945    }
9946
9947  /* Extract the integral value of scale.  */
9948  if (scale_rtx)
9949    {
9950      if (!CONST_INT_P (scale_rtx))
9951	return 0;
9952      scale = INTVAL (scale_rtx);
9953    }
9954
9955  base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9956  index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9957
9958  /* Avoid useless 0 displacement.  */
9959  if (disp == const0_rtx && (base || index))
9960    disp = NULL_RTX;
9961
9962  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
9963  if (base_reg && index_reg && scale == 1
9964      && (REGNO (index_reg) == ARG_POINTER_REGNUM
9965	  || REGNO (index_reg) == FRAME_POINTER_REGNUM
9966	  || REGNO (index_reg) == SP_REG))
9967    {
9968      std::swap (base, index);
9969      std::swap (base_reg, index_reg);
9970    }
9971
9972  /* Special case: %ebp cannot be encoded as a base without a displacement.
9973     Similarly %r13.  */
9974  if (!disp && base_reg
9975      && (REGNO (base_reg) == ARG_POINTER_REGNUM
9976	  || REGNO (base_reg) == FRAME_POINTER_REGNUM
9977	  || REGNO (base_reg) == BP_REG
9978	  || REGNO (base_reg) == R13_REG))
9979    disp = const0_rtx;
9980
9981  /* Special case: on K6, [%esi] makes the instruction vector decoded.
9982     Avoid this by transforming to [%esi+0].
9983     Reload calls address legitimization without cfun defined, so we need
9984     to test cfun for being non-NULL. */
9985  if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9986      && base_reg && !index_reg && !disp
9987      && REGNO (base_reg) == SI_REG)
9988    disp = const0_rtx;
9989
9990  /* Special case: encode reg+reg instead of reg*2.  */
9991  if (!base && index && scale == 2)
9992    base = index, base_reg = index_reg, scale = 1;
9993
9994  /* Special case: scaling cannot be encoded without base or displacement.  */
9995  if (!base && !disp && index && scale != 1)
9996    disp = const0_rtx;
9997
9998  out->base = base;
9999  out->index = index;
10000  out->disp = disp;
10001  out->scale = scale;
10002  out->seg = seg;
10003
10004  return retval;
10005}
10006
10007/* Return cost of the memory address x.
10008   For i386, it is better to use a complex address than let gcc copy
10009   the address into a reg and make a new pseudo.  But not if the address
10010   requires to two regs - that would mean more pseudos with longer
10011   lifetimes.  */
10012static int
10013ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10014{
10015  struct ix86_address parts;
10016  int cost = 1;
10017  int ok = ix86_decompose_address (x, &parts);
10018
10019  gcc_assert (ok);
10020
10021  if (parts.base && SUBREG_P (parts.base))
10022    parts.base = SUBREG_REG (parts.base);
10023  if (parts.index && SUBREG_P (parts.index))
10024    parts.index = SUBREG_REG (parts.index);
10025
10026  /* Attempt to minimize number of registers in the address by increasing
10027     address cost for each used register.  We don't increase address cost
10028     for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
10029     is not invariant itself it most likely means that base or index is not
10030     invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
10031     which is not profitable for x86.  */
10032  if (parts.base
10033      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10034      && (current_pass->type == GIMPLE_PASS
10035	  || !pic_offset_table_rtx
10036	  || !REG_P (parts.base)
10037	  || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10038    cost++;
10039
10040  if (parts.index
10041      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10042      && (current_pass->type == GIMPLE_PASS
10043	  || !pic_offset_table_rtx
10044	  || !REG_P (parts.index)
10045	  || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10046    cost++;
10047
10048  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10049     since it's predecode logic can't detect the length of instructions
10050     and it degenerates to vector decoded.  Increase cost of such
10051     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
10052     to split such addresses or even refuse such addresses at all.
10053
10054     Following addressing modes are affected:
10055      [base+scale*index]
10056      [scale*index+disp]
10057      [base+index]
10058
10059     The first and last case  may be avoidable by explicitly coding the zero in
10060     memory address, but I don't have AMD-K6 machine handy to check this
10061     theory.  */
10062
10063  if (TARGET_K6
10064      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10065	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10066	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10067    cost += 10;
10068
10069  return cost;
10070}
10071
10072/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10073   this is used for to form addresses to local data when -fPIC is in
10074   use.  */
10075
10076static bool
10077darwin_local_data_pic (rtx disp)
10078{
10079  return (GET_CODE (disp) == UNSPEC
10080	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10081}
10082
10083/* True if operand X should be loaded from GOT.  */
10084
10085bool
10086ix86_force_load_from_GOT_p (rtx x)
10087{
10088  return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
10089	  && !TARGET_PECOFF && !TARGET_MACHO
10090	  && !flag_pic
10091	  && ix86_cmodel != CM_LARGE
10092	  && GET_CODE (x) == SYMBOL_REF
10093	  && SYMBOL_REF_FUNCTION_P (x)
10094	  && (!flag_plt
10095	      || (SYMBOL_REF_DECL (x)
10096		  && lookup_attribute ("noplt",
10097				       DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
10098	  && !SYMBOL_REF_LOCAL_P (x));
10099}
10100
10101/* Determine if a given RTX is a valid constant.  We already know this
10102   satisfies CONSTANT_P.  */
10103
10104static bool
10105ix86_legitimate_constant_p (machine_mode mode, rtx x)
10106{
10107  switch (GET_CODE (x))
10108    {
10109    case CONST:
10110      x = XEXP (x, 0);
10111
10112      if (GET_CODE (x) == PLUS)
10113	{
10114	  if (!CONST_INT_P (XEXP (x, 1)))
10115	    return false;
10116	  x = XEXP (x, 0);
10117	}
10118
10119      if (TARGET_MACHO && darwin_local_data_pic (x))
10120	return true;
10121
10122      /* Only some unspecs are valid as "constants".  */
10123      if (GET_CODE (x) == UNSPEC)
10124	switch (XINT (x, 1))
10125	  {
10126	  case UNSPEC_GOT:
10127	  case UNSPEC_GOTOFF:
10128	  case UNSPEC_PLTOFF:
10129	    return TARGET_64BIT;
10130	  case UNSPEC_TPOFF:
10131	  case UNSPEC_NTPOFF:
10132	    x = XVECEXP (x, 0, 0);
10133	    return (GET_CODE (x) == SYMBOL_REF
10134		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10135	  case UNSPEC_DTPOFF:
10136	    x = XVECEXP (x, 0, 0);
10137	    return (GET_CODE (x) == SYMBOL_REF
10138		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10139	  default:
10140	    return false;
10141	  }
10142
10143      /* We must have drilled down to a symbol.  */
10144      if (GET_CODE (x) == LABEL_REF)
10145	return true;
10146      if (GET_CODE (x) != SYMBOL_REF)
10147	return false;
10148      /* FALLTHRU */
10149
10150    case SYMBOL_REF:
10151      /* TLS symbols are never valid.  */
10152      if (SYMBOL_REF_TLS_MODEL (x))
10153	return false;
10154
10155      /* DLLIMPORT symbols are never valid.  */
10156      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10157	  && SYMBOL_REF_DLLIMPORT_P (x))
10158	return false;
10159
10160#if TARGET_MACHO
10161      /* mdynamic-no-pic */
10162      if (MACHO_DYNAMIC_NO_PIC_P)
10163	return machopic_symbol_defined_p (x);
10164#endif
10165
10166      /* External function address should be loaded
10167	 via the GOT slot to avoid PLT.  */
10168      if (ix86_force_load_from_GOT_p (x))
10169	return false;
10170
10171      break;
10172
10173    CASE_CONST_SCALAR_INT:
10174      switch (mode)
10175	{
10176	case E_TImode:
10177	  if (TARGET_64BIT)
10178	    return true;
10179	  /* FALLTHRU */
10180	case E_OImode:
10181	case E_XImode:
10182	  if (!standard_sse_constant_p (x, mode))
10183	    return false;
10184	default:
10185	  break;
10186	}
10187      break;
10188
10189    case CONST_VECTOR:
10190      if (!standard_sse_constant_p (x, mode))
10191	return false;
10192
10193    default:
10194      break;
10195    }
10196
10197  /* Otherwise we handle everything else in the move patterns.  */
10198  return true;
10199}
10200
10201/* Determine if it's legal to put X into the constant pool.  This
10202   is not possible for the address of thread-local symbols, which
10203   is checked above.  */
10204
10205static bool
10206ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10207{
10208  /* We can put any immediate constant in memory.  */
10209  switch (GET_CODE (x))
10210    {
10211    CASE_CONST_ANY:
10212      return false;
10213
10214    default:
10215      break;
10216    }
10217
10218  return !ix86_legitimate_constant_p (mode, x);
10219}
10220
10221/*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
10222    otherwise zero.  */
10223
10224static bool
10225is_imported_p (rtx x)
10226{
10227  if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10228      || GET_CODE (x) != SYMBOL_REF)
10229    return false;
10230
10231  return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10232}
10233
10234
10235/* Nonzero if the constant value X is a legitimate general operand
10236   when generating PIC code.  It is given that flag_pic is on and
10237   that X satisfies CONSTANT_P.  */
10238
10239bool
10240legitimate_pic_operand_p (rtx x)
10241{
10242  rtx inner;
10243
10244  switch (GET_CODE (x))
10245    {
10246    case CONST:
10247      inner = XEXP (x, 0);
10248      if (GET_CODE (inner) == PLUS
10249	  && CONST_INT_P (XEXP (inner, 1)))
10250	inner = XEXP (inner, 0);
10251
10252      /* Only some unspecs are valid as "constants".  */
10253      if (GET_CODE (inner) == UNSPEC)
10254	switch (XINT (inner, 1))
10255	  {
10256	  case UNSPEC_GOT:
10257	  case UNSPEC_GOTOFF:
10258	  case UNSPEC_PLTOFF:
10259	    return TARGET_64BIT;
10260	  case UNSPEC_TPOFF:
10261	    x = XVECEXP (inner, 0, 0);
10262	    return (GET_CODE (x) == SYMBOL_REF
10263		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10264	  case UNSPEC_MACHOPIC_OFFSET:
10265	    return legitimate_pic_address_disp_p (x);
10266	  default:
10267	    return false;
10268	  }
10269      /* FALLTHRU */
10270
10271    case SYMBOL_REF:
10272    case LABEL_REF:
10273      return legitimate_pic_address_disp_p (x);
10274
10275    default:
10276      return true;
10277    }
10278}
10279
10280/* Determine if a given CONST RTX is a valid memory displacement
10281   in PIC mode.  */
10282
10283bool
10284legitimate_pic_address_disp_p (rtx disp)
10285{
10286  bool saw_plus;
10287
10288  /* In 64bit mode we can allow direct addresses of symbols and labels
10289     when they are not dynamic symbols.  */
10290  if (TARGET_64BIT)
10291    {
10292      rtx op0 = disp, op1;
10293
10294      switch (GET_CODE (disp))
10295	{
10296	case LABEL_REF:
10297	  return true;
10298
10299	case CONST:
10300	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
10301	    break;
10302	  op0 = XEXP (XEXP (disp, 0), 0);
10303	  op1 = XEXP (XEXP (disp, 0), 1);
10304	  if (!CONST_INT_P (op1))
10305	    break;
10306	  if (GET_CODE (op0) == UNSPEC
10307	      && (XINT (op0, 1) == UNSPEC_DTPOFF
10308		  || XINT (op0, 1) == UNSPEC_NTPOFF)
10309	      && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10310	    return true;
10311	  if (INTVAL (op1) >= 16*1024*1024
10312	      || INTVAL (op1) < -16*1024*1024)
10313	    break;
10314	  if (GET_CODE (op0) == LABEL_REF)
10315	    return true;
10316	  if (GET_CODE (op0) == CONST
10317	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
10318	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10319	    return true;
10320	  if (GET_CODE (op0) == UNSPEC
10321	      && XINT (op0, 1) == UNSPEC_PCREL)
10322	    return true;
10323	  if (GET_CODE (op0) != SYMBOL_REF)
10324	    break;
10325	  /* FALLTHRU */
10326
10327	case SYMBOL_REF:
10328	  /* TLS references should always be enclosed in UNSPEC.
10329	     The dllimported symbol needs always to be resolved.  */
10330	  if (SYMBOL_REF_TLS_MODEL (op0)
10331	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10332	    return false;
10333
10334	  if (TARGET_PECOFF)
10335	    {
10336	      if (is_imported_p (op0))
10337		return true;
10338
10339	      if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10340		break;
10341
10342	      /* Non-external-weak function symbols need to be resolved only
10343		 for the large model.  Non-external symbols don't need to be
10344		 resolved for large and medium models.  For the small model,
10345		 we don't need to resolve anything here.  */
10346	      if ((ix86_cmodel != CM_LARGE_PIC
10347		   && SYMBOL_REF_FUNCTION_P (op0)
10348		   && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10349		  || !SYMBOL_REF_EXTERNAL_P (op0)
10350		  || ix86_cmodel == CM_SMALL_PIC)
10351		return true;
10352	    }
10353	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10354		   && (SYMBOL_REF_LOCAL_P (op0)
10355		       || (HAVE_LD_PIE_COPYRELOC
10356			   && flag_pie
10357			   && !SYMBOL_REF_WEAK (op0)
10358			   && !SYMBOL_REF_FUNCTION_P (op0)))
10359		   && ix86_cmodel != CM_LARGE_PIC)
10360	    return true;
10361	  break;
10362
10363	default:
10364	  break;
10365	}
10366    }
10367  if (GET_CODE (disp) != CONST)
10368    return false;
10369  disp = XEXP (disp, 0);
10370
10371  if (TARGET_64BIT)
10372    {
10373      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
10374         of GOT tables.  We should not need these anyway.  */
10375      if (GET_CODE (disp) != UNSPEC
10376	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
10377	      && XINT (disp, 1) != UNSPEC_GOTOFF
10378	      && XINT (disp, 1) != UNSPEC_PCREL
10379	      && XINT (disp, 1) != UNSPEC_PLTOFF))
10380	return false;
10381
10382      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10383	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10384	return false;
10385      return true;
10386    }
10387
10388  saw_plus = false;
10389  if (GET_CODE (disp) == PLUS)
10390    {
10391      if (!CONST_INT_P (XEXP (disp, 1)))
10392	return false;
10393      disp = XEXP (disp, 0);
10394      saw_plus = true;
10395    }
10396
10397  if (TARGET_MACHO && darwin_local_data_pic (disp))
10398    return true;
10399
10400  if (GET_CODE (disp) != UNSPEC)
10401    return false;
10402
10403  switch (XINT (disp, 1))
10404    {
10405    case UNSPEC_GOT:
10406      if (saw_plus)
10407	return false;
10408      /* We need to check for both symbols and labels because VxWorks loads
10409	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
10410	 details.  */
10411      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10412	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10413    case UNSPEC_GOTOFF:
10414      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10415	 While ABI specify also 32bit relocation but we don't produce it in
10416	 small PIC model at all.  */
10417      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10418	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10419	  && !TARGET_64BIT)
10420        return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10421      return false;
10422    case UNSPEC_GOTTPOFF:
10423    case UNSPEC_GOTNTPOFF:
10424    case UNSPEC_INDNTPOFF:
10425      if (saw_plus)
10426	return false;
10427      disp = XVECEXP (disp, 0, 0);
10428      return (GET_CODE (disp) == SYMBOL_REF
10429	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10430    case UNSPEC_NTPOFF:
10431      disp = XVECEXP (disp, 0, 0);
10432      return (GET_CODE (disp) == SYMBOL_REF
10433	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10434    case UNSPEC_DTPOFF:
10435      disp = XVECEXP (disp, 0, 0);
10436      return (GET_CODE (disp) == SYMBOL_REF
10437	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10438    }
10439
10440  return false;
10441}
10442
10443/* Determine if op is suitable RTX for an address register.
10444   Return naked register if a register or a register subreg is
10445   found, otherwise return NULL_RTX.  */
10446
10447static rtx
10448ix86_validate_address_register (rtx op)
10449{
10450  machine_mode mode = GET_MODE (op);
10451
10452  /* Only SImode or DImode registers can form the address.  */
10453  if (mode != SImode && mode != DImode)
10454    return NULL_RTX;
10455
10456  if (REG_P (op))
10457    return op;
10458  else if (SUBREG_P (op))
10459    {
10460      rtx reg = SUBREG_REG (op);
10461
10462      if (!REG_P (reg))
10463	return NULL_RTX;
10464
10465      mode = GET_MODE (reg);
10466
10467      /* Don't allow SUBREGs that span more than a word.  It can
10468	 lead to spill failures when the register is one word out
10469	 of a two word structure.  */
10470      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10471	return NULL_RTX;
10472
10473      /* Allow only SUBREGs of non-eliminable hard registers.  */
10474      if (register_no_elim_operand (reg, mode))
10475	return reg;
10476    }
10477
10478  /* Op is not a register.  */
10479  return NULL_RTX;
10480}
10481
10482/* Recognizes RTL expressions that are valid memory addresses for an
10483   instruction.  The MODE argument is the machine mode for the MEM
10484   expression that wants to use this address.
10485
10486   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
10487   convert common non-canonical forms to canonical form so that they will
10488   be recognized.  */
10489
10490static bool
10491ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10492{
10493  struct ix86_address parts;
10494  rtx base, index, disp;
10495  HOST_WIDE_INT scale;
10496  addr_space_t seg;
10497
10498  if (ix86_decompose_address (addr, &parts) <= 0)
10499    /* Decomposition failed.  */
10500    return false;
10501
10502  base = parts.base;
10503  index = parts.index;
10504  disp = parts.disp;
10505  scale = parts.scale;
10506  seg = parts.seg;
10507
10508  /* Validate base register.  */
10509  if (base)
10510    {
10511      rtx reg = ix86_validate_address_register (base);
10512
10513      if (reg == NULL_RTX)
10514	return false;
10515
10516      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10517	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10518	/* Base is not valid.  */
10519	return false;
10520    }
10521
10522  /* Validate index register.  */
10523  if (index)
10524    {
10525      rtx reg = ix86_validate_address_register (index);
10526
10527      if (reg == NULL_RTX)
10528	return false;
10529
10530      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10531	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10532	/* Index is not valid.  */
10533	return false;
10534    }
10535
10536  /* Index and base should have the same mode.  */
10537  if (base && index
10538      && GET_MODE (base) != GET_MODE (index))
10539    return false;
10540
10541  /* Address override works only on the (%reg) part of %fs:(%reg).  */
10542  if (seg != ADDR_SPACE_GENERIC
10543      && ((base && GET_MODE (base) != word_mode)
10544	  || (index && GET_MODE (index) != word_mode)))
10545    return false;
10546
10547  /* Validate scale factor.  */
10548  if (scale != 1)
10549    {
10550      if (!index)
10551	/* Scale without index.  */
10552	return false;
10553
10554      if (scale != 2 && scale != 4 && scale != 8)
10555	/* Scale is not a valid multiplier.  */
10556	return false;
10557    }
10558
10559  /* Validate displacement.  */
10560  if (disp)
10561    {
10562      if (GET_CODE (disp) == CONST
10563	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
10564	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10565	switch (XINT (XEXP (disp, 0), 1))
10566	  {
10567	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10568	     when used.  While ABI specify also 32bit relocations, we
10569	     don't produce them at all and use IP relative instead.
10570	     Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10571	     should be loaded via GOT.  */
10572	  case UNSPEC_GOT:
10573	    if (!TARGET_64BIT
10574		&& ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10575	      goto is_legitimate_pic;
10576	    /* FALLTHRU */
10577	  case UNSPEC_GOTOFF:
10578	    gcc_assert (flag_pic);
10579	    if (!TARGET_64BIT)
10580	      goto is_legitimate_pic;
10581
10582	    /* 64bit address unspec.  */
10583	    return false;
10584
10585	  case UNSPEC_GOTPCREL:
10586	    if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10587	      goto is_legitimate_pic;
10588	    /* FALLTHRU */
10589	  case UNSPEC_PCREL:
10590	    gcc_assert (flag_pic);
10591	    goto is_legitimate_pic;
10592
10593	  case UNSPEC_GOTTPOFF:
10594	  case UNSPEC_GOTNTPOFF:
10595	  case UNSPEC_INDNTPOFF:
10596	  case UNSPEC_NTPOFF:
10597	  case UNSPEC_DTPOFF:
10598	    break;
10599
10600	  default:
10601	    /* Invalid address unspec.  */
10602	    return false;
10603	  }
10604
10605      else if (SYMBOLIC_CONST (disp)
10606	       && (flag_pic
10607		   || (TARGET_MACHO
10608#if TARGET_MACHO
10609		       && MACHOPIC_INDIRECT
10610		       && !machopic_operand_p (disp)
10611#endif
10612	       )))
10613	{
10614
10615	is_legitimate_pic:
10616	  if (TARGET_64BIT && (index || base))
10617	    {
10618	      /* foo@dtpoff(%rX) is ok.  */
10619	      if (GET_CODE (disp) != CONST
10620		  || GET_CODE (XEXP (disp, 0)) != PLUS
10621		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10622		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10623		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10624		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10625		/* Non-constant pic memory reference.  */
10626		return false;
10627	    }
10628	  else if ((!TARGET_MACHO || flag_pic)
10629		    && ! legitimate_pic_address_disp_p (disp))
10630	    /* Displacement is an invalid pic construct.  */
10631	    return false;
10632#if TARGET_MACHO
10633	  else if (MACHO_DYNAMIC_NO_PIC_P
10634		   && !ix86_legitimate_constant_p (Pmode, disp))
10635	    /* displacment must be referenced via non_lazy_pointer */
10636	    return false;
10637#endif
10638
10639          /* This code used to verify that a symbolic pic displacement
10640	     includes the pic_offset_table_rtx register.
10641
10642	     While this is good idea, unfortunately these constructs may
10643	     be created by "adds using lea" optimization for incorrect
10644	     code like:
10645
10646	     int a;
10647	     int foo(int i)
10648	       {
10649	         return *(&a+i);
10650	       }
10651
10652	     This code is nonsensical, but results in addressing
10653	     GOT table with pic_offset_table_rtx base.  We can't
10654	     just refuse it easily, since it gets matched by
10655	     "addsi3" pattern, that later gets split to lea in the
10656	     case output register differs from input.  While this
10657	     can be handled by separate addsi pattern for this case
10658	     that never results in lea, this seems to be easier and
10659	     correct fix for crash to disable this test.  */
10660	}
10661      else if (GET_CODE (disp) != LABEL_REF
10662	       && !CONST_INT_P (disp)
10663	       && (GET_CODE (disp) != CONST
10664		   || !ix86_legitimate_constant_p (Pmode, disp))
10665	       && (GET_CODE (disp) != SYMBOL_REF
10666		   || !ix86_legitimate_constant_p (Pmode, disp)))
10667	/* Displacement is not constant.  */
10668	return false;
10669      else if (TARGET_64BIT
10670	       && !x86_64_immediate_operand (disp, VOIDmode))
10671	/* Displacement is out of range.  */
10672	return false;
10673      /* In x32 mode, constant addresses are sign extended to 64bit, so
10674	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
10675      else if (TARGET_X32 && !(index || base)
10676	       && CONST_INT_P (disp)
10677	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
10678	return false;
10679    }
10680
10681  /* Everything looks valid.  */
10682  return true;
10683}
10684
10685/* Determine if a given RTX is a valid constant address.  */
10686
10687bool
10688constant_address_p (rtx x)
10689{
10690  return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10691}
10692
10693/* Return a unique alias set for the GOT.  */
10694
10695alias_set_type
10696ix86_GOT_alias_set (void)
10697{
10698  static alias_set_type set = -1;
10699  if (set == -1)
10700    set = new_alias_set ();
10701  return set;
10702}
10703
10704/* Return a legitimate reference for ORIG (an address) using the
10705   register REG.  If REG is 0, a new pseudo is generated.
10706
10707   There are two types of references that must be handled:
10708
10709   1. Global data references must load the address from the GOT, via
10710      the PIC reg.  An insn is emitted to do this load, and the reg is
10711      returned.
10712
10713   2. Static data references, constant pool addresses, and code labels
10714      compute the address as an offset from the GOT, whose base is in
10715      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
10716      differentiate them from global data objects.  The returned
10717      address is the PIC reg + an unspec constant.
10718
10719   TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10720   reg also appears in the address.  */
10721
10722rtx
10723legitimize_pic_address (rtx orig, rtx reg)
10724{
10725  rtx addr = orig;
10726  rtx new_rtx = orig;
10727
10728#if TARGET_MACHO
10729  if (TARGET_MACHO && !TARGET_64BIT)
10730    {
10731      if (reg == 0)
10732	reg = gen_reg_rtx (Pmode);
10733      /* Use the generic Mach-O PIC machinery.  */
10734      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10735    }
10736#endif
10737
10738  if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10739    {
10740      rtx tmp = legitimize_pe_coff_symbol (addr, true);
10741      if (tmp)
10742        return tmp;
10743    }
10744
10745  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10746    new_rtx = addr;
10747  else if ((!TARGET_64BIT
10748	    || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10749	   && !TARGET_PECOFF
10750	   && gotoff_operand (addr, Pmode))
10751    {
10752      /* This symbol may be referenced via a displacement
10753	 from the PIC base address (@GOTOFF).  */
10754      if (GET_CODE (addr) == CONST)
10755	addr = XEXP (addr, 0);
10756
10757      if (GET_CODE (addr) == PLUS)
10758	  {
10759            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10760				      UNSPEC_GOTOFF);
10761	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10762	  }
10763	else
10764          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10765
10766      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10767
10768      if (TARGET_64BIT)
10769	new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10770
10771      if (reg != 0)
10772	{
10773 	  gcc_assert (REG_P (reg));
10774	  new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10775					 new_rtx, reg, 1, OPTAB_DIRECT);
10776 	}
10777      else
10778	new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10779    }
10780  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10781	   /* We can't use @GOTOFF for text labels
10782	      on VxWorks, see gotoff_operand.  */
10783	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10784    {
10785      rtx tmp = legitimize_pe_coff_symbol (addr, true);
10786      if (tmp)
10787        return tmp;
10788
10789      /* For x64 PE-COFF there is no GOT table,
10790	 so we use address directly.  */
10791      if (TARGET_64BIT && TARGET_PECOFF)
10792	{
10793	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10794	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10795	}
10796      else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10797	{
10798	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10799				    UNSPEC_GOTPCREL);
10800	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10801	  new_rtx = gen_const_mem (Pmode, new_rtx);
10802	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10803	}
10804      else
10805	{
10806	  /* This symbol must be referenced via a load
10807	     from the Global Offset Table (@GOT).  */
10808	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10809	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10810	  if (TARGET_64BIT)
10811	    new_rtx = force_reg (Pmode, new_rtx);
10812	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10813	  new_rtx = gen_const_mem (Pmode, new_rtx);
10814	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10815	}
10816
10817      new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10818    }
10819  else
10820    {
10821      if (CONST_INT_P (addr)
10822	  && !x86_64_immediate_operand (addr, VOIDmode))
10823	new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10824      else if (GET_CODE (addr) == CONST)
10825	{
10826	  addr = XEXP (addr, 0);
10827
10828	  /* We must match stuff we generate before.  Assume the only
10829	     unspecs that can get here are ours.  Not that we could do
10830	     anything with them anyway....  */
10831	  if (GET_CODE (addr) == UNSPEC
10832	      || (GET_CODE (addr) == PLUS
10833		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10834	    return orig;
10835	  gcc_assert (GET_CODE (addr) == PLUS);
10836	}
10837
10838      if (GET_CODE (addr) == PLUS)
10839	{
10840	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10841
10842	  /* Check first to see if this is a constant
10843	     offset from a @GOTOFF symbol reference.  */
10844	  if (!TARGET_PECOFF
10845	      && gotoff_operand (op0, Pmode)
10846	      && CONST_INT_P (op1))
10847	    {
10848	      if (!TARGET_64BIT)
10849		{
10850		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10851					    UNSPEC_GOTOFF);
10852		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10853		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10854
10855		  if (reg != 0)
10856		    {
10857		      gcc_assert (REG_P (reg));
10858		      new_rtx = expand_simple_binop (Pmode, PLUS,
10859						     pic_offset_table_rtx,
10860						     new_rtx, reg, 1,
10861						     OPTAB_DIRECT);
10862		    }
10863		  else
10864		    new_rtx
10865		      = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10866		}
10867	      else
10868		{
10869		  if (INTVAL (op1) < -16*1024*1024
10870		      || INTVAL (op1) >= 16*1024*1024)
10871		    {
10872		      if (!x86_64_immediate_operand (op1, Pmode))
10873			op1 = force_reg (Pmode, op1);
10874
10875		      new_rtx
10876			= gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10877		    }
10878		}
10879	    }
10880	  else
10881	    {
10882	      rtx base = legitimize_pic_address (op0, reg);
10883	      machine_mode mode = GET_MODE (base);
10884	      new_rtx
10885	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10886
10887	      if (CONST_INT_P (new_rtx))
10888		{
10889		  if (INTVAL (new_rtx) < -16*1024*1024
10890		      || INTVAL (new_rtx) >= 16*1024*1024)
10891		    {
10892		      if (!x86_64_immediate_operand (new_rtx, mode))
10893			new_rtx = force_reg (mode, new_rtx);
10894
10895		      new_rtx
10896		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10897		    }
10898		  else
10899		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10900		}
10901	      else
10902		{
10903		  /* For %rip addressing, we have to use
10904		     just disp32, not base nor index.  */
10905		  if (TARGET_64BIT
10906		      && (GET_CODE (base) == SYMBOL_REF
10907			  || GET_CODE (base) == LABEL_REF))
10908		    base = force_reg (mode, base);
10909		  if (GET_CODE (new_rtx) == PLUS
10910		      && CONSTANT_P (XEXP (new_rtx, 1)))
10911		    {
10912		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10913		      new_rtx = XEXP (new_rtx, 1);
10914		    }
10915		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10916		}
10917	    }
10918	}
10919    }
10920  return new_rtx;
10921}
10922
10923/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
10924
10925static rtx
10926get_thread_pointer (machine_mode tp_mode, bool to_reg)
10927{
10928  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10929
10930  if (GET_MODE (tp) != tp_mode)
10931    {
10932      gcc_assert (GET_MODE (tp) == SImode);
10933      gcc_assert (tp_mode == DImode);
10934
10935      tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10936    }
10937
10938  if (to_reg)
10939    tp = copy_to_mode_reg (tp_mode, tp);
10940
10941  return tp;
10942}
10943
10944/* Construct the SYMBOL_REF for the tls_get_addr function.  */
10945
10946static GTY(()) rtx ix86_tls_symbol;
10947
10948static rtx
10949ix86_tls_get_addr (void)
10950{
10951  if (!ix86_tls_symbol)
10952    {
10953      const char *sym
10954	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10955	   ? "___tls_get_addr" : "__tls_get_addr");
10956
10957      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10958    }
10959
10960  if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10961    {
10962      rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10963				   UNSPEC_PLTOFF);
10964      return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10965			   gen_rtx_CONST (Pmode, unspec));
10966    }
10967
10968  return ix86_tls_symbol;
10969}
10970
10971/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
10972
10973static GTY(()) rtx ix86_tls_module_base_symbol;
10974
10975rtx
10976ix86_tls_module_base (void)
10977{
10978  if (!ix86_tls_module_base_symbol)
10979    {
10980      ix86_tls_module_base_symbol
10981	= gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
10982
10983      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10984	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10985    }
10986
10987  return ix86_tls_module_base_symbol;
10988}
10989
10990/* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
10991   false if we expect this to be used for a memory address and true if
10992   we expect to load the address into a register.  */
10993
10994rtx
10995legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10996{
10997  rtx dest, base, off;
10998  rtx pic = NULL_RTX, tp = NULL_RTX;
10999  machine_mode tp_mode = Pmode;
11000  int type;
11001
11002  /* Fall back to global dynamic model if tool chain cannot support local
11003     dynamic.  */
11004  if (TARGET_SUN_TLS && !TARGET_64BIT
11005      && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11006      && model == TLS_MODEL_LOCAL_DYNAMIC)
11007    model = TLS_MODEL_GLOBAL_DYNAMIC;
11008
11009  switch (model)
11010    {
11011    case TLS_MODEL_GLOBAL_DYNAMIC:
11012      if (!TARGET_64BIT)
11013	{
11014	  if (flag_pic && !TARGET_PECOFF)
11015	    pic = pic_offset_table_rtx;
11016	  else
11017	    {
11018	      pic = gen_reg_rtx (Pmode);
11019	      emit_insn (gen_set_got (pic));
11020	    }
11021	}
11022
11023      if (TARGET_GNU2_TLS)
11024	{
11025	  dest = gen_reg_rtx (ptr_mode);
11026	  if (TARGET_64BIT)
11027	    emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11028	  else
11029	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11030
11031	  tp = get_thread_pointer (ptr_mode, true);
11032	  dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11033	  if (GET_MODE (dest) != Pmode)
11034	     dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11035	  dest = force_reg (Pmode, dest);
11036
11037	  if (GET_MODE (x) != Pmode)
11038	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
11039
11040	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11041	}
11042      else
11043	{
11044	  rtx caddr = ix86_tls_get_addr ();
11045
11046	  dest = gen_reg_rtx (Pmode);
11047	  if (TARGET_64BIT)
11048	    {
11049	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
11050	      rtx_insn *insns;
11051
11052	      start_sequence ();
11053	      emit_call_insn
11054		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11055	      insns = get_insns ();
11056	      end_sequence ();
11057
11058	      if (GET_MODE (x) != Pmode)
11059		x = gen_rtx_ZERO_EXTEND (Pmode, x);
11060
11061	      RTL_CONST_CALL_P (insns) = 1;
11062	      emit_libcall_block (insns, dest, rax, x);
11063	    }
11064	  else
11065	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11066	}
11067      break;
11068
11069    case TLS_MODEL_LOCAL_DYNAMIC:
11070      if (!TARGET_64BIT)
11071	{
11072	  if (flag_pic)
11073	    pic = pic_offset_table_rtx;
11074	  else
11075	    {
11076	      pic = gen_reg_rtx (Pmode);
11077	      emit_insn (gen_set_got (pic));
11078	    }
11079	}
11080
11081      if (TARGET_GNU2_TLS)
11082	{
11083	  rtx tmp = ix86_tls_module_base ();
11084
11085	  base = gen_reg_rtx (ptr_mode);
11086	  if (TARGET_64BIT)
11087	    emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11088	  else
11089	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11090
11091	  tp = get_thread_pointer (ptr_mode, true);
11092	  if (GET_MODE (base) != Pmode)
11093	    base = gen_rtx_ZERO_EXTEND (Pmode, base);
11094	  base = force_reg (Pmode, base);
11095	}
11096      else
11097	{
11098	  rtx caddr = ix86_tls_get_addr ();
11099
11100	  base = gen_reg_rtx (Pmode);
11101	  if (TARGET_64BIT)
11102	    {
11103	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
11104	      rtx_insn *insns;
11105	      rtx eqv;
11106
11107	      start_sequence ();
11108	      emit_call_insn
11109		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11110	      insns = get_insns ();
11111	      end_sequence ();
11112
11113	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11114		 share the LD_BASE result with other LD model accesses.  */
11115	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11116				    UNSPEC_TLS_LD_BASE);
11117
11118	      RTL_CONST_CALL_P (insns) = 1;
11119	      emit_libcall_block (insns, base, rax, eqv);
11120	    }
11121	  else
11122	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11123	}
11124
11125      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11126      off = gen_rtx_CONST (Pmode, off);
11127
11128      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11129
11130      if (TARGET_GNU2_TLS)
11131	{
11132	  if (GET_MODE (tp) != Pmode)
11133	    {
11134	      dest = lowpart_subreg (ptr_mode, dest, Pmode);
11135	      dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11136	      dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11137	    }
11138	  else
11139	    dest = gen_rtx_PLUS (Pmode, tp, dest);
11140	  dest = force_reg (Pmode, dest);
11141
11142	  if (GET_MODE (x) != Pmode)
11143	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
11144
11145	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11146	}
11147      break;
11148
11149    case TLS_MODEL_INITIAL_EXEC:
11150      if (TARGET_64BIT)
11151	{
11152	  if (TARGET_SUN_TLS && !TARGET_X32)
11153	    {
11154	      /* The Sun linker took the AMD64 TLS spec literally
11155		 and can only handle %rax as destination of the
11156		 initial executable code sequence.  */
11157
11158	      dest = gen_reg_rtx (DImode);
11159	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11160	      return dest;
11161	    }
11162
11163	  /* Generate DImode references to avoid %fs:(%reg32)
11164	     problems and linker IE->LE relaxation bug.  */
11165	  tp_mode = DImode;
11166	  pic = NULL;
11167	  type = UNSPEC_GOTNTPOFF;
11168	}
11169      else if (flag_pic)
11170	{
11171	  pic = pic_offset_table_rtx;
11172	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11173	}
11174      else if (!TARGET_ANY_GNU_TLS)
11175	{
11176	  pic = gen_reg_rtx (Pmode);
11177	  emit_insn (gen_set_got (pic));
11178	  type = UNSPEC_GOTTPOFF;
11179	}
11180      else
11181	{
11182	  pic = NULL;
11183	  type = UNSPEC_INDNTPOFF;
11184	}
11185
11186      off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11187      off = gen_rtx_CONST (tp_mode, off);
11188      if (pic)
11189	off = gen_rtx_PLUS (tp_mode, pic, off);
11190      off = gen_const_mem (tp_mode, off);
11191      set_mem_alias_set (off, ix86_GOT_alias_set ());
11192
11193      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11194	{
11195	  base = get_thread_pointer (tp_mode,
11196				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11197	  off = force_reg (tp_mode, off);
11198	  dest = gen_rtx_PLUS (tp_mode, base, off);
11199	  if (tp_mode != Pmode)
11200	    dest = convert_to_mode (Pmode, dest, 1);
11201	}
11202      else
11203	{
11204	  base = get_thread_pointer (Pmode, true);
11205	  dest = gen_reg_rtx (Pmode);
11206	  emit_insn (gen_sub3_insn (dest, base, off));
11207	}
11208      break;
11209
11210    case TLS_MODEL_LOCAL_EXEC:
11211      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11212			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11213			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11214      off = gen_rtx_CONST (Pmode, off);
11215
11216      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11217	{
11218	  base = get_thread_pointer (Pmode,
11219				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11220	  return gen_rtx_PLUS (Pmode, base, off);
11221	}
11222      else
11223	{
11224	  base = get_thread_pointer (Pmode, true);
11225	  dest = gen_reg_rtx (Pmode);
11226	  emit_insn (gen_sub3_insn (dest, base, off));
11227	}
11228      break;
11229
11230    default:
11231      gcc_unreachable ();
11232    }
11233
11234  return dest;
11235}
11236
11237/* Return true if OP refers to a TLS address.  */
11238bool
11239ix86_tls_address_pattern_p (rtx op)
11240{
11241  subrtx_var_iterator::array_type array;
11242  FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11243    {
11244      rtx op = *iter;
11245      if (MEM_P (op))
11246	{
11247	  rtx *x = &XEXP (op, 0);
11248	  while (GET_CODE (*x) == PLUS)
11249	    {
11250	      int i;
11251	      for (i = 0; i < 2; i++)
11252		{
11253		  rtx u = XEXP (*x, i);
11254		  if (GET_CODE (u) == ZERO_EXTEND)
11255		    u = XEXP (u, 0);
11256		  if (GET_CODE (u) == UNSPEC
11257		      && XINT (u, 1) == UNSPEC_TP)
11258		    return true;
11259		}
11260	      x = &XEXP (*x, 0);
11261	    }
11262
11263	  iter.skip_subrtxes ();
11264	}
11265    }
11266
11267  return false;
11268}
11269
11270/* Rewrite *LOC so that it refers to a default TLS address space.  */
11271void
11272ix86_rewrite_tls_address_1 (rtx *loc)
11273{
11274  subrtx_ptr_iterator::array_type array;
11275  FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11276    {
11277      rtx *loc = *iter;
11278      if (MEM_P (*loc))
11279	{
11280	  rtx addr = XEXP (*loc, 0);
11281	  rtx *x = &addr;
11282	  while (GET_CODE (*x) == PLUS)
11283	    {
11284	      int i;
11285	      for (i = 0; i < 2; i++)
11286		{
11287		  rtx u = XEXP (*x, i);
11288		  if (GET_CODE (u) == ZERO_EXTEND)
11289		    u = XEXP (u, 0);
11290		  if (GET_CODE (u) == UNSPEC
11291		      && XINT (u, 1) == UNSPEC_TP)
11292		    {
11293		      addr_space_t as = DEFAULT_TLS_SEG_REG;
11294
11295		      *x = XEXP (*x, 1 - i);
11296
11297		      *loc = replace_equiv_address_nv (*loc, addr, true);
11298		      set_mem_addr_space (*loc, as);
11299		      return;
11300		    }
11301		}
11302	      x = &XEXP (*x, 0);
11303	    }
11304
11305	  iter.skip_subrtxes ();
11306	}
11307    }
11308}
11309
11310/* Rewrite instruction pattern involvning TLS address
11311   so that it refers to a default TLS address space.  */
11312rtx
11313ix86_rewrite_tls_address (rtx pattern)
11314{
11315  pattern = copy_insn (pattern);
11316  ix86_rewrite_tls_address_1 (&pattern);
11317  return pattern;
11318}
11319
11320/* Create or return the unique __imp_DECL dllimport symbol corresponding
11321   to symbol DECL if BEIMPORT is true.  Otherwise create or return the
11322   unique refptr-DECL symbol corresponding to symbol DECL.  */
11323
11324struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11325{
11326  static inline hashval_t hash (tree_map *m) { return m->hash; }
11327  static inline bool
11328  equal (tree_map *a, tree_map *b)
11329  {
11330    return a->base.from == b->base.from;
11331  }
11332
11333  static int
11334  keep_cache_entry (tree_map *&m)
11335  {
11336    return ggc_marked_p (m->base.from);
11337  }
11338};
11339
11340static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11341
11342static tree
11343get_dllimport_decl (tree decl, bool beimport)
11344{
11345  struct tree_map *h, in;
11346  const char *name;
11347  const char *prefix;
11348  size_t namelen, prefixlen;
11349  char *imp_name;
11350  tree to;
11351  rtx rtl;
11352
11353  if (!dllimport_map)
11354    dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11355
11356  in.hash = htab_hash_pointer (decl);
11357  in.base.from = decl;
11358  tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11359  h = *loc;
11360  if (h)
11361    return h->to;
11362
11363  *loc = h = ggc_alloc<tree_map> ();
11364  h->hash = in.hash;
11365  h->base.from = decl;
11366  h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11367			   VAR_DECL, NULL, ptr_type_node);
11368  DECL_ARTIFICIAL (to) = 1;
11369  DECL_IGNORED_P (to) = 1;
11370  DECL_EXTERNAL (to) = 1;
11371  TREE_READONLY (to) = 1;
11372
11373  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11374  name = targetm.strip_name_encoding (name);
11375  if (beimport)
11376    prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11377      ? "*__imp_" : "*__imp__";
11378  else
11379    prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11380  namelen = strlen (name);
11381  prefixlen = strlen (prefix);
11382  imp_name = (char *) alloca (namelen + prefixlen + 1);
11383  memcpy (imp_name, prefix, prefixlen);
11384  memcpy (imp_name + prefixlen, name, namelen + 1);
11385
11386  name = ggc_alloc_string (imp_name, namelen + prefixlen);
11387  rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11388  SET_SYMBOL_REF_DECL (rtl, to);
11389  SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11390  if (!beimport)
11391    {
11392      SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11393#ifdef SUB_TARGET_RECORD_STUB
11394      SUB_TARGET_RECORD_STUB (name);
11395#endif
11396    }
11397
11398  rtl = gen_const_mem (Pmode, rtl);
11399  set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11400
11401  SET_DECL_RTL (to, rtl);
11402  SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11403
11404  return to;
11405}
11406
11407/* Expand SYMBOL into its corresponding far-address symbol.
11408   WANT_REG is true if we require the result be a register.  */
11409
11410static rtx
11411legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11412{
11413  tree imp_decl;
11414  rtx x;
11415
11416  gcc_assert (SYMBOL_REF_DECL (symbol));
11417  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11418
11419  x = DECL_RTL (imp_decl);
11420  if (want_reg)
11421    x = force_reg (Pmode, x);
11422  return x;
11423}
11424
11425/* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
11426   true if we require the result be a register.  */
11427
11428static rtx
11429legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11430{
11431  tree imp_decl;
11432  rtx x;
11433
11434  gcc_assert (SYMBOL_REF_DECL (symbol));
11435  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11436
11437  x = DECL_RTL (imp_decl);
11438  if (want_reg)
11439    x = force_reg (Pmode, x);
11440  return x;
11441}
11442
11443/* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
11444   is true if we require the result be a register.  */
11445
11446rtx
11447legitimize_pe_coff_symbol (rtx addr, bool inreg)
11448{
11449  if (!TARGET_PECOFF)
11450    return NULL_RTX;
11451
11452  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11453    {
11454      if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11455	return legitimize_dllimport_symbol (addr, inreg);
11456      if (GET_CODE (addr) == CONST
11457	  && GET_CODE (XEXP (addr, 0)) == PLUS
11458	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11459	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11460	{
11461	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11462	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11463	}
11464    }
11465
11466  if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11467    return NULL_RTX;
11468  if (GET_CODE (addr) == SYMBOL_REF
11469      && !is_imported_p (addr)
11470      && SYMBOL_REF_EXTERNAL_P (addr)
11471      && SYMBOL_REF_DECL (addr))
11472    return legitimize_pe_coff_extern_decl (addr, inreg);
11473
11474  if (GET_CODE (addr) == CONST
11475      && GET_CODE (XEXP (addr, 0)) == PLUS
11476      && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11477      && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11478      && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11479      && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11480    {
11481      rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11482      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11483    }
11484  return NULL_RTX;
11485}
11486
11487/* Try machine-dependent ways of modifying an illegitimate address
11488   to be legitimate.  If we find one, return the new, valid address.
11489   This macro is used in only one place: `memory_address' in explow.c.
11490
11491   OLDX is the address as it was before break_out_memory_refs was called.
11492   In some cases it is useful to look at this to decide what needs to be done.
11493
11494   It is always safe for this macro to do nothing.  It exists to recognize
11495   opportunities to optimize the output.
11496
11497   For the 80386, we handle X+REG by loading X into a register R and
11498   using R+REG.  R will go in a general reg and indexing will be used.
11499   However, if REG is a broken-out memory address or multiplication,
11500   nothing needs to be done because REG can certainly go in a general reg.
11501
11502   When -fpic is used, special handling is needed for symbolic references.
11503   See comments by legitimize_pic_address in i386.c for details.  */
11504
11505static rtx
11506ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11507{
11508  bool changed = false;
11509  unsigned log;
11510
11511  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11512  if (log)
11513    return legitimize_tls_address (x, (enum tls_model) log, false);
11514  if (GET_CODE (x) == CONST
11515      && GET_CODE (XEXP (x, 0)) == PLUS
11516      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11517      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11518    {
11519      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11520				      (enum tls_model) log, false);
11521      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11522    }
11523
11524  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11525    {
11526      rtx tmp = legitimize_pe_coff_symbol (x, true);
11527      if (tmp)
11528        return tmp;
11529    }
11530
11531  if (flag_pic && SYMBOLIC_CONST (x))
11532    return legitimize_pic_address (x, 0);
11533
11534#if TARGET_MACHO
11535  if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11536    return machopic_indirect_data_reference (x, 0);
11537#endif
11538
11539  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11540  if (GET_CODE (x) == ASHIFT
11541      && CONST_INT_P (XEXP (x, 1))
11542      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11543    {
11544      changed = true;
11545      log = INTVAL (XEXP (x, 1));
11546      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11547			GEN_INT (1 << log));
11548    }
11549
11550  if (GET_CODE (x) == PLUS)
11551    {
11552      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
11553
11554      if (GET_CODE (XEXP (x, 0)) == ASHIFT
11555	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11556	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11557	{
11558	  changed = true;
11559	  log = INTVAL (XEXP (XEXP (x, 0), 1));
11560	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
11561				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11562				      GEN_INT (1 << log));
11563	}
11564
11565      if (GET_CODE (XEXP (x, 1)) == ASHIFT
11566	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11567	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11568	{
11569	  changed = true;
11570	  log = INTVAL (XEXP (XEXP (x, 1), 1));
11571	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
11572				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11573				      GEN_INT (1 << log));
11574	}
11575
11576      /* Put multiply first if it isn't already.  */
11577      if (GET_CODE (XEXP (x, 1)) == MULT)
11578	{
11579	  std::swap (XEXP (x, 0), XEXP (x, 1));
11580	  changed = true;
11581	}
11582
11583      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11584	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
11585	 created by virtual register instantiation, register elimination, and
11586	 similar optimizations.  */
11587      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11588	{
11589	  changed = true;
11590	  x = gen_rtx_PLUS (Pmode,
11591			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
11592					  XEXP (XEXP (x, 1), 0)),
11593			    XEXP (XEXP (x, 1), 1));
11594	}
11595
11596      /* Canonicalize
11597	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11598	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
11599      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11600	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11601	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11602	       && CONSTANT_P (XEXP (x, 1)))
11603	{
11604	  rtx constant;
11605	  rtx other = NULL_RTX;
11606
11607	  if (CONST_INT_P (XEXP (x, 1)))
11608	    {
11609	      constant = XEXP (x, 1);
11610	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11611	    }
11612	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11613	    {
11614	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11615	      other = XEXP (x, 1);
11616	    }
11617	  else
11618	    constant = 0;
11619
11620	  if (constant)
11621	    {
11622	      changed = true;
11623	      x = gen_rtx_PLUS (Pmode,
11624				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11625					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
11626				plus_constant (Pmode, other,
11627					       INTVAL (constant)));
11628	    }
11629	}
11630
11631      if (changed && ix86_legitimate_address_p (mode, x, false))
11632	return x;
11633
11634      if (GET_CODE (XEXP (x, 0)) == MULT)
11635	{
11636	  changed = true;
11637	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11638	}
11639
11640      if (GET_CODE (XEXP (x, 1)) == MULT)
11641	{
11642	  changed = true;
11643	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11644	}
11645
11646      if (changed
11647	  && REG_P (XEXP (x, 1))
11648	  && REG_P (XEXP (x, 0)))
11649	return x;
11650
11651      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11652	{
11653	  changed = true;
11654	  x = legitimize_pic_address (x, 0);
11655	}
11656
11657      if (changed && ix86_legitimate_address_p (mode, x, false))
11658	return x;
11659
11660      if (REG_P (XEXP (x, 0)))
11661	{
11662	  rtx temp = gen_reg_rtx (Pmode);
11663	  rtx val  = force_operand (XEXP (x, 1), temp);
11664	  if (val != temp)
11665	    {
11666	      val = convert_to_mode (Pmode, val, 1);
11667	      emit_move_insn (temp, val);
11668	    }
11669
11670	  XEXP (x, 1) = temp;
11671	  return x;
11672	}
11673
11674      else if (REG_P (XEXP (x, 1)))
11675	{
11676	  rtx temp = gen_reg_rtx (Pmode);
11677	  rtx val  = force_operand (XEXP (x, 0), temp);
11678	  if (val != temp)
11679	    {
11680	      val = convert_to_mode (Pmode, val, 1);
11681	      emit_move_insn (temp, val);
11682	    }
11683
11684	  XEXP (x, 0) = temp;
11685	  return x;
11686	}
11687    }
11688
11689  return x;
11690}
11691
11692/* Print an integer constant expression in assembler syntax.  Addition
11693   and subtraction are the only arithmetic that may appear in these
11694   expressions.  FILE is the stdio stream to write to, X is the rtx, and
11695   CODE is the operand print code from the output string.  */
11696
11697static void
11698output_pic_addr_const (FILE *file, rtx x, int code)
11699{
11700  char buf[256];
11701
11702  switch (GET_CODE (x))
11703    {
11704    case PC:
11705      gcc_assert (flag_pic);
11706      putc ('.', file);
11707      break;
11708
11709    case SYMBOL_REF:
11710      if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11711	output_addr_const (file, x);
11712      else
11713	{
11714	  const char *name = XSTR (x, 0);
11715
11716	  /* Mark the decl as referenced so that cgraph will
11717	     output the function.  */
11718	  if (SYMBOL_REF_DECL (x))
11719	    mark_decl_referenced (SYMBOL_REF_DECL (x));
11720
11721#if TARGET_MACHO
11722	  if (MACHOPIC_INDIRECT
11723	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11724	    name = machopic_indirection_name (x, /*stub_p=*/true);
11725#endif
11726	  assemble_name (file, name);
11727	}
11728      if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11729	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11730	fputs ("@PLT", file);
11731      break;
11732
11733    case LABEL_REF:
11734      x = XEXP (x, 0);
11735      /* FALLTHRU */
11736    case CODE_LABEL:
11737      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11738      assemble_name (asm_out_file, buf);
11739      break;
11740
11741    case CONST_INT:
11742      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11743      break;
11744
11745    case CONST:
11746      /* This used to output parentheses around the expression,
11747	 but that does not work on the 386 (either ATT or BSD assembler).  */
11748      output_pic_addr_const (file, XEXP (x, 0), code);
11749      break;
11750
11751    case CONST_DOUBLE:
11752      /* We can't handle floating point constants;
11753	 TARGET_PRINT_OPERAND must handle them.  */
11754      output_operand_lossage ("floating constant misused");
11755      break;
11756
11757    case PLUS:
11758      /* Some assemblers need integer constants to appear first.  */
11759      if (CONST_INT_P (XEXP (x, 0)))
11760	{
11761	  output_pic_addr_const (file, XEXP (x, 0), code);
11762	  putc ('+', file);
11763	  output_pic_addr_const (file, XEXP (x, 1), code);
11764	}
11765      else
11766	{
11767	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
11768	  output_pic_addr_const (file, XEXP (x, 1), code);
11769	  putc ('+', file);
11770	  output_pic_addr_const (file, XEXP (x, 0), code);
11771	}
11772      break;
11773
11774    case MINUS:
11775      if (!TARGET_MACHO)
11776	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11777      output_pic_addr_const (file, XEXP (x, 0), code);
11778      putc ('-', file);
11779      output_pic_addr_const (file, XEXP (x, 1), code);
11780      if (!TARGET_MACHO)
11781	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11782      break;
11783
11784    case UNSPEC:
11785      gcc_assert (XVECLEN (x, 0) == 1);
11786      output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11787      switch (XINT (x, 1))
11788	{
11789	case UNSPEC_GOT:
11790	  fputs ("@GOT", file);
11791	  break;
11792	case UNSPEC_GOTOFF:
11793	  fputs ("@GOTOFF", file);
11794	  break;
11795	case UNSPEC_PLTOFF:
11796	  fputs ("@PLTOFF", file);
11797	  break;
11798	case UNSPEC_PCREL:
11799	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11800		 "(%rip)" : "[rip]", file);
11801	  break;
11802	case UNSPEC_GOTPCREL:
11803	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11804		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11805	  break;
11806	case UNSPEC_GOTTPOFF:
11807	  /* FIXME: This might be @TPOFF in Sun ld too.  */
11808	  fputs ("@gottpoff", file);
11809	  break;
11810	case UNSPEC_TPOFF:
11811	  fputs ("@tpoff", file);
11812	  break;
11813	case UNSPEC_NTPOFF:
11814	  if (TARGET_64BIT)
11815	    fputs ("@tpoff", file);
11816	  else
11817	    fputs ("@ntpoff", file);
11818	  break;
11819	case UNSPEC_DTPOFF:
11820	  fputs ("@dtpoff", file);
11821	  break;
11822	case UNSPEC_GOTNTPOFF:
11823	  if (TARGET_64BIT)
11824	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11825		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
11826	  else
11827	    fputs ("@gotntpoff", file);
11828	  break;
11829	case UNSPEC_INDNTPOFF:
11830	  fputs ("@indntpoff", file);
11831	  break;
11832#if TARGET_MACHO
11833	case UNSPEC_MACHOPIC_OFFSET:
11834	  putc ('-', file);
11835	  machopic_output_function_base_name (file);
11836	  break;
11837#endif
11838	default:
11839	  output_operand_lossage ("invalid UNSPEC as operand");
11840	  break;
11841	}
11842       break;
11843
11844    default:
11845      output_operand_lossage ("invalid expression as operand");
11846    }
11847}
11848
11849/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11850   We need to emit DTP-relative relocations.  */
11851
11852static void ATTRIBUTE_UNUSED
11853i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11854{
11855  fputs (ASM_LONG, file);
11856  output_addr_const (file, x);
11857  fputs ("@dtpoff", file);
11858  switch (size)
11859    {
11860    case 4:
11861      break;
11862    case 8:
11863      fputs (", 0", file);
11864      break;
11865    default:
11866      gcc_unreachable ();
11867   }
11868}
11869
11870/* Return true if X is a representation of the PIC register.  This copes
11871   with calls from ix86_find_base_term, where the register might have
11872   been replaced by a cselib value.  */
11873
11874static bool
11875ix86_pic_register_p (rtx x)
11876{
11877  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11878    return (pic_offset_table_rtx
11879	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11880  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11881    return true;
11882  else if (!REG_P (x))
11883    return false;
11884  else if (pic_offset_table_rtx)
11885    {
11886      if (REGNO (x) == REGNO (pic_offset_table_rtx))
11887	return true;
11888      if (HARD_REGISTER_P (x)
11889	  && !HARD_REGISTER_P (pic_offset_table_rtx)
11890	  && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11891	return true;
11892      return false;
11893    }
11894  else
11895    return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11896}
11897
11898/* Helper function for ix86_delegitimize_address.
11899   Attempt to delegitimize TLS local-exec accesses.  */
11900
11901static rtx
11902ix86_delegitimize_tls_address (rtx orig_x)
11903{
11904  rtx x = orig_x, unspec;
11905  struct ix86_address addr;
11906
11907  if (!TARGET_TLS_DIRECT_SEG_REFS)
11908    return orig_x;
11909  if (MEM_P (x))
11910    x = XEXP (x, 0);
11911  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11912    return orig_x;
11913  if (ix86_decompose_address (x, &addr) == 0
11914      || addr.seg != DEFAULT_TLS_SEG_REG
11915      || addr.disp == NULL_RTX
11916      || GET_CODE (addr.disp) != CONST)
11917    return orig_x;
11918  unspec = XEXP (addr.disp, 0);
11919  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11920    unspec = XEXP (unspec, 0);
11921  if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11922    return orig_x;
11923  x = XVECEXP (unspec, 0, 0);
11924  gcc_assert (GET_CODE (x) == SYMBOL_REF);
11925  if (unspec != XEXP (addr.disp, 0))
11926    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11927  if (addr.index)
11928    {
11929      rtx idx = addr.index;
11930      if (addr.scale != 1)
11931	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11932      x = gen_rtx_PLUS (Pmode, idx, x);
11933    }
11934  if (addr.base)
11935    x = gen_rtx_PLUS (Pmode, addr.base, x);
11936  if (MEM_P (orig_x))
11937    x = replace_equiv_address_nv (orig_x, x);
11938  return x;
11939}
11940
11941/* In the name of slightly smaller debug output, and to cater to
11942   general assembler lossage, recognize PIC+GOTOFF and turn it back
11943   into a direct symbol reference.
11944
11945   On Darwin, this is necessary to avoid a crash, because Darwin
11946   has a different PIC label for each routine but the DWARF debugging
11947   information is not associated with any particular routine, so it's
11948   necessary to remove references to the PIC label from RTL stored by
11949   the DWARF output code.
11950
11951   This helper is used in the normal ix86_delegitimize_address
11952   entrypoint (e.g. used in the target delegitimization hook) and
11953   in ix86_find_base_term.  As compile time memory optimization, we
11954   avoid allocating rtxes that will not change anything on the outcome
11955   of the callers (find_base_value and find_base_term).  */
11956
11957static inline rtx
11958ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11959{
11960  rtx orig_x = delegitimize_mem_from_attrs (x);
11961  /* addend is NULL or some rtx if x is something+GOTOFF where
11962     something doesn't include the PIC register.  */
11963  rtx addend = NULL_RTX;
11964  /* reg_addend is NULL or a multiple of some register.  */
11965  rtx reg_addend = NULL_RTX;
11966  /* const_addend is NULL or a const_int.  */
11967  rtx const_addend = NULL_RTX;
11968  /* This is the result, or NULL.  */
11969  rtx result = NULL_RTX;
11970
11971  x = orig_x;
11972
11973  if (MEM_P (x))
11974    x = XEXP (x, 0);
11975
11976  if (TARGET_64BIT)
11977    {
11978      if (GET_CODE (x) == CONST
11979          && GET_CODE (XEXP (x, 0)) == PLUS
11980          && GET_MODE (XEXP (x, 0)) == Pmode
11981          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11982          && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11983          && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11984        {
11985	  /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11986	     base.  A CONST can't be arg_pointer_rtx based.  */
11987	  if (base_term_p && MEM_P (orig_x))
11988	    return orig_x;
11989	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11990	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11991	  if (MEM_P (orig_x))
11992	    x = replace_equiv_address_nv (orig_x, x);
11993	  return x;
11994	}
11995
11996      if (GET_CODE (x) == CONST
11997	  && GET_CODE (XEXP (x, 0)) == UNSPEC
11998	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11999	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12000	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12001	{
12002	  x = XVECEXP (XEXP (x, 0), 0, 0);
12003	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12004	    {
12005	      x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12006	      if (x == NULL_RTX)
12007		return orig_x;
12008	    }
12009	  return x;
12010	}
12011
12012      if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12013	return ix86_delegitimize_tls_address (orig_x);
12014
12015      /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12016	 and -mcmodel=medium -fpic.  */
12017    }
12018
12019  if (GET_CODE (x) != PLUS
12020      || GET_CODE (XEXP (x, 1)) != CONST)
12021    return ix86_delegitimize_tls_address (orig_x);
12022
12023  if (ix86_pic_register_p (XEXP (x, 0)))
12024    /* %ebx + GOT/GOTOFF */
12025    ;
12026  else if (GET_CODE (XEXP (x, 0)) == PLUS)
12027    {
12028      /* %ebx + %reg * scale + GOT/GOTOFF */
12029      reg_addend = XEXP (x, 0);
12030      if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12031	reg_addend = XEXP (reg_addend, 1);
12032      else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12033	reg_addend = XEXP (reg_addend, 0);
12034      else
12035	{
12036	  reg_addend = NULL_RTX;
12037	  addend = XEXP (x, 0);
12038	}
12039    }
12040  else
12041    addend = XEXP (x, 0);
12042
12043  x = XEXP (XEXP (x, 1), 0);
12044  if (GET_CODE (x) == PLUS
12045      && CONST_INT_P (XEXP (x, 1)))
12046    {
12047      const_addend = XEXP (x, 1);
12048      x = XEXP (x, 0);
12049    }
12050
12051  if (GET_CODE (x) == UNSPEC
12052      && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12053	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12054	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12055	      && !MEM_P (orig_x) && !addend)))
12056    result = XVECEXP (x, 0, 0);
12057
12058  if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12059      && !MEM_P (orig_x))
12060    result = XVECEXP (x, 0, 0);
12061
12062  if (! result)
12063    return ix86_delegitimize_tls_address (orig_x);
12064
12065  /* For (PLUS something CONST_INT) both find_base_{value,term} just
12066     recurse on the first operand.  */
12067  if (const_addend && !base_term_p)
12068    result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12069  if (reg_addend)
12070    result = gen_rtx_PLUS (Pmode, reg_addend, result);
12071  if (addend)
12072    {
12073      /* If the rest of original X doesn't involve the PIC register, add
12074	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
12075	 for code like:
12076	 leal (%ebx, %ecx, 4), %ecx
12077	 ...
12078	 movl foo@GOTOFF(%ecx), %edx
12079	 in which case we return (%ecx - %ebx) + foo
12080	 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12081	 and reload has completed.  Don't do the latter for debug,
12082	 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
12083      if (pic_offset_table_rtx
12084	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12085        result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12086						     pic_offset_table_rtx),
12087			       result);
12088      else if (base_term_p
12089	       && pic_offset_table_rtx
12090	       && !TARGET_MACHO
12091	       && !TARGET_VXWORKS_RTP)
12092	{
12093	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12094	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12095	  result = gen_rtx_PLUS (Pmode, tmp, result);
12096	}
12097      else
12098	return orig_x;
12099    }
12100  if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12101    {
12102      result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12103      if (result == NULL_RTX)
12104	return orig_x;
12105    }
12106  return result;
12107}
12108
12109/* The normal instantiation of the above template.  */
12110
12111static rtx
12112ix86_delegitimize_address (rtx x)
12113{
12114  return ix86_delegitimize_address_1 (x, false);
12115}
12116
12117/* If X is a machine specific address (i.e. a symbol or label being
12118   referenced as a displacement from the GOT implemented using an
12119   UNSPEC), then return the base term.  Otherwise return X.  */
12120
12121rtx
12122ix86_find_base_term (rtx x)
12123{
12124  rtx term;
12125
12126  if (TARGET_64BIT)
12127    {
12128      if (GET_CODE (x) != CONST)
12129	return x;
12130      term = XEXP (x, 0);
12131      if (GET_CODE (term) == PLUS
12132	  && CONST_INT_P (XEXP (term, 1)))
12133	term = XEXP (term, 0);
12134      if (GET_CODE (term) != UNSPEC
12135	  || (XINT (term, 1) != UNSPEC_GOTPCREL
12136	      && XINT (term, 1) != UNSPEC_PCREL))
12137	return x;
12138
12139      return XVECEXP (term, 0, 0);
12140    }
12141
12142  return ix86_delegitimize_address_1 (x, true);
12143}
12144
12145/* Return true if X shouldn't be emitted into the debug info.
12146   Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12147   symbol easily into the .debug_info section, so we need not to
12148   delegitimize, but instead assemble as @gotoff.
12149   Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12150   assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
12151
12152static bool
12153ix86_const_not_ok_for_debug_p (rtx x)
12154{
12155  if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12156    return true;
12157
12158  if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12159    return true;
12160
12161  return false;
12162}
12163
12164static void
12165put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12166		    bool fp, FILE *file)
12167{
12168  const char *suffix;
12169
12170  if (mode == CCFPmode)
12171    {
12172      code = ix86_fp_compare_code_to_integer (code);
12173      mode = CCmode;
12174    }
12175  if (reverse)
12176    code = reverse_condition (code);
12177
12178  switch (code)
12179    {
12180    case EQ:
12181      gcc_assert (mode != CCGZmode);
12182      switch (mode)
12183	{
12184	case E_CCAmode:
12185	  suffix = "a";
12186	  break;
12187	case E_CCCmode:
12188	  suffix = "c";
12189	  break;
12190	case E_CCOmode:
12191	  suffix = "o";
12192	  break;
12193	case E_CCPmode:
12194	  suffix = "p";
12195	  break;
12196	case E_CCSmode:
12197	  suffix = "s";
12198	  break;
12199	default:
12200	  suffix = "e";
12201	  break;
12202	}
12203      break;
12204    case NE:
12205      gcc_assert (mode != CCGZmode);
12206      switch (mode)
12207	{
12208	case E_CCAmode:
12209	  suffix = "na";
12210	  break;
12211	case E_CCCmode:
12212	  suffix = "nc";
12213	  break;
12214	case E_CCOmode:
12215	  suffix = "no";
12216	  break;
12217	case E_CCPmode:
12218	  suffix = "np";
12219	  break;
12220	case E_CCSmode:
12221	  suffix = "ns";
12222	  break;
12223	default:
12224	  suffix = "ne";
12225	  break;
12226	}
12227      break;
12228    case GT:
12229      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12230      suffix = "g";
12231      break;
12232    case GTU:
12233      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12234	 Those same assemblers have the same but opposite lossage on cmov.  */
12235      if (mode == CCmode)
12236	suffix = fp ? "nbe" : "a";
12237      else
12238	gcc_unreachable ();
12239      break;
12240    case LT:
12241      switch (mode)
12242	{
12243	case E_CCNOmode:
12244	case E_CCGOCmode:
12245	  suffix = "s";
12246	  break;
12247
12248	case E_CCmode:
12249	case E_CCGCmode:
12250	case E_CCGZmode:
12251	  suffix = "l";
12252	  break;
12253
12254	default:
12255	  gcc_unreachable ();
12256	}
12257      break;
12258    case LTU:
12259      if (mode == CCmode || mode == CCGZmode)
12260	suffix = "b";
12261      else if (mode == CCCmode)
12262	suffix = fp ? "b" : "c";
12263      else
12264	gcc_unreachable ();
12265      break;
12266    case GE:
12267      switch (mode)
12268	{
12269	case E_CCNOmode:
12270	case E_CCGOCmode:
12271	  suffix = "ns";
12272	  break;
12273
12274	case E_CCmode:
12275	case E_CCGCmode:
12276	case E_CCGZmode:
12277	  suffix = "ge";
12278	  break;
12279
12280	default:
12281	  gcc_unreachable ();
12282	}
12283      break;
12284    case GEU:
12285      if (mode == CCmode || mode == CCGZmode)
12286	suffix = "nb";
12287      else if (mode == CCCmode)
12288	suffix = fp ? "nb" : "nc";
12289      else
12290	gcc_unreachable ();
12291      break;
12292    case LE:
12293      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12294      suffix = "le";
12295      break;
12296    case LEU:
12297      if (mode == CCmode)
12298	suffix = "be";
12299      else
12300	gcc_unreachable ();
12301      break;
12302    case UNORDERED:
12303      suffix = fp ? "u" : "p";
12304      break;
12305    case ORDERED:
12306      suffix = fp ? "nu" : "np";
12307      break;
12308    default:
12309      gcc_unreachable ();
12310    }
12311  fputs (suffix, file);
12312}
12313
12314/* Print the name of register X to FILE based on its machine mode and number.
12315   If CODE is 'w', pretend the mode is HImode.
12316   If CODE is 'b', pretend the mode is QImode.
12317   If CODE is 'k', pretend the mode is SImode.
12318   If CODE is 'q', pretend the mode is DImode.
12319   If CODE is 'x', pretend the mode is V4SFmode.
12320   If CODE is 't', pretend the mode is V8SFmode.
12321   If CODE is 'g', pretend the mode is V16SFmode.
12322   If CODE is 'h', pretend the reg is the 'high' byte register.
12323   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12324   If CODE is 'd', duplicate the operand for AVX instruction.
12325   If CODE is 'V', print naked full integer register name without %.
12326 */
12327
12328void
12329print_reg (rtx x, int code, FILE *file)
12330{
12331  const char *reg;
12332  int msize;
12333  unsigned int regno;
12334  bool duplicated;
12335
12336  if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12337    putc ('%', file);
12338
12339  if (x == pc_rtx)
12340    {
12341      gcc_assert (TARGET_64BIT);
12342      fputs ("rip", file);
12343      return;
12344    }
12345
12346  if (code == 'y' && STACK_TOP_P (x))
12347    {
12348      fputs ("st(0)", file);
12349      return;
12350    }
12351
12352  if (code == 'w')
12353    msize = 2;
12354  else if (code == 'b')
12355    msize = 1;
12356  else if (code == 'k')
12357    msize = 4;
12358  else if (code == 'q')
12359    msize = 8;
12360  else if (code == 'h')
12361    msize = 0;
12362  else if (code == 'x')
12363    msize = 16;
12364  else if (code == 't')
12365    msize = 32;
12366  else if (code == 'g')
12367    msize = 64;
12368  else
12369    msize = GET_MODE_SIZE (GET_MODE (x));
12370
12371  regno = REGNO (x);
12372
12373  if (regno == ARG_POINTER_REGNUM
12374      || regno == FRAME_POINTER_REGNUM
12375      || regno == FPSR_REG)
12376    {
12377      output_operand_lossage
12378	("invalid use of register '%s'", reg_names[regno]);
12379      return;
12380    }
12381  else if (regno == FLAGS_REG)
12382    {
12383      output_operand_lossage ("invalid use of asm flag output");
12384      return;
12385    }
12386
12387  if (code == 'V')
12388    {
12389      if (GENERAL_REGNO_P (regno))
12390	msize = GET_MODE_SIZE (word_mode);
12391      else
12392	error ("%<V%> modifier on non-integer register");
12393    }
12394
12395  duplicated = code == 'd' && TARGET_AVX;
12396
12397  switch (msize)
12398    {
12399    case 16:
12400    case 12:
12401    case 8:
12402      if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12403	warning (0, "unsupported size for integer register");
12404      /* FALLTHRU */
12405    case 4:
12406      if (LEGACY_INT_REGNO_P (regno))
12407	putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12408      /* FALLTHRU */
12409    case 2:
12410    normal:
12411      reg = hi_reg_name[regno];
12412      break;
12413    case 1:
12414      if (regno >= ARRAY_SIZE (qi_reg_name))
12415	goto normal;
12416      if (!ANY_QI_REGNO_P (regno))
12417	error ("unsupported size for integer register");
12418      reg = qi_reg_name[regno];
12419      break;
12420    case 0:
12421      if (regno >= ARRAY_SIZE (qi_high_reg_name))
12422	goto normal;
12423      reg = qi_high_reg_name[regno];
12424      break;
12425    case 32:
12426    case 64:
12427      if (SSE_REGNO_P (regno))
12428	{
12429	  gcc_assert (!duplicated);
12430	  putc (msize == 32 ? 'y' : 'z', file);
12431	  reg = hi_reg_name[regno] + 1;
12432	  break;
12433	}
12434      goto normal;
12435    default:
12436      gcc_unreachable ();
12437    }
12438
12439  fputs (reg, file);
12440
12441  /* Irritatingly, AMD extended registers use
12442     different naming convention: "r%d[bwd]"  */
12443  if (REX_INT_REGNO_P (regno))
12444    {
12445      gcc_assert (TARGET_64BIT);
12446      switch (msize)
12447	{
12448	  case 0:
12449	    error ("extended registers have no high halves");
12450	    break;
12451	  case 1:
12452	    putc ('b', file);
12453	    break;
12454	  case 2:
12455	    putc ('w', file);
12456	    break;
12457	  case 4:
12458	    putc ('d', file);
12459	    break;
12460	  case 8:
12461	    /* no suffix */
12462	    break;
12463	  default:
12464	    error ("unsupported operand size for extended register");
12465	    break;
12466	}
12467      return;
12468    }
12469
12470  if (duplicated)
12471    {
12472      if (ASSEMBLER_DIALECT == ASM_ATT)
12473	fprintf (file, ", %%%s", reg);
12474      else
12475	fprintf (file, ", %s", reg);
12476    }
12477}
12478
12479/* Meaning of CODE:
12480   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12481   C -- print opcode suffix for set/cmov insn.
12482   c -- like C, but print reversed condition
12483   F,f -- likewise, but for floating-point.
12484   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12485	otherwise nothing
12486   R -- print embedded rounding and sae.
12487   r -- print only sae.
12488   z -- print the opcode suffix for the size of the current operand.
12489   Z -- likewise, with special suffixes for x87 instructions.
12490   * -- print a star (in certain assembler syntax)
12491   A -- print an absolute memory reference.
12492   E -- print address with DImode register names if TARGET_64BIT.
12493   w -- print the operand as if it's a "word" (HImode) even if it isn't.
12494   s -- print a shift double count, followed by the assemblers argument
12495	delimiter.
12496   b -- print the QImode name of the register for the indicated operand.
12497	%b0 would print %al if operands[0] is reg 0.
12498   w --  likewise, print the HImode name of the register.
12499   k --  likewise, print the SImode name of the register.
12500   q --  likewise, print the DImode name of the register.
12501   x --  likewise, print the V4SFmode name of the register.
12502   t --  likewise, print the V8SFmode name of the register.
12503   g --  likewise, print the V16SFmode name of the register.
12504   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12505   y -- print "st(0)" instead of "st" as a register.
12506   d -- print duplicated register operand for AVX instruction.
12507   D -- print condition for SSE cmp instruction.
12508   P -- if PIC, print an @PLT suffix.
12509   p -- print raw symbol name.
12510   X -- don't print any sort of PIC '@' suffix for a symbol.
12511   & -- print some in-use local-dynamic symbol name.
12512   H -- print a memory address offset by 8; used for sse high-parts
12513   Y -- print condition for XOP pcom* instruction.
12514   V -- print naked full integer register name without %.
12515   + -- print a branch hint as 'cs' or 'ds' prefix
12516   ; -- print a semicolon (after prefixes due to bug in older gas).
12517   ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12518   ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12519   M -- print addr32 prefix for TARGET_X32 with VSIB address.
12520   ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12521 */
12522
12523void
12524ix86_print_operand (FILE *file, rtx x, int code)
12525{
12526  if (code)
12527    {
12528      switch (code)
12529	{
12530	case 'A':
12531	  switch (ASSEMBLER_DIALECT)
12532	    {
12533	    case ASM_ATT:
12534	      putc ('*', file);
12535	      break;
12536
12537	    case ASM_INTEL:
12538	      /* Intel syntax. For absolute addresses, registers should not
12539		 be surrounded by braces.  */
12540	      if (!REG_P (x))
12541		{
12542		  putc ('[', file);
12543		  ix86_print_operand (file, x, 0);
12544		  putc (']', file);
12545		  return;
12546		}
12547	      break;
12548
12549	    default:
12550	      gcc_unreachable ();
12551	    }
12552
12553	  ix86_print_operand (file, x, 0);
12554	  return;
12555
12556	case 'E':
12557	  /* Wrap address in an UNSPEC to declare special handling.  */
12558	  if (TARGET_64BIT)
12559	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12560
12561	  output_address (VOIDmode, x);
12562	  return;
12563
12564	case 'L':
12565	  if (ASSEMBLER_DIALECT == ASM_ATT)
12566	    putc ('l', file);
12567	  return;
12568
12569	case 'W':
12570	  if (ASSEMBLER_DIALECT == ASM_ATT)
12571	    putc ('w', file);
12572	  return;
12573
12574	case 'B':
12575	  if (ASSEMBLER_DIALECT == ASM_ATT)
12576	    putc ('b', file);
12577	  return;
12578
12579	case 'Q':
12580	  if (ASSEMBLER_DIALECT == ASM_ATT)
12581	    putc ('l', file);
12582	  return;
12583
12584	case 'S':
12585	  if (ASSEMBLER_DIALECT == ASM_ATT)
12586	    putc ('s', file);
12587	  return;
12588
12589	case 'T':
12590	  if (ASSEMBLER_DIALECT == ASM_ATT)
12591	    putc ('t', file);
12592	  return;
12593
12594	case 'O':
12595#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12596	  if (ASSEMBLER_DIALECT != ASM_ATT)
12597	    return;
12598
12599	  switch (GET_MODE_SIZE (GET_MODE (x)))
12600	    {
12601	    case 2:
12602	      putc ('w', file);
12603	      break;
12604
12605	    case 4:
12606	      putc ('l', file);
12607	      break;
12608
12609	    case 8:
12610	      putc ('q', file);
12611	      break;
12612
12613	    default:
12614	      output_operand_lossage ("invalid operand size for operand "
12615				      "code 'O'");
12616	      return;
12617	    }
12618
12619	  putc ('.', file);
12620#endif
12621	  return;
12622
12623	case 'z':
12624	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12625	    {
12626	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
12627	      if (ASSEMBLER_DIALECT == ASM_INTEL)
12628		return;
12629
12630	      switch (GET_MODE_SIZE (GET_MODE (x)))
12631		{
12632		case 1:
12633		  putc ('b', file);
12634		  return;
12635
12636		case 2:
12637		  putc ('w', file);
12638		  return;
12639
12640		case 4:
12641		  putc ('l', file);
12642		  return;
12643
12644		case 8:
12645		  putc ('q', file);
12646		  return;
12647
12648		default:
12649		  output_operand_lossage ("invalid operand size for operand "
12650					  "code 'z'");
12651		  return;
12652		}
12653	    }
12654
12655	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12656	    warning (0, "non-integer operand used with operand code %<z%>");
12657	  /* FALLTHRU */
12658
12659	case 'Z':
12660	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
12661	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12662	    return;
12663
12664	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12665	    {
12666	      switch (GET_MODE_SIZE (GET_MODE (x)))
12667		{
12668		case 2:
12669#ifdef HAVE_AS_IX86_FILDS
12670		  putc ('s', file);
12671#endif
12672		  return;
12673
12674		case 4:
12675		  putc ('l', file);
12676		  return;
12677
12678		case 8:
12679#ifdef HAVE_AS_IX86_FILDQ
12680		  putc ('q', file);
12681#else
12682		  fputs ("ll", file);
12683#endif
12684		  return;
12685
12686		default:
12687		  break;
12688		}
12689	    }
12690	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12691	    {
12692	      /* 387 opcodes don't get size suffixes
12693		 if the operands are registers.  */
12694	      if (STACK_REG_P (x))
12695		return;
12696
12697	      switch (GET_MODE_SIZE (GET_MODE (x)))
12698		{
12699		case 4:
12700		  putc ('s', file);
12701		  return;
12702
12703		case 8:
12704		  putc ('l', file);
12705		  return;
12706
12707		case 12:
12708		case 16:
12709		  putc ('t', file);
12710		  return;
12711
12712		default:
12713		  break;
12714		}
12715	    }
12716	  else
12717	    {
12718	      output_operand_lossage ("invalid operand type used with "
12719				      "operand code 'Z'");
12720	      return;
12721	    }
12722
12723	  output_operand_lossage ("invalid operand size for operand code 'Z'");
12724	  return;
12725
12726	case 'd':
12727	case 'b':
12728	case 'w':
12729	case 'k':
12730	case 'q':
12731	case 'h':
12732	case 't':
12733	case 'g':
12734	case 'y':
12735	case 'x':
12736	case 'X':
12737	case 'P':
12738	case 'p':
12739	case 'V':
12740	  break;
12741
12742	case 's':
12743	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12744	    {
12745	      ix86_print_operand (file, x, 0);
12746	      fputs (", ", file);
12747	    }
12748	  return;
12749
12750	case 'Y':
12751	  switch (GET_CODE (x))
12752	    {
12753	    case NE:
12754	      fputs ("neq", file);
12755	      break;
12756	    case EQ:
12757	      fputs ("eq", file);
12758	      break;
12759	    case GE:
12760	    case GEU:
12761	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12762	      break;
12763	    case GT:
12764	    case GTU:
12765	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12766	      break;
12767	    case LE:
12768	    case LEU:
12769	      fputs ("le", file);
12770	      break;
12771	    case LT:
12772	    case LTU:
12773	      fputs ("lt", file);
12774	      break;
12775	    case UNORDERED:
12776	      fputs ("unord", file);
12777	      break;
12778	    case ORDERED:
12779	      fputs ("ord", file);
12780	      break;
12781	    case UNEQ:
12782	      fputs ("ueq", file);
12783	      break;
12784	    case UNGE:
12785	      fputs ("nlt", file);
12786	      break;
12787	    case UNGT:
12788	      fputs ("nle", file);
12789	      break;
12790	    case UNLE:
12791	      fputs ("ule", file);
12792	      break;
12793	    case UNLT:
12794	      fputs ("ult", file);
12795	      break;
12796	    case LTGT:
12797	      fputs ("une", file);
12798	      break;
12799	    default:
12800	      output_operand_lossage ("operand is not a condition code, "
12801				      "invalid operand code 'Y'");
12802	      return;
12803	    }
12804	  return;
12805
12806	case 'D':
12807	  /* Little bit of braindamage here.  The SSE compare instructions
12808	     does use completely different names for the comparisons that the
12809	     fp conditional moves.  */
12810	  switch (GET_CODE (x))
12811	    {
12812	    case UNEQ:
12813	      if (TARGET_AVX)
12814		{
12815		  fputs ("eq_us", file);
12816		  break;
12817		}
12818	     /* FALLTHRU */
12819	    case EQ:
12820	      fputs ("eq", file);
12821	      break;
12822	    case UNLT:
12823	      if (TARGET_AVX)
12824		{
12825		  fputs ("nge", file);
12826		  break;
12827		}
12828	     /* FALLTHRU */
12829	    case LT:
12830	      fputs ("lt", file);
12831	      break;
12832	    case UNLE:
12833	      if (TARGET_AVX)
12834		{
12835		  fputs ("ngt", file);
12836		  break;
12837		}
12838	     /* FALLTHRU */
12839	    case LE:
12840	      fputs ("le", file);
12841	      break;
12842	    case UNORDERED:
12843	      fputs ("unord", file);
12844	      break;
12845	    case LTGT:
12846	      if (TARGET_AVX)
12847		{
12848		  fputs ("neq_oq", file);
12849		  break;
12850		}
12851	     /* FALLTHRU */
12852	    case NE:
12853	      fputs ("neq", file);
12854	      break;
12855	    case GE:
12856	      if (TARGET_AVX)
12857		{
12858		  fputs ("ge", file);
12859		  break;
12860		}
12861	     /* FALLTHRU */
12862	    case UNGE:
12863	      fputs ("nlt", file);
12864	      break;
12865	    case GT:
12866	      if (TARGET_AVX)
12867		{
12868		  fputs ("gt", file);
12869		  break;
12870		}
12871	     /* FALLTHRU */
12872	    case UNGT:
12873	      fputs ("nle", file);
12874	      break;
12875	    case ORDERED:
12876	      fputs ("ord", file);
12877	      break;
12878	    default:
12879	      output_operand_lossage ("operand is not a condition code, "
12880				      "invalid operand code 'D'");
12881	      return;
12882	    }
12883	  return;
12884
12885	case 'F':
12886	case 'f':
12887#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12888	  if (ASSEMBLER_DIALECT == ASM_ATT)
12889	    putc ('.', file);
12890	  gcc_fallthrough ();
12891#endif
12892
12893	case 'C':
12894	case 'c':
12895	  if (!COMPARISON_P (x))
12896	    {
12897	      output_operand_lossage ("operand is not a condition code, "
12898				      "invalid operand code '%c'", code);
12899	      return;
12900	    }
12901	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12902			      code == 'c' || code == 'f',
12903			      code == 'F' || code == 'f',
12904			      file);
12905	  return;
12906
12907	case 'H':
12908	  if (!offsettable_memref_p (x))
12909	    {
12910	      output_operand_lossage ("operand is not an offsettable memory "
12911				      "reference, invalid operand code 'H'");
12912	      return;
12913	    }
12914	  /* It doesn't actually matter what mode we use here, as we're
12915	     only going to use this for printing.  */
12916	  x = adjust_address_nv (x, DImode, 8);
12917	  /* Output 'qword ptr' for intel assembler dialect.  */
12918	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12919	    code = 'q';
12920	  break;
12921
12922	case 'K':
12923	  if (!CONST_INT_P (x))
12924	    {
12925	      output_operand_lossage ("operand is not an integer, invalid "
12926				      "operand code 'K'");
12927	      return;
12928	    }
12929
12930	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
12931#ifdef HAVE_AS_IX86_HLE
12932	    fputs ("xacquire ", file);
12933#else
12934	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12935#endif
12936	  else if (INTVAL (x) & IX86_HLE_RELEASE)
12937#ifdef HAVE_AS_IX86_HLE
12938	    fputs ("xrelease ", file);
12939#else
12940	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12941#endif
12942	  /* We do not want to print value of the operand.  */
12943	  return;
12944
12945	case 'N':
12946	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12947	    fputs ("{z}", file);
12948	  return;
12949
12950	case 'r':
12951	  if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12952	    {
12953	      output_operand_lossage ("operand is not a specific integer, "
12954				      "invalid operand code 'r'");
12955	      return;
12956	    }
12957
12958	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12959	    fputs (", ", file);
12960
12961	  fputs ("{sae}", file);
12962
12963	  if (ASSEMBLER_DIALECT == ASM_ATT)
12964	    fputs (", ", file);
12965
12966	  return;
12967
12968	case 'R':
12969	  if (!CONST_INT_P (x))
12970	    {
12971	      output_operand_lossage ("operand is not an integer, invalid "
12972				      "operand code 'R'");
12973	      return;
12974	    }
12975
12976	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12977	    fputs (", ", file);
12978
12979	  switch (INTVAL (x))
12980	    {
12981	    case ROUND_NEAREST_INT | ROUND_SAE:
12982	      fputs ("{rn-sae}", file);
12983	      break;
12984	    case ROUND_NEG_INF | ROUND_SAE:
12985	      fputs ("{rd-sae}", file);
12986	      break;
12987	    case ROUND_POS_INF | ROUND_SAE:
12988	      fputs ("{ru-sae}", file);
12989	      break;
12990	    case ROUND_ZERO | ROUND_SAE:
12991	      fputs ("{rz-sae}", file);
12992	      break;
12993	    default:
12994	      output_operand_lossage ("operand is not a specific integer, "
12995				      "invalid operand code 'R'");
12996	    }
12997
12998	  if (ASSEMBLER_DIALECT == ASM_ATT)
12999	    fputs (", ", file);
13000
13001	  return;
13002
13003	case '*':
13004	  if (ASSEMBLER_DIALECT == ASM_ATT)
13005	    putc ('*', file);
13006	  return;
13007
13008	case '&':
13009	  {
13010	    const char *name = get_some_local_dynamic_name ();
13011	    if (name == NULL)
13012	      output_operand_lossage ("'%%&' used without any "
13013				      "local dynamic TLS references");
13014	    else
13015	      assemble_name (file, name);
13016	    return;
13017	  }
13018
13019	case '+':
13020	  {
13021	    rtx x;
13022
13023	    if (!optimize
13024	        || optimize_function_for_size_p (cfun)
13025		|| !TARGET_BRANCH_PREDICTION_HINTS)
13026	      return;
13027
13028	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13029	    if (x)
13030	      {
13031		int pred_val = profile_probability::from_reg_br_prob_note
13032				 (XINT (x, 0)).to_reg_br_prob_base ();
13033
13034		if (pred_val < REG_BR_PROB_BASE * 45 / 100
13035		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
13036		  {
13037		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
13038		    bool cputaken
13039		      = final_forward_branch_p (current_output_insn) == 0;
13040
13041		    /* Emit hints only in the case default branch prediction
13042		       heuristics would fail.  */
13043		    if (taken != cputaken)
13044		      {
13045			/* We use 3e (DS) prefix for taken branches and
13046			   2e (CS) prefix for not taken branches.  */
13047			if (taken)
13048			  fputs ("ds ; ", file);
13049			else
13050			  fputs ("cs ; ", file);
13051		      }
13052		  }
13053	      }
13054	    return;
13055	  }
13056
13057	case ';':
13058#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13059	  putc (';', file);
13060#endif
13061	  return;
13062
13063	case '~':
13064	  putc (TARGET_AVX2 ? 'i' : 'f', file);
13065	  return;
13066
13067	case 'M':
13068	  if (TARGET_X32)
13069	    {
13070	      /* NB: 32-bit indices in VSIB address are sign-extended
13071		 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13072		 sign-extended to 0xfffffffff7fa3010 which is invalid
13073		 address.  Add addr32 prefix if there is no base
13074		 register nor symbol.  */
13075	      bool ok;
13076	      struct ix86_address parts;
13077	      ok = ix86_decompose_address (x, &parts);
13078	      gcc_assert (ok && parts.index == NULL_RTX);
13079	      if (parts.base == NULL_RTX
13080		  && (parts.disp == NULL_RTX
13081		      || !symbolic_operand (parts.disp,
13082					    GET_MODE (parts.disp))))
13083		fputs ("addr32 ", file);
13084	    }
13085	  return;
13086
13087	case '^':
13088	  if (TARGET_64BIT && Pmode != word_mode)
13089	    fputs ("addr32 ", file);
13090	  return;
13091
13092	case '!':
13093	  if (ix86_notrack_prefixed_insn_p (current_output_insn))
13094	    fputs ("notrack ", file);
13095	  return;
13096
13097	default:
13098	  output_operand_lossage ("invalid operand code '%c'", code);
13099	}
13100    }
13101
13102  if (REG_P (x))
13103    print_reg (x, code, file);
13104
13105  else if (MEM_P (x))
13106    {
13107      rtx addr = XEXP (x, 0);
13108
13109      /* No `byte ptr' prefix for call instructions ... */
13110      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13111	{
13112	  machine_mode mode = GET_MODE (x);
13113	  const char *size;
13114
13115	  /* Check for explicit size override codes.  */
13116	  if (code == 'b')
13117	    size = "BYTE";
13118	  else if (code == 'w')
13119	    size = "WORD";
13120	  else if (code == 'k')
13121	    size = "DWORD";
13122	  else if (code == 'q')
13123	    size = "QWORD";
13124	  else if (code == 'x')
13125	    size = "XMMWORD";
13126	  else if (code == 't')
13127	    size = "YMMWORD";
13128	  else if (code == 'g')
13129	    size = "ZMMWORD";
13130	  else if (mode == BLKmode)
13131	    /* ... or BLKmode operands, when not overridden.  */
13132	    size = NULL;
13133	  else
13134	    switch (GET_MODE_SIZE (mode))
13135	      {
13136	      case 1: size = "BYTE"; break;
13137	      case 2: size = "WORD"; break;
13138	      case 4: size = "DWORD"; break;
13139	      case 8: size = "QWORD"; break;
13140	      case 12: size = "TBYTE"; break;
13141	      case 16:
13142		if (mode == XFmode)
13143		  size = "TBYTE";
13144		else
13145		  size = "XMMWORD";
13146		break;
13147	      case 32: size = "YMMWORD"; break;
13148	      case 64: size = "ZMMWORD"; break;
13149	      default:
13150		gcc_unreachable ();
13151	      }
13152	  if (size)
13153	    {
13154	      fputs (size, file);
13155	      fputs (" PTR ", file);
13156	    }
13157	}
13158
13159      if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13160	output_operand_lossage ("invalid constraints for operand");
13161      else
13162	ix86_print_operand_address_as
13163	  (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13164    }
13165
13166  else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13167    {
13168      long l;
13169
13170      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13171
13172      if (ASSEMBLER_DIALECT == ASM_ATT)
13173	putc ('$', file);
13174      /* Sign extend 32bit SFmode immediate to 8 bytes.  */
13175      if (code == 'q')
13176	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13177		 (unsigned long long) (int) l);
13178      else
13179	fprintf (file, "0x%08x", (unsigned int) l);
13180    }
13181
13182  else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13183    {
13184      long l[2];
13185
13186      REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13187
13188      if (ASSEMBLER_DIALECT == ASM_ATT)
13189	putc ('$', file);
13190      fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13191    }
13192
13193  /* These float cases don't actually occur as immediate operands.  */
13194  else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13195    {
13196      char dstr[30];
13197
13198      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13199      fputs (dstr, file);
13200    }
13201
13202  else
13203    {
13204      /* We have patterns that allow zero sets of memory, for instance.
13205	 In 64-bit mode, we should probably support all 8-byte vectors,
13206	 since we can in fact encode that into an immediate.  */
13207      if (GET_CODE (x) == CONST_VECTOR)
13208	{
13209	  if (x != CONST0_RTX (GET_MODE (x)))
13210	    output_operand_lossage ("invalid vector immediate");
13211	  x = const0_rtx;
13212	}
13213
13214      if (code != 'P' && code != 'p')
13215	{
13216	  if (CONST_INT_P (x))
13217	    {
13218	      if (ASSEMBLER_DIALECT == ASM_ATT)
13219		putc ('$', file);
13220	    }
13221	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13222		   || GET_CODE (x) == LABEL_REF)
13223	    {
13224	      if (ASSEMBLER_DIALECT == ASM_ATT)
13225		putc ('$', file);
13226	      else
13227		fputs ("OFFSET FLAT:", file);
13228	    }
13229	}
13230      if (CONST_INT_P (x))
13231	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13232      else if (flag_pic || MACHOPIC_INDIRECT)
13233	output_pic_addr_const (file, x, code);
13234      else
13235	output_addr_const (file, x);
13236    }
13237}
13238
13239static bool
13240ix86_print_operand_punct_valid_p (unsigned char code)
13241{
13242  return (code == '*' || code == '+' || code == '&' || code == ';'
13243	  || code == '~' || code == '^' || code == '!');
13244}
13245
13246/* Print a memory operand whose address is ADDR.  */
13247
13248static void
13249ix86_print_operand_address_as (FILE *file, rtx addr,
13250			       addr_space_t as, bool no_rip)
13251{
13252  struct ix86_address parts;
13253  rtx base, index, disp;
13254  int scale;
13255  int ok;
13256  bool vsib = false;
13257  int code = 0;
13258
13259  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13260    {
13261      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13262      gcc_assert (parts.index == NULL_RTX);
13263      parts.index = XVECEXP (addr, 0, 1);
13264      parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13265      addr = XVECEXP (addr, 0, 0);
13266      vsib = true;
13267    }
13268  else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13269    {
13270      gcc_assert (TARGET_64BIT);
13271      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13272      code = 'q';
13273    }
13274  else
13275    ok = ix86_decompose_address (addr, &parts);
13276
13277  gcc_assert (ok);
13278
13279  base = parts.base;
13280  index = parts.index;
13281  disp = parts.disp;
13282  scale = parts.scale;
13283
13284  if (ADDR_SPACE_GENERIC_P (as))
13285    as = parts.seg;
13286  else
13287    gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13288
13289  if (!ADDR_SPACE_GENERIC_P (as))
13290    {
13291      if (ASSEMBLER_DIALECT == ASM_ATT)
13292	putc ('%', file);
13293
13294      switch (as)
13295	{
13296	case ADDR_SPACE_SEG_FS:
13297	  fputs ("fs:", file);
13298	  break;
13299	case ADDR_SPACE_SEG_GS:
13300	  fputs ("gs:", file);
13301	  break;
13302	default:
13303	  gcc_unreachable ();
13304	}
13305    }
13306
13307  /* Use one byte shorter RIP relative addressing for 64bit mode.  */
13308  if (TARGET_64BIT && !base && !index && !no_rip)
13309    {
13310      rtx symbol = disp;
13311
13312      if (GET_CODE (disp) == CONST
13313	  && GET_CODE (XEXP (disp, 0)) == PLUS
13314	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13315	symbol = XEXP (XEXP (disp, 0), 0);
13316
13317      if (GET_CODE (symbol) == LABEL_REF
13318	  || (GET_CODE (symbol) == SYMBOL_REF
13319	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13320	base = pc_rtx;
13321    }
13322
13323  if (!base && !index)
13324    {
13325      /* Displacement only requires special attention.  */
13326      if (CONST_INT_P (disp))
13327	{
13328	  if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13329	    fputs ("ds:", file);
13330	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13331	}
13332      /* Load the external function address via the GOT slot to avoid PLT.  */
13333      else if (GET_CODE (disp) == CONST
13334	       && GET_CODE (XEXP (disp, 0)) == UNSPEC
13335	       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13336		   || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13337	       && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13338	output_pic_addr_const (file, disp, 0);
13339      else if (flag_pic)
13340	output_pic_addr_const (file, disp, 0);
13341      else
13342	output_addr_const (file, disp);
13343    }
13344  else
13345    {
13346      /* Print SImode register names to force addr32 prefix.  */
13347      if (SImode_address_operand (addr, VOIDmode))
13348	{
13349	  if (flag_checking)
13350	    {
13351	      gcc_assert (TARGET_64BIT);
13352	      switch (GET_CODE (addr))
13353		{
13354		case SUBREG:
13355		  gcc_assert (GET_MODE (addr) == SImode);
13356		  gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13357		  break;
13358		case ZERO_EXTEND:
13359		case AND:
13360		  gcc_assert (GET_MODE (addr) == DImode);
13361		  break;
13362		default:
13363		  gcc_unreachable ();
13364		}
13365	    }
13366	  gcc_assert (!code);
13367	  code = 'k';
13368	}
13369      else if (code == 0
13370	       && TARGET_X32
13371	       && disp
13372	       && CONST_INT_P (disp)
13373	       && INTVAL (disp) < -16*1024*1024)
13374	{
13375	  /* X32 runs in 64-bit mode, where displacement, DISP, in
13376	     address DISP(%r64), is encoded as 32-bit immediate sign-
13377	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
13378	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
13379	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13380	     which is invalid for x32.  The correct address is %r64
13381	     - 0x40000300 == 0xf7ffdd64.  To properly encode
13382	     -0x40000300(%r64) for x32, we zero-extend negative
13383	     displacement by forcing addr32 prefix which truncates
13384	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
13385	     zero-extend all negative displacements, including -1(%rsp).
13386	     However, for small negative displacements, sign-extension
13387	     won't cause overflow.  We only zero-extend negative
13388	     displacements if they < -16*1024*1024, which is also used
13389	     to check legitimate address displacements for PIC.  */
13390	  code = 'k';
13391	}
13392
13393      /* Since the upper 32 bits of RSP are always zero for x32,
13394	 we can encode %esp as %rsp to avoid 0x67 prefix if
13395	 there is no index register.  */
13396      if (TARGET_X32 && Pmode == SImode
13397	  && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13398	code = 'q';
13399
13400      if (ASSEMBLER_DIALECT == ASM_ATT)
13401	{
13402	  if (disp)
13403	    {
13404	      if (flag_pic)
13405		output_pic_addr_const (file, disp, 0);
13406	      else if (GET_CODE (disp) == LABEL_REF)
13407		output_asm_label (disp);
13408	      else
13409		output_addr_const (file, disp);
13410	    }
13411
13412	  putc ('(', file);
13413	  if (base)
13414	    print_reg (base, code, file);
13415	  if (index)
13416	    {
13417	      putc (',', file);
13418	      print_reg (index, vsib ? 0 : code, file);
13419	      if (scale != 1 || vsib)
13420		fprintf (file, ",%d", scale);
13421	    }
13422	  putc (')', file);
13423	}
13424      else
13425	{
13426	  rtx offset = NULL_RTX;
13427
13428	  if (disp)
13429	    {
13430	      /* Pull out the offset of a symbol; print any symbol itself.  */
13431	      if (GET_CODE (disp) == CONST
13432		  && GET_CODE (XEXP (disp, 0)) == PLUS
13433		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13434		{
13435		  offset = XEXP (XEXP (disp, 0), 1);
13436		  disp = gen_rtx_CONST (VOIDmode,
13437					XEXP (XEXP (disp, 0), 0));
13438		}
13439
13440	      if (flag_pic)
13441		output_pic_addr_const (file, disp, 0);
13442	      else if (GET_CODE (disp) == LABEL_REF)
13443		output_asm_label (disp);
13444	      else if (CONST_INT_P (disp))
13445		offset = disp;
13446	      else
13447		output_addr_const (file, disp);
13448	    }
13449
13450	  putc ('[', file);
13451	  if (base)
13452	    {
13453	      print_reg (base, code, file);
13454	      if (offset)
13455		{
13456		  if (INTVAL (offset) >= 0)
13457		    putc ('+', file);
13458		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13459		}
13460	    }
13461	  else if (offset)
13462	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13463	  else
13464	    putc ('0', file);
13465
13466	  if (index)
13467	    {
13468	      putc ('+', file);
13469	      print_reg (index, vsib ? 0 : code, file);
13470	      if (scale != 1 || vsib)
13471		fprintf (file, "*%d", scale);
13472	    }
13473	  putc (']', file);
13474	}
13475    }
13476}
13477
13478static void
13479ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13480{
13481  if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13482    output_operand_lossage ("invalid constraints for operand");
13483  else
13484    ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13485}
13486
13487/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
13488
13489static bool
13490i386_asm_output_addr_const_extra (FILE *file, rtx x)
13491{
13492  rtx op;
13493
13494  if (GET_CODE (x) != UNSPEC)
13495    return false;
13496
13497  op = XVECEXP (x, 0, 0);
13498  switch (XINT (x, 1))
13499    {
13500    case UNSPEC_GOTOFF:
13501      output_addr_const (file, op);
13502      fputs ("@gotoff", file);
13503      break;
13504    case UNSPEC_GOTTPOFF:
13505      output_addr_const (file, op);
13506      /* FIXME: This might be @TPOFF in Sun ld.  */
13507      fputs ("@gottpoff", file);
13508      break;
13509    case UNSPEC_TPOFF:
13510      output_addr_const (file, op);
13511      fputs ("@tpoff", file);
13512      break;
13513    case UNSPEC_NTPOFF:
13514      output_addr_const (file, op);
13515      if (TARGET_64BIT)
13516	fputs ("@tpoff", file);
13517      else
13518	fputs ("@ntpoff", file);
13519      break;
13520    case UNSPEC_DTPOFF:
13521      output_addr_const (file, op);
13522      fputs ("@dtpoff", file);
13523      break;
13524    case UNSPEC_GOTNTPOFF:
13525      output_addr_const (file, op);
13526      if (TARGET_64BIT)
13527	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13528	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13529      else
13530	fputs ("@gotntpoff", file);
13531      break;
13532    case UNSPEC_INDNTPOFF:
13533      output_addr_const (file, op);
13534      fputs ("@indntpoff", file);
13535      break;
13536#if TARGET_MACHO
13537    case UNSPEC_MACHOPIC_OFFSET:
13538      output_addr_const (file, op);
13539      putc ('-', file);
13540      machopic_output_function_base_name (file);
13541      break;
13542#endif
13543
13544    default:
13545      return false;
13546    }
13547
13548  return true;
13549}
13550
13551
13552/* Output code to perform a 387 binary operation in INSN, one of PLUS,
13553   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
13554   is the expression of the binary operation.  The output may either be
13555   emitted here, or returned to the caller, like all output_* functions.
13556
13557   There is no guarantee that the operands are the same mode, as they
13558   might be within FLOAT or FLOAT_EXTEND expressions.  */
13559
13560#ifndef SYSV386_COMPAT
13561/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
13562   wants to fix the assemblers because that causes incompatibility
13563   with gcc.  No-one wants to fix gcc because that causes
13564   incompatibility with assemblers...  You can use the option of
13565   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
13566#define SYSV386_COMPAT 1
13567#endif
13568
13569const char *
13570output_387_binary_op (rtx_insn *insn, rtx *operands)
13571{
13572  static char buf[40];
13573  const char *p;
13574  bool is_sse
13575    = (SSE_REG_P (operands[0])
13576       || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13577
13578  if (is_sse)
13579    p = "%v";
13580  else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13581	   || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13582    p = "fi";
13583  else
13584    p = "f";
13585
13586  strcpy (buf, p);
13587
13588  switch (GET_CODE (operands[3]))
13589    {
13590    case PLUS:
13591      p = "add"; break;
13592    case MINUS:
13593      p = "sub"; break;
13594    case MULT:
13595      p = "mul"; break;
13596    case DIV:
13597      p = "div"; break;
13598    default:
13599      gcc_unreachable ();
13600    }
13601
13602  strcat (buf, p);
13603
13604  if (is_sse)
13605   {
13606     p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13607     strcat (buf, p);
13608
13609     if (TARGET_AVX)
13610       p = "\t{%2, %1, %0|%0, %1, %2}";
13611     else
13612       p = "\t{%2, %0|%0, %2}";
13613
13614     strcat (buf, p);
13615     return buf;
13616   }
13617
13618  /* Even if we do not want to check the inputs, this documents input
13619     constraints.  Which helps in understanding the following code.  */
13620  if (flag_checking)
13621    {
13622      if (STACK_REG_P (operands[0])
13623	  && ((REG_P (operands[1])
13624	       && REGNO (operands[0]) == REGNO (operands[1])
13625	       && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13626	      || (REG_P (operands[2])
13627		  && REGNO (operands[0]) == REGNO (operands[2])
13628		  && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13629	  && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13630	; /* ok */
13631      else
13632	gcc_unreachable ();
13633    }
13634
13635  switch (GET_CODE (operands[3]))
13636    {
13637    case MULT:
13638    case PLUS:
13639      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13640	std::swap (operands[1], operands[2]);
13641
13642      /* know operands[0] == operands[1].  */
13643
13644      if (MEM_P (operands[2]))
13645	{
13646	  p = "%Z2\t%2";
13647	  break;
13648	}
13649
13650      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13651	{
13652	  if (STACK_TOP_P (operands[0]))
13653	    /* How is it that we are storing to a dead operand[2]?
13654	       Well, presumably operands[1] is dead too.  We can't
13655	       store the result to st(0) as st(0) gets popped on this
13656	       instruction.  Instead store to operands[2] (which I
13657	       think has to be st(1)).  st(1) will be popped later.
13658	       gcc <= 2.8.1 didn't have this check and generated
13659	       assembly code that the Unixware assembler rejected.  */
13660	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
13661	  else
13662	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
13663	  break;
13664	}
13665
13666      if (STACK_TOP_P (operands[0]))
13667	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
13668      else
13669	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
13670      break;
13671
13672    case MINUS:
13673    case DIV:
13674      if (MEM_P (operands[1]))
13675	{
13676	  p = "r%Z1\t%1";
13677	  break;
13678	}
13679
13680      if (MEM_P (operands[2]))
13681	{
13682	  p = "%Z2\t%2";
13683	  break;
13684	}
13685
13686      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13687	{
13688#if SYSV386_COMPAT
13689	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13690	     derived assemblers, confusingly reverse the direction of
13691	     the operation for fsub{r} and fdiv{r} when the
13692	     destination register is not st(0).  The Intel assembler
13693	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
13694	     figure out what the hardware really does.  */
13695	  if (STACK_TOP_P (operands[0]))
13696	    p = "{p\t%0, %2|rp\t%2, %0}";
13697	  else
13698	    p = "{rp\t%2, %0|p\t%0, %2}";
13699#else
13700	  if (STACK_TOP_P (operands[0]))
13701	    /* As above for fmul/fadd, we can't store to st(0).  */
13702	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
13703	  else
13704	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
13705#endif
13706	  break;
13707	}
13708
13709      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13710	{
13711#if SYSV386_COMPAT
13712	  if (STACK_TOP_P (operands[0]))
13713	    p = "{rp\t%0, %1|p\t%1, %0}";
13714	  else
13715	    p = "{p\t%1, %0|rp\t%0, %1}";
13716#else
13717	  if (STACK_TOP_P (operands[0]))
13718	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
13719	  else
13720	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
13721#endif
13722	  break;
13723	}
13724
13725      if (STACK_TOP_P (operands[0]))
13726	{
13727	  if (STACK_TOP_P (operands[1]))
13728	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
13729	  else
13730	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
13731	  break;
13732	}
13733      else if (STACK_TOP_P (operands[1]))
13734	{
13735#if SYSV386_COMPAT
13736	  p = "{\t%1, %0|r\t%0, %1}";
13737#else
13738	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
13739#endif
13740	}
13741      else
13742	{
13743#if SYSV386_COMPAT
13744	  p = "{r\t%2, %0|\t%0, %2}";
13745#else
13746	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
13747#endif
13748	}
13749      break;
13750
13751    default:
13752      gcc_unreachable ();
13753    }
13754
13755  strcat (buf, p);
13756  return buf;
13757}
13758
13759/* Return needed mode for entity in optimize_mode_switching pass.  */
13760
13761static int
13762ix86_dirflag_mode_needed (rtx_insn *insn)
13763{
13764  if (CALL_P (insn))
13765    {
13766      if (cfun->machine->func_type == TYPE_NORMAL)
13767	return X86_DIRFLAG_ANY;
13768      else
13769	/* No need to emit CLD in interrupt handler for TARGET_CLD.  */
13770	return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13771    }
13772
13773  if (recog_memoized (insn) < 0)
13774    return X86_DIRFLAG_ANY;
13775
13776  if (get_attr_type (insn) == TYPE_STR)
13777    {
13778      /* Emit cld instruction if stringops are used in the function.  */
13779      if (cfun->machine->func_type == TYPE_NORMAL)
13780	return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13781      else
13782	return X86_DIRFLAG_RESET;
13783    }
13784
13785  return X86_DIRFLAG_ANY;
13786}
13787
13788/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
13789
13790static bool
13791ix86_check_avx_upper_register (const_rtx exp)
13792{
13793  return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
13794}
13795
13796/* Return needed mode for entity in optimize_mode_switching pass.  */
13797
13798static int
13799ix86_avx_u128_mode_needed (rtx_insn *insn)
13800{
13801  if (CALL_P (insn))
13802    {
13803      rtx link;
13804
13805      /* Needed mode is set to AVX_U128_CLEAN if there are
13806	 no 256bit or 512bit modes used in function arguments. */
13807      for (link = CALL_INSN_FUNCTION_USAGE (insn);
13808	   link;
13809	   link = XEXP (link, 1))
13810	{
13811	  if (GET_CODE (XEXP (link, 0)) == USE)
13812	    {
13813	      rtx arg = XEXP (XEXP (link, 0), 0);
13814
13815	      if (ix86_check_avx_upper_register (arg))
13816		return AVX_U128_DIRTY;
13817	    }
13818	}
13819
13820      /* If the function is known to preserve some SSE registers,
13821	 RA and previous passes can legitimately rely on that for
13822	 modes wider than 256 bits.  It's only safe to issue a
13823	 vzeroupper if all SSE registers are clobbered.  */
13824      const function_abi &abi = insn_callee_abi (insn);
13825      if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
13826				  abi.mode_clobbers (V4DImode)))
13827	return AVX_U128_ANY;
13828
13829      return AVX_U128_CLEAN;
13830    }
13831
13832  /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13833     Hardware changes state only when a 256bit register is written to,
13834     but we need to prevent the compiler from moving optimal insertion
13835     point above eventual read from 256bit or 512 bit register.  */
13836  subrtx_iterator::array_type array;
13837  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13838    if (ix86_check_avx_upper_register (*iter))
13839      return AVX_U128_DIRTY;
13840
13841  return AVX_U128_ANY;
13842}
13843
13844/* Return mode that i387 must be switched into
13845   prior to the execution of insn.  */
13846
13847static int
13848ix86_i387_mode_needed (int entity, rtx_insn *insn)
13849{
13850  enum attr_i387_cw mode;
13851
13852  /* The mode UNINITIALIZED is used to store control word after a
13853     function call or ASM pattern.  The mode ANY specify that function
13854     has no requirements on the control word and make no changes in the
13855     bits we are interested in.  */
13856
13857  if (CALL_P (insn)
13858      || (NONJUMP_INSN_P (insn)
13859	  && (asm_noperands (PATTERN (insn)) >= 0
13860	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13861    return I387_CW_UNINITIALIZED;
13862
13863  if (recog_memoized (insn) < 0)
13864    return I387_CW_ANY;
13865
13866  mode = get_attr_i387_cw (insn);
13867
13868  switch (entity)
13869    {
13870    case I387_ROUNDEVEN:
13871      if (mode == I387_CW_ROUNDEVEN)
13872	return mode;
13873      break;
13874
13875    case I387_TRUNC:
13876      if (mode == I387_CW_TRUNC)
13877	return mode;
13878      break;
13879
13880    case I387_FLOOR:
13881      if (mode == I387_CW_FLOOR)
13882	return mode;
13883      break;
13884
13885    case I387_CEIL:
13886      if (mode == I387_CW_CEIL)
13887	return mode;
13888      break;
13889
13890    default:
13891      gcc_unreachable ();
13892    }
13893
13894  return I387_CW_ANY;
13895}
13896
13897/* Return mode that entity must be switched into
13898   prior to the execution of insn.  */
13899
13900static int
13901ix86_mode_needed (int entity, rtx_insn *insn)
13902{
13903  switch (entity)
13904    {
13905    case X86_DIRFLAG:
13906      return ix86_dirflag_mode_needed (insn);
13907    case AVX_U128:
13908      return ix86_avx_u128_mode_needed (insn);
13909    case I387_ROUNDEVEN:
13910    case I387_TRUNC:
13911    case I387_FLOOR:
13912    case I387_CEIL:
13913      return ix86_i387_mode_needed (entity, insn);
13914    default:
13915      gcc_unreachable ();
13916    }
13917  return 0;
13918}
13919
13920/* Check if a 256bit or 512bit AVX register is referenced in stores.   */
13921
13922static void
13923ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13924 {
13925   if (ix86_check_avx_upper_register (dest))
13926    {
13927      bool *used = (bool *) data;
13928      *used = true;
13929    }
13930 }
13931
13932/* Calculate mode of upper 128bit AVX registers after the insn.  */
13933
13934static int
13935ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13936{
13937  rtx pat = PATTERN (insn);
13938
13939  if (vzeroupper_pattern (pat, VOIDmode)
13940      || vzeroall_pattern (pat, VOIDmode))
13941    return AVX_U128_CLEAN;
13942
13943  /* We know that state is clean after CALL insn if there are no
13944     256bit or 512bit registers used in the function return register. */
13945  if (CALL_P (insn))
13946    {
13947      bool avx_upper_reg_found = false;
13948      note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13949
13950      return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13951    }
13952
13953  /* Otherwise, return current mode.  Remember that if insn
13954     references AVX 256bit or 512bit registers, the mode was already
13955     changed to DIRTY from MODE_NEEDED.  */
13956  return mode;
13957}
13958
13959/* Return the mode that an insn results in.  */
13960
13961static int
13962ix86_mode_after (int entity, int mode, rtx_insn *insn)
13963{
13964  switch (entity)
13965    {
13966    case X86_DIRFLAG:
13967      return mode;
13968    case AVX_U128:
13969      return ix86_avx_u128_mode_after (mode, insn);
13970    case I387_ROUNDEVEN:
13971    case I387_TRUNC:
13972    case I387_FLOOR:
13973    case I387_CEIL:
13974      return mode;
13975    default:
13976      gcc_unreachable ();
13977    }
13978}
13979
13980static int
13981ix86_dirflag_mode_entry (void)
13982{
13983  /* For TARGET_CLD or in the interrupt handler we can't assume
13984     direction flag state at function entry.  */
13985  if (TARGET_CLD
13986      || cfun->machine->func_type != TYPE_NORMAL)
13987    return X86_DIRFLAG_ANY;
13988
13989  return X86_DIRFLAG_RESET;
13990}
13991
13992static int
13993ix86_avx_u128_mode_entry (void)
13994{
13995  tree arg;
13996
13997  /* Entry mode is set to AVX_U128_DIRTY if there are
13998     256bit or 512bit modes used in function arguments.  */
13999  for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14000       arg = TREE_CHAIN (arg))
14001    {
14002      rtx incoming = DECL_INCOMING_RTL (arg);
14003
14004      if (incoming && ix86_check_avx_upper_register (incoming))
14005	return AVX_U128_DIRTY;
14006    }
14007
14008  return AVX_U128_CLEAN;
14009}
14010
14011/* Return a mode that ENTITY is assumed to be
14012   switched to at function entry.  */
14013
14014static int
14015ix86_mode_entry (int entity)
14016{
14017  switch (entity)
14018    {
14019    case X86_DIRFLAG:
14020      return ix86_dirflag_mode_entry ();
14021    case AVX_U128:
14022      return ix86_avx_u128_mode_entry ();
14023    case I387_ROUNDEVEN:
14024    case I387_TRUNC:
14025    case I387_FLOOR:
14026    case I387_CEIL:
14027      return I387_CW_ANY;
14028    default:
14029      gcc_unreachable ();
14030    }
14031}
14032
14033static int
14034ix86_avx_u128_mode_exit (void)
14035{
14036  rtx reg = crtl->return_rtx;
14037
14038  /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14039     or 512 bit modes used in the function return register. */
14040  if (reg && ix86_check_avx_upper_register (reg))
14041    return AVX_U128_DIRTY;
14042
14043  /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14044     modes used in function arguments, otherwise return AVX_U128_CLEAN.
14045   */
14046  return ix86_avx_u128_mode_entry ();
14047}
14048
14049/* Return a mode that ENTITY is assumed to be
14050   switched to at function exit.  */
14051
14052static int
14053ix86_mode_exit (int entity)
14054{
14055  switch (entity)
14056    {
14057    case X86_DIRFLAG:
14058      return X86_DIRFLAG_ANY;
14059    case AVX_U128:
14060      return ix86_avx_u128_mode_exit ();
14061    case I387_ROUNDEVEN:
14062    case I387_TRUNC:
14063    case I387_FLOOR:
14064    case I387_CEIL:
14065      return I387_CW_ANY;
14066    default:
14067      gcc_unreachable ();
14068    }
14069}
14070
14071static int
14072ix86_mode_priority (int, int n)
14073{
14074  return n;
14075}
14076
14077/* Output code to initialize control word copies used by trunc?f?i and
14078   rounding patterns.  CURRENT_MODE is set to current control word,
14079   while NEW_MODE is set to new control word.  */
14080
14081static void
14082emit_i387_cw_initialization (int mode)
14083{
14084  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14085  rtx new_mode;
14086
14087  enum ix86_stack_slot slot;
14088
14089  rtx reg = gen_reg_rtx (HImode);
14090
14091  emit_insn (gen_x86_fnstcw_1 (stored_mode));
14092  emit_move_insn (reg, copy_rtx (stored_mode));
14093
14094  switch (mode)
14095    {
14096    case I387_CW_ROUNDEVEN:
14097      /* round to nearest */
14098      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14099      slot = SLOT_CW_ROUNDEVEN;
14100      break;
14101
14102    case I387_CW_TRUNC:
14103      /* round toward zero (truncate) */
14104      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14105      slot = SLOT_CW_TRUNC;
14106      break;
14107
14108    case I387_CW_FLOOR:
14109      /* round down toward -oo */
14110      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14111      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14112      slot = SLOT_CW_FLOOR;
14113      break;
14114
14115    case I387_CW_CEIL:
14116      /* round up toward +oo */
14117      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14118      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14119      slot = SLOT_CW_CEIL;
14120      break;
14121
14122    default:
14123      gcc_unreachable ();
14124    }
14125
14126  gcc_assert (slot < MAX_386_STACK_LOCALS);
14127
14128  new_mode = assign_386_stack_local (HImode, slot);
14129  emit_move_insn (new_mode, reg);
14130}
14131
14132/* Generate one or more insns to set ENTITY to MODE.  */
14133
14134static void
14135ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14136		    HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14137{
14138  switch (entity)
14139    {
14140    case X86_DIRFLAG:
14141      if (mode == X86_DIRFLAG_RESET)
14142	emit_insn (gen_cld ());
14143      break;
14144    case AVX_U128:
14145      if (mode == AVX_U128_CLEAN)
14146	emit_insn (gen_avx_vzeroupper ());
14147      break;
14148    case I387_ROUNDEVEN:
14149    case I387_TRUNC:
14150    case I387_FLOOR:
14151    case I387_CEIL:
14152      if (mode != I387_CW_ANY
14153	  && mode != I387_CW_UNINITIALIZED)
14154	emit_i387_cw_initialization (mode);
14155      break;
14156    default:
14157      gcc_unreachable ();
14158    }
14159}
14160
14161/* Output code for INSN to convert a float to a signed int.  OPERANDS
14162   are the insn operands.  The output may be [HSD]Imode and the input
14163   operand may be [SDX]Fmode.  */
14164
14165const char *
14166output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14167{
14168  bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14169  bool dimode_p = GET_MODE (operands[0]) == DImode;
14170  int round_mode = get_attr_i387_cw (insn);
14171
14172  static char buf[40];
14173  const char *p;
14174
14175  /* Jump through a hoop or two for DImode, since the hardware has no
14176     non-popping instruction.  We used to do this a different way, but
14177     that was somewhat fragile and broke with post-reload splitters.  */
14178  if ((dimode_p || fisttp) && !stack_top_dies)
14179    output_asm_insn ("fld\t%y1", operands);
14180
14181  gcc_assert (STACK_TOP_P (operands[1]));
14182  gcc_assert (MEM_P (operands[0]));
14183  gcc_assert (GET_MODE (operands[1]) != TFmode);
14184
14185  if (fisttp)
14186    return "fisttp%Z0\t%0";
14187
14188  strcpy (buf, "fist");
14189
14190  if (round_mode != I387_CW_ANY)
14191    output_asm_insn ("fldcw\t%3", operands);
14192
14193  p = "p%Z0\t%0";
14194  strcat (buf, p + !(stack_top_dies || dimode_p));
14195
14196  output_asm_insn (buf, operands);
14197
14198  if (round_mode != I387_CW_ANY)
14199    output_asm_insn ("fldcw\t%2", operands);
14200
14201  return "";
14202}
14203
14204/* Output code for x87 ffreep insn.  The OPNO argument, which may only
14205   have the values zero or one, indicates the ffreep insn's operand
14206   from the OPERANDS array.  */
14207
14208static const char *
14209output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14210{
14211  if (TARGET_USE_FFREEP)
14212#ifdef HAVE_AS_IX86_FFREEP
14213    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14214#else
14215    {
14216      static char retval[32];
14217      int regno = REGNO (operands[opno]);
14218
14219      gcc_assert (STACK_REGNO_P (regno));
14220
14221      regno -= FIRST_STACK_REG;
14222
14223      snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14224      return retval;
14225    }
14226#endif
14227
14228  return opno ? "fstp\t%y1" : "fstp\t%y0";
14229}
14230
14231
14232/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
14233   should be used.  UNORDERED_P is true when fucom should be used.  */
14234
14235const char *
14236output_fp_compare (rtx_insn *insn, rtx *operands,
14237		   bool eflags_p, bool unordered_p)
14238{
14239  rtx *xops = eflags_p ? &operands[0] : &operands[1];
14240  bool stack_top_dies;
14241
14242  static char buf[40];
14243  const char *p;
14244
14245  gcc_assert (STACK_TOP_P (xops[0]));
14246
14247  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14248
14249  if (eflags_p)
14250    {
14251      p = unordered_p ? "fucomi" : "fcomi";
14252      strcpy (buf, p);
14253
14254      p = "p\t{%y1, %0|%0, %y1}";
14255      strcat (buf, p + !stack_top_dies);
14256
14257      return buf;
14258    }
14259
14260  if (STACK_REG_P (xops[1])
14261      && stack_top_dies
14262      && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14263    {
14264      gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14265
14266      /* If both the top of the 387 stack die, and the other operand
14267	 is also a stack register that dies, then this must be a
14268	 `fcompp' float compare.  */
14269      p = unordered_p ? "fucompp" : "fcompp";
14270      strcpy (buf, p);
14271    }
14272  else if (const0_operand (xops[1], VOIDmode))
14273    {
14274      gcc_assert (!unordered_p);
14275      strcpy (buf, "ftst");
14276    }
14277  else
14278    {
14279      if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14280	{
14281	  gcc_assert (!unordered_p);
14282	  p = "ficom";
14283	}
14284      else
14285	p = unordered_p ? "fucom" : "fcom";
14286
14287      strcpy (buf, p);
14288
14289      p = "p%Z2\t%y2";
14290      strcat (buf, p + !stack_top_dies);
14291    }
14292
14293  output_asm_insn (buf, operands);
14294  return "fnstsw\t%0";
14295}
14296
14297void
14298ix86_output_addr_vec_elt (FILE *file, int value)
14299{
14300  const char *directive = ASM_LONG;
14301
14302#ifdef ASM_QUAD
14303  if (TARGET_LP64)
14304    directive = ASM_QUAD;
14305#else
14306  gcc_assert (!TARGET_64BIT);
14307#endif
14308
14309  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14310}
14311
14312void
14313ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14314{
14315  const char *directive = ASM_LONG;
14316
14317#ifdef ASM_QUAD
14318  if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14319    directive = ASM_QUAD;
14320#else
14321  gcc_assert (!TARGET_64BIT);
14322#endif
14323  /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
14324  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14325    fprintf (file, "%s%s%d-%s%d\n",
14326	     directive, LPREFIX, value, LPREFIX, rel);
14327#if TARGET_MACHO
14328  else if (TARGET_MACHO)
14329    {
14330      fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14331      machopic_output_function_base_name (file);
14332      putc ('\n', file);
14333    }
14334#endif
14335  else if (HAVE_AS_GOTOFF_IN_DATA)
14336    fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14337  else
14338    asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14339		 GOT_SYMBOL_NAME, LPREFIX, value);
14340}
14341
14342#define LEA_MAX_STALL (3)
14343#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14344
14345/* Increase given DISTANCE in half-cycles according to
14346   dependencies between PREV and NEXT instructions.
14347   Add 1 half-cycle if there is no dependency and
14348   go to next cycle if there is some dependecy.  */
14349
14350static unsigned int
14351increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14352{
14353  df_ref def, use;
14354
14355  if (!prev || !next)
14356    return distance + (distance & 1) + 2;
14357
14358  if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14359    return distance + 1;
14360
14361  FOR_EACH_INSN_USE (use, next)
14362    FOR_EACH_INSN_DEF (def, prev)
14363      if (!DF_REF_IS_ARTIFICIAL (def)
14364	  && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14365	return distance + (distance & 1) + 2;
14366
14367  return distance + 1;
14368}
14369
14370/* Function checks if instruction INSN defines register number
14371   REGNO1 or REGNO2.  */
14372
14373bool
14374insn_defines_reg (unsigned int regno1, unsigned int regno2,
14375		  rtx_insn *insn)
14376{
14377  df_ref def;
14378
14379  FOR_EACH_INSN_DEF (def, insn)
14380    if (DF_REF_REG_DEF_P (def)
14381	&& !DF_REF_IS_ARTIFICIAL (def)
14382	&& (regno1 == DF_REF_REGNO (def)
14383	    || regno2 == DF_REF_REGNO (def)))
14384      return true;
14385
14386  return false;
14387}
14388
14389/* Function checks if instruction INSN uses register number
14390   REGNO as a part of address expression.  */
14391
14392static bool
14393insn_uses_reg_mem (unsigned int regno, rtx insn)
14394{
14395  df_ref use;
14396
14397  FOR_EACH_INSN_USE (use, insn)
14398    if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14399      return true;
14400
14401  return false;
14402}
14403
14404/* Search backward for non-agu definition of register number REGNO1
14405   or register number REGNO2 in basic block starting from instruction
14406   START up to head of basic block or instruction INSN.
14407
14408   Function puts true value into *FOUND var if definition was found
14409   and false otherwise.
14410
14411   Distance in half-cycles between START and found instruction or head
14412   of BB is added to DISTANCE and returned.  */
14413
14414static int
14415distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14416			       rtx_insn *insn, int distance,
14417			       rtx_insn *start, bool *found)
14418{
14419  basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14420  rtx_insn *prev = start;
14421  rtx_insn *next = NULL;
14422
14423  *found = false;
14424
14425  while (prev
14426	 && prev != insn
14427	 && distance < LEA_SEARCH_THRESHOLD)
14428    {
14429      if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14430	{
14431	  distance = increase_distance (prev, next, distance);
14432	  if (insn_defines_reg (regno1, regno2, prev))
14433	    {
14434	      if (recog_memoized (prev) < 0
14435		  || get_attr_type (prev) != TYPE_LEA)
14436		{
14437		  *found = true;
14438		  return distance;
14439		}
14440	    }
14441
14442	  next = prev;
14443	}
14444      if (prev == BB_HEAD (bb))
14445	break;
14446
14447      prev = PREV_INSN (prev);
14448    }
14449
14450  return distance;
14451}
14452
14453/* Search backward for non-agu definition of register number REGNO1
14454   or register number REGNO2 in INSN's basic block until
14455   1. Pass LEA_SEARCH_THRESHOLD instructions, or
14456   2. Reach neighbor BBs boundary, or
14457   3. Reach agu definition.
14458   Returns the distance between the non-agu definition point and INSN.
14459   If no definition point, returns -1.  */
14460
14461static int
14462distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14463			 rtx_insn *insn)
14464{
14465  basic_block bb = BLOCK_FOR_INSN (insn);
14466  int distance = 0;
14467  bool found = false;
14468
14469  if (insn != BB_HEAD (bb))
14470    distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14471					      distance, PREV_INSN (insn),
14472					      &found);
14473
14474  if (!found && distance < LEA_SEARCH_THRESHOLD)
14475    {
14476      edge e;
14477      edge_iterator ei;
14478      bool simple_loop = false;
14479
14480      FOR_EACH_EDGE (e, ei, bb->preds)
14481	if (e->src == bb)
14482	  {
14483	    simple_loop = true;
14484	    break;
14485	  }
14486
14487      if (simple_loop)
14488	distance = distance_non_agu_define_in_bb (regno1, regno2,
14489						  insn, distance,
14490						  BB_END (bb), &found);
14491      else
14492	{
14493	  int shortest_dist = -1;
14494	  bool found_in_bb = false;
14495
14496	  FOR_EACH_EDGE (e, ei, bb->preds)
14497	    {
14498	      int bb_dist
14499		= distance_non_agu_define_in_bb (regno1, regno2,
14500						 insn, distance,
14501						 BB_END (e->src),
14502						 &found_in_bb);
14503	      if (found_in_bb)
14504		{
14505		  if (shortest_dist < 0)
14506		    shortest_dist = bb_dist;
14507		  else if (bb_dist > 0)
14508		    shortest_dist = MIN (bb_dist, shortest_dist);
14509
14510		  found = true;
14511		}
14512	    }
14513
14514	  distance = shortest_dist;
14515	}
14516    }
14517
14518  /* get_attr_type may modify recog data.  We want to make sure
14519     that recog data is valid for instruction INSN, on which
14520     distance_non_agu_define is called.  INSN is unchanged here.  */
14521  extract_insn_cached (insn);
14522
14523  if (!found)
14524    return -1;
14525
14526  return distance >> 1;
14527}
14528
14529/* Return the distance in half-cycles between INSN and the next
14530   insn that uses register number REGNO in memory address added
14531   to DISTANCE.  Return -1 if REGNO0 is set.
14532
14533   Put true value into *FOUND if register usage was found and
14534   false otherwise.
14535   Put true value into *REDEFINED if register redefinition was
14536   found and false otherwise.  */
14537
14538static int
14539distance_agu_use_in_bb (unsigned int regno,
14540			rtx_insn *insn, int distance, rtx_insn *start,
14541			bool *found, bool *redefined)
14542{
14543  basic_block bb = NULL;
14544  rtx_insn *next = start;
14545  rtx_insn *prev = NULL;
14546
14547  *found = false;
14548  *redefined = false;
14549
14550  if (start != NULL_RTX)
14551    {
14552      bb = BLOCK_FOR_INSN (start);
14553      if (start != BB_HEAD (bb))
14554	/* If insn and start belong to the same bb, set prev to insn,
14555	   so the call to increase_distance will increase the distance
14556	   between insns by 1.  */
14557	prev = insn;
14558    }
14559
14560  while (next
14561	 && next != insn
14562	 && distance < LEA_SEARCH_THRESHOLD)
14563    {
14564      if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14565	{
14566	  distance = increase_distance(prev, next, distance);
14567	  if (insn_uses_reg_mem (regno, next))
14568	    {
14569	      /* Return DISTANCE if OP0 is used in memory
14570		 address in NEXT.  */
14571	      *found = true;
14572	      return distance;
14573	    }
14574
14575	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
14576	    {
14577	      /* Return -1 if OP0 is set in NEXT.  */
14578	      *redefined = true;
14579	      return -1;
14580	    }
14581
14582	  prev = next;
14583	}
14584
14585      if (next == BB_END (bb))
14586	break;
14587
14588      next = NEXT_INSN (next);
14589    }
14590
14591  return distance;
14592}
14593
14594/* Return the distance between INSN and the next insn that uses
14595   register number REGNO0 in memory address.  Return -1 if no such
14596   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
14597
14598static int
14599distance_agu_use (unsigned int regno0, rtx_insn *insn)
14600{
14601  basic_block bb = BLOCK_FOR_INSN (insn);
14602  int distance = 0;
14603  bool found = false;
14604  bool redefined = false;
14605
14606  if (insn != BB_END (bb))
14607    distance = distance_agu_use_in_bb (regno0, insn, distance,
14608				       NEXT_INSN (insn),
14609				       &found, &redefined);
14610
14611  if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14612    {
14613      edge e;
14614      edge_iterator ei;
14615      bool simple_loop = false;
14616
14617      FOR_EACH_EDGE (e, ei, bb->succs)
14618        if (e->dest == bb)
14619	  {
14620	    simple_loop = true;
14621	    break;
14622	  }
14623
14624      if (simple_loop)
14625	distance = distance_agu_use_in_bb (regno0, insn,
14626					   distance, BB_HEAD (bb),
14627					   &found, &redefined);
14628      else
14629	{
14630	  int shortest_dist = -1;
14631	  bool found_in_bb = false;
14632	  bool redefined_in_bb = false;
14633
14634	  FOR_EACH_EDGE (e, ei, bb->succs)
14635	    {
14636	      int bb_dist
14637		= distance_agu_use_in_bb (regno0, insn,
14638					  distance, BB_HEAD (e->dest),
14639					  &found_in_bb, &redefined_in_bb);
14640	      if (found_in_bb)
14641		{
14642		  if (shortest_dist < 0)
14643		    shortest_dist = bb_dist;
14644		  else if (bb_dist > 0)
14645		    shortest_dist = MIN (bb_dist, shortest_dist);
14646
14647		  found = true;
14648		}
14649	    }
14650
14651	  distance = shortest_dist;
14652	}
14653    }
14654
14655  if (!found || redefined)
14656    return -1;
14657
14658  return distance >> 1;
14659}
14660
14661/* Define this macro to tune LEA priority vs ADD, it take effect when
14662   there is a dilemma of choosing LEA or ADD
14663   Negative value: ADD is more preferred than LEA
14664   Zero: Neutral
14665   Positive value: LEA is more preferred than ADD.  */
14666#define IX86_LEA_PRIORITY 0
14667
14668/* Return true if usage of lea INSN has performance advantage
14669   over a sequence of instructions.  Instructions sequence has
14670   SPLIT_COST cycles higher latency than lea latency.  */
14671
14672static bool
14673ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14674		      unsigned int regno2, int split_cost, bool has_scale)
14675{
14676  int dist_define, dist_use;
14677
14678  /* For Atom processors newer than Bonnell, if using a 2-source or
14679     3-source LEA for non-destructive destination purposes, or due to
14680     wanting ability to use SCALE, the use of LEA is justified.  */
14681  if (!TARGET_BONNELL)
14682    {
14683      if (has_scale)
14684	return true;
14685      if (split_cost < 1)
14686	return false;
14687      if (regno0 == regno1 || regno0 == regno2)
14688	return false;
14689      return true;
14690    }
14691
14692  rtx_insn *rinsn = recog_data.insn;
14693
14694  dist_define = distance_non_agu_define (regno1, regno2, insn);
14695  dist_use = distance_agu_use (regno0, insn);
14696
14697  /* distance_non_agu_define can call extract_insn_cached.  If this function
14698     is called from define_split conditions, that can break insn splitting,
14699     because split_insns works by clearing recog_data.insn and then modifying
14700     recog_data.operand array and match the various split conditions.  */
14701  if (recog_data.insn != rinsn)
14702    recog_data.insn = NULL;
14703
14704  if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14705    {
14706      /* If there is no non AGU operand definition, no AGU
14707	 operand usage and split cost is 0 then both lea
14708	 and non lea variants have same priority.  Currently
14709	 we prefer lea for 64 bit code and non lea on 32 bit
14710	 code.  */
14711      if (dist_use < 0 && split_cost == 0)
14712	return TARGET_64BIT || IX86_LEA_PRIORITY;
14713      else
14714	return true;
14715    }
14716
14717  /* With longer definitions distance lea is more preferable.
14718     Here we change it to take into account splitting cost and
14719     lea priority.  */
14720  dist_define += split_cost + IX86_LEA_PRIORITY;
14721
14722  /* If there is no use in memory addess then we just check
14723     that split cost exceeds AGU stall.  */
14724  if (dist_use < 0)
14725    return dist_define > LEA_MAX_STALL;
14726
14727  /* If this insn has both backward non-agu dependence and forward
14728     agu dependence, the one with short distance takes effect.  */
14729  return dist_define >= dist_use;
14730}
14731
14732/* Return true if it is legal to clobber flags by INSN and
14733   false otherwise.  */
14734
14735static bool
14736ix86_ok_to_clobber_flags (rtx_insn *insn)
14737{
14738  basic_block bb = BLOCK_FOR_INSN (insn);
14739  df_ref use;
14740  bitmap live;
14741
14742  while (insn)
14743    {
14744      if (NONDEBUG_INSN_P (insn))
14745	{
14746	  FOR_EACH_INSN_USE (use, insn)
14747	    if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14748	      return false;
14749
14750	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14751	    return true;
14752	}
14753
14754      if (insn == BB_END (bb))
14755	break;
14756
14757      insn = NEXT_INSN (insn);
14758    }
14759
14760  live = df_get_live_out(bb);
14761  return !REGNO_REG_SET_P (live, FLAGS_REG);
14762}
14763
14764/* Return true if we need to split op0 = op1 + op2 into a sequence of
14765   move and add to avoid AGU stalls.  */
14766
14767bool
14768ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14769{
14770  unsigned int regno0, regno1, regno2;
14771
14772  /* Check if we need to optimize.  */
14773  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14774    return false;
14775
14776  /* Check it is correct to split here.  */
14777  if (!ix86_ok_to_clobber_flags(insn))
14778    return false;
14779
14780  regno0 = true_regnum (operands[0]);
14781  regno1 = true_regnum (operands[1]);
14782  regno2 = true_regnum (operands[2]);
14783
14784  /* We need to split only adds with non destructive
14785     destination operand.  */
14786  if (regno0 == regno1 || regno0 == regno2)
14787    return false;
14788  else
14789    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14790}
14791
14792/* Return true if we should emit lea instruction instead of mov
14793   instruction.  */
14794
14795bool
14796ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14797{
14798  unsigned int regno0, regno1;
14799
14800  /* Check if we need to optimize.  */
14801  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14802    return false;
14803
14804  /* Use lea for reg to reg moves only.  */
14805  if (!REG_P (operands[0]) || !REG_P (operands[1]))
14806    return false;
14807
14808  regno0 = true_regnum (operands[0]);
14809  regno1 = true_regnum (operands[1]);
14810
14811  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14812}
14813
14814/* Return true if we need to split lea into a sequence of
14815   instructions to avoid AGU stalls. */
14816
14817bool
14818ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14819{
14820  unsigned int regno0, regno1, regno2;
14821  int split_cost;
14822  struct ix86_address parts;
14823  int ok;
14824
14825  /* The "at least two components" test below might not catch simple
14826     move or zero extension insns if parts.base is non-NULL and parts.disp
14827     is const0_rtx as the only components in the address, e.g. if the
14828     register is %rbp or %r13.  As this test is much cheaper and moves or
14829     zero extensions are the common case, do this check first.  */
14830  if (REG_P (operands[1])
14831      || (SImode_address_operand (operands[1], VOIDmode)
14832	  && REG_P (XEXP (operands[1], 0))))
14833    return false;
14834
14835  /* Check if it is OK to split here.  */
14836  if (!ix86_ok_to_clobber_flags (insn))
14837    return false;
14838
14839  ok = ix86_decompose_address (operands[1], &parts);
14840  gcc_assert (ok);
14841
14842  /* There should be at least two components in the address.  */
14843  if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14844      + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14845    return false;
14846
14847  /* We should not split into add if non legitimate pic
14848     operand is used as displacement. */
14849  if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14850    return false;
14851
14852  regno0 = true_regnum (operands[0]) ;
14853  regno1 = INVALID_REGNUM;
14854  regno2 = INVALID_REGNUM;
14855
14856  if (parts.base)
14857    regno1 = true_regnum (parts.base);
14858  if (parts.index)
14859    regno2 = true_regnum (parts.index);
14860
14861  /* Use add for a = a + b and a = b + a since it is faster and shorter
14862     than lea for most processors.  For the processors like BONNELL, if
14863     the destination register of LEA holds an actual address which will
14864     be used soon, LEA is better and otherwise ADD is better.  */
14865  if (!TARGET_BONNELL
14866      && parts.scale == 1
14867      && (!parts.disp || parts.disp == const0_rtx)
14868      && (regno0 == regno1 || regno0 == regno2))
14869    return true;
14870
14871  /* Check we need to optimize.  */
14872  if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14873    return false;
14874
14875  split_cost = 0;
14876
14877  /* Compute how many cycles we will add to execution time
14878     if split lea into a sequence of instructions.  */
14879  if (parts.base || parts.index)
14880    {
14881      /* Have to use mov instruction if non desctructive
14882	 destination form is used.  */
14883      if (regno1 != regno0 && regno2 != regno0)
14884	split_cost += 1;
14885
14886      /* Have to add index to base if both exist.  */
14887      if (parts.base && parts.index)
14888	split_cost += 1;
14889
14890      /* Have to use shift and adds if scale is 2 or greater.  */
14891      if (parts.scale > 1)
14892	{
14893	  if (regno0 != regno1)
14894	    split_cost += 1;
14895	  else if (regno2 == regno0)
14896	    split_cost += 4;
14897	  else
14898	    split_cost += parts.scale;
14899	}
14900
14901      /* Have to use add instruction with immediate if
14902	 disp is non zero.  */
14903      if (parts.disp && parts.disp != const0_rtx)
14904	split_cost += 1;
14905
14906      /* Subtract the price of lea.  */
14907      split_cost -= 1;
14908    }
14909
14910  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14911				parts.scale > 1);
14912}
14913
14914/* Return true if it is ok to optimize an ADD operation to LEA
14915   operation to avoid flag register consumation.  For most processors,
14916   ADD is faster than LEA.  For the processors like BONNELL, if the
14917   destination register of LEA holds an actual address which will be
14918   used soon, LEA is better and otherwise ADD is better.  */
14919
14920bool
14921ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14922{
14923  unsigned int regno0 = true_regnum (operands[0]);
14924  unsigned int regno1 = true_regnum (operands[1]);
14925  unsigned int regno2 = true_regnum (operands[2]);
14926
14927  /* If a = b + c, (a!=b && a!=c), must use lea form. */
14928  if (regno0 != regno1 && regno0 != regno2)
14929    return true;
14930
14931  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14932    return false;
14933
14934  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14935}
14936
14937/* Return true if destination reg of SET_BODY is shift count of
14938   USE_BODY.  */
14939
14940static bool
14941ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14942{
14943  rtx set_dest;
14944  rtx shift_rtx;
14945  int i;
14946
14947  /* Retrieve destination of SET_BODY.  */
14948  switch (GET_CODE (set_body))
14949    {
14950    case SET:
14951      set_dest = SET_DEST (set_body);
14952      if (!set_dest || !REG_P (set_dest))
14953	return false;
14954      break;
14955    case PARALLEL:
14956      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14957	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14958					  use_body))
14959	  return true;
14960      /* FALLTHROUGH */
14961    default:
14962      return false;
14963    }
14964
14965  /* Retrieve shift count of USE_BODY.  */
14966  switch (GET_CODE (use_body))
14967    {
14968    case SET:
14969      shift_rtx = XEXP (use_body, 1);
14970      break;
14971    case PARALLEL:
14972      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14973	if (ix86_dep_by_shift_count_body (set_body,
14974					  XVECEXP (use_body, 0, i)))
14975	  return true;
14976      /* FALLTHROUGH */
14977    default:
14978      return false;
14979    }
14980
14981  if (shift_rtx
14982      && (GET_CODE (shift_rtx) == ASHIFT
14983	  || GET_CODE (shift_rtx) == LSHIFTRT
14984	  || GET_CODE (shift_rtx) == ASHIFTRT
14985	  || GET_CODE (shift_rtx) == ROTATE
14986	  || GET_CODE (shift_rtx) == ROTATERT))
14987    {
14988      rtx shift_count = XEXP (shift_rtx, 1);
14989
14990      /* Return true if shift count is dest of SET_BODY.  */
14991      if (REG_P (shift_count))
14992	{
14993	  /* Add check since it can be invoked before register
14994	     allocation in pre-reload schedule.  */
14995	  if (reload_completed
14996	      && true_regnum (set_dest) == true_regnum (shift_count))
14997	    return true;
14998	  else if (REGNO(set_dest) == REGNO(shift_count))
14999	    return true;
15000	}
15001    }
15002
15003  return false;
15004}
15005
15006/* Return true if destination reg of SET_INSN is shift count of
15007   USE_INSN.  */
15008
15009bool
15010ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15011{
15012  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15013				       PATTERN (use_insn));
15014}
15015
15016/* Return TRUE or FALSE depending on whether the unary operator meets the
15017   appropriate constraints.  */
15018
15019bool
15020ix86_unary_operator_ok (enum rtx_code,
15021			machine_mode,
15022			rtx operands[2])
15023{
15024  /* If one of operands is memory, source and destination must match.  */
15025  if ((MEM_P (operands[0])
15026       || MEM_P (operands[1]))
15027      && ! rtx_equal_p (operands[0], operands[1]))
15028    return false;
15029  return true;
15030}
15031
15032/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15033   are ok, keeping in mind the possible movddup alternative.  */
15034
15035bool
15036ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15037{
15038  if (MEM_P (operands[0]))
15039    return rtx_equal_p (operands[0], operands[1 + high]);
15040  if (MEM_P (operands[1]) && MEM_P (operands[2]))
15041    return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15042  return true;
15043}
15044
15045/* A subroutine of ix86_build_signbit_mask.  If VECT is true,
15046   then replicate the value for all elements of the vector
15047   register.  */
15048
15049rtx
15050ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15051{
15052  int i, n_elt;
15053  rtvec v;
15054  machine_mode scalar_mode;
15055
15056  switch (mode)
15057    {
15058    case E_V64QImode:
15059    case E_V32QImode:
15060    case E_V16QImode:
15061    case E_V32HImode:
15062    case E_V16HImode:
15063    case E_V8HImode:
15064    case E_V16SImode:
15065    case E_V8SImode:
15066    case E_V4SImode:
15067    case E_V8DImode:
15068    case E_V4DImode:
15069    case E_V2DImode:
15070      gcc_assert (vect);
15071      /* FALLTHRU */
15072    case E_V16SFmode:
15073    case E_V8SFmode:
15074    case E_V4SFmode:
15075    case E_V8DFmode:
15076    case E_V4DFmode:
15077    case E_V2DFmode:
15078      n_elt = GET_MODE_NUNITS (mode);
15079      v = rtvec_alloc (n_elt);
15080      scalar_mode = GET_MODE_INNER (mode);
15081
15082      RTVEC_ELT (v, 0) = value;
15083
15084      for (i = 1; i < n_elt; ++i)
15085	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15086
15087      return gen_rtx_CONST_VECTOR (mode, v);
15088
15089    default:
15090      gcc_unreachable ();
15091    }
15092}
15093
15094/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15095   and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
15096   for an SSE register.  If VECT is true, then replicate the mask for
15097   all elements of the vector register.  If INVERT is true, then create
15098   a mask excluding the sign bit.  */
15099
15100rtx
15101ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15102{
15103  machine_mode vec_mode, imode;
15104  wide_int w;
15105  rtx mask, v;
15106
15107  switch (mode)
15108    {
15109    case E_V16SImode:
15110    case E_V16SFmode:
15111    case E_V8SImode:
15112    case E_V4SImode:
15113    case E_V8SFmode:
15114    case E_V4SFmode:
15115      vec_mode = mode;
15116      imode = SImode;
15117      break;
15118
15119    case E_V8DImode:
15120    case E_V4DImode:
15121    case E_V2DImode:
15122    case E_V8DFmode:
15123    case E_V4DFmode:
15124    case E_V2DFmode:
15125      vec_mode = mode;
15126      imode = DImode;
15127      break;
15128
15129    case E_TImode:
15130    case E_TFmode:
15131      vec_mode = VOIDmode;
15132      imode = TImode;
15133      break;
15134
15135    default:
15136      gcc_unreachable ();
15137    }
15138
15139  machine_mode inner_mode = GET_MODE_INNER (mode);
15140  w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15141			   GET_MODE_BITSIZE (inner_mode));
15142  if (invert)
15143    w = wi::bit_not (w);
15144
15145  /* Force this value into the low part of a fp vector constant.  */
15146  mask = immed_wide_int_const (w, imode);
15147  mask = gen_lowpart (inner_mode, mask);
15148
15149  if (vec_mode == VOIDmode)
15150    return force_reg (inner_mode, mask);
15151
15152  v = ix86_build_const_vector (vec_mode, vect, mask);
15153  return force_reg (vec_mode, v);
15154}
15155
15156/* Return TRUE or FALSE depending on whether the first SET in INSN
15157   has source and destination with matching CC modes, and that the
15158   CC mode is at least as constrained as REQ_MODE.  */
15159
15160bool
15161ix86_match_ccmode (rtx insn, machine_mode req_mode)
15162{
15163  rtx set;
15164  machine_mode set_mode;
15165
15166  set = PATTERN (insn);
15167  if (GET_CODE (set) == PARALLEL)
15168    set = XVECEXP (set, 0, 0);
15169  gcc_assert (GET_CODE (set) == SET);
15170  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15171
15172  set_mode = GET_MODE (SET_DEST (set));
15173  switch (set_mode)
15174    {
15175    case E_CCNOmode:
15176      if (req_mode != CCNOmode
15177	  && (req_mode != CCmode
15178	      || XEXP (SET_SRC (set), 1) != const0_rtx))
15179	return false;
15180      break;
15181    case E_CCmode:
15182      if (req_mode == CCGCmode)
15183	return false;
15184      /* FALLTHRU */
15185    case E_CCGCmode:
15186      if (req_mode == CCGOCmode || req_mode == CCNOmode)
15187	return false;
15188      /* FALLTHRU */
15189    case E_CCGOCmode:
15190      if (req_mode == CCZmode)
15191	return false;
15192      /* FALLTHRU */
15193    case E_CCZmode:
15194      break;
15195
15196    case E_CCGZmode:
15197
15198    case E_CCAmode:
15199    case E_CCCmode:
15200    case E_CCOmode:
15201    case E_CCPmode:
15202    case E_CCSmode:
15203      if (set_mode != req_mode)
15204	return false;
15205      break;
15206
15207    default:
15208      gcc_unreachable ();
15209    }
15210
15211  return GET_MODE (SET_SRC (set)) == set_mode;
15212}
15213
15214machine_mode
15215ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15216{
15217  machine_mode mode = GET_MODE (op0);
15218
15219  if (SCALAR_FLOAT_MODE_P (mode))
15220    {
15221      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15222      return CCFPmode;
15223    }
15224
15225  switch (code)
15226    {
15227      /* Only zero flag is needed.  */
15228    case EQ:			/* ZF=0 */
15229    case NE:			/* ZF!=0 */
15230      return CCZmode;
15231      /* Codes needing carry flag.  */
15232    case GEU:			/* CF=0 */
15233    case LTU:			/* CF=1 */
15234      /* Detect overflow checks.  They need just the carry flag.  */
15235      if (GET_CODE (op0) == PLUS
15236	  && (rtx_equal_p (op1, XEXP (op0, 0))
15237	      || rtx_equal_p (op1, XEXP (op0, 1))))
15238	return CCCmode;
15239      else
15240	return CCmode;
15241    case GTU:			/* CF=0 & ZF=0 */
15242    case LEU:			/* CF=1 | ZF=1 */
15243      return CCmode;
15244      /* Codes possibly doable only with sign flag when
15245         comparing against zero.  */
15246    case GE:			/* SF=OF   or   SF=0 */
15247    case LT:			/* SF<>OF  or   SF=1 */
15248      if (op1 == const0_rtx)
15249	return CCGOCmode;
15250      else
15251	/* For other cases Carry flag is not required.  */
15252	return CCGCmode;
15253      /* Codes doable only with sign flag when comparing
15254         against zero, but we miss jump instruction for it
15255         so we need to use relational tests against overflow
15256         that thus needs to be zero.  */
15257    case GT:			/* ZF=0 & SF=OF */
15258    case LE:			/* ZF=1 | SF<>OF */
15259      if (op1 == const0_rtx)
15260	return CCNOmode;
15261      else
15262	return CCGCmode;
15263      /* strcmp pattern do (use flags) and combine may ask us for proper
15264	 mode.  */
15265    case USE:
15266      return CCmode;
15267    default:
15268      gcc_unreachable ();
15269    }
15270}
15271
15272/* Return the fixed registers used for condition codes.  */
15273
15274static bool
15275ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15276{
15277  *p1 = FLAGS_REG;
15278  *p2 = INVALID_REGNUM;
15279  return true;
15280}
15281
15282/* If two condition code modes are compatible, return a condition code
15283   mode which is compatible with both.  Otherwise, return
15284   VOIDmode.  */
15285
15286static machine_mode
15287ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15288{
15289  if (m1 == m2)
15290    return m1;
15291
15292  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15293    return VOIDmode;
15294
15295  if ((m1 == CCGCmode && m2 == CCGOCmode)
15296      || (m1 == CCGOCmode && m2 == CCGCmode))
15297    return CCGCmode;
15298
15299  if ((m1 == CCNOmode && m2 == CCGOCmode)
15300      || (m1 == CCGOCmode && m2 == CCNOmode))
15301    return CCNOmode;
15302
15303  if (m1 == CCZmode
15304      && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15305    return m2;
15306  else if (m2 == CCZmode
15307	   && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15308    return m1;
15309
15310  switch (m1)
15311    {
15312    default:
15313      gcc_unreachable ();
15314
15315    case E_CCmode:
15316    case E_CCGCmode:
15317    case E_CCGOCmode:
15318    case E_CCNOmode:
15319    case E_CCAmode:
15320    case E_CCCmode:
15321    case E_CCOmode:
15322    case E_CCPmode:
15323    case E_CCSmode:
15324    case E_CCZmode:
15325      switch (m2)
15326	{
15327	default:
15328	  return VOIDmode;
15329
15330	case E_CCmode:
15331	case E_CCGCmode:
15332	case E_CCGOCmode:
15333	case E_CCNOmode:
15334	case E_CCAmode:
15335	case E_CCCmode:
15336	case E_CCOmode:
15337	case E_CCPmode:
15338	case E_CCSmode:
15339	case E_CCZmode:
15340	  return CCmode;
15341	}
15342
15343    case E_CCFPmode:
15344      /* These are only compatible with themselves, which we already
15345	 checked above.  */
15346      return VOIDmode;
15347    }
15348}
15349
15350/* Return strategy to use for floating-point.  We assume that fcomi is always
15351   preferrable where available, since that is also true when looking at size
15352   (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
15353
15354enum ix86_fpcmp_strategy
15355ix86_fp_comparison_strategy (enum rtx_code)
15356{
15357  /* Do fcomi/sahf based test when profitable.  */
15358
15359  if (TARGET_CMOVE)
15360    return IX86_FPCMP_COMI;
15361
15362  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15363    return IX86_FPCMP_SAHF;
15364
15365  return IX86_FPCMP_ARITH;
15366}
15367
15368/* Convert comparison codes we use to represent FP comparison to integer
15369   code that will result in proper branch.  Return UNKNOWN if no such code
15370   is available.  */
15371
15372enum rtx_code
15373ix86_fp_compare_code_to_integer (enum rtx_code code)
15374{
15375  switch (code)
15376    {
15377    case GT:
15378      return GTU;
15379    case GE:
15380      return GEU;
15381    case ORDERED:
15382    case UNORDERED:
15383      return code;
15384    case UNEQ:
15385      return EQ;
15386    case UNLT:
15387      return LTU;
15388    case UNLE:
15389      return LEU;
15390    case LTGT:
15391      return NE;
15392    default:
15393      return UNKNOWN;
15394    }
15395}
15396
15397/* Zero extend possibly SImode EXP to Pmode register.  */
15398rtx
15399ix86_zero_extend_to_Pmode (rtx exp)
15400{
15401  return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15402}
15403
15404/* Return true if the function being called was marked with attribute
15405   "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
15406   to handle the non-PIC case in the backend because there is no easy
15407   interface for the front-end to force non-PLT calls to use the GOT.
15408   This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15409   to call the function marked "noplt" indirectly.  */
15410
15411static bool
15412ix86_nopic_noplt_attribute_p (rtx call_op)
15413{
15414  if (flag_pic || ix86_cmodel == CM_LARGE
15415      || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15416      || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15417      || SYMBOL_REF_LOCAL_P (call_op))
15418    return false;
15419
15420  tree symbol_decl = SYMBOL_REF_DECL (call_op);
15421
15422  if (!flag_plt
15423      || (symbol_decl != NULL_TREE
15424          && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15425    return true;
15426
15427  return false;
15428}
15429
15430/* Helper to output the jmp/call.  */
15431static void
15432ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15433{
15434  if (thunk_name != NULL)
15435    {
15436      fprintf (asm_out_file, "\tjmp\t");
15437      assemble_name (asm_out_file, thunk_name);
15438      putc ('\n', asm_out_file);
15439    }
15440  else
15441    output_indirect_thunk (regno);
15442}
15443
15444/* Output indirect branch via a call and return thunk.  CALL_OP is a
15445   register which contains the branch target.  XASM is the assembly
15446   template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
15447   A normal call is converted to:
15448
15449	call __x86_indirect_thunk_reg
15450
15451   and a tail call is converted to:
15452
15453	jmp __x86_indirect_thunk_reg
15454 */
15455
15456static void
15457ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15458{
15459  char thunk_name_buf[32];
15460  char *thunk_name;
15461  enum indirect_thunk_prefix need_prefix
15462    = indirect_thunk_need_prefix (current_output_insn);
15463  int regno = REGNO (call_op);
15464
15465  if (cfun->machine->indirect_branch_type
15466      != indirect_branch_thunk_inline)
15467    {
15468      if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15469	{
15470	  int i = regno;
15471	  if (i >= FIRST_REX_INT_REG)
15472	    i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15473	  indirect_thunks_used |= 1 << i;
15474	}
15475      indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15476      thunk_name = thunk_name_buf;
15477    }
15478  else
15479    thunk_name = NULL;
15480
15481  if (sibcall_p)
15482     ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15483  else
15484    {
15485      if (thunk_name != NULL)
15486	{
15487	  fprintf (asm_out_file, "\tcall\t");
15488	  assemble_name (asm_out_file, thunk_name);
15489	  putc ('\n', asm_out_file);
15490	  return;
15491	}
15492
15493      char indirectlabel1[32];
15494      char indirectlabel2[32];
15495
15496      ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15497				   INDIRECT_LABEL,
15498				   indirectlabelno++);
15499      ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15500				   INDIRECT_LABEL,
15501				   indirectlabelno++);
15502
15503      /* Jump.  */
15504      fputs ("\tjmp\t", asm_out_file);
15505      assemble_name_raw (asm_out_file, indirectlabel2);
15506      fputc ('\n', asm_out_file);
15507
15508      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15509
15510     ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15511
15512      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15513
15514      /* Call.  */
15515      fputs ("\tcall\t", asm_out_file);
15516      assemble_name_raw (asm_out_file, indirectlabel1);
15517      fputc ('\n', asm_out_file);
15518    }
15519}
15520
15521/* Output indirect branch via a call and return thunk.  CALL_OP is
15522   the branch target.  XASM is the assembly template for CALL_OP.
15523   Branch is a tail call if SIBCALL_P is true.  A normal call is
15524   converted to:
15525
15526	jmp L2
15527   L1:
15528	push CALL_OP
15529	jmp __x86_indirect_thunk
15530   L2:
15531	call L1
15532
15533   and a tail call is converted to:
15534
15535	push CALL_OP
15536	jmp __x86_indirect_thunk
15537 */
15538
15539static void
15540ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15541				      bool sibcall_p)
15542{
15543  char thunk_name_buf[32];
15544  char *thunk_name;
15545  char push_buf[64];
15546  enum indirect_thunk_prefix need_prefix
15547    = indirect_thunk_need_prefix (current_output_insn);
15548  int regno = -1;
15549
15550  if (cfun->machine->indirect_branch_type
15551      != indirect_branch_thunk_inline)
15552    {
15553      if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15554	indirect_thunk_needed = true;
15555      indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15556      thunk_name = thunk_name_buf;
15557    }
15558  else
15559    thunk_name = NULL;
15560
15561  snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15562	    TARGET_64BIT ? 'q' : 'l', xasm);
15563
15564  if (sibcall_p)
15565    {
15566      output_asm_insn (push_buf, &call_op);
15567      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15568    }
15569  else
15570    {
15571      char indirectlabel1[32];
15572      char indirectlabel2[32];
15573
15574      ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15575				   INDIRECT_LABEL,
15576				   indirectlabelno++);
15577      ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15578				   INDIRECT_LABEL,
15579				   indirectlabelno++);
15580
15581      /* Jump.  */
15582      fputs ("\tjmp\t", asm_out_file);
15583      assemble_name_raw (asm_out_file, indirectlabel2);
15584      fputc ('\n', asm_out_file);
15585
15586      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15587
15588      /* An external function may be called via GOT, instead of PLT.  */
15589      if (MEM_P (call_op))
15590	{
15591	  struct ix86_address parts;
15592	  rtx addr = XEXP (call_op, 0);
15593	  if (ix86_decompose_address (addr, &parts)
15594	      && parts.base == stack_pointer_rtx)
15595	    {
15596	      /* Since call will adjust stack by -UNITS_PER_WORD,
15597		 we must convert "disp(stack, index, scale)" to
15598		 "disp+UNITS_PER_WORD(stack, index, scale)".  */
15599	      if (parts.index)
15600		{
15601		  addr = gen_rtx_MULT (Pmode, parts.index,
15602				       GEN_INT (parts.scale));
15603		  addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15604				       addr);
15605		}
15606	      else
15607		addr = stack_pointer_rtx;
15608
15609	      rtx disp;
15610	      if (parts.disp != NULL_RTX)
15611		disp = plus_constant (Pmode, parts.disp,
15612				      UNITS_PER_WORD);
15613	      else
15614		disp = GEN_INT (UNITS_PER_WORD);
15615
15616	      addr = gen_rtx_PLUS (Pmode, addr, disp);
15617	      call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15618	    }
15619	}
15620
15621      output_asm_insn (push_buf, &call_op);
15622
15623      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15624
15625      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15626
15627      /* Call.  */
15628      fputs ("\tcall\t", asm_out_file);
15629      assemble_name_raw (asm_out_file, indirectlabel1);
15630      fputc ('\n', asm_out_file);
15631    }
15632}
15633
15634/* Output indirect branch via a call and return thunk.  CALL_OP is
15635   the branch target.  XASM is the assembly template for CALL_OP.
15636   Branch is a tail call if SIBCALL_P is true.   */
15637
15638static void
15639ix86_output_indirect_branch (rtx call_op, const char *xasm,
15640			     bool sibcall_p)
15641{
15642  if (REG_P (call_op))
15643    ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15644  else
15645    ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15646}
15647
15648/* Output indirect jump.  CALL_OP is the jump target.  */
15649
15650const char *
15651ix86_output_indirect_jmp (rtx call_op)
15652{
15653  if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15654    {
15655      /* We can't have red-zone since "call" in the indirect thunk
15656         pushes the return address onto stack, destroying red-zone.  */
15657      if (ix86_red_zone_size != 0)
15658	gcc_unreachable ();
15659
15660      ix86_output_indirect_branch (call_op, "%0", true);
15661      return "";
15662    }
15663  else
15664    return "%!jmp\t%A0";
15665}
15666
15667/* Output return instrumentation for current function if needed.  */
15668
15669static void
15670output_return_instrumentation (void)
15671{
15672  if (ix86_instrument_return != instrument_return_none
15673      && flag_fentry
15674      && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15675    {
15676      if (ix86_flag_record_return)
15677	fprintf (asm_out_file, "1:\n");
15678      switch (ix86_instrument_return)
15679	{
15680	case instrument_return_call:
15681	  fprintf (asm_out_file, "\tcall\t__return__\n");
15682	  break;
15683	case instrument_return_nop5:
15684	  /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
15685	  fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15686	  break;
15687	case instrument_return_none:
15688	  break;
15689	}
15690
15691      if (ix86_flag_record_return)
15692	{
15693	  fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15694	  fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15695	  fprintf (asm_out_file, "\t.previous\n");
15696	}
15697    }
15698}
15699
15700/* Output function return.  CALL_OP is the jump target.  Add a REP
15701   prefix to RET if LONG_P is true and function return is kept.  */
15702
15703const char *
15704ix86_output_function_return (bool long_p)
15705{
15706  output_return_instrumentation ();
15707
15708  if (cfun->machine->function_return_type != indirect_branch_keep)
15709    {
15710      char thunk_name[32];
15711      enum indirect_thunk_prefix need_prefix
15712	= indirect_thunk_need_prefix (current_output_insn);
15713
15714      if (cfun->machine->function_return_type
15715	  != indirect_branch_thunk_inline)
15716	{
15717	  bool need_thunk = (cfun->machine->function_return_type
15718			     == indirect_branch_thunk);
15719	  indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15720			       true);
15721	  indirect_return_needed |= need_thunk;
15722	  fprintf (asm_out_file, "\tjmp\t");
15723	  assemble_name (asm_out_file, thunk_name);
15724	  putc ('\n', asm_out_file);
15725	}
15726      else
15727	output_indirect_thunk (INVALID_REGNUM);
15728
15729      return "";
15730    }
15731
15732  if (!long_p)
15733    return "%!ret";
15734
15735  return "rep%; ret";
15736}
15737
15738/* Output indirect function return.  RET_OP is the function return
15739   target.  */
15740
15741const char *
15742ix86_output_indirect_function_return (rtx ret_op)
15743{
15744  if (cfun->machine->function_return_type != indirect_branch_keep)
15745    {
15746      char thunk_name[32];
15747      enum indirect_thunk_prefix need_prefix
15748	= indirect_thunk_need_prefix (current_output_insn);
15749      unsigned int regno = REGNO (ret_op);
15750      gcc_assert (regno == CX_REG);
15751
15752      if (cfun->machine->function_return_type
15753	  != indirect_branch_thunk_inline)
15754	{
15755	  bool need_thunk = (cfun->machine->function_return_type
15756			     == indirect_branch_thunk);
15757	  indirect_thunk_name (thunk_name, regno, need_prefix, true);
15758
15759	  if (need_thunk)
15760	    {
15761	      indirect_return_via_cx = true;
15762	      indirect_thunks_used |= 1 << CX_REG;
15763	    }
15764	  fprintf (asm_out_file, "\tjmp\t");
15765	  assemble_name (asm_out_file, thunk_name);
15766	  putc ('\n', asm_out_file);
15767	}
15768      else
15769	output_indirect_thunk (regno);
15770
15771      return "";
15772    }
15773  else
15774    return "%!jmp\t%A0";
15775}
15776
15777/* Output the assembly for a call instruction.  */
15778
15779const char *
15780ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15781{
15782  bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15783  bool output_indirect_p
15784    = (!TARGET_SEH
15785       && cfun->machine->indirect_branch_type != indirect_branch_keep);
15786  bool seh_nop_p = false;
15787  const char *xasm;
15788
15789  if (SIBLING_CALL_P (insn))
15790    {
15791      output_return_instrumentation ();
15792      if (direct_p)
15793	{
15794	  if (ix86_nopic_noplt_attribute_p (call_op))
15795	    {
15796	      direct_p = false;
15797	      if (TARGET_64BIT)
15798		{
15799		  if (output_indirect_p)
15800		    xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15801		  else
15802		    xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15803		}
15804	      else
15805		{
15806		  if (output_indirect_p)
15807		    xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15808		  else
15809		    xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15810		}
15811	    }
15812	  else
15813	    xasm = "%!jmp\t%P0";
15814	}
15815      /* SEH epilogue detection requires the indirect branch case
15816	 to include REX.W.  */
15817      else if (TARGET_SEH)
15818	xasm = "%!rex.W jmp\t%A0";
15819      else
15820	{
15821	  if (output_indirect_p)
15822	    xasm = "%0";
15823	  else
15824	    xasm = "%!jmp\t%A0";
15825	}
15826
15827      if (output_indirect_p && !direct_p)
15828	ix86_output_indirect_branch (call_op, xasm, true);
15829      else
15830	output_asm_insn (xasm, &call_op);
15831      return "";
15832    }
15833
15834  /* SEH unwinding can require an extra nop to be emitted in several
15835     circumstances.  Determine if we have one of those.  */
15836  if (TARGET_SEH)
15837    {
15838      rtx_insn *i;
15839
15840      for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15841	{
15842	  /* Prevent a catch region from being adjacent to a jump that would
15843	     be interpreted as an epilogue sequence by the unwinder.  */
15844	  if (JUMP_P(i) && CROSSING_JUMP_P (i))
15845	    {
15846	      seh_nop_p = true;
15847	      break;
15848	    }
15849
15850	  /* If we get to another real insn, we don't need the nop.  */
15851	  if (INSN_P (i))
15852	    break;
15853
15854	  /* If we get to the epilogue note, prevent a catch region from
15855	     being adjacent to the standard epilogue sequence.  Note that,
15856	     if non-call exceptions are enabled, we already did it during
15857	     epilogue expansion, or else, if the insn can throw internally,
15858	     we already did it during the reorg pass.  */
15859	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15860	      && !flag_non_call_exceptions
15861	      && !can_throw_internal (insn))
15862	    {
15863	      seh_nop_p = true;
15864	      break;
15865	    }
15866	}
15867
15868      /* If we didn't find a real insn following the call, prevent the
15869	 unwinder from looking into the next function.  */
15870      if (i == NULL)
15871	seh_nop_p = true;
15872    }
15873
15874  if (direct_p)
15875    {
15876      if (ix86_nopic_noplt_attribute_p (call_op))
15877	{
15878	  direct_p = false;
15879	  if (TARGET_64BIT)
15880	    {
15881	      if (output_indirect_p)
15882		xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15883	      else
15884		xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15885	    }
15886	  else
15887	    {
15888	      if (output_indirect_p)
15889		xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15890	      else
15891		xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15892	    }
15893	}
15894      else
15895	xasm = "%!call\t%P0";
15896    }
15897  else
15898    {
15899      if (output_indirect_p)
15900	xasm = "%0";
15901      else
15902	xasm = "%!call\t%A0";
15903    }
15904
15905  if (output_indirect_p && !direct_p)
15906    ix86_output_indirect_branch (call_op, xasm, false);
15907  else
15908    output_asm_insn (xasm, &call_op);
15909
15910  if (seh_nop_p)
15911    return "nop";
15912
15913  return "";
15914}
15915
15916/* Return a MEM corresponding to a stack slot with mode MODE.
15917   Allocate a new slot if necessary.
15918
15919   The RTL for a function can have several slots available: N is
15920   which slot to use.  */
15921
15922rtx
15923assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15924{
15925  struct stack_local_entry *s;
15926
15927  gcc_assert (n < MAX_386_STACK_LOCALS);
15928
15929  for (s = ix86_stack_locals; s; s = s->next)
15930    if (s->mode == mode && s->n == n)
15931      return validize_mem (copy_rtx (s->rtl));
15932
15933  s = ggc_alloc<stack_local_entry> ();
15934  s->n = n;
15935  s->mode = mode;
15936  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15937
15938  s->next = ix86_stack_locals;
15939  ix86_stack_locals = s;
15940  return validize_mem (copy_rtx (s->rtl));
15941}
15942
15943static void
15944ix86_instantiate_decls (void)
15945{
15946  struct stack_local_entry *s;
15947
15948  for (s = ix86_stack_locals; s; s = s->next)
15949    if (s->rtl != NULL_RTX)
15950      instantiate_decl_rtl (s->rtl);
15951}
15952
15953/* Check whether x86 address PARTS is a pc-relative address.  */
15954
15955bool
15956ix86_rip_relative_addr_p (struct ix86_address *parts)
15957{
15958  rtx base, index, disp;
15959
15960  base = parts->base;
15961  index = parts->index;
15962  disp = parts->disp;
15963
15964  if (disp && !base && !index)
15965    {
15966      if (TARGET_64BIT)
15967	{
15968	  rtx symbol = disp;
15969
15970	  if (GET_CODE (disp) == CONST)
15971	    symbol = XEXP (disp, 0);
15972	  if (GET_CODE (symbol) == PLUS
15973	      && CONST_INT_P (XEXP (symbol, 1)))
15974	    symbol = XEXP (symbol, 0);
15975
15976	  if (GET_CODE (symbol) == LABEL_REF
15977	      || (GET_CODE (symbol) == SYMBOL_REF
15978		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15979	      || (GET_CODE (symbol) == UNSPEC
15980		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15981		      || XINT (symbol, 1) == UNSPEC_PCREL
15982		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15983	    return true;
15984	}
15985    }
15986  return false;
15987}
15988
15989/* Calculate the length of the memory address in the instruction encoding.
15990   Includes addr32 prefix, does not include the one-byte modrm, opcode,
15991   or other prefixes.  We never generate addr32 prefix for LEA insn.  */
15992
15993int
15994memory_address_length (rtx addr, bool lea)
15995{
15996  struct ix86_address parts;
15997  rtx base, index, disp;
15998  int len;
15999  int ok;
16000
16001  if (GET_CODE (addr) == PRE_DEC
16002      || GET_CODE (addr) == POST_INC
16003      || GET_CODE (addr) == PRE_MODIFY
16004      || GET_CODE (addr) == POST_MODIFY)
16005    return 0;
16006
16007  ok = ix86_decompose_address (addr, &parts);
16008  gcc_assert (ok);
16009
16010  len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16011
16012  /*  If this is not LEA instruction, add the length of addr32 prefix.  */
16013  if (TARGET_64BIT && !lea
16014      && (SImode_address_operand (addr, VOIDmode)
16015	  || (parts.base && GET_MODE (parts.base) == SImode)
16016	  || (parts.index && GET_MODE (parts.index) == SImode)))
16017    len++;
16018
16019  base = parts.base;
16020  index = parts.index;
16021  disp = parts.disp;
16022
16023  if (base && SUBREG_P (base))
16024    base = SUBREG_REG (base);
16025  if (index && SUBREG_P (index))
16026    index = SUBREG_REG (index);
16027
16028  gcc_assert (base == NULL_RTX || REG_P (base));
16029  gcc_assert (index == NULL_RTX || REG_P (index));
16030
16031  /* Rule of thumb:
16032       - esp as the base always wants an index,
16033       - ebp as the base always wants a displacement,
16034       - r12 as the base always wants an index,
16035       - r13 as the base always wants a displacement.  */
16036
16037  /* Register Indirect.  */
16038  if (base && !index && !disp)
16039    {
16040      /* esp (for its index) and ebp (for its displacement) need
16041	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
16042	 code.  */
16043      if (base == arg_pointer_rtx
16044	  || base == frame_pointer_rtx
16045	  || REGNO (base) == SP_REG
16046	  || REGNO (base) == BP_REG
16047	  || REGNO (base) == R12_REG
16048	  || REGNO (base) == R13_REG)
16049	len++;
16050    }
16051
16052  /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
16053     is not disp32, but disp32(%rip), so for disp32
16054     SIB byte is needed, unless print_operand_address
16055     optimizes it into disp32(%rip) or (%rip) is implied
16056     by UNSPEC.  */
16057  else if (disp && !base && !index)
16058    {
16059      len += 4;
16060      if (!ix86_rip_relative_addr_p (&parts))
16061	len++;
16062    }
16063  else
16064    {
16065      /* Find the length of the displacement constant.  */
16066      if (disp)
16067	{
16068	  if (base && satisfies_constraint_K (disp))
16069	    len += 1;
16070	  else
16071	    len += 4;
16072	}
16073      /* ebp always wants a displacement.  Similarly r13.  */
16074      else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16075	len++;
16076
16077      /* An index requires the two-byte modrm form....  */
16078      if (index
16079	  /* ...like esp (or r12), which always wants an index.  */
16080	  || base == arg_pointer_rtx
16081	  || base == frame_pointer_rtx
16082	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16083	len++;
16084    }
16085
16086  return len;
16087}
16088
16089/* Compute default value for "length_immediate" attribute.  When SHORTFORM
16090   is set, expect that insn have 8bit immediate alternative.  */
16091int
16092ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16093{
16094  int len = 0;
16095  int i;
16096  extract_insn_cached (insn);
16097  for (i = recog_data.n_operands - 1; i >= 0; --i)
16098    if (CONSTANT_P (recog_data.operand[i]))
16099      {
16100        enum attr_mode mode = get_attr_mode (insn);
16101
16102	gcc_assert (!len);
16103	if (shortform && CONST_INT_P (recog_data.operand[i]))
16104	  {
16105	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16106	    switch (mode)
16107	      {
16108	      case MODE_QI:
16109		len = 1;
16110		continue;
16111	      case MODE_HI:
16112		ival = trunc_int_for_mode (ival, HImode);
16113		break;
16114	      case MODE_SI:
16115		ival = trunc_int_for_mode (ival, SImode);
16116		break;
16117	      default:
16118		break;
16119	      }
16120	    if (IN_RANGE (ival, -128, 127))
16121	      {
16122		len = 1;
16123		continue;
16124	      }
16125	  }
16126	switch (mode)
16127	  {
16128	  case MODE_QI:
16129	    len = 1;
16130	    break;
16131	  case MODE_HI:
16132	    len = 2;
16133	    break;
16134	  case MODE_SI:
16135	    len = 4;
16136	    break;
16137	  /* Immediates for DImode instructions are encoded
16138	     as 32bit sign extended values.  */
16139	  case MODE_DI:
16140	    len = 4;
16141	    break;
16142	  default:
16143	    fatal_insn ("unknown insn mode", insn);
16144	}
16145      }
16146  return len;
16147}
16148
16149/* Compute default value for "length_address" attribute.  */
16150int
16151ix86_attr_length_address_default (rtx_insn *insn)
16152{
16153  int i;
16154
16155  if (get_attr_type (insn) == TYPE_LEA)
16156    {
16157      rtx set = PATTERN (insn), addr;
16158
16159      if (GET_CODE (set) == PARALLEL)
16160	set = XVECEXP (set, 0, 0);
16161
16162      gcc_assert (GET_CODE (set) == SET);
16163
16164      addr = SET_SRC (set);
16165
16166      return memory_address_length (addr, true);
16167    }
16168
16169  extract_insn_cached (insn);
16170  for (i = recog_data.n_operands - 1; i >= 0; --i)
16171    {
16172      rtx op = recog_data.operand[i];
16173      if (MEM_P (op))
16174	{
16175	  constrain_operands_cached (insn, reload_completed);
16176	  if (which_alternative != -1)
16177	    {
16178	      const char *constraints = recog_data.constraints[i];
16179	      int alt = which_alternative;
16180
16181	      while (*constraints == '=' || *constraints == '+')
16182		constraints++;
16183	      while (alt-- > 0)
16184	        while (*constraints++ != ',')
16185		  ;
16186	      /* Skip ignored operands.  */
16187	      if (*constraints == 'X')
16188		continue;
16189	    }
16190
16191	  int len = memory_address_length (XEXP (op, 0), false);
16192
16193	  /* Account for segment prefix for non-default addr spaces.  */
16194	  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16195	    len++;
16196
16197	  return len;
16198	}
16199    }
16200  return 0;
16201}
16202
16203/* Compute default value for "length_vex" attribute. It includes
16204   2 or 3 byte VEX prefix and 1 opcode byte.  */
16205
16206int
16207ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16208			      bool has_vex_w)
16209{
16210  int i;
16211
16212  /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
16213     byte VEX prefix.  */
16214  if (!has_0f_opcode || has_vex_w)
16215    return 3 + 1;
16216
16217 /* We can always use 2 byte VEX prefix in 32bit.  */
16218  if (!TARGET_64BIT)
16219    return 2 + 1;
16220
16221  extract_insn_cached (insn);
16222
16223  for (i = recog_data.n_operands - 1; i >= 0; --i)
16224    if (REG_P (recog_data.operand[i]))
16225      {
16226	/* REX.W bit uses 3 byte VEX prefix.  */
16227	if (GET_MODE (recog_data.operand[i]) == DImode
16228	    && GENERAL_REG_P (recog_data.operand[i]))
16229	  return 3 + 1;
16230      }
16231    else
16232      {
16233	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
16234	if (MEM_P (recog_data.operand[i])
16235	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16236	  return 3 + 1;
16237      }
16238
16239  return 2 + 1;
16240}
16241
16242
16243static bool
16244ix86_class_likely_spilled_p (reg_class_t);
16245
16246/* Returns true if lhs of insn is HW function argument register and set up
16247   is_spilled to true if it is likely spilled HW register.  */
16248static bool
16249insn_is_function_arg (rtx insn, bool* is_spilled)
16250{
16251  rtx dst;
16252
16253  if (!NONDEBUG_INSN_P (insn))
16254    return false;
16255  /* Call instructions are not movable, ignore it.  */
16256  if (CALL_P (insn))
16257    return false;
16258  insn = PATTERN (insn);
16259  if (GET_CODE (insn) == PARALLEL)
16260    insn = XVECEXP (insn, 0, 0);
16261  if (GET_CODE (insn) != SET)
16262    return false;
16263  dst = SET_DEST (insn);
16264  if (REG_P (dst) && HARD_REGISTER_P (dst)
16265      && ix86_function_arg_regno_p (REGNO (dst)))
16266    {
16267      /* Is it likely spilled HW register?  */
16268      if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16269	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16270	*is_spilled = true;
16271      return true;
16272    }
16273  return false;
16274}
16275
16276/* Add output dependencies for chain of function adjacent arguments if only
16277   there is a move to likely spilled HW register.  Return first argument
16278   if at least one dependence was added or NULL otherwise.  */
16279static rtx_insn *
16280add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16281{
16282  rtx_insn *insn;
16283  rtx_insn *last = call;
16284  rtx_insn *first_arg = NULL;
16285  bool is_spilled = false;
16286
16287  head = PREV_INSN (head);
16288
16289  /* Find nearest to call argument passing instruction.  */
16290  while (true)
16291    {
16292      last = PREV_INSN (last);
16293      if (last == head)
16294	return NULL;
16295      if (!NONDEBUG_INSN_P (last))
16296	continue;
16297      if (insn_is_function_arg (last, &is_spilled))
16298	break;
16299      return NULL;
16300    }
16301
16302  first_arg = last;
16303  while (true)
16304    {
16305      insn = PREV_INSN (last);
16306      if (!INSN_P (insn))
16307	break;
16308      if (insn == head)
16309	break;
16310      if (!NONDEBUG_INSN_P (insn))
16311	{
16312	  last = insn;
16313	  continue;
16314	}
16315      if (insn_is_function_arg (insn, &is_spilled))
16316	{
16317	  /* Add output depdendence between two function arguments if chain
16318	     of output arguments contains likely spilled HW registers.  */
16319	  if (is_spilled)
16320	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16321	  first_arg = last = insn;
16322	}
16323      else
16324	break;
16325    }
16326  if (!is_spilled)
16327    return NULL;
16328  return first_arg;
16329}
16330
16331/* Add output or anti dependency from insn to first_arg to restrict its code
16332   motion.  */
16333static void
16334avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16335{
16336  rtx set;
16337  rtx tmp;
16338
16339  set = single_set (insn);
16340  if (!set)
16341    return;
16342  tmp = SET_DEST (set);
16343  if (REG_P (tmp))
16344    {
16345      /* Add output dependency to the first function argument.  */
16346      add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16347      return;
16348    }
16349  /* Add anti dependency.  */
16350  add_dependence (first_arg, insn, REG_DEP_ANTI);
16351}
16352
16353/* Avoid cross block motion of function argument through adding dependency
16354   from the first non-jump instruction in bb.  */
16355static void
16356add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16357{
16358  rtx_insn *insn = BB_END (bb);
16359
16360  while (insn)
16361    {
16362      if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16363	{
16364	  rtx set = single_set (insn);
16365	  if (set)
16366	    {
16367	      avoid_func_arg_motion (arg, insn);
16368	      return;
16369	    }
16370	}
16371      if (insn == BB_HEAD (bb))
16372	return;
16373      insn = PREV_INSN (insn);
16374    }
16375}
16376
16377/* Hook for pre-reload schedule - avoid motion of function arguments
16378   passed in likely spilled HW registers.  */
16379static void
16380ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16381{
16382  rtx_insn *insn;
16383  rtx_insn *first_arg = NULL;
16384  if (reload_completed)
16385    return;
16386  while (head != tail && DEBUG_INSN_P (head))
16387    head = NEXT_INSN (head);
16388  for (insn = tail; insn != head; insn = PREV_INSN (insn))
16389    if (INSN_P (insn) && CALL_P (insn))
16390      {
16391	first_arg = add_parameter_dependencies (insn, head);
16392	if (first_arg)
16393	  {
16394	    /* Add dependee for first argument to predecessors if only
16395	       region contains more than one block.  */
16396	    basic_block bb =  BLOCK_FOR_INSN (insn);
16397	    int rgn = CONTAINING_RGN (bb->index);
16398	    int nr_blks = RGN_NR_BLOCKS (rgn);
16399	    /* Skip trivial regions and region head blocks that can have
16400	       predecessors outside of region.  */
16401	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16402	      {
16403		edge e;
16404		edge_iterator ei;
16405
16406		/* Regions are SCCs with the exception of selective
16407		   scheduling with pipelining of outer blocks enabled.
16408		   So also check that immediate predecessors of a non-head
16409		   block are in the same region.  */
16410		FOR_EACH_EDGE (e, ei, bb->preds)
16411		  {
16412		    /* Avoid creating of loop-carried dependencies through
16413		       using topological ordering in the region.  */
16414		    if (rgn == CONTAINING_RGN (e->src->index)
16415			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16416		      add_dependee_for_func_arg (first_arg, e->src);
16417		  }
16418	      }
16419	    insn = first_arg;
16420	    if (insn == head)
16421	      break;
16422	  }
16423      }
16424    else if (first_arg)
16425      avoid_func_arg_motion (first_arg, insn);
16426}
16427
16428/* Hook for pre-reload schedule - set priority of moves from likely spilled
16429   HW registers to maximum, to schedule them at soon as possible. These are
16430   moves from function argument registers at the top of the function entry
16431   and moves from function return value registers after call.  */
16432static int
16433ix86_adjust_priority (rtx_insn *insn, int priority)
16434{
16435  rtx set;
16436
16437  if (reload_completed)
16438    return priority;
16439
16440  if (!NONDEBUG_INSN_P (insn))
16441    return priority;
16442
16443  set = single_set (insn);
16444  if (set)
16445    {
16446      rtx tmp = SET_SRC (set);
16447      if (REG_P (tmp)
16448          && HARD_REGISTER_P (tmp)
16449          && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16450          && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16451	return current_sched_info->sched_max_insns_priority;
16452    }
16453
16454  return priority;
16455}
16456
16457/* Prepare for scheduling pass.  */
16458static void
16459ix86_sched_init_global (FILE *, int, int)
16460{
16461  /* Install scheduling hooks for current CPU.  Some of these hooks are used
16462     in time-critical parts of the scheduler, so we only set them up when
16463     they are actually used.  */
16464  switch (ix86_tune)
16465    {
16466    case PROCESSOR_CORE2:
16467    case PROCESSOR_NEHALEM:
16468    case PROCESSOR_SANDYBRIDGE:
16469    case PROCESSOR_HASWELL:
16470    case PROCESSOR_GENERIC:
16471      /* Do not perform multipass scheduling for pre-reload schedule
16472         to save compile time.  */
16473      if (reload_completed)
16474	{
16475	  ix86_core2i7_init_hooks ();
16476	  break;
16477	}
16478      /* Fall through.  */
16479    default:
16480      targetm.sched.dfa_post_advance_cycle = NULL;
16481      targetm.sched.first_cycle_multipass_init = NULL;
16482      targetm.sched.first_cycle_multipass_begin = NULL;
16483      targetm.sched.first_cycle_multipass_issue = NULL;
16484      targetm.sched.first_cycle_multipass_backtrack = NULL;
16485      targetm.sched.first_cycle_multipass_end = NULL;
16486      targetm.sched.first_cycle_multipass_fini = NULL;
16487      break;
16488    }
16489}
16490
16491
16492/* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
16493
16494static HOST_WIDE_INT
16495ix86_static_rtx_alignment (machine_mode mode)
16496{
16497  if (mode == DFmode)
16498    return 64;
16499  if (ALIGN_MODE_128 (mode))
16500    return MAX (128, GET_MODE_ALIGNMENT (mode));
16501  return GET_MODE_ALIGNMENT (mode);
16502}
16503
16504/* Implement TARGET_CONSTANT_ALIGNMENT.  */
16505
16506static HOST_WIDE_INT
16507ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16508{
16509  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16510      || TREE_CODE (exp) == INTEGER_CST)
16511    {
16512      machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16513      HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16514      return MAX (mode_align, align);
16515    }
16516  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16517	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16518    return BITS_PER_WORD;
16519
16520  return align;
16521}
16522
16523/* Implement TARGET_EMPTY_RECORD_P.  */
16524
16525static bool
16526ix86_is_empty_record (const_tree type)
16527{
16528  if (!TARGET_64BIT)
16529    return false;
16530  return default_is_empty_record (type);
16531}
16532
16533/* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
16534
16535static void
16536ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16537{
16538  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16539
16540  if (!cum->warn_empty)
16541    return;
16542
16543  if (!TYPE_EMPTY_P (type))
16544    return;
16545
16546  /* Don't warn if the function isn't visible outside of the TU.  */
16547  if (cum->decl && !TREE_PUBLIC (cum->decl))
16548    return;
16549
16550  const_tree ctx = get_ultimate_context (cum->decl);
16551  if (ctx != NULL_TREE
16552      && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16553    return;
16554
16555  /* If the actual size of the type is zero, then there is no change
16556     in how objects of this size are passed.  */
16557  if (int_size_in_bytes (type) == 0)
16558    return;
16559
16560  warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16561	   "changes in %<-fabi-version=12%> (GCC 8)", type);
16562
16563  /* Only warn once.  */
16564  cum->warn_empty = false;
16565}
16566
16567/* This hook returns name of multilib ABI.  */
16568
16569static const char *
16570ix86_get_multilib_abi_name (void)
16571{
16572  if (!(TARGET_64BIT_P (ix86_isa_flags)))
16573    return "i386";
16574  else if (TARGET_X32_P (ix86_isa_flags))
16575    return "x32";
16576  else
16577    return "x86_64";
16578}
16579
16580/* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
16581   the data type, and ALIGN is the alignment that the object would
16582   ordinarily have.  */
16583
16584static int
16585iamcu_alignment (tree type, int align)
16586{
16587  machine_mode mode;
16588
16589  if (align < 32 || TYPE_USER_ALIGN (type))
16590    return align;
16591
16592  /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16593     bytes.  */
16594  mode = TYPE_MODE (strip_array_types (type));
16595  switch (GET_MODE_CLASS (mode))
16596    {
16597    case MODE_INT:
16598    case MODE_COMPLEX_INT:
16599    case MODE_COMPLEX_FLOAT:
16600    case MODE_FLOAT:
16601    case MODE_DECIMAL_FLOAT:
16602      return 32;
16603    default:
16604      return align;
16605    }
16606}
16607
16608/* Compute the alignment for a static variable.
16609   TYPE is the data type, and ALIGN is the alignment that
16610   the object would ordinarily have.  The value of this function is used
16611   instead of that alignment to align the object.  */
16612
16613int
16614ix86_data_alignment (tree type, unsigned int align, bool opt)
16615{
16616  /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16617     for symbols from other compilation units or symbols that don't need
16618     to bind locally.  In order to preserve some ABI compatibility with
16619     those compilers, ensure we don't decrease alignment from what we
16620     used to assume.  */
16621
16622  unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16623
16624  /* A data structure, equal or greater than the size of a cache line
16625     (64 bytes in the Pentium 4 and other recent Intel processors, including
16626     processors based on Intel Core microarchitecture) should be aligned
16627     so that its base address is a multiple of a cache line size.  */
16628
16629  unsigned int max_align
16630    = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16631
16632  if (max_align < BITS_PER_WORD)
16633    max_align = BITS_PER_WORD;
16634
16635  switch (ix86_align_data_type)
16636    {
16637    case ix86_align_data_type_abi: opt = false; break;
16638    case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16639    case ix86_align_data_type_cacheline: break;
16640    }
16641
16642  if (TARGET_IAMCU)
16643    align = iamcu_alignment (type, align);
16644
16645  if (opt
16646      && AGGREGATE_TYPE_P (type)
16647      && TYPE_SIZE (type)
16648      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16649    {
16650      if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16651	  && align < max_align_compat)
16652	align = max_align_compat;
16653      if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16654	  && align < max_align)
16655	align = max_align;
16656    }
16657
16658  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16659     to 16byte boundary.  */
16660  if (TARGET_64BIT)
16661    {
16662      if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16663	  && TYPE_SIZE (type)
16664	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16665	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16666	  && align < 128)
16667	return 128;
16668    }
16669
16670  if (!opt)
16671    return align;
16672
16673  if (TREE_CODE (type) == ARRAY_TYPE)
16674    {
16675      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16676	return 64;
16677      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16678	return 128;
16679    }
16680  else if (TREE_CODE (type) == COMPLEX_TYPE)
16681    {
16682
16683      if (TYPE_MODE (type) == DCmode && align < 64)
16684	return 64;
16685      if ((TYPE_MODE (type) == XCmode
16686	   || TYPE_MODE (type) == TCmode) && align < 128)
16687	return 128;
16688    }
16689  else if ((TREE_CODE (type) == RECORD_TYPE
16690	    || TREE_CODE (type) == UNION_TYPE
16691	    || TREE_CODE (type) == QUAL_UNION_TYPE)
16692	   && TYPE_FIELDS (type))
16693    {
16694      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16695	return 64;
16696      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16697	return 128;
16698    }
16699  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16700	   || TREE_CODE (type) == INTEGER_TYPE)
16701    {
16702      if (TYPE_MODE (type) == DFmode && align < 64)
16703	return 64;
16704      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16705	return 128;
16706    }
16707
16708  return align;
16709}
16710
16711/* Compute the alignment for a local variable or a stack slot.  EXP is
16712   the data type or decl itself, MODE is the widest mode available and
16713   ALIGN is the alignment that the object would ordinarily have.  The
16714   value of this macro is used instead of that alignment to align the
16715   object.  */
16716
16717unsigned int
16718ix86_local_alignment (tree exp, machine_mode mode,
16719		      unsigned int align)
16720{
16721  tree type, decl;
16722
16723  if (exp && DECL_P (exp))
16724    {
16725      type = TREE_TYPE (exp);
16726      decl = exp;
16727    }
16728  else
16729    {
16730      type = exp;
16731      decl = NULL;
16732    }
16733
16734  /* Don't do dynamic stack realignment for long long objects with
16735     -mpreferred-stack-boundary=2.  */
16736  if (!TARGET_64BIT
16737      && align == 64
16738      && ix86_preferred_stack_boundary < 64
16739      && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16740      && (!type || !TYPE_USER_ALIGN (type))
16741      && (!decl || !DECL_USER_ALIGN (decl)))
16742    align = 32;
16743
16744  /* If TYPE is NULL, we are allocating a stack slot for caller-save
16745     register in MODE.  We will return the largest alignment of XF
16746     and DF.  */
16747  if (!type)
16748    {
16749      if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16750	align = GET_MODE_ALIGNMENT (DFmode);
16751      return align;
16752    }
16753
16754  /* Don't increase alignment for Intel MCU psABI.  */
16755  if (TARGET_IAMCU)
16756    return align;
16757
16758  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16759     to 16byte boundary.  Exact wording is:
16760
16761     An array uses the same alignment as its elements, except that a local or
16762     global array variable of length at least 16 bytes or
16763     a C99 variable-length array variable always has alignment of at least 16 bytes.
16764
16765     This was added to allow use of aligned SSE instructions at arrays.  This
16766     rule is meant for static storage (where compiler cannot do the analysis
16767     by itself).  We follow it for automatic variables only when convenient.
16768     We fully control everything in the function compiled and functions from
16769     other unit cannot rely on the alignment.
16770
16771     Exclude va_list type.  It is the common case of local array where
16772     we cannot benefit from the alignment.
16773
16774     TODO: Probably one should optimize for size only when var is not escaping.  */
16775  if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16776      && TARGET_SSE)
16777    {
16778      if (AGGREGATE_TYPE_P (type)
16779	  && (va_list_type_node == NULL_TREE
16780	      || (TYPE_MAIN_VARIANT (type)
16781		  != TYPE_MAIN_VARIANT (va_list_type_node)))
16782	  && TYPE_SIZE (type)
16783	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16784	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16785	  && align < 128)
16786	return 128;
16787    }
16788  if (TREE_CODE (type) == ARRAY_TYPE)
16789    {
16790      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16791	return 64;
16792      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16793	return 128;
16794    }
16795  else if (TREE_CODE (type) == COMPLEX_TYPE)
16796    {
16797      if (TYPE_MODE (type) == DCmode && align < 64)
16798	return 64;
16799      if ((TYPE_MODE (type) == XCmode
16800	   || TYPE_MODE (type) == TCmode) && align < 128)
16801	return 128;
16802    }
16803  else if ((TREE_CODE (type) == RECORD_TYPE
16804	    || TREE_CODE (type) == UNION_TYPE
16805	    || TREE_CODE (type) == QUAL_UNION_TYPE)
16806	   && TYPE_FIELDS (type))
16807    {
16808      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16809	return 64;
16810      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16811	return 128;
16812    }
16813  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16814	   || TREE_CODE (type) == INTEGER_TYPE)
16815    {
16816
16817      if (TYPE_MODE (type) == DFmode && align < 64)
16818	return 64;
16819      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16820	return 128;
16821    }
16822  return align;
16823}
16824
16825/* Compute the minimum required alignment for dynamic stack realignment
16826   purposes for a local variable, parameter or a stack slot.  EXP is
16827   the data type or decl itself, MODE is its mode and ALIGN is the
16828   alignment that the object would ordinarily have.  */
16829
16830unsigned int
16831ix86_minimum_alignment (tree exp, machine_mode mode,
16832			unsigned int align)
16833{
16834  tree type, decl;
16835
16836  if (exp && DECL_P (exp))
16837    {
16838      type = TREE_TYPE (exp);
16839      decl = exp;
16840    }
16841  else
16842    {
16843      type = exp;
16844      decl = NULL;
16845    }
16846
16847  if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16848    return align;
16849
16850  /* Don't do dynamic stack realignment for long long objects with
16851     -mpreferred-stack-boundary=2.  */
16852  if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16853      && (!type || !TYPE_USER_ALIGN (type))
16854      && (!decl || !DECL_USER_ALIGN (decl)))
16855    {
16856      gcc_checking_assert (!TARGET_STV);
16857      return 32;
16858    }
16859
16860  return align;
16861}
16862
16863/* Find a location for the static chain incoming to a nested function.
16864   This is a register, unless all free registers are used by arguments.  */
16865
16866static rtx
16867ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16868{
16869  unsigned regno;
16870
16871  if (TARGET_64BIT)
16872    {
16873      /* We always use R10 in 64-bit mode.  */
16874      regno = R10_REG;
16875    }
16876  else
16877    {
16878      const_tree fntype, fndecl;
16879      unsigned int ccvt;
16880
16881      /* By default in 32-bit mode we use ECX to pass the static chain.  */
16882      regno = CX_REG;
16883
16884      if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16885	{
16886          fntype = TREE_TYPE (fndecl_or_type);
16887	  fndecl = fndecl_or_type;
16888	}
16889      else
16890	{
16891	  fntype = fndecl_or_type;
16892	  fndecl = NULL;
16893	}
16894
16895      ccvt = ix86_get_callcvt (fntype);
16896      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16897	{
16898	  /* Fastcall functions use ecx/edx for arguments, which leaves
16899	     us with EAX for the static chain.
16900	     Thiscall functions use ecx for arguments, which also
16901	     leaves us with EAX for the static chain.  */
16902	  regno = AX_REG;
16903	}
16904      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16905	{
16906	  /* Thiscall functions use ecx for arguments, which leaves
16907	     us with EAX and EDX for the static chain.
16908	     We are using for abi-compatibility EAX.  */
16909	  regno = AX_REG;
16910	}
16911      else if (ix86_function_regparm (fntype, fndecl) == 3)
16912	{
16913	  /* For regparm 3, we have no free call-clobbered registers in
16914	     which to store the static chain.  In order to implement this,
16915	     we have the trampoline push the static chain to the stack.
16916	     However, we can't push a value below the return address when
16917	     we call the nested function directly, so we have to use an
16918	     alternate entry point.  For this we use ESI, and have the
16919	     alternate entry point push ESI, so that things appear the
16920	     same once we're executing the nested function.  */
16921	  if (incoming_p)
16922	    {
16923	      if (fndecl == current_function_decl
16924		  && !ix86_static_chain_on_stack)
16925		{
16926		  gcc_assert (!reload_completed);
16927		  ix86_static_chain_on_stack = true;
16928		}
16929	      return gen_frame_mem (SImode,
16930				    plus_constant (Pmode,
16931						   arg_pointer_rtx, -8));
16932	    }
16933	  regno = SI_REG;
16934	}
16935    }
16936
16937  return gen_rtx_REG (Pmode, regno);
16938}
16939
16940/* Emit RTL insns to initialize the variable parts of a trampoline.
16941   FNDECL is the decl of the target address; M_TRAMP is a MEM for
16942   the trampoline, and CHAIN_VALUE is an RTX for the static chain
16943   to be passed to the target function.  */
16944
16945static void
16946ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16947{
16948  rtx mem, fnaddr;
16949  int opcode;
16950  int offset = 0;
16951  bool need_endbr = (flag_cf_protection & CF_BRANCH);
16952
16953  fnaddr = XEXP (DECL_RTL (fndecl), 0);
16954
16955  if (TARGET_64BIT)
16956    {
16957      int size;
16958
16959      if (need_endbr)
16960	{
16961	  /* Insert ENDBR64.  */
16962	  mem = adjust_address (m_tramp, SImode, offset);
16963	  emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16964	  offset += 4;
16965	}
16966
16967      /* Load the function address to r11.  Try to load address using
16968	 the shorter movl instead of movabs.  We may want to support
16969	 movq for kernel mode, but kernel does not use trampolines at
16970	 the moment.  FNADDR is a 32bit address and may not be in
16971	 DImode when ptr_mode == SImode.  Always use movl in this
16972	 case.  */
16973      if (ptr_mode == SImode
16974	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16975	{
16976	  fnaddr = copy_addr_to_reg (fnaddr);
16977
16978	  mem = adjust_address (m_tramp, HImode, offset);
16979	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16980
16981	  mem = adjust_address (m_tramp, SImode, offset + 2);
16982	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16983	  offset += 6;
16984	}
16985      else
16986	{
16987	  mem = adjust_address (m_tramp, HImode, offset);
16988	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16989
16990	  mem = adjust_address (m_tramp, DImode, offset + 2);
16991	  emit_move_insn (mem, fnaddr);
16992	  offset += 10;
16993	}
16994
16995      /* Load static chain using movabs to r10.  Use the shorter movl
16996         instead of movabs when ptr_mode == SImode.  */
16997      if (ptr_mode == SImode)
16998	{
16999	  opcode = 0xba41;
17000	  size = 6;
17001	}
17002      else
17003	{
17004	  opcode = 0xba49;
17005	  size = 10;
17006	}
17007
17008      mem = adjust_address (m_tramp, HImode, offset);
17009      emit_move_insn (mem, gen_int_mode (opcode, HImode));
17010
17011      mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17012      emit_move_insn (mem, chain_value);
17013      offset += size;
17014
17015      /* Jump to r11; the last (unused) byte is a nop, only there to
17016	 pad the write out to a single 32-bit store.  */
17017      mem = adjust_address (m_tramp, SImode, offset);
17018      emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17019      offset += 4;
17020    }
17021  else
17022    {
17023      rtx disp, chain;
17024
17025      /* Depending on the static chain location, either load a register
17026	 with a constant, or push the constant to the stack.  All of the
17027	 instructions are the same size.  */
17028      chain = ix86_static_chain (fndecl, true);
17029      if (REG_P (chain))
17030	{
17031	  switch (REGNO (chain))
17032	    {
17033	    case AX_REG:
17034	      opcode = 0xb8; break;
17035	    case CX_REG:
17036	      opcode = 0xb9; break;
17037	    default:
17038	      gcc_unreachable ();
17039	    }
17040	}
17041      else
17042	opcode = 0x68;
17043
17044      if (need_endbr)
17045	{
17046	  /* Insert ENDBR32.  */
17047	  mem = adjust_address (m_tramp, SImode, offset);
17048	  emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17049	  offset += 4;
17050	}
17051
17052      mem = adjust_address (m_tramp, QImode, offset);
17053      emit_move_insn (mem, gen_int_mode (opcode, QImode));
17054
17055      mem = adjust_address (m_tramp, SImode, offset + 1);
17056      emit_move_insn (mem, chain_value);
17057      offset += 5;
17058
17059      mem = adjust_address (m_tramp, QImode, offset);
17060      emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17061
17062      mem = adjust_address (m_tramp, SImode, offset + 1);
17063
17064      /* Compute offset from the end of the jmp to the target function.
17065	 In the case in which the trampoline stores the static chain on
17066	 the stack, we need to skip the first insn which pushes the
17067	 (call-saved) register static chain; this push is 1 byte.  */
17068      offset += 5;
17069      int skip = MEM_P (chain) ? 1 : 0;
17070      /* Skip ENDBR32 at the entry of the target function.  */
17071      if (need_endbr
17072	  && !cgraph_node::get (fndecl)->only_called_directly_p ())
17073	skip += 4;
17074      disp = expand_binop (SImode, sub_optab, fnaddr,
17075			   plus_constant (Pmode, XEXP (m_tramp, 0),
17076					  offset - skip),
17077			   NULL_RTX, 1, OPTAB_DIRECT);
17078      emit_move_insn (mem, disp);
17079    }
17080
17081  gcc_assert (offset <= TRAMPOLINE_SIZE);
17082
17083#ifdef HAVE_ENABLE_EXECUTE_STACK
17084#ifdef CHECK_EXECUTE_STACK_ENABLED
17085  if (CHECK_EXECUTE_STACK_ENABLED)
17086#endif
17087  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17088		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17089#endif
17090}
17091
17092static bool
17093ix86_allocate_stack_slots_for_args (void)
17094{
17095  /* Naked functions should not allocate stack slots for arguments.  */
17096  return !ix86_function_naked (current_function_decl);
17097}
17098
17099static bool
17100ix86_warn_func_return (tree decl)
17101{
17102  /* Naked functions are implemented entirely in assembly, including the
17103     return sequence, so suppress warnings about this.  */
17104  return !ix86_function_naked (decl);
17105}
17106
17107/* Return the shift count of a vector by scalar shift builtin second argument
17108   ARG1.  */
17109static tree
17110ix86_vector_shift_count (tree arg1)
17111{
17112  if (tree_fits_uhwi_p (arg1))
17113    return arg1;
17114  else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17115    {
17116      /* The count argument is weird, passed in as various 128-bit
17117	 (or 64-bit) vectors, the low 64 bits from it are the count.  */
17118      unsigned char buf[16];
17119      int len = native_encode_expr (arg1, buf, 16);
17120      if (len == 0)
17121	return NULL_TREE;
17122      tree t = native_interpret_expr (uint64_type_node, buf, len);
17123      if (t && tree_fits_uhwi_p (t))
17124	return t;
17125    }
17126  return NULL_TREE;
17127}
17128
17129static tree
17130ix86_fold_builtin (tree fndecl, int n_args,
17131		   tree *args, bool ignore ATTRIBUTE_UNUSED)
17132{
17133  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17134    {
17135      enum ix86_builtins fn_code
17136	= (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17137      enum rtx_code rcode;
17138      bool is_vshift;
17139      unsigned HOST_WIDE_INT mask;
17140
17141      switch (fn_code)
17142	{
17143	case IX86_BUILTIN_CPU_IS:
17144	case IX86_BUILTIN_CPU_SUPPORTS:
17145	  gcc_assert (n_args == 1);
17146	  return fold_builtin_cpu (fndecl, args);
17147
17148	case IX86_BUILTIN_NANQ:
17149	case IX86_BUILTIN_NANSQ:
17150	  {
17151	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17152	    const char *str = c_getstr (*args);
17153	    int quiet = fn_code == IX86_BUILTIN_NANQ;
17154	    REAL_VALUE_TYPE real;
17155
17156	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17157	      return build_real (type, real);
17158	    return NULL_TREE;
17159	  }
17160
17161	case IX86_BUILTIN_INFQ:
17162	case IX86_BUILTIN_HUGE_VALQ:
17163	  {
17164	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17165	    REAL_VALUE_TYPE inf;
17166	    real_inf (&inf);
17167	    return build_real (type, inf);
17168	  }
17169
17170	case IX86_BUILTIN_TZCNT16:
17171	case IX86_BUILTIN_CTZS:
17172	case IX86_BUILTIN_TZCNT32:
17173	case IX86_BUILTIN_TZCNT64:
17174	  gcc_assert (n_args == 1);
17175	  if (TREE_CODE (args[0]) == INTEGER_CST)
17176	    {
17177	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
17178	      tree arg = args[0];
17179	      if (fn_code == IX86_BUILTIN_TZCNT16
17180		  || fn_code == IX86_BUILTIN_CTZS)
17181		arg = fold_convert (short_unsigned_type_node, arg);
17182	      if (integer_zerop (arg))
17183		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17184	      else
17185		return fold_const_call (CFN_CTZ, type, arg);
17186	    }
17187	  break;
17188
17189	case IX86_BUILTIN_LZCNT16:
17190	case IX86_BUILTIN_CLZS:
17191	case IX86_BUILTIN_LZCNT32:
17192	case IX86_BUILTIN_LZCNT64:
17193	  gcc_assert (n_args == 1);
17194	  if (TREE_CODE (args[0]) == INTEGER_CST)
17195	    {
17196	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
17197	      tree arg = args[0];
17198	      if (fn_code == IX86_BUILTIN_LZCNT16
17199		  || fn_code == IX86_BUILTIN_CLZS)
17200		arg = fold_convert (short_unsigned_type_node, arg);
17201	      if (integer_zerop (arg))
17202		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17203	      else
17204		return fold_const_call (CFN_CLZ, type, arg);
17205	    }
17206	  break;
17207
17208	case IX86_BUILTIN_BEXTR32:
17209	case IX86_BUILTIN_BEXTR64:
17210	case IX86_BUILTIN_BEXTRI32:
17211	case IX86_BUILTIN_BEXTRI64:
17212	  gcc_assert (n_args == 2);
17213	  if (tree_fits_uhwi_p (args[1]))
17214	    {
17215	      unsigned HOST_WIDE_INT res = 0;
17216	      unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17217	      unsigned int start = tree_to_uhwi (args[1]);
17218	      unsigned int len = (start & 0xff00) >> 8;
17219	      start &= 0xff;
17220	      if (start >= prec || len == 0)
17221		res = 0;
17222	      else if (!tree_fits_uhwi_p (args[0]))
17223		break;
17224	      else
17225		res = tree_to_uhwi (args[0]) >> start;
17226	      if (len > prec)
17227		len = prec;
17228	      if (len < HOST_BITS_PER_WIDE_INT)
17229		res &= (HOST_WIDE_INT_1U << len) - 1;
17230	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17231	    }
17232	  break;
17233
17234	case IX86_BUILTIN_BZHI32:
17235	case IX86_BUILTIN_BZHI64:
17236	  gcc_assert (n_args == 2);
17237	  if (tree_fits_uhwi_p (args[1]))
17238	    {
17239	      unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17240	      if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17241		return args[0];
17242	      if (idx == 0)
17243		return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17244	      if (!tree_fits_uhwi_p (args[0]))
17245		break;
17246	      unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17247	      res &= ~(HOST_WIDE_INT_M1U << idx);
17248	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17249	    }
17250	  break;
17251
17252	case IX86_BUILTIN_PDEP32:
17253	case IX86_BUILTIN_PDEP64:
17254	  gcc_assert (n_args == 2);
17255	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17256	    {
17257	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17258	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17259	      unsigned HOST_WIDE_INT res = 0;
17260	      unsigned HOST_WIDE_INT m, k = 1;
17261	      for (m = 1; m; m <<= 1)
17262		if ((mask & m) != 0)
17263		  {
17264		    if ((src & k) != 0)
17265		      res |= m;
17266		    k <<= 1;
17267		  }
17268	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17269	    }
17270	  break;
17271
17272	case IX86_BUILTIN_PEXT32:
17273	case IX86_BUILTIN_PEXT64:
17274	  gcc_assert (n_args == 2);
17275	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17276	    {
17277	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17278	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17279	      unsigned HOST_WIDE_INT res = 0;
17280	      unsigned HOST_WIDE_INT m, k = 1;
17281	      for (m = 1; m; m <<= 1)
17282		if ((mask & m) != 0)
17283		  {
17284		    if ((src & m) != 0)
17285		      res |= k;
17286		    k <<= 1;
17287		  }
17288	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17289	    }
17290	  break;
17291
17292	case IX86_BUILTIN_MOVMSKPS:
17293	case IX86_BUILTIN_PMOVMSKB:
17294	case IX86_BUILTIN_MOVMSKPD:
17295	case IX86_BUILTIN_PMOVMSKB128:
17296	case IX86_BUILTIN_MOVMSKPD256:
17297	case IX86_BUILTIN_MOVMSKPS256:
17298	case IX86_BUILTIN_PMOVMSKB256:
17299	  gcc_assert (n_args == 1);
17300	  if (TREE_CODE (args[0]) == VECTOR_CST)
17301	    {
17302	      HOST_WIDE_INT res = 0;
17303	      for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17304		{
17305		  tree e = VECTOR_CST_ELT (args[0], i);
17306		  if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17307		    {
17308		      if (wi::neg_p (wi::to_wide (e)))
17309			res |= HOST_WIDE_INT_1 << i;
17310		    }
17311		  else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17312		    {
17313		      if (TREE_REAL_CST (e).sign)
17314			res |= HOST_WIDE_INT_1 << i;
17315		    }
17316		  else
17317		    return NULL_TREE;
17318		}
17319	      return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17320	    }
17321	  break;
17322
17323	case IX86_BUILTIN_PSLLD:
17324	case IX86_BUILTIN_PSLLD128:
17325	case IX86_BUILTIN_PSLLD128_MASK:
17326	case IX86_BUILTIN_PSLLD256:
17327	case IX86_BUILTIN_PSLLD256_MASK:
17328	case IX86_BUILTIN_PSLLD512:
17329	case IX86_BUILTIN_PSLLDI:
17330	case IX86_BUILTIN_PSLLDI128:
17331	case IX86_BUILTIN_PSLLDI128_MASK:
17332	case IX86_BUILTIN_PSLLDI256:
17333	case IX86_BUILTIN_PSLLDI256_MASK:
17334	case IX86_BUILTIN_PSLLDI512:
17335	case IX86_BUILTIN_PSLLQ:
17336	case IX86_BUILTIN_PSLLQ128:
17337	case IX86_BUILTIN_PSLLQ128_MASK:
17338	case IX86_BUILTIN_PSLLQ256:
17339	case IX86_BUILTIN_PSLLQ256_MASK:
17340	case IX86_BUILTIN_PSLLQ512:
17341	case IX86_BUILTIN_PSLLQI:
17342	case IX86_BUILTIN_PSLLQI128:
17343	case IX86_BUILTIN_PSLLQI128_MASK:
17344	case IX86_BUILTIN_PSLLQI256:
17345	case IX86_BUILTIN_PSLLQI256_MASK:
17346	case IX86_BUILTIN_PSLLQI512:
17347	case IX86_BUILTIN_PSLLW:
17348	case IX86_BUILTIN_PSLLW128:
17349	case IX86_BUILTIN_PSLLW128_MASK:
17350	case IX86_BUILTIN_PSLLW256:
17351	case IX86_BUILTIN_PSLLW256_MASK:
17352	case IX86_BUILTIN_PSLLW512_MASK:
17353	case IX86_BUILTIN_PSLLWI:
17354	case IX86_BUILTIN_PSLLWI128:
17355	case IX86_BUILTIN_PSLLWI128_MASK:
17356	case IX86_BUILTIN_PSLLWI256:
17357	case IX86_BUILTIN_PSLLWI256_MASK:
17358	case IX86_BUILTIN_PSLLWI512_MASK:
17359	  rcode = ASHIFT;
17360	  is_vshift = false;
17361	  goto do_shift;
17362	case IX86_BUILTIN_PSRAD:
17363	case IX86_BUILTIN_PSRAD128:
17364	case IX86_BUILTIN_PSRAD128_MASK:
17365	case IX86_BUILTIN_PSRAD256:
17366	case IX86_BUILTIN_PSRAD256_MASK:
17367	case IX86_BUILTIN_PSRAD512:
17368	case IX86_BUILTIN_PSRADI:
17369	case IX86_BUILTIN_PSRADI128:
17370	case IX86_BUILTIN_PSRADI128_MASK:
17371	case IX86_BUILTIN_PSRADI256:
17372	case IX86_BUILTIN_PSRADI256_MASK:
17373	case IX86_BUILTIN_PSRADI512:
17374	case IX86_BUILTIN_PSRAQ128_MASK:
17375	case IX86_BUILTIN_PSRAQ256_MASK:
17376	case IX86_BUILTIN_PSRAQ512:
17377	case IX86_BUILTIN_PSRAQI128_MASK:
17378	case IX86_BUILTIN_PSRAQI256_MASK:
17379	case IX86_BUILTIN_PSRAQI512:
17380	case IX86_BUILTIN_PSRAW:
17381	case IX86_BUILTIN_PSRAW128:
17382	case IX86_BUILTIN_PSRAW128_MASK:
17383	case IX86_BUILTIN_PSRAW256:
17384	case IX86_BUILTIN_PSRAW256_MASK:
17385	case IX86_BUILTIN_PSRAW512:
17386	case IX86_BUILTIN_PSRAWI:
17387	case IX86_BUILTIN_PSRAWI128:
17388	case IX86_BUILTIN_PSRAWI128_MASK:
17389	case IX86_BUILTIN_PSRAWI256:
17390	case IX86_BUILTIN_PSRAWI256_MASK:
17391	case IX86_BUILTIN_PSRAWI512:
17392	  rcode = ASHIFTRT;
17393	  is_vshift = false;
17394	  goto do_shift;
17395	case IX86_BUILTIN_PSRLD:
17396	case IX86_BUILTIN_PSRLD128:
17397	case IX86_BUILTIN_PSRLD128_MASK:
17398	case IX86_BUILTIN_PSRLD256:
17399	case IX86_BUILTIN_PSRLD256_MASK:
17400	case IX86_BUILTIN_PSRLD512:
17401	case IX86_BUILTIN_PSRLDI:
17402	case IX86_BUILTIN_PSRLDI128:
17403	case IX86_BUILTIN_PSRLDI128_MASK:
17404	case IX86_BUILTIN_PSRLDI256:
17405	case IX86_BUILTIN_PSRLDI256_MASK:
17406	case IX86_BUILTIN_PSRLDI512:
17407	case IX86_BUILTIN_PSRLQ:
17408	case IX86_BUILTIN_PSRLQ128:
17409	case IX86_BUILTIN_PSRLQ128_MASK:
17410	case IX86_BUILTIN_PSRLQ256:
17411	case IX86_BUILTIN_PSRLQ256_MASK:
17412	case IX86_BUILTIN_PSRLQ512:
17413	case IX86_BUILTIN_PSRLQI:
17414	case IX86_BUILTIN_PSRLQI128:
17415	case IX86_BUILTIN_PSRLQI128_MASK:
17416	case IX86_BUILTIN_PSRLQI256:
17417	case IX86_BUILTIN_PSRLQI256_MASK:
17418	case IX86_BUILTIN_PSRLQI512:
17419	case IX86_BUILTIN_PSRLW:
17420	case IX86_BUILTIN_PSRLW128:
17421	case IX86_BUILTIN_PSRLW128_MASK:
17422	case IX86_BUILTIN_PSRLW256:
17423	case IX86_BUILTIN_PSRLW256_MASK:
17424	case IX86_BUILTIN_PSRLW512:
17425	case IX86_BUILTIN_PSRLWI:
17426	case IX86_BUILTIN_PSRLWI128:
17427	case IX86_BUILTIN_PSRLWI128_MASK:
17428	case IX86_BUILTIN_PSRLWI256:
17429	case IX86_BUILTIN_PSRLWI256_MASK:
17430	case IX86_BUILTIN_PSRLWI512:
17431	  rcode = LSHIFTRT;
17432	  is_vshift = false;
17433	  goto do_shift;
17434	case IX86_BUILTIN_PSLLVV16HI:
17435	case IX86_BUILTIN_PSLLVV16SI:
17436	case IX86_BUILTIN_PSLLVV2DI:
17437	case IX86_BUILTIN_PSLLVV2DI_MASK:
17438	case IX86_BUILTIN_PSLLVV32HI:
17439	case IX86_BUILTIN_PSLLVV4DI:
17440	case IX86_BUILTIN_PSLLVV4DI_MASK:
17441	case IX86_BUILTIN_PSLLVV4SI:
17442	case IX86_BUILTIN_PSLLVV4SI_MASK:
17443	case IX86_BUILTIN_PSLLVV8DI:
17444	case IX86_BUILTIN_PSLLVV8HI:
17445	case IX86_BUILTIN_PSLLVV8SI:
17446	case IX86_BUILTIN_PSLLVV8SI_MASK:
17447	  rcode = ASHIFT;
17448	  is_vshift = true;
17449	  goto do_shift;
17450	case IX86_BUILTIN_PSRAVQ128:
17451	case IX86_BUILTIN_PSRAVQ256:
17452	case IX86_BUILTIN_PSRAVV16HI:
17453	case IX86_BUILTIN_PSRAVV16SI:
17454	case IX86_BUILTIN_PSRAVV32HI:
17455	case IX86_BUILTIN_PSRAVV4SI:
17456	case IX86_BUILTIN_PSRAVV4SI_MASK:
17457	case IX86_BUILTIN_PSRAVV8DI:
17458	case IX86_BUILTIN_PSRAVV8HI:
17459	case IX86_BUILTIN_PSRAVV8SI:
17460	case IX86_BUILTIN_PSRAVV8SI_MASK:
17461	  rcode = ASHIFTRT;
17462	  is_vshift = true;
17463	  goto do_shift;
17464	case IX86_BUILTIN_PSRLVV16HI:
17465	case IX86_BUILTIN_PSRLVV16SI:
17466	case IX86_BUILTIN_PSRLVV2DI:
17467	case IX86_BUILTIN_PSRLVV2DI_MASK:
17468	case IX86_BUILTIN_PSRLVV32HI:
17469	case IX86_BUILTIN_PSRLVV4DI:
17470	case IX86_BUILTIN_PSRLVV4DI_MASK:
17471	case IX86_BUILTIN_PSRLVV4SI:
17472	case IX86_BUILTIN_PSRLVV4SI_MASK:
17473	case IX86_BUILTIN_PSRLVV8DI:
17474	case IX86_BUILTIN_PSRLVV8HI:
17475	case IX86_BUILTIN_PSRLVV8SI:
17476	case IX86_BUILTIN_PSRLVV8SI_MASK:
17477	  rcode = LSHIFTRT;
17478	  is_vshift = true;
17479	  goto do_shift;
17480
17481	do_shift:
17482	  gcc_assert (n_args >= 2);
17483	  if (TREE_CODE (args[0]) != VECTOR_CST)
17484	    break;
17485	  mask = HOST_WIDE_INT_M1U;
17486	  if (n_args > 2)
17487	    {
17488	      /* This is masked shift.  */
17489	      if (!tree_fits_uhwi_p (args[n_args - 1])
17490		  || TREE_SIDE_EFFECTS (args[n_args - 2]))
17491		break;
17492	      mask = tree_to_uhwi (args[n_args - 1]);
17493	      unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17494	      mask |= HOST_WIDE_INT_M1U << elems;
17495	      if (mask != HOST_WIDE_INT_M1U
17496		  && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17497		break;
17498	      if (mask == (HOST_WIDE_INT_M1U << elems))
17499		return args[n_args - 2];
17500	    }
17501	  if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17502	    break;
17503	  if (tree tem = (is_vshift ? integer_one_node
17504			  : ix86_vector_shift_count (args[1])))
17505	    {
17506	      unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17507	      unsigned HOST_WIDE_INT prec
17508		= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17509	      if (count == 0 && mask == HOST_WIDE_INT_M1U)
17510		return args[0];
17511	      if (count >= prec)
17512		{
17513		  if (rcode == ASHIFTRT)
17514		    count = prec - 1;
17515		  else if (mask == HOST_WIDE_INT_M1U)
17516		    return build_zero_cst (TREE_TYPE (args[0]));
17517		}
17518	      tree countt = NULL_TREE;
17519	      if (!is_vshift)
17520		{
17521		  if (count >= prec)
17522		    countt = integer_zero_node;
17523		  else
17524		    countt = build_int_cst (integer_type_node, count);
17525		}
17526	      tree_vector_builder builder;
17527	      if (mask != HOST_WIDE_INT_M1U || is_vshift)
17528		builder.new_vector (TREE_TYPE (args[0]),
17529				    TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17530				    1);
17531	      else
17532		builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17533					     false);
17534	      unsigned int cnt = builder.encoded_nelts ();
17535	      for (unsigned int i = 0; i < cnt; ++i)
17536		{
17537		  tree elt = VECTOR_CST_ELT (args[0], i);
17538		  if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17539		    return NULL_TREE;
17540		  tree type = TREE_TYPE (elt);
17541		  if (rcode == LSHIFTRT)
17542		    elt = fold_convert (unsigned_type_for (type), elt);
17543		  if (is_vshift)
17544		    {
17545		      countt = VECTOR_CST_ELT (args[1], i);
17546		      if (TREE_CODE (countt) != INTEGER_CST
17547			  || TREE_OVERFLOW (countt))
17548			return NULL_TREE;
17549		      if (wi::neg_p (wi::to_wide (countt))
17550			  || wi::to_widest (countt) >= prec)
17551			{
17552			  if (rcode == ASHIFTRT)
17553			    countt = build_int_cst (TREE_TYPE (countt),
17554						    prec - 1);
17555			  else
17556			    {
17557			      elt = build_zero_cst (TREE_TYPE (elt));
17558			      countt = build_zero_cst (TREE_TYPE (countt));
17559			    }
17560			}
17561		    }
17562		  else if (count >= prec)
17563		    elt = build_zero_cst (TREE_TYPE (elt));
17564		  elt = const_binop (rcode == ASHIFT
17565				     ? LSHIFT_EXPR : RSHIFT_EXPR,
17566				     TREE_TYPE (elt), elt, countt);
17567		  if (!elt || TREE_CODE (elt) != INTEGER_CST)
17568		    return NULL_TREE;
17569		  if (rcode == LSHIFTRT)
17570		    elt = fold_convert (type, elt);
17571		  if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17572		    {
17573		      elt = VECTOR_CST_ELT (args[n_args - 2], i);
17574		      if (TREE_CODE (elt) != INTEGER_CST
17575			  || TREE_OVERFLOW (elt))
17576			return NULL_TREE;
17577		    }
17578		  builder.quick_push (elt);
17579		}
17580	      return builder.build ();
17581	    }
17582	  break;
17583
17584	default:
17585	  break;
17586	}
17587    }
17588
17589#ifdef SUBTARGET_FOLD_BUILTIN
17590  return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17591#endif
17592
17593  return NULL_TREE;
17594}
17595
17596/* Fold a MD builtin (use ix86_fold_builtin for folding into
17597   constant) in GIMPLE.  */
17598
17599bool
17600ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17601{
17602  gimple *stmt = gsi_stmt (*gsi);
17603  tree fndecl = gimple_call_fndecl (stmt);
17604  gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17605  int n_args = gimple_call_num_args (stmt);
17606  enum ix86_builtins fn_code
17607    = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17608  tree decl = NULL_TREE;
17609  tree arg0, arg1, arg2;
17610  enum rtx_code rcode;
17611  unsigned HOST_WIDE_INT count;
17612  bool is_vshift;
17613
17614  switch (fn_code)
17615    {
17616    case IX86_BUILTIN_TZCNT32:
17617      decl = builtin_decl_implicit (BUILT_IN_CTZ);
17618      goto fold_tzcnt_lzcnt;
17619
17620    case IX86_BUILTIN_TZCNT64:
17621      decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17622      goto fold_tzcnt_lzcnt;
17623
17624    case IX86_BUILTIN_LZCNT32:
17625      decl = builtin_decl_implicit (BUILT_IN_CLZ);
17626      goto fold_tzcnt_lzcnt;
17627
17628    case IX86_BUILTIN_LZCNT64:
17629      decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17630      goto fold_tzcnt_lzcnt;
17631
17632    fold_tzcnt_lzcnt:
17633      gcc_assert (n_args == 1);
17634      arg0 = gimple_call_arg (stmt, 0);
17635      if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17636	{
17637	  int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17638	  /* If arg0 is provably non-zero, optimize into generic
17639	     __builtin_c[tl]z{,ll} function the middle-end handles
17640	     better.  */
17641	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
17642	    return false;
17643
17644	  location_t loc = gimple_location (stmt);
17645	  gimple *g = gimple_build_call (decl, 1, arg0);
17646	  gimple_set_location (g, loc);
17647	  tree lhs = make_ssa_name (integer_type_node);
17648	  gimple_call_set_lhs (g, lhs);
17649	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
17650	  g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17651	  gimple_set_location (g, loc);
17652	  gsi_replace (gsi, g, false);
17653	  return true;
17654	}
17655      break;
17656
17657    case IX86_BUILTIN_BZHI32:
17658    case IX86_BUILTIN_BZHI64:
17659      gcc_assert (n_args == 2);
17660      arg1 = gimple_call_arg (stmt, 1);
17661      if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17662	{
17663	  unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17664	  arg0 = gimple_call_arg (stmt, 0);
17665	  if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17666	    break;
17667	  location_t loc = gimple_location (stmt);
17668	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17669	  gimple_set_location (g, loc);
17670	  gsi_replace (gsi, g, false);
17671	  return true;
17672	}
17673      break;
17674
17675    case IX86_BUILTIN_PDEP32:
17676    case IX86_BUILTIN_PDEP64:
17677    case IX86_BUILTIN_PEXT32:
17678    case IX86_BUILTIN_PEXT64:
17679      gcc_assert (n_args == 2);
17680      arg1 = gimple_call_arg (stmt, 1);
17681      if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17682	{
17683	  location_t loc = gimple_location (stmt);
17684	  arg0 = gimple_call_arg (stmt, 0);
17685	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17686	  gimple_set_location (g, loc);
17687	  gsi_replace (gsi, g, false);
17688	  return true;
17689	}
17690      break;
17691
17692    case IX86_BUILTIN_PSLLD:
17693    case IX86_BUILTIN_PSLLD128:
17694    case IX86_BUILTIN_PSLLD128_MASK:
17695    case IX86_BUILTIN_PSLLD256:
17696    case IX86_BUILTIN_PSLLD256_MASK:
17697    case IX86_BUILTIN_PSLLD512:
17698    case IX86_BUILTIN_PSLLDI:
17699    case IX86_BUILTIN_PSLLDI128:
17700    case IX86_BUILTIN_PSLLDI128_MASK:
17701    case IX86_BUILTIN_PSLLDI256:
17702    case IX86_BUILTIN_PSLLDI256_MASK:
17703    case IX86_BUILTIN_PSLLDI512:
17704    case IX86_BUILTIN_PSLLQ:
17705    case IX86_BUILTIN_PSLLQ128:
17706    case IX86_BUILTIN_PSLLQ128_MASK:
17707    case IX86_BUILTIN_PSLLQ256:
17708    case IX86_BUILTIN_PSLLQ256_MASK:
17709    case IX86_BUILTIN_PSLLQ512:
17710    case IX86_BUILTIN_PSLLQI:
17711    case IX86_BUILTIN_PSLLQI128:
17712    case IX86_BUILTIN_PSLLQI128_MASK:
17713    case IX86_BUILTIN_PSLLQI256:
17714    case IX86_BUILTIN_PSLLQI256_MASK:
17715    case IX86_BUILTIN_PSLLQI512:
17716    case IX86_BUILTIN_PSLLW:
17717    case IX86_BUILTIN_PSLLW128:
17718    case IX86_BUILTIN_PSLLW128_MASK:
17719    case IX86_BUILTIN_PSLLW256:
17720    case IX86_BUILTIN_PSLLW256_MASK:
17721    case IX86_BUILTIN_PSLLW512_MASK:
17722    case IX86_BUILTIN_PSLLWI:
17723    case IX86_BUILTIN_PSLLWI128:
17724    case IX86_BUILTIN_PSLLWI128_MASK:
17725    case IX86_BUILTIN_PSLLWI256:
17726    case IX86_BUILTIN_PSLLWI256_MASK:
17727    case IX86_BUILTIN_PSLLWI512_MASK:
17728      rcode = ASHIFT;
17729      is_vshift = false;
17730      goto do_shift;
17731    case IX86_BUILTIN_PSRAD:
17732    case IX86_BUILTIN_PSRAD128:
17733    case IX86_BUILTIN_PSRAD128_MASK:
17734    case IX86_BUILTIN_PSRAD256:
17735    case IX86_BUILTIN_PSRAD256_MASK:
17736    case IX86_BUILTIN_PSRAD512:
17737    case IX86_BUILTIN_PSRADI:
17738    case IX86_BUILTIN_PSRADI128:
17739    case IX86_BUILTIN_PSRADI128_MASK:
17740    case IX86_BUILTIN_PSRADI256:
17741    case IX86_BUILTIN_PSRADI256_MASK:
17742    case IX86_BUILTIN_PSRADI512:
17743    case IX86_BUILTIN_PSRAQ128_MASK:
17744    case IX86_BUILTIN_PSRAQ256_MASK:
17745    case IX86_BUILTIN_PSRAQ512:
17746    case IX86_BUILTIN_PSRAQI128_MASK:
17747    case IX86_BUILTIN_PSRAQI256_MASK:
17748    case IX86_BUILTIN_PSRAQI512:
17749    case IX86_BUILTIN_PSRAW:
17750    case IX86_BUILTIN_PSRAW128:
17751    case IX86_BUILTIN_PSRAW128_MASK:
17752    case IX86_BUILTIN_PSRAW256:
17753    case IX86_BUILTIN_PSRAW256_MASK:
17754    case IX86_BUILTIN_PSRAW512:
17755    case IX86_BUILTIN_PSRAWI:
17756    case IX86_BUILTIN_PSRAWI128:
17757    case IX86_BUILTIN_PSRAWI128_MASK:
17758    case IX86_BUILTIN_PSRAWI256:
17759    case IX86_BUILTIN_PSRAWI256_MASK:
17760    case IX86_BUILTIN_PSRAWI512:
17761      rcode = ASHIFTRT;
17762      is_vshift = false;
17763      goto do_shift;
17764    case IX86_BUILTIN_PSRLD:
17765    case IX86_BUILTIN_PSRLD128:
17766    case IX86_BUILTIN_PSRLD128_MASK:
17767    case IX86_BUILTIN_PSRLD256:
17768    case IX86_BUILTIN_PSRLD256_MASK:
17769    case IX86_BUILTIN_PSRLD512:
17770    case IX86_BUILTIN_PSRLDI:
17771    case IX86_BUILTIN_PSRLDI128:
17772    case IX86_BUILTIN_PSRLDI128_MASK:
17773    case IX86_BUILTIN_PSRLDI256:
17774    case IX86_BUILTIN_PSRLDI256_MASK:
17775    case IX86_BUILTIN_PSRLDI512:
17776    case IX86_BUILTIN_PSRLQ:
17777    case IX86_BUILTIN_PSRLQ128:
17778    case IX86_BUILTIN_PSRLQ128_MASK:
17779    case IX86_BUILTIN_PSRLQ256:
17780    case IX86_BUILTIN_PSRLQ256_MASK:
17781    case IX86_BUILTIN_PSRLQ512:
17782    case IX86_BUILTIN_PSRLQI:
17783    case IX86_BUILTIN_PSRLQI128:
17784    case IX86_BUILTIN_PSRLQI128_MASK:
17785    case IX86_BUILTIN_PSRLQI256:
17786    case IX86_BUILTIN_PSRLQI256_MASK:
17787    case IX86_BUILTIN_PSRLQI512:
17788    case IX86_BUILTIN_PSRLW:
17789    case IX86_BUILTIN_PSRLW128:
17790    case IX86_BUILTIN_PSRLW128_MASK:
17791    case IX86_BUILTIN_PSRLW256:
17792    case IX86_BUILTIN_PSRLW256_MASK:
17793    case IX86_BUILTIN_PSRLW512:
17794    case IX86_BUILTIN_PSRLWI:
17795    case IX86_BUILTIN_PSRLWI128:
17796    case IX86_BUILTIN_PSRLWI128_MASK:
17797    case IX86_BUILTIN_PSRLWI256:
17798    case IX86_BUILTIN_PSRLWI256_MASK:
17799    case IX86_BUILTIN_PSRLWI512:
17800      rcode = LSHIFTRT;
17801      is_vshift = false;
17802      goto do_shift;
17803    case IX86_BUILTIN_PSLLVV16HI:
17804    case IX86_BUILTIN_PSLLVV16SI:
17805    case IX86_BUILTIN_PSLLVV2DI:
17806    case IX86_BUILTIN_PSLLVV2DI_MASK:
17807    case IX86_BUILTIN_PSLLVV32HI:
17808    case IX86_BUILTIN_PSLLVV4DI:
17809    case IX86_BUILTIN_PSLLVV4DI_MASK:
17810    case IX86_BUILTIN_PSLLVV4SI:
17811    case IX86_BUILTIN_PSLLVV4SI_MASK:
17812    case IX86_BUILTIN_PSLLVV8DI:
17813    case IX86_BUILTIN_PSLLVV8HI:
17814    case IX86_BUILTIN_PSLLVV8SI:
17815    case IX86_BUILTIN_PSLLVV8SI_MASK:
17816      rcode = ASHIFT;
17817      is_vshift = true;
17818      goto do_shift;
17819    case IX86_BUILTIN_PSRAVQ128:
17820    case IX86_BUILTIN_PSRAVQ256:
17821    case IX86_BUILTIN_PSRAVV16HI:
17822    case IX86_BUILTIN_PSRAVV16SI:
17823    case IX86_BUILTIN_PSRAVV32HI:
17824    case IX86_BUILTIN_PSRAVV4SI:
17825    case IX86_BUILTIN_PSRAVV4SI_MASK:
17826    case IX86_BUILTIN_PSRAVV8DI:
17827    case IX86_BUILTIN_PSRAVV8HI:
17828    case IX86_BUILTIN_PSRAVV8SI:
17829    case IX86_BUILTIN_PSRAVV8SI_MASK:
17830      rcode = ASHIFTRT;
17831      is_vshift = true;
17832      goto do_shift;
17833    case IX86_BUILTIN_PSRLVV16HI:
17834    case IX86_BUILTIN_PSRLVV16SI:
17835    case IX86_BUILTIN_PSRLVV2DI:
17836    case IX86_BUILTIN_PSRLVV2DI_MASK:
17837    case IX86_BUILTIN_PSRLVV32HI:
17838    case IX86_BUILTIN_PSRLVV4DI:
17839    case IX86_BUILTIN_PSRLVV4DI_MASK:
17840    case IX86_BUILTIN_PSRLVV4SI:
17841    case IX86_BUILTIN_PSRLVV4SI_MASK:
17842    case IX86_BUILTIN_PSRLVV8DI:
17843    case IX86_BUILTIN_PSRLVV8HI:
17844    case IX86_BUILTIN_PSRLVV8SI:
17845    case IX86_BUILTIN_PSRLVV8SI_MASK:
17846      rcode = LSHIFTRT;
17847      is_vshift = true;
17848      goto do_shift;
17849
17850    do_shift:
17851      gcc_assert (n_args >= 2);
17852      if (!gimple_call_lhs (stmt))
17853	break;
17854      arg0 = gimple_call_arg (stmt, 0);
17855      arg1 = gimple_call_arg (stmt, 1);
17856      if (n_args > 2)
17857	{
17858	  /* This is masked shift.  Only optimize if the mask is all ones.  */
17859	  tree argl = gimple_call_arg (stmt, n_args - 1);
17860	  if (!tree_fits_uhwi_p (argl))
17861	    break;
17862	  unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17863	  unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17864	  if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17865	    break;
17866	}
17867      if (is_vshift)
17868	{
17869	  if (TREE_CODE (arg1) != VECTOR_CST)
17870	    break;
17871	  count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17872	  if (integer_zerop (arg1))
17873	    count = 0;
17874	  else if (rcode == ASHIFTRT)
17875	    break;
17876	  else
17877	    for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17878	      {
17879		tree elt = VECTOR_CST_ELT (arg1, i);
17880		if (!wi::neg_p (wi::to_wide (elt))
17881		    && wi::to_widest (elt) < count)
17882		  return false;
17883	      }
17884	}
17885      else
17886	{
17887	  arg1 = ix86_vector_shift_count (arg1);
17888	  if (!arg1)
17889	    break;
17890	  count = tree_to_uhwi (arg1);
17891	}
17892      if (count == 0)
17893	{
17894	  /* Just return the first argument for shift by 0.  */
17895	  location_t loc = gimple_location (stmt);
17896	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17897	  gimple_set_location (g, loc);
17898	  gsi_replace (gsi, g, false);
17899	  return true;
17900	}
17901      if (rcode != ASHIFTRT
17902	  && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17903	{
17904	  /* For shift counts equal or greater than precision, except for
17905	     arithmetic right shift the result is zero.  */
17906	  location_t loc = gimple_location (stmt);
17907	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17908					   build_zero_cst (TREE_TYPE (arg0)));
17909	  gimple_set_location (g, loc);
17910	  gsi_replace (gsi, g, false);
17911	  return true;
17912	}
17913      break;
17914
17915    case IX86_BUILTIN_SHUFPD:
17916      arg2 = gimple_call_arg (stmt, 2);
17917      if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
17918	{
17919	  location_t loc = gimple_location (stmt);
17920	  unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17921	  arg0 = gimple_call_arg (stmt, 0);
17922	  arg1 = gimple_call_arg (stmt, 1);
17923	  tree itype = long_long_integer_type_node;
17924	  tree vtype = build_vector_type (itype, 2); /* V2DI */
17925	  tree_vector_builder elts (vtype, 2, 1);
17926	  /* Ignore bits other than the lowest 2.  */
17927	  elts.quick_push (build_int_cst (itype, imask & 1));
17928	  imask >>= 1;
17929	  elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17930	  tree omask = elts.build ();
17931	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17932					   VEC_PERM_EXPR,
17933					   arg0, arg1, omask);
17934	  gimple_set_location (g, loc);
17935	  gsi_replace (gsi, g, false);
17936	  return true;
17937	}
17938      // Do not error yet, the constant could be propagated later?
17939      break;
17940
17941    default:
17942      break;
17943    }
17944
17945  return false;
17946}
17947
17948/* Handler for an SVML-style interface to
17949   a library with vectorized intrinsics.  */
17950
17951tree
17952ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17953{
17954  char name[20];
17955  tree fntype, new_fndecl, args;
17956  unsigned arity;
17957  const char *bname;
17958  machine_mode el_mode, in_mode;
17959  int n, in_n;
17960
17961  /* The SVML is suitable for unsafe math only.  */
17962  if (!flag_unsafe_math_optimizations)
17963    return NULL_TREE;
17964
17965  el_mode = TYPE_MODE (TREE_TYPE (type_out));
17966  n = TYPE_VECTOR_SUBPARTS (type_out);
17967  in_mode = TYPE_MODE (TREE_TYPE (type_in));
17968  in_n = TYPE_VECTOR_SUBPARTS (type_in);
17969  if (el_mode != in_mode
17970      || n != in_n)
17971    return NULL_TREE;
17972
17973  switch (fn)
17974    {
17975    CASE_CFN_EXP:
17976    CASE_CFN_LOG:
17977    CASE_CFN_LOG10:
17978    CASE_CFN_POW:
17979    CASE_CFN_TANH:
17980    CASE_CFN_TAN:
17981    CASE_CFN_ATAN:
17982    CASE_CFN_ATAN2:
17983    CASE_CFN_ATANH:
17984    CASE_CFN_CBRT:
17985    CASE_CFN_SINH:
17986    CASE_CFN_SIN:
17987    CASE_CFN_ASINH:
17988    CASE_CFN_ASIN:
17989    CASE_CFN_COSH:
17990    CASE_CFN_COS:
17991    CASE_CFN_ACOSH:
17992    CASE_CFN_ACOS:
17993      if ((el_mode != DFmode || n != 2)
17994	  && (el_mode != SFmode || n != 4))
17995	return NULL_TREE;
17996      break;
17997
17998    default:
17999      return NULL_TREE;
18000    }
18001
18002  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18003  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18004
18005  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18006    strcpy (name, "vmlsLn4");
18007  else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18008    strcpy (name, "vmldLn2");
18009  else if (n == 4)
18010    {
18011      sprintf (name, "vmls%s", bname+10);
18012      name[strlen (name)-1] = '4';
18013    }
18014  else
18015    sprintf (name, "vmld%s2", bname+10);
18016
18017  /* Convert to uppercase. */
18018  name[4] &= ~0x20;
18019
18020  arity = 0;
18021  for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18022    arity++;
18023
18024  if (arity == 1)
18025    fntype = build_function_type_list (type_out, type_in, NULL);
18026  else
18027    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18028
18029  /* Build a function declaration for the vectorized function.  */
18030  new_fndecl = build_decl (BUILTINS_LOCATION,
18031			   FUNCTION_DECL, get_identifier (name), fntype);
18032  TREE_PUBLIC (new_fndecl) = 1;
18033  DECL_EXTERNAL (new_fndecl) = 1;
18034  DECL_IS_NOVOPS (new_fndecl) = 1;
18035  TREE_READONLY (new_fndecl) = 1;
18036
18037  return new_fndecl;
18038}
18039
18040/* Handler for an ACML-style interface to
18041   a library with vectorized intrinsics.  */
18042
18043tree
18044ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18045{
18046  char name[20] = "__vr.._";
18047  tree fntype, new_fndecl, args;
18048  unsigned arity;
18049  const char *bname;
18050  machine_mode el_mode, in_mode;
18051  int n, in_n;
18052
18053  /* The ACML is 64bits only and suitable for unsafe math only as
18054     it does not correctly support parts of IEEE with the required
18055     precision such as denormals.  */
18056  if (!TARGET_64BIT
18057      || !flag_unsafe_math_optimizations)
18058    return NULL_TREE;
18059
18060  el_mode = TYPE_MODE (TREE_TYPE (type_out));
18061  n = TYPE_VECTOR_SUBPARTS (type_out);
18062  in_mode = TYPE_MODE (TREE_TYPE (type_in));
18063  in_n = TYPE_VECTOR_SUBPARTS (type_in);
18064  if (el_mode != in_mode
18065      || n != in_n)
18066    return NULL_TREE;
18067
18068  switch (fn)
18069    {
18070    CASE_CFN_SIN:
18071    CASE_CFN_COS:
18072    CASE_CFN_EXP:
18073    CASE_CFN_LOG:
18074    CASE_CFN_LOG2:
18075    CASE_CFN_LOG10:
18076      if (el_mode == DFmode && n == 2)
18077	{
18078	  name[4] = 'd';
18079	  name[5] = '2';
18080	}
18081      else if (el_mode == SFmode && n == 4)
18082	{
18083	  name[4] = 's';
18084	  name[5] = '4';
18085	}
18086      else
18087	return NULL_TREE;
18088      break;
18089
18090    default:
18091      return NULL_TREE;
18092    }
18093
18094  tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18095  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18096  sprintf (name + 7, "%s", bname+10);
18097
18098  arity = 0;
18099  for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18100    arity++;
18101
18102  if (arity == 1)
18103    fntype = build_function_type_list (type_out, type_in, NULL);
18104  else
18105    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18106
18107  /* Build a function declaration for the vectorized function.  */
18108  new_fndecl = build_decl (BUILTINS_LOCATION,
18109			   FUNCTION_DECL, get_identifier (name), fntype);
18110  TREE_PUBLIC (new_fndecl) = 1;
18111  DECL_EXTERNAL (new_fndecl) = 1;
18112  DECL_IS_NOVOPS (new_fndecl) = 1;
18113  TREE_READONLY (new_fndecl) = 1;
18114
18115  return new_fndecl;
18116}
18117
18118/* Returns a decl of a function that implements scatter store with
18119   register type VECTYPE and index type INDEX_TYPE and SCALE.
18120   Return NULL_TREE if it is not available.  */
18121
18122static tree
18123ix86_vectorize_builtin_scatter (const_tree vectype,
18124				const_tree index_type, int scale)
18125{
18126  bool si;
18127  enum ix86_builtins code;
18128
18129  if (!TARGET_AVX512F)
18130    return NULL_TREE;
18131
18132  if ((TREE_CODE (index_type) != INTEGER_TYPE
18133       && !POINTER_TYPE_P (index_type))
18134      || (TYPE_MODE (index_type) != SImode
18135	  && TYPE_MODE (index_type) != DImode))
18136    return NULL_TREE;
18137
18138  if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18139    return NULL_TREE;
18140
18141  /* v*scatter* insn sign extends index to pointer mode.  */
18142  if (TYPE_PRECISION (index_type) < POINTER_SIZE
18143      && TYPE_UNSIGNED (index_type))
18144    return NULL_TREE;
18145
18146  /* Scale can be 1, 2, 4 or 8.  */
18147  if (scale <= 0
18148      || scale > 8
18149      || (scale & (scale - 1)) != 0)
18150    return NULL_TREE;
18151
18152  si = TYPE_MODE (index_type) == SImode;
18153  switch (TYPE_MODE (vectype))
18154    {
18155    case E_V8DFmode:
18156      code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18157      break;
18158    case E_V8DImode:
18159      code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18160      break;
18161    case E_V16SFmode:
18162      code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18163      break;
18164    case E_V16SImode:
18165      code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18166      break;
18167    case E_V4DFmode:
18168      if (TARGET_AVX512VL)
18169	code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18170      else
18171	return NULL_TREE;
18172      break;
18173    case E_V4DImode:
18174      if (TARGET_AVX512VL)
18175	code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18176      else
18177	return NULL_TREE;
18178      break;
18179    case E_V8SFmode:
18180      if (TARGET_AVX512VL)
18181	code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18182      else
18183	return NULL_TREE;
18184      break;
18185    case E_V8SImode:
18186      if (TARGET_AVX512VL)
18187	code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18188      else
18189	return NULL_TREE;
18190      break;
18191    case E_V2DFmode:
18192      if (TARGET_AVX512VL)
18193	code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18194      else
18195	return NULL_TREE;
18196      break;
18197    case E_V2DImode:
18198      if (TARGET_AVX512VL)
18199	code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18200      else
18201	return NULL_TREE;
18202      break;
18203    case E_V4SFmode:
18204      if (TARGET_AVX512VL)
18205	code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18206      else
18207	return NULL_TREE;
18208      break;
18209    case E_V4SImode:
18210      if (TARGET_AVX512VL)
18211	code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18212      else
18213	return NULL_TREE;
18214      break;
18215    default:
18216      return NULL_TREE;
18217    }
18218
18219  return get_ix86_builtin (code);
18220}
18221
18222/* Return true if it is safe to use the rsqrt optabs to optimize
18223   1.0/sqrt.  */
18224
18225static bool
18226use_rsqrt_p ()
18227{
18228  return (TARGET_SSE && TARGET_SSE_MATH
18229	  && flag_finite_math_only
18230	  && !flag_trapping_math
18231	  && flag_unsafe_math_optimizations);
18232}
18233
18234/* Helper for avx_vpermilps256_operand et al.  This is also used by
18235   the expansion functions to turn the parallel back into a mask.
18236   The return value is 0 for no match and the imm8+1 for a match.  */
18237
18238int
18239avx_vpermilp_parallel (rtx par, machine_mode mode)
18240{
18241  unsigned i, nelt = GET_MODE_NUNITS (mode);
18242  unsigned mask = 0;
18243  unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
18244
18245  if (XVECLEN (par, 0) != (int) nelt)
18246    return 0;
18247
18248  /* Validate that all of the elements are constants, and not totally
18249     out of range.  Copy the data into an integral array to make the
18250     subsequent checks easier.  */
18251  for (i = 0; i < nelt; ++i)
18252    {
18253      rtx er = XVECEXP (par, 0, i);
18254      unsigned HOST_WIDE_INT ei;
18255
18256      if (!CONST_INT_P (er))
18257	return 0;
18258      ei = INTVAL (er);
18259      if (ei >= nelt)
18260	return 0;
18261      ipar[i] = ei;
18262    }
18263
18264  switch (mode)
18265    {
18266    case E_V8DFmode:
18267      /* In the 512-bit DFmode case, we can only move elements within
18268         a 128-bit lane.  First fill the second part of the mask,
18269	 then fallthru.  */
18270      for (i = 4; i < 6; ++i)
18271	{
18272	  if (ipar[i] < 4 || ipar[i] >= 6)
18273	    return 0;
18274	  mask |= (ipar[i] - 4) << i;
18275	}
18276      for (i = 6; i < 8; ++i)
18277	{
18278	  if (ipar[i] < 6)
18279	    return 0;
18280	  mask |= (ipar[i] - 6) << i;
18281	}
18282      /* FALLTHRU */
18283
18284    case E_V4DFmode:
18285      /* In the 256-bit DFmode case, we can only move elements within
18286         a 128-bit lane.  */
18287      for (i = 0; i < 2; ++i)
18288	{
18289	  if (ipar[i] >= 2)
18290	    return 0;
18291	  mask |= ipar[i] << i;
18292	}
18293      for (i = 2; i < 4; ++i)
18294	{
18295	  if (ipar[i] < 2)
18296	    return 0;
18297	  mask |= (ipar[i] - 2) << i;
18298	}
18299      break;
18300
18301    case E_V16SFmode:
18302      /* In 512 bit SFmode case, permutation in the upper 256 bits
18303	 must mirror the permutation in the lower 256-bits.  */
18304      for (i = 0; i < 8; ++i)
18305	if (ipar[i] + 8 != ipar[i + 8])
18306	  return 0;
18307      /* FALLTHRU */
18308
18309    case E_V8SFmode:
18310      /* In 256 bit SFmode case, we have full freedom of
18311         movement within the low 128-bit lane, but the high 128-bit
18312         lane must mirror the exact same pattern.  */
18313      for (i = 0; i < 4; ++i)
18314	if (ipar[i] + 4 != ipar[i + 4])
18315	  return 0;
18316      nelt = 4;
18317      /* FALLTHRU */
18318
18319    case E_V2DFmode:
18320    case E_V4SFmode:
18321      /* In the 128-bit case, we've full freedom in the placement of
18322	 the elements from the source operand.  */
18323      for (i = 0; i < nelt; ++i)
18324	mask |= ipar[i] << (i * (nelt / 2));
18325      break;
18326
18327    default:
18328      gcc_unreachable ();
18329    }
18330
18331  /* Make sure success has a non-zero value by adding one.  */
18332  return mask + 1;
18333}
18334
18335/* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
18336   the expansion functions to turn the parallel back into a mask.
18337   The return value is 0 for no match and the imm8+1 for a match.  */
18338
18339int
18340avx_vperm2f128_parallel (rtx par, machine_mode mode)
18341{
18342  unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18343  unsigned mask = 0;
18344  unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
18345
18346  if (XVECLEN (par, 0) != (int) nelt)
18347    return 0;
18348
18349  /* Validate that all of the elements are constants, and not totally
18350     out of range.  Copy the data into an integral array to make the
18351     subsequent checks easier.  */
18352  for (i = 0; i < nelt; ++i)
18353    {
18354      rtx er = XVECEXP (par, 0, i);
18355      unsigned HOST_WIDE_INT ei;
18356
18357      if (!CONST_INT_P (er))
18358	return 0;
18359      ei = INTVAL (er);
18360      if (ei >= 2 * nelt)
18361	return 0;
18362      ipar[i] = ei;
18363    }
18364
18365  /* Validate that the halves of the permute are halves.  */
18366  for (i = 0; i < nelt2 - 1; ++i)
18367    if (ipar[i] + 1 != ipar[i + 1])
18368      return 0;
18369  for (i = nelt2; i < nelt - 1; ++i)
18370    if (ipar[i] + 1 != ipar[i + 1])
18371      return 0;
18372
18373  /* Reconstruct the mask.  */
18374  for (i = 0; i < 2; ++i)
18375    {
18376      unsigned e = ipar[i * nelt2];
18377      if (e % nelt2)
18378	return 0;
18379      e /= nelt2;
18380      mask |= e << (i * 4);
18381    }
18382
18383  /* Make sure success has a non-zero value by adding one.  */
18384  return mask + 1;
18385}
18386
18387/* Return a register priority for hard reg REGNO.  */
18388static int
18389ix86_register_priority (int hard_regno)
18390{
18391  /* ebp and r13 as the base always wants a displacement, r12 as the
18392     base always wants an index.  So discourage their usage in an
18393     address.  */
18394  if (hard_regno == R12_REG || hard_regno == R13_REG)
18395    return 0;
18396  if (hard_regno == BP_REG)
18397    return 1;
18398  /* New x86-64 int registers result in bigger code size.  Discourage
18399     them.  */
18400  if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18401    return 2;
18402  /* New x86-64 SSE registers result in bigger code size.  Discourage
18403     them.  */
18404  if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18405    return 2;
18406  if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18407    return 1;
18408  /* Usage of AX register results in smaller code.  Prefer it.  */
18409  if (hard_regno == AX_REG)
18410    return 4;
18411  return 3;
18412}
18413
18414/* Implement TARGET_PREFERRED_RELOAD_CLASS.
18415
18416   Put float CONST_DOUBLE in the constant pool instead of fp regs.
18417   QImode must go into class Q_REGS.
18418   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
18419   movdf to do mem-to-mem moves through integer regs.  */
18420
18421static reg_class_t
18422ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18423{
18424  machine_mode mode = GET_MODE (x);
18425
18426  /* We're only allowed to return a subclass of CLASS.  Many of the
18427     following checks fail for NO_REGS, so eliminate that early.  */
18428  if (regclass == NO_REGS)
18429    return NO_REGS;
18430
18431  /* All classes can load zeros.  */
18432  if (x == CONST0_RTX (mode))
18433    return regclass;
18434
18435  /* Force constants into memory if we are loading a (nonzero) constant into
18436     an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
18437     instructions to load from a constant.  */
18438  if (CONSTANT_P (x)
18439      && (MAYBE_MMX_CLASS_P (regclass)
18440	  || MAYBE_SSE_CLASS_P (regclass)
18441	  || MAYBE_MASK_CLASS_P (regclass)))
18442    return NO_REGS;
18443
18444  /* Floating-point constants need more complex checks.  */
18445  if (CONST_DOUBLE_P (x))
18446    {
18447      /* General regs can load everything.  */
18448      if (INTEGER_CLASS_P (regclass))
18449        return regclass;
18450
18451      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
18452	 zero above.  We only want to wind up preferring 80387 registers if
18453	 we plan on doing computation with them.  */
18454      if (IS_STACK_MODE (mode)
18455	  && standard_80387_constant_p (x) > 0)
18456	{
18457	  /* Limit class to FP regs.  */
18458	  if (FLOAT_CLASS_P (regclass))
18459	    return FLOAT_REGS;
18460	}
18461
18462      return NO_REGS;
18463    }
18464
18465  /* Prefer SSE regs only, if we can use them for math.  */
18466  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18467    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18468
18469  /* Generally when we see PLUS here, it's the function invariant
18470     (plus soft-fp const_int).  Which can only be computed into general
18471     regs.  */
18472  if (GET_CODE (x) == PLUS)
18473    return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18474
18475  /* QImode constants are easy to load, but non-constant QImode data
18476     must go into Q_REGS.  */
18477  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18478    {
18479      if (Q_CLASS_P (regclass))
18480	return regclass;
18481      else if (reg_class_subset_p (Q_REGS, regclass))
18482	return Q_REGS;
18483      else
18484	return NO_REGS;
18485    }
18486
18487  return regclass;
18488}
18489
18490/* Discourage putting floating-point values in SSE registers unless
18491   SSE math is being used, and likewise for the 387 registers.  */
18492static reg_class_t
18493ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18494{
18495  /* Restrict the output reload class to the register bank that we are doing
18496     math on.  If we would like not to return a subset of CLASS, reject this
18497     alternative: if reload cannot do this, it will still use its choice.  */
18498  machine_mode mode = GET_MODE (x);
18499  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18500    return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18501
18502  if (IS_STACK_MODE (mode))
18503    return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18504
18505  return regclass;
18506}
18507
18508static reg_class_t
18509ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18510		       machine_mode mode, secondary_reload_info *sri)
18511{
18512  /* Double-word spills from general registers to non-offsettable memory
18513     references (zero-extended addresses) require special handling.  */
18514  if (TARGET_64BIT
18515      && MEM_P (x)
18516      && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18517      && INTEGER_CLASS_P (rclass)
18518      && !offsettable_memref_p (x))
18519    {
18520      sri->icode = (in_p
18521		    ? CODE_FOR_reload_noff_load
18522		    : CODE_FOR_reload_noff_store);
18523      /* Add the cost of moving address to a temporary.  */
18524      sri->extra_cost = 1;
18525
18526      return NO_REGS;
18527    }
18528
18529  /* QImode spills from non-QI registers require
18530     intermediate register on 32bit targets.  */
18531  if (mode == QImode
18532      && ((!TARGET_64BIT && !in_p
18533	   && INTEGER_CLASS_P (rclass)
18534	   && MAYBE_NON_Q_CLASS_P (rclass))
18535	  || (!TARGET_AVX512DQ
18536	      && MAYBE_MASK_CLASS_P (rclass))))
18537    {
18538      int regno = true_regnum (x);
18539
18540      /* Return Q_REGS if the operand is in memory.  */
18541      if (regno == -1)
18542	return Q_REGS;
18543
18544      return NO_REGS;
18545    }
18546
18547  /* This condition handles corner case where an expression involving
18548     pointers gets vectorized.  We're trying to use the address of a
18549     stack slot as a vector initializer.
18550
18551     (set (reg:V2DI 74 [ vect_cst_.2 ])
18552          (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18553
18554     Eventually frame gets turned into sp+offset like this:
18555
18556     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18557          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18558	                               (const_int 392 [0x188]))))
18559
18560     That later gets turned into:
18561
18562     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18563          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18564	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18565
18566     We'll have the following reload recorded:
18567
18568     Reload 0: reload_in (DI) =
18569           (plus:DI (reg/f:DI 7 sp)
18570            (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18571     reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18572     SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18573     reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18574     reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18575     reload_reg_rtx: (reg:V2DI 22 xmm1)
18576
18577     Which isn't going to work since SSE instructions can't handle scalar
18578     additions.  Returning GENERAL_REGS forces the addition into integer
18579     register and reload can handle subsequent reloads without problems.  */
18580
18581  if (in_p && GET_CODE (x) == PLUS
18582      && SSE_CLASS_P (rclass)
18583      && SCALAR_INT_MODE_P (mode))
18584    return GENERAL_REGS;
18585
18586  return NO_REGS;
18587}
18588
18589/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
18590
18591static bool
18592ix86_class_likely_spilled_p (reg_class_t rclass)
18593{
18594  switch (rclass)
18595    {
18596      case AREG:
18597      case DREG:
18598      case CREG:
18599      case BREG:
18600      case AD_REGS:
18601      case SIREG:
18602      case DIREG:
18603      case SSE_FIRST_REG:
18604      case FP_TOP_REG:
18605      case FP_SECOND_REG:
18606	return true;
18607
18608      default:
18609	break;
18610    }
18611
18612  return false;
18613}
18614
18615/* If we are copying between registers from different register sets
18616   (e.g. FP and integer), we may need a memory location.
18617
18618   The function can't work reliably when one of the CLASSES is a class
18619   containing registers from multiple sets.  We avoid this by never combining
18620   different sets in a single alternative in the machine description.
18621   Ensure that this constraint holds to avoid unexpected surprises.
18622
18623   When STRICT is false, we are being called from REGISTER_MOVE_COST,
18624   so do not enforce these sanity checks.
18625
18626   To optimize register_move_cost performance, define inline variant.  */
18627
18628static inline bool
18629inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18630				reg_class_t class2, int strict)
18631{
18632  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18633    return false;
18634
18635  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18636      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18637      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18638      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18639      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18640      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18641      || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18642      || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18643    {
18644      gcc_assert (!strict || lra_in_progress);
18645      return true;
18646    }
18647
18648  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18649    return true;
18650
18651  /* ??? This is a lie.  We do have moves between mmx/general, and for
18652     mmx/sse2.  But by saying we need secondary memory we discourage the
18653     register allocator from using the mmx registers unless needed.  */
18654  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18655    return true;
18656
18657  /* Between mask and general, we have moves no larger than word size.  */
18658  if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18659    {
18660      if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18661	  || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18662	return true;
18663    }
18664
18665  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18666    {
18667      /* SSE1 doesn't have any direct moves from other classes.  */
18668      if (!TARGET_SSE2)
18669	return true;
18670
18671      /* Between SSE and general, we have moves no larger than word size.  */
18672      if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18673	  || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18674	  || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18675	return true;
18676
18677      /* If the target says that inter-unit moves are more expensive
18678	 than moving through memory, then don't generate them.  */
18679      if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18680	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18681	return true;
18682    }
18683
18684  return false;
18685}
18686
18687/* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
18688
18689static bool
18690ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18691			      reg_class_t class2)
18692{
18693  return inline_secondary_memory_needed (mode, class1, class2, true);
18694}
18695
18696/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18697
18698   get_secondary_mem widens integral modes to BITS_PER_WORD.
18699   There is no need to emit full 64 bit move on 64 bit targets
18700   for integral modes that can be moved using 32 bit move.  */
18701
18702static machine_mode
18703ix86_secondary_memory_needed_mode (machine_mode mode)
18704{
18705  if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18706    return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18707  return mode;
18708}
18709
18710/* Implement the TARGET_CLASS_MAX_NREGS hook.
18711
18712   On the 80386, this is the size of MODE in words,
18713   except in the FP regs, where a single reg is always enough.  */
18714
18715static unsigned char
18716ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18717{
18718  if (MAYBE_INTEGER_CLASS_P (rclass))
18719    {
18720      if (mode == XFmode)
18721	return (TARGET_64BIT ? 2 : 3);
18722      else if (mode == XCmode)
18723	return (TARGET_64BIT ? 4 : 6);
18724      else
18725	return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18726    }
18727  else
18728    {
18729      if (COMPLEX_MODE_P (mode))
18730	return 2;
18731      else
18732	return 1;
18733    }
18734}
18735
18736/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
18737
18738static bool
18739ix86_can_change_mode_class (machine_mode from, machine_mode to,
18740			    reg_class_t regclass)
18741{
18742  if (from == to)
18743    return true;
18744
18745  /* x87 registers can't do subreg at all, as all values are reformatted
18746     to extended precision.  */
18747  if (MAYBE_FLOAT_CLASS_P (regclass))
18748    return false;
18749
18750  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18751    {
18752      /* Vector registers do not support QI or HImode loads.  If we don't
18753	 disallow a change to these modes, reload will assume it's ok to
18754	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
18755	 the vec_dupv4hi pattern.  */
18756      if (GET_MODE_SIZE (from) < 4)
18757	return false;
18758    }
18759
18760  return true;
18761}
18762
18763/* Return index of MODE in the sse load/store tables.  */
18764
18765static inline int
18766sse_store_index (machine_mode mode)
18767{
18768      switch (GET_MODE_SIZE (mode))
18769	{
18770	  case 4:
18771	    return 0;
18772	  case 8:
18773	    return 1;
18774	  case 16:
18775	    return 2;
18776	  case 32:
18777	    return 3;
18778	  case 64:
18779	    return 4;
18780	  default:
18781	    return -1;
18782	}
18783}
18784
18785/* Return the cost of moving data of mode M between a
18786   register and memory.  A value of 2 is the default; this cost is
18787   relative to those in `REGISTER_MOVE_COST'.
18788
18789   This function is used extensively by register_move_cost that is used to
18790   build tables at startup.  Make it inline in this case.
18791   When IN is 2, return maximum of in and out move cost.
18792
18793   If moving between registers and memory is more expensive than
18794   between two registers, you should define this macro to express the
18795   relative cost.
18796
18797   Model also increased moving costs of QImode registers in non
18798   Q_REGS classes.
18799 */
18800static inline int
18801inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18802{
18803  int cost;
18804  if (FLOAT_CLASS_P (regclass))
18805    {
18806      int index;
18807      switch (mode)
18808	{
18809	  case E_SFmode:
18810	    index = 0;
18811	    break;
18812	  case E_DFmode:
18813	    index = 1;
18814	    break;
18815	  case E_XFmode:
18816	    index = 2;
18817	    break;
18818	  default:
18819	    return 100;
18820	}
18821      if (in == 2)
18822        return MAX (ix86_cost->hard_register.fp_load [index],
18823		    ix86_cost->hard_register.fp_store [index]);
18824      return in ? ix86_cost->hard_register.fp_load [index]
18825		: ix86_cost->hard_register.fp_store [index];
18826    }
18827  if (SSE_CLASS_P (regclass))
18828    {
18829      int index = sse_store_index (mode);
18830      if (index == -1)
18831	return 100;
18832      if (in == 2)
18833        return MAX (ix86_cost->hard_register.sse_load [index],
18834		    ix86_cost->hard_register.sse_store [index]);
18835      return in ? ix86_cost->hard_register.sse_load [index]
18836		: ix86_cost->hard_register.sse_store [index];
18837    }
18838  if (MMX_CLASS_P (regclass))
18839    {
18840      int index;
18841      switch (GET_MODE_SIZE (mode))
18842	{
18843	  case 4:
18844	    index = 0;
18845	    break;
18846	  case 8:
18847	    index = 1;
18848	    break;
18849	  default:
18850	    return 100;
18851	}
18852      if (in == 2)
18853        return MAX (ix86_cost->hard_register.mmx_load [index],
18854		    ix86_cost->hard_register.mmx_store [index]);
18855      return in ? ix86_cost->hard_register.mmx_load [index]
18856		: ix86_cost->hard_register.mmx_store [index];
18857    }
18858  switch (GET_MODE_SIZE (mode))
18859    {
18860      case 1:
18861	if (Q_CLASS_P (regclass) || TARGET_64BIT)
18862	  {
18863	    if (!in)
18864	      return ix86_cost->hard_register.int_store[0];
18865	    if (TARGET_PARTIAL_REG_DEPENDENCY
18866	        && optimize_function_for_speed_p (cfun))
18867	      cost = ix86_cost->hard_register.movzbl_load;
18868	    else
18869	      cost = ix86_cost->hard_register.int_load[0];
18870	    if (in == 2)
18871	      return MAX (cost, ix86_cost->hard_register.int_store[0]);
18872	    return cost;
18873	  }
18874	else
18875	  {
18876	   if (in == 2)
18877	     return MAX (ix86_cost->hard_register.movzbl_load,
18878			 ix86_cost->hard_register.int_store[0] + 4);
18879	   if (in)
18880	     return ix86_cost->hard_register.movzbl_load;
18881	   else
18882	     return ix86_cost->hard_register.int_store[0] + 4;
18883	  }
18884	break;
18885      case 2:
18886	if (in == 2)
18887	  return MAX (ix86_cost->hard_register.int_load[1],
18888		      ix86_cost->hard_register.int_store[1]);
18889	return in ? ix86_cost->hard_register.int_load[1]
18890		  : ix86_cost->hard_register.int_store[1];
18891      default:
18892	if (in == 2)
18893	  cost = MAX (ix86_cost->hard_register.int_load[2],
18894		      ix86_cost->hard_register.int_store[2]);
18895	else if (in)
18896	  cost = ix86_cost->hard_register.int_load[2];
18897	else
18898	  cost = ix86_cost->hard_register.int_store[2];
18899	/* Multiply with the number of GPR moves needed.  */
18900	return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18901    }
18902}
18903
18904static int
18905ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18906{
18907  return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18908}
18909
18910
18911/* Return the cost of moving data from a register in class CLASS1 to
18912   one in class CLASS2.
18913
18914   It is not required that the cost always equal 2 when FROM is the same as TO;
18915   on some machines it is expensive to move between registers if they are not
18916   general registers.  */
18917
18918static int
18919ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18920			 reg_class_t class2_i)
18921{
18922  enum reg_class class1 = (enum reg_class) class1_i;
18923  enum reg_class class2 = (enum reg_class) class2_i;
18924
18925  /* In case we require secondary memory, compute cost of the store followed
18926     by load.  In order to avoid bad register allocation choices, we need
18927     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
18928
18929  if (inline_secondary_memory_needed (mode, class1, class2, false))
18930    {
18931      int cost = 1;
18932
18933      cost += inline_memory_move_cost (mode, class1, 2);
18934      cost += inline_memory_move_cost (mode, class2, 2);
18935
18936      /* In case of copying from general_purpose_register we may emit multiple
18937         stores followed by single load causing memory size mismatch stall.
18938         Count this as arbitrarily high cost of 20.  */
18939      if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18940	  && TARGET_MEMORY_MISMATCH_STALL
18941	  && targetm.class_max_nregs (class1, mode)
18942	     > targetm.class_max_nregs (class2, mode))
18943	cost += 20;
18944
18945      /* In the case of FP/MMX moves, the registers actually overlap, and we
18946	 have to switch modes in order to treat them differently.  */
18947      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18948          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18949	cost += 20;
18950
18951      return cost;
18952    }
18953
18954  /* Moves between MMX and non-MMX units require secondary memory.  */
18955  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18956    gcc_unreachable ();
18957
18958  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18959    return (SSE_CLASS_P (class1)
18960	    ? ix86_cost->hard_register.sse_to_integer
18961	    : ix86_cost->hard_register.integer_to_sse);
18962
18963  if (MAYBE_FLOAT_CLASS_P (class1))
18964    return ix86_cost->hard_register.fp_move;
18965  if (MAYBE_SSE_CLASS_P (class1))
18966    {
18967      if (GET_MODE_BITSIZE (mode) <= 128)
18968	return ix86_cost->hard_register.xmm_move;
18969      if (GET_MODE_BITSIZE (mode) <= 256)
18970	return ix86_cost->hard_register.ymm_move;
18971      return ix86_cost->hard_register.zmm_move;
18972    }
18973  if (MAYBE_MMX_CLASS_P (class1))
18974    return ix86_cost->hard_register.mmx_move;
18975  return 2;
18976}
18977
18978/* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
18979   words of a value of mode MODE but can be less for certain modes in
18980   special long registers.
18981
18982   Actually there are no two word move instructions for consecutive
18983   registers.  And only registers 0-3 may have mov byte instructions
18984   applied to them.  */
18985
18986static unsigned int
18987ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18988{
18989  if (GENERAL_REGNO_P (regno))
18990    {
18991      if (mode == XFmode)
18992	return TARGET_64BIT ? 2 : 3;
18993      if (mode == XCmode)
18994	return TARGET_64BIT ? 4 : 6;
18995      return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18996    }
18997  if (COMPLEX_MODE_P (mode))
18998    return 2;
18999  /* Register pair for mask registers.  */
19000  if (mode == P2QImode || mode == P2HImode)
19001    return 2;
19002  if (mode == V64SFmode || mode == V64SImode)
19003    return 4;
19004  return 1;
19005}
19006
19007/* Implement REGMODE_NATURAL_SIZE(MODE).  */
19008unsigned int
19009ix86_regmode_natural_size (machine_mode mode)
19010{
19011  if (mode == P2HImode || mode == P2QImode)
19012    return GET_MODE_SIZE (mode) / 2;
19013  return UNITS_PER_WORD;
19014}
19015
19016/* Implement TARGET_HARD_REGNO_MODE_OK.  */
19017
19018static bool
19019ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19020{
19021  /* Flags and only flags can only hold CCmode values.  */
19022  if (CC_REGNO_P (regno))
19023    return GET_MODE_CLASS (mode) == MODE_CC;
19024  if (GET_MODE_CLASS (mode) == MODE_CC
19025      || GET_MODE_CLASS (mode) == MODE_RANDOM)
19026    return false;
19027  if (STACK_REGNO_P (regno))
19028    return VALID_FP_MODE_P (mode);
19029  if (MASK_REGNO_P (regno))
19030    {
19031      /* Register pair only starts at even register number.  */
19032      if ((mode == P2QImode || mode == P2HImode))
19033	return MASK_PAIR_REGNO_P(regno);
19034
19035      return (VALID_MASK_REG_MODE (mode)
19036	      || (TARGET_AVX512BW
19037		  && VALID_MASK_AVX512BW_MODE (mode)));
19038    }
19039
19040  if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19041    return false;
19042
19043  if (SSE_REGNO_P (regno))
19044    {
19045      /* We implement the move patterns for all vector modes into and
19046	 out of SSE registers, even when no operation instructions
19047	 are available.  */
19048
19049      /* For AVX-512 we allow, regardless of regno:
19050	  - XI mode
19051	  - any of 512-bit wide vector mode
19052	  - any scalar mode.  */
19053      if (TARGET_AVX512F
19054	  && (mode == XImode
19055	      || VALID_AVX512F_REG_MODE (mode)
19056	      || VALID_AVX512F_SCALAR_MODE (mode)))
19057	return true;
19058
19059      /* For AVX-5124FMAPS or AVX-5124VNNIW
19060	 allow V64SF and V64SI modes for special regnos.  */
19061      if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19062	  && (mode == V64SFmode || mode == V64SImode)
19063	  && MOD4_SSE_REGNO_P (regno))
19064	return true;
19065
19066      /* TODO check for QI/HI scalars.  */
19067      /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
19068      if (TARGET_AVX512VL
19069	  && (mode == OImode
19070	      || mode == TImode
19071	      || VALID_AVX256_REG_MODE (mode)
19072	      || VALID_AVX512VL_128_REG_MODE (mode)))
19073	return true;
19074
19075      /* xmm16-xmm31 are only available for AVX-512.  */
19076      if (EXT_REX_SSE_REGNO_P (regno))
19077	return false;
19078
19079      /* OImode and AVX modes are available only when AVX is enabled.  */
19080      return ((TARGET_AVX
19081	       && VALID_AVX256_REG_OR_OI_MODE (mode))
19082	      || VALID_SSE_REG_MODE (mode)
19083	      || VALID_SSE2_REG_MODE (mode)
19084	      || VALID_MMX_REG_MODE (mode)
19085	      || VALID_MMX_REG_MODE_3DNOW (mode));
19086    }
19087  if (MMX_REGNO_P (regno))
19088    {
19089      /* We implement the move patterns for 3DNOW modes even in MMX mode,
19090	 so if the register is available at all, then we can move data of
19091	 the given mode into or out of it.  */
19092      return (VALID_MMX_REG_MODE (mode)
19093	      || VALID_MMX_REG_MODE_3DNOW (mode));
19094    }
19095
19096  if (mode == QImode)
19097    {
19098      /* Take care for QImode values - they can be in non-QI regs,
19099	 but then they do cause partial register stalls.  */
19100      if (ANY_QI_REGNO_P (regno))
19101	return true;
19102      if (!TARGET_PARTIAL_REG_STALL)
19103	return true;
19104      /* LRA checks if the hard register is OK for the given mode.
19105	 QImode values can live in non-QI regs, so we allow all
19106	 registers here.  */
19107      if (lra_in_progress)
19108       return true;
19109      return !can_create_pseudo_p ();
19110    }
19111  /* We handle both integer and floats in the general purpose registers.  */
19112  else if (VALID_INT_MODE_P (mode))
19113    return true;
19114  else if (VALID_FP_MODE_P (mode))
19115    return true;
19116  else if (VALID_DFP_MODE_P (mode))
19117    return true;
19118  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
19119     on to use that value in smaller contexts, this can easily force a
19120     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
19121     supporting DImode, allow it.  */
19122  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19123    return true;
19124
19125  return false;
19126}
19127
19128/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
19129   saves SSE registers across calls is Win64 (thus no need to check the
19130   current ABI here), and with AVX enabled Win64 only guarantees that
19131   the low 16 bytes are saved.  */
19132
19133static bool
19134ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19135				     machine_mode mode)
19136{
19137  return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19138}
19139
19140/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
19141   tieable integer mode.  */
19142
19143static bool
19144ix86_tieable_integer_mode_p (machine_mode mode)
19145{
19146  switch (mode)
19147    {
19148    case E_HImode:
19149    case E_SImode:
19150      return true;
19151
19152    case E_QImode:
19153      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19154
19155    case E_DImode:
19156      return TARGET_64BIT;
19157
19158    default:
19159      return false;
19160    }
19161}
19162
19163/* Implement TARGET_MODES_TIEABLE_P.
19164
19165   Return true if MODE1 is accessible in a register that can hold MODE2
19166   without copying.  That is, all register classes that can hold MODE2
19167   can also hold MODE1.  */
19168
19169static bool
19170ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19171{
19172  if (mode1 == mode2)
19173    return true;
19174
19175  if (ix86_tieable_integer_mode_p (mode1)
19176      && ix86_tieable_integer_mode_p (mode2))
19177    return true;
19178
19179  /* MODE2 being XFmode implies fp stack or general regs, which means we
19180     can tie any smaller floating point modes to it.  Note that we do not
19181     tie this with TFmode.  */
19182  if (mode2 == XFmode)
19183    return mode1 == SFmode || mode1 == DFmode;
19184
19185  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19186     that we can tie it with SFmode.  */
19187  if (mode2 == DFmode)
19188    return mode1 == SFmode;
19189
19190  /* If MODE2 is only appropriate for an SSE register, then tie with
19191     any other mode acceptable to SSE registers.  */
19192  if (GET_MODE_SIZE (mode2) == 64
19193      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19194    return (GET_MODE_SIZE (mode1) == 64
19195	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19196  if (GET_MODE_SIZE (mode2) == 32
19197      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19198    return (GET_MODE_SIZE (mode1) == 32
19199	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19200  if (GET_MODE_SIZE (mode2) == 16
19201      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19202    return (GET_MODE_SIZE (mode1) == 16
19203	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19204
19205  /* If MODE2 is appropriate for an MMX register, then tie
19206     with any other mode acceptable to MMX registers.  */
19207  if (GET_MODE_SIZE (mode2) == 8
19208      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19209    return (GET_MODE_SIZE (mode1) == 8
19210	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19211
19212  return false;
19213}
19214
19215/* Return the cost of moving between two registers of mode MODE.  */
19216
19217static int
19218ix86_set_reg_reg_cost (machine_mode mode)
19219{
19220  unsigned int units = UNITS_PER_WORD;
19221
19222  switch (GET_MODE_CLASS (mode))
19223    {
19224    default:
19225      break;
19226
19227    case MODE_CC:
19228      units = GET_MODE_SIZE (CCmode);
19229      break;
19230
19231    case MODE_FLOAT:
19232      if ((TARGET_SSE && mode == TFmode)
19233	  || (TARGET_80387 && mode == XFmode)
19234	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19235	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19236	units = GET_MODE_SIZE (mode);
19237      break;
19238
19239    case MODE_COMPLEX_FLOAT:
19240      if ((TARGET_SSE && mode == TCmode)
19241	  || (TARGET_80387 && mode == XCmode)
19242	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19243	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19244	units = GET_MODE_SIZE (mode);
19245      break;
19246
19247    case MODE_VECTOR_INT:
19248    case MODE_VECTOR_FLOAT:
19249      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19250	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19251	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19252	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19253	  || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19254	      && VALID_MMX_REG_MODE (mode)))
19255	units = GET_MODE_SIZE (mode);
19256    }
19257
19258  /* Return the cost of moving between two registers of mode MODE,
19259     assuming that the move will be in pieces of at most UNITS bytes.  */
19260  return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19261}
19262
19263/* Return cost of vector operation in MODE given that scalar version has
19264   COST.  */
19265
19266static int
19267ix86_vec_cost (machine_mode mode, int cost)
19268{
19269  if (!VECTOR_MODE_P (mode))
19270    return cost;
19271
19272  if (GET_MODE_BITSIZE (mode) == 128
19273      && TARGET_SSE_SPLIT_REGS)
19274    return cost * 2;
19275  if (GET_MODE_BITSIZE (mode) > 128
19276      && TARGET_AVX256_SPLIT_REGS)
19277    return cost * GET_MODE_BITSIZE (mode) / 128;
19278  return cost;
19279}
19280
19281/* Return cost of multiplication in MODE.  */
19282
19283static int
19284ix86_multiplication_cost (const struct processor_costs *cost,
19285			  enum machine_mode mode)
19286{
19287  machine_mode inner_mode = mode;
19288  if (VECTOR_MODE_P (mode))
19289    inner_mode = GET_MODE_INNER (mode);
19290
19291  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19292    return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19293  else if (X87_FLOAT_MODE_P (mode))
19294    return cost->fmul;
19295  else if (FLOAT_MODE_P (mode))
19296    return  ix86_vec_cost (mode,
19297			   inner_mode == DFmode ? cost->mulsd : cost->mulss);
19298  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19299    {
19300      /* vpmullq is used in this case. No emulation is needed.  */
19301      if (TARGET_AVX512DQ)
19302	return ix86_vec_cost (mode, cost->mulss);
19303
19304      /* V*QImode is emulated with 7-13 insns.  */
19305      if (mode == V16QImode || mode == V32QImode)
19306	{
19307	  int extra = 11;
19308	  if (TARGET_XOP && mode == V16QImode)
19309	    extra = 5;
19310	  else if (TARGET_SSSE3)
19311	    extra = 6;
19312	  return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19313	}
19314      /* V*DImode is emulated with 5-8 insns.  */
19315      else if (mode == V2DImode || mode == V4DImode)
19316	{
19317	  if (TARGET_XOP && mode == V2DImode)
19318	    return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19319	  else
19320	    return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19321	}
19322      /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19323	 insns, including two PMULUDQ.  */
19324      else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19325	return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19326      else
19327	return ix86_vec_cost (mode, cost->mulss);
19328    }
19329  else
19330    return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19331}
19332
19333/* Return cost of multiplication in MODE.  */
19334
19335static int
19336ix86_division_cost (const struct processor_costs *cost,
19337			  enum machine_mode mode)
19338{
19339  machine_mode inner_mode = mode;
19340  if (VECTOR_MODE_P (mode))
19341    inner_mode = GET_MODE_INNER (mode);
19342
19343  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19344    return inner_mode == DFmode ? cost->divsd : cost->divss;
19345  else if (X87_FLOAT_MODE_P (mode))
19346    return cost->fdiv;
19347  else if (FLOAT_MODE_P (mode))
19348    return ix86_vec_cost (mode,
19349			  inner_mode == DFmode ? cost->divsd : cost->divss);
19350  else
19351    return cost->divide[MODE_INDEX (mode)];
19352}
19353
19354#define COSTS_N_BYTES(N) ((N) * 2)
19355
19356/* Return cost of shift in MODE.
19357   If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19358   AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19359   if op1 is a result of subreg.
19360
19361   SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
19362
19363static int
19364ix86_shift_rotate_cost (const struct processor_costs *cost,
19365			enum machine_mode mode, bool constant_op1,
19366			HOST_WIDE_INT op1_val,
19367			bool speed,
19368			bool and_in_op1,
19369			bool shift_and_truncate,
19370			bool *skip_op0, bool *skip_op1)
19371{
19372  if (skip_op0)
19373    *skip_op0 = *skip_op1 = false;
19374  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19375    {
19376      /* V*QImode is emulated with 1-11 insns.  */
19377      if (mode == V16QImode || mode == V32QImode)
19378	{
19379	  int count = 11;
19380	  if (TARGET_XOP && mode == V16QImode)
19381	    {
19382	      /* For XOP we use vpshab, which requires a broadcast of the
19383		 value to the variable shift insn.  For constants this
19384		 means a V16Q const in mem; even when we can perform the
19385		 shift with one insn set the cost to prefer paddb.  */
19386	      if (constant_op1)
19387		{
19388		  if (skip_op1)
19389		    *skip_op1 = true;
19390		  return ix86_vec_cost (mode,
19391					cost->sse_op
19392					+ (speed
19393					   ? 2
19394					   : COSTS_N_BYTES
19395					       (GET_MODE_UNIT_SIZE (mode))));
19396		}
19397	      count = 3;
19398	    }
19399	  else if (TARGET_SSSE3)
19400	    count = 7;
19401	  return ix86_vec_cost (mode, cost->sse_op * count);
19402	}
19403      else
19404	return ix86_vec_cost (mode, cost->sse_op);
19405    }
19406  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19407    {
19408      if (constant_op1)
19409	{
19410	  if (op1_val > 32)
19411	    return cost->shift_const + COSTS_N_INSNS (2);
19412	  else
19413	    return cost->shift_const * 2;
19414	}
19415      else
19416	{
19417	  if (and_in_op1)
19418	    return cost->shift_var * 2;
19419	  else
19420	    return cost->shift_var * 6 + COSTS_N_INSNS (2);
19421	}
19422    }
19423  else
19424    {
19425      if (constant_op1)
19426	return cost->shift_const;
19427      else if (shift_and_truncate)
19428	{
19429	  if (skip_op0)
19430	    *skip_op0 = *skip_op1 = true;
19431	  /* Return the cost after shift-and truncation.  */
19432	  return cost->shift_var;
19433	}
19434      else
19435	return cost->shift_var;
19436    }
19437  return cost->shift_const;
19438}
19439
19440/* Compute a (partial) cost for rtx X.  Return true if the complete
19441   cost has been computed, and false if subexpressions should be
19442   scanned.  In either case, *TOTAL contains the cost result.  */
19443
19444static bool
19445ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19446		int *total, bool speed)
19447{
19448  rtx mask;
19449  enum rtx_code code = GET_CODE (x);
19450  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19451  const struct processor_costs *cost
19452    = speed ? ix86_tune_cost : &ix86_size_cost;
19453  int src_cost;
19454
19455  switch (code)
19456    {
19457    case SET:
19458      if (register_operand (SET_DEST (x), VOIDmode)
19459	  && register_operand (SET_SRC (x), VOIDmode))
19460	{
19461	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19462	  return true;
19463	}
19464
19465      if (register_operand (SET_SRC (x), VOIDmode))
19466	/* Avoid potentially incorrect high cost from rtx_costs
19467	   for non-tieable SUBREGs.  */
19468	src_cost = 0;
19469      else
19470	{
19471	  src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19472
19473	  if (CONSTANT_P (SET_SRC (x)))
19474	    /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19475	       a small value, possibly zero for cheap constants.  */
19476	    src_cost += COSTS_N_INSNS (1);
19477	}
19478
19479      *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19480      return true;
19481
19482    case CONST_INT:
19483    case CONST:
19484    case LABEL_REF:
19485    case SYMBOL_REF:
19486      if (x86_64_immediate_operand (x, VOIDmode))
19487	*total = 0;
19488     else
19489	*total = 1;
19490      return true;
19491
19492    case CONST_DOUBLE:
19493      if (IS_STACK_MODE (mode))
19494	switch (standard_80387_constant_p (x))
19495	  {
19496	  case -1:
19497	  case 0:
19498	    break;
19499	  case 1: /* 0.0 */
19500	    *total = 1;
19501	    return true;
19502	  default: /* Other constants */
19503	    *total = 2;
19504	    return true;
19505	  }
19506      /* FALLTHRU */
19507
19508    case CONST_VECTOR:
19509      switch (standard_sse_constant_p (x, mode))
19510	{
19511	case 0:
19512	  break;
19513	case 1:  /* 0: xor eliminates false dependency */
19514	  *total = 0;
19515	  return true;
19516	default: /* -1: cmp contains false dependency */
19517	  *total = 1;
19518	  return true;
19519	}
19520      /* FALLTHRU */
19521
19522    case CONST_WIDE_INT:
19523      /* Fall back to (MEM (SYMBOL_REF)), since that's where
19524	 it'll probably end up.  Add a penalty for size.  */
19525      *total = (COSTS_N_INSNS (1)
19526		+ (!TARGET_64BIT && flag_pic)
19527		+ (GET_MODE_SIZE (mode) <= 4
19528		   ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19529      return true;
19530
19531    case ZERO_EXTEND:
19532      /* The zero extensions is often completely free on x86_64, so make
19533	 it as cheap as possible.  */
19534      if (TARGET_64BIT && mode == DImode
19535	  && GET_MODE (XEXP (x, 0)) == SImode)
19536	*total = 1;
19537      else if (TARGET_ZERO_EXTEND_WITH_AND)
19538	*total = cost->add;
19539      else
19540	*total = cost->movzx;
19541      return false;
19542
19543    case SIGN_EXTEND:
19544      *total = cost->movsx;
19545      return false;
19546
19547    case ASHIFT:
19548      if (SCALAR_INT_MODE_P (mode)
19549	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19550	  && CONST_INT_P (XEXP (x, 1)))
19551	{
19552	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19553	  if (value == 1)
19554	    {
19555	      *total = cost->add;
19556	      return false;
19557	    }
19558	  if ((value == 2 || value == 3)
19559	      && cost->lea <= cost->shift_const)
19560	    {
19561	      *total = cost->lea;
19562	      return false;
19563	    }
19564	}
19565      /* FALLTHRU */
19566
19567    case ROTATE:
19568    case ASHIFTRT:
19569    case LSHIFTRT:
19570    case ROTATERT:
19571      bool skip_op0, skip_op1;
19572      *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19573				       CONST_INT_P (XEXP (x, 1))
19574					 ? INTVAL (XEXP (x, 1)) : -1,
19575				       speed,
19576				       GET_CODE (XEXP (x, 1)) == AND,
19577				       SUBREG_P (XEXP (x, 1))
19578				       && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19579				       &skip_op0, &skip_op1);
19580      if (skip_op0 || skip_op1)
19581	{
19582	  if (!skip_op0)
19583	    *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19584	  if (!skip_op1)
19585	    *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19586	  return true;
19587	}
19588      return false;
19589
19590    case FMA:
19591      {
19592	rtx sub;
19593
19594        gcc_assert (FLOAT_MODE_P (mode));
19595        gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19596
19597        *total = ix86_vec_cost (mode,
19598				GET_MODE_INNER (mode) == SFmode
19599				? cost->fmass : cost->fmasd);
19600	*total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19601
19602        /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
19603	sub = XEXP (x, 0);
19604	if (GET_CODE (sub) == NEG)
19605	  sub = XEXP (sub, 0);
19606	*total += rtx_cost (sub, mode, FMA, 0, speed);
19607
19608	sub = XEXP (x, 2);
19609	if (GET_CODE (sub) == NEG)
19610	  sub = XEXP (sub, 0);
19611	*total += rtx_cost (sub, mode, FMA, 2, speed);
19612	return true;
19613      }
19614
19615    case MULT:
19616      if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19617	{
19618	  rtx op0 = XEXP (x, 0);
19619	  rtx op1 = XEXP (x, 1);
19620	  int nbits;
19621	  if (CONST_INT_P (XEXP (x, 1)))
19622	    {
19623	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19624	      for (nbits = 0; value != 0; value &= value - 1)
19625	        nbits++;
19626	    }
19627	  else
19628	    /* This is arbitrary.  */
19629	    nbits = 7;
19630
19631	  /* Compute costs correctly for widening multiplication.  */
19632	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19633	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19634	         == GET_MODE_SIZE (mode))
19635	    {
19636	      int is_mulwiden = 0;
19637	      machine_mode inner_mode = GET_MODE (op0);
19638
19639	      if (GET_CODE (op0) == GET_CODE (op1))
19640		is_mulwiden = 1, op1 = XEXP (op1, 0);
19641	      else if (CONST_INT_P (op1))
19642		{
19643		  if (GET_CODE (op0) == SIGN_EXTEND)
19644		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19645			          == INTVAL (op1);
19646		  else
19647		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19648	        }
19649
19650	      if (is_mulwiden)
19651	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19652	    }
19653
19654  	  *total = (cost->mult_init[MODE_INDEX (mode)]
19655		    + nbits * cost->mult_bit
19656	            + rtx_cost (op0, mode, outer_code, opno, speed)
19657		    + rtx_cost (op1, mode, outer_code, opno, speed));
19658
19659          return true;
19660	}
19661      *total = ix86_multiplication_cost (cost, mode);
19662      return false;
19663
19664    case DIV:
19665    case UDIV:
19666    case MOD:
19667    case UMOD:
19668      *total = ix86_division_cost (cost, mode);
19669      return false;
19670
19671    case PLUS:
19672      if (GET_MODE_CLASS (mode) == MODE_INT
19673	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19674	{
19675	  if (GET_CODE (XEXP (x, 0)) == PLUS
19676	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19677	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19678	      && CONSTANT_P (XEXP (x, 1)))
19679	    {
19680	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19681	      if (val == 2 || val == 4 || val == 8)
19682		{
19683		  *total = cost->lea;
19684		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19685				      outer_code, opno, speed);
19686		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19687				      outer_code, opno, speed);
19688		  *total += rtx_cost (XEXP (x, 1), mode,
19689				      outer_code, opno, speed);
19690		  return true;
19691		}
19692	    }
19693	  else if (GET_CODE (XEXP (x, 0)) == MULT
19694		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19695	    {
19696	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19697	      if (val == 2 || val == 4 || val == 8)
19698		{
19699		  *total = cost->lea;
19700		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19701				      outer_code, opno, speed);
19702		  *total += rtx_cost (XEXP (x, 1), mode,
19703				      outer_code, opno, speed);
19704		  return true;
19705		}
19706	    }
19707	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
19708	    {
19709	      /* Add with carry, ignore the cost of adding a carry flag.  */
19710	      if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19711		*total = cost->add;
19712	      else
19713		{
19714		  *total = cost->lea;
19715		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19716				      outer_code, opno, speed);
19717		}
19718
19719	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19720				  outer_code, opno, speed);
19721	      *total += rtx_cost (XEXP (x, 1), mode,
19722				  outer_code, opno, speed);
19723	      return true;
19724	    }
19725	}
19726      /* FALLTHRU */
19727
19728    case MINUS:
19729      /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
19730      if (GET_MODE_CLASS (mode) == MODE_INT
19731	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19732	  && GET_CODE (XEXP (x, 0)) == MINUS
19733	  && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19734	{
19735	  *total = cost->add;
19736	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19737			      outer_code, opno, speed);
19738	  *total += rtx_cost (XEXP (x, 1), mode,
19739			      outer_code, opno, speed);
19740	  return true;
19741	}
19742
19743      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19744	{
19745	  *total = cost->addss;
19746	  return false;
19747	}
19748      else if (X87_FLOAT_MODE_P (mode))
19749	{
19750	  *total = cost->fadd;
19751	  return false;
19752	}
19753      else if (FLOAT_MODE_P (mode))
19754	{
19755	  *total = ix86_vec_cost (mode, cost->addss);
19756	  return false;
19757	}
19758      /* FALLTHRU */
19759
19760    case AND:
19761    case IOR:
19762    case XOR:
19763      if (GET_MODE_CLASS (mode) == MODE_INT
19764	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19765	{
19766	  *total = (cost->add * 2
19767		    + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19768		       << (GET_MODE (XEXP (x, 0)) != DImode))
19769		    + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19770	               << (GET_MODE (XEXP (x, 1)) != DImode)));
19771	  return true;
19772	}
19773      /* FALLTHRU */
19774
19775    case NEG:
19776      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19777	{
19778	  *total = cost->sse_op;
19779	  return false;
19780	}
19781      else if (X87_FLOAT_MODE_P (mode))
19782	{
19783	  *total = cost->fchs;
19784	  return false;
19785	}
19786      else if (FLOAT_MODE_P (mode))
19787	{
19788	  *total = ix86_vec_cost (mode, cost->sse_op);
19789	  return false;
19790	}
19791      /* FALLTHRU */
19792
19793    case NOT:
19794      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19795	*total = ix86_vec_cost (mode, cost->sse_op);
19796      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19797	*total = cost->add * 2;
19798      else
19799	*total = cost->add;
19800      return false;
19801
19802    case COMPARE:
19803      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19804	  && XEXP (XEXP (x, 0), 1) == const1_rtx
19805	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19806	  && XEXP (x, 1) == const0_rtx)
19807	{
19808	  /* This kind of construct is implemented using test[bwl].
19809	     Treat it as if we had an AND.  */
19810	  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19811	  *total = (cost->add
19812		    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19813				opno, speed)
19814		    + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19815	  return true;
19816	}
19817
19818      if (GET_CODE (XEXP (x, 0)) == PLUS
19819	  && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
19820	{
19821	  /* This is an overflow detection, count it as a normal compare.  */
19822	  *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
19823			     COMPARE, 0, speed);
19824	  return true;
19825	}
19826
19827      /* The embedded comparison operand is completely free.  */
19828      if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19829	  && XEXP (x, 1) == const0_rtx)
19830	*total = 0;
19831
19832      return false;
19833
19834    case FLOAT_EXTEND:
19835      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19836	*total = 0;
19837      else
19838        *total = ix86_vec_cost (mode, cost->addss);
19839      return false;
19840
19841    case FLOAT_TRUNCATE:
19842      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19843	*total = cost->fadd;
19844      else
19845        *total = ix86_vec_cost (mode, cost->addss);
19846      return false;
19847
19848    case ABS:
19849      /* SSE requires memory load for the constant operand. It may make
19850	 sense to account for this.  Of course the constant operand may or
19851	 may not be reused. */
19852      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19853	*total = cost->sse_op;
19854      else if (X87_FLOAT_MODE_P (mode))
19855	*total = cost->fabs;
19856      else if (FLOAT_MODE_P (mode))
19857	*total = ix86_vec_cost (mode, cost->sse_op);
19858      return false;
19859
19860    case SQRT:
19861      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19862	*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19863      else if (X87_FLOAT_MODE_P (mode))
19864	*total = cost->fsqrt;
19865      else if (FLOAT_MODE_P (mode))
19866	*total = ix86_vec_cost (mode,
19867				mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19868      return false;
19869
19870    case UNSPEC:
19871      if (XINT (x, 1) == UNSPEC_TP)
19872	*total = 0;
19873      return false;
19874
19875    case VEC_SELECT:
19876    case VEC_CONCAT:
19877    case VEC_DUPLICATE:
19878      /* ??? Assume all of these vector manipulation patterns are
19879	 recognizable.  In which case they all pretty much have the
19880	 same cost.  */
19881     *total = cost->sse_op;
19882     return true;
19883    case VEC_MERGE:
19884      mask = XEXP (x, 2);
19885      /* This is masked instruction, assume the same cost,
19886	 as nonmasked variant.  */
19887      if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19888	*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19889      else
19890	*total = cost->sse_op;
19891      return true;
19892
19893    default:
19894      return false;
19895    }
19896}
19897
19898#if TARGET_MACHO
19899
19900static int current_machopic_label_num;
19901
19902/* Given a symbol name and its associated stub, write out the
19903   definition of the stub.  */
19904
19905void
19906machopic_output_stub (FILE *file, const char *symb, const char *stub)
19907{
19908  unsigned int length;
19909  char *binder_name, *symbol_name, lazy_ptr_name[32];
19910  int label = ++current_machopic_label_num;
19911
19912  /* For 64-bit we shouldn't get here.  */
19913  gcc_assert (!TARGET_64BIT);
19914
19915  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
19916  symb = targetm.strip_name_encoding (symb);
19917
19918  length = strlen (stub);
19919  binder_name = XALLOCAVEC (char, length + 32);
19920  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19921
19922  length = strlen (symb);
19923  symbol_name = XALLOCAVEC (char, length + 32);
19924  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19925
19926  sprintf (lazy_ptr_name, "L%d$lz", label);
19927
19928  if (MACHOPIC_ATT_STUB)
19929    switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19930  else if (MACHOPIC_PURE)
19931    switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19932  else
19933    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19934
19935  fprintf (file, "%s:\n", stub);
19936  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19937
19938  if (MACHOPIC_ATT_STUB)
19939    {
19940      fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19941    }
19942  else if (MACHOPIC_PURE)
19943    {
19944      /* PIC stub.  */
19945      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
19946      rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19947      output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
19948      fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19949	       label, lazy_ptr_name, label);
19950      fprintf (file, "\tjmp\t*%%ecx\n");
19951    }
19952  else
19953    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19954
19955  /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19956     it needs no stub-binding-helper.  */
19957  if (MACHOPIC_ATT_STUB)
19958    return;
19959
19960  fprintf (file, "%s:\n", binder_name);
19961
19962  if (MACHOPIC_PURE)
19963    {
19964      fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19965      fprintf (file, "\tpushl\t%%ecx\n");
19966    }
19967  else
19968    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19969
19970  fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19971
19972  /* N.B. Keep the correspondence of these
19973     'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19974     old-pic/new-pic/non-pic stubs; altering this will break
19975     compatibility with existing dylibs.  */
19976  if (MACHOPIC_PURE)
19977    {
19978      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
19979      switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19980    }
19981  else
19982    /* 16-byte -mdynamic-no-pic stub.  */
19983    switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19984
19985  fprintf (file, "%s:\n", lazy_ptr_name);
19986  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19987  fprintf (file, ASM_LONG "%s\n", binder_name);
19988}
19989#endif /* TARGET_MACHO */
19990
19991/* Order the registers for register allocator.  */
19992
19993void
19994x86_order_regs_for_local_alloc (void)
19995{
19996   int pos = 0;
19997   int i;
19998
19999   /* First allocate the local general purpose registers.  */
20000   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20001     if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
20002	reg_alloc_order [pos++] = i;
20003
20004   /* Global general purpose registers.  */
20005   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20006     if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
20007	reg_alloc_order [pos++] = i;
20008
20009   /* x87 registers come first in case we are doing FP math
20010      using them.  */
20011   if (!TARGET_SSE_MATH)
20012     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20013       reg_alloc_order [pos++] = i;
20014
20015   /* SSE registers.  */
20016   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20017     reg_alloc_order [pos++] = i;
20018   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20019     reg_alloc_order [pos++] = i;
20020
20021   /* Extended REX SSE registers.  */
20022   for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
20023     reg_alloc_order [pos++] = i;
20024
20025   /* Mask register.  */
20026   for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
20027     reg_alloc_order [pos++] = i;
20028
20029   /* x87 registers.  */
20030   if (TARGET_SSE_MATH)
20031     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20032       reg_alloc_order [pos++] = i;
20033
20034   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20035     reg_alloc_order [pos++] = i;
20036
20037   /* Initialize the rest of array as we do not allocate some registers
20038      at all.  */
20039   while (pos < FIRST_PSEUDO_REGISTER)
20040     reg_alloc_order [pos++] = 0;
20041}
20042
20043static bool
20044ix86_ms_bitfield_layout_p (const_tree record_type)
20045{
20046  return ((TARGET_MS_BITFIELD_LAYOUT
20047	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20048          || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
20049}
20050
20051/* Returns an expression indicating where the this parameter is
20052   located on entry to the FUNCTION.  */
20053
20054static rtx
20055x86_this_parameter (tree function)
20056{
20057  tree type = TREE_TYPE (function);
20058  bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20059  int nregs;
20060
20061  if (TARGET_64BIT)
20062    {
20063      const int *parm_regs;
20064
20065      if (ix86_function_type_abi (type) == MS_ABI)
20066        parm_regs = x86_64_ms_abi_int_parameter_registers;
20067      else
20068        parm_regs = x86_64_int_parameter_registers;
20069      return gen_rtx_REG (Pmode, parm_regs[aggr]);
20070    }
20071
20072  nregs = ix86_function_regparm (type, function);
20073
20074  if (nregs > 0 && !stdarg_p (type))
20075    {
20076      int regno;
20077      unsigned int ccvt = ix86_get_callcvt (type);
20078
20079      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20080	regno = aggr ? DX_REG : CX_REG;
20081      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20082        {
20083	  regno = CX_REG;
20084	  if (aggr)
20085	    return gen_rtx_MEM (SImode,
20086				plus_constant (Pmode, stack_pointer_rtx, 4));
20087	}
20088      else
20089        {
20090	  regno = AX_REG;
20091	  if (aggr)
20092	    {
20093	      regno = DX_REG;
20094	      if (nregs == 1)
20095		return gen_rtx_MEM (SImode,
20096				    plus_constant (Pmode,
20097						   stack_pointer_rtx, 4));
20098	    }
20099	}
20100      return gen_rtx_REG (SImode, regno);
20101    }
20102
20103  return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20104					     aggr ? 8 : 4));
20105}
20106
20107/* Determine whether x86_output_mi_thunk can succeed.  */
20108
20109static bool
20110x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20111			 const_tree function)
20112{
20113  /* 64-bit can handle anything.  */
20114  if (TARGET_64BIT)
20115    return true;
20116
20117  /* For 32-bit, everything's fine if we have one free register.  */
20118  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20119    return true;
20120
20121  /* Need a free register for vcall_offset.  */
20122  if (vcall_offset)
20123    return false;
20124
20125  /* Need a free register for GOT references.  */
20126  if (flag_pic && !targetm.binds_local_p (function))
20127    return false;
20128
20129  /* Otherwise ok.  */
20130  return true;
20131}
20132
20133/* Output the assembler code for a thunk function.  THUNK_DECL is the
20134   declaration for the thunk function itself, FUNCTION is the decl for
20135   the target function.  DELTA is an immediate constant offset to be
20136   added to THIS.  If VCALL_OFFSET is nonzero, the word at
20137   *(*this + vcall_offset) should be added to THIS.  */
20138
20139static void
20140x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20141		     HOST_WIDE_INT vcall_offset, tree function)
20142{
20143  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20144  rtx this_param = x86_this_parameter (function);
20145  rtx this_reg, tmp, fnaddr;
20146  unsigned int tmp_regno;
20147  rtx_insn *insn;
20148
20149  if (TARGET_64BIT)
20150    tmp_regno = R10_REG;
20151  else
20152    {
20153      unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20154      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20155	tmp_regno = AX_REG;
20156      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20157	tmp_regno = DX_REG;
20158      else
20159	tmp_regno = CX_REG;
20160    }
20161
20162  emit_note (NOTE_INSN_PROLOGUE_END);
20163
20164  /* CET is enabled, insert EB instruction.  */
20165  if ((flag_cf_protection & CF_BRANCH))
20166    emit_insn (gen_nop_endbr ());
20167
20168  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
20169     pull it in now and let DELTA benefit.  */
20170  if (REG_P (this_param))
20171    this_reg = this_param;
20172  else if (vcall_offset)
20173    {
20174      /* Put the this parameter into %eax.  */
20175      this_reg = gen_rtx_REG (Pmode, AX_REG);
20176      emit_move_insn (this_reg, this_param);
20177    }
20178  else
20179    this_reg = NULL_RTX;
20180
20181  /* Adjust the this parameter by a fixed constant.  */
20182  if (delta)
20183    {
20184      rtx delta_rtx = GEN_INT (delta);
20185      rtx delta_dst = this_reg ? this_reg : this_param;
20186
20187      if (TARGET_64BIT)
20188	{
20189	  if (!x86_64_general_operand (delta_rtx, Pmode))
20190	    {
20191	      tmp = gen_rtx_REG (Pmode, tmp_regno);
20192	      emit_move_insn (tmp, delta_rtx);
20193	      delta_rtx = tmp;
20194	    }
20195	}
20196
20197      ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20198    }
20199
20200  /* Adjust the this parameter by a value stored in the vtable.  */
20201  if (vcall_offset)
20202    {
20203      rtx vcall_addr, vcall_mem, this_mem;
20204
20205      tmp = gen_rtx_REG (Pmode, tmp_regno);
20206
20207      this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20208      if (Pmode != ptr_mode)
20209	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20210      emit_move_insn (tmp, this_mem);
20211
20212      /* Adjust the this parameter.  */
20213      vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20214      if (TARGET_64BIT
20215	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20216	{
20217	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20218	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
20219	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20220	}
20221
20222      vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20223      if (Pmode != ptr_mode)
20224	emit_insn (gen_addsi_1_zext (this_reg,
20225				     gen_rtx_REG (ptr_mode,
20226						  REGNO (this_reg)),
20227				     vcall_mem));
20228      else
20229	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20230    }
20231
20232  /* If necessary, drop THIS back to its stack slot.  */
20233  if (this_reg && this_reg != this_param)
20234    emit_move_insn (this_param, this_reg);
20235
20236  fnaddr = XEXP (DECL_RTL (function), 0);
20237  if (TARGET_64BIT)
20238    {
20239      if (!flag_pic || targetm.binds_local_p (function)
20240	  || TARGET_PECOFF)
20241	;
20242      else
20243	{
20244	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20245	  tmp = gen_rtx_CONST (Pmode, tmp);
20246	  fnaddr = gen_const_mem (Pmode, tmp);
20247	}
20248    }
20249  else
20250    {
20251      if (!flag_pic || targetm.binds_local_p (function))
20252	;
20253#if TARGET_MACHO
20254      else if (TARGET_MACHO)
20255	{
20256	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20257	  fnaddr = XEXP (fnaddr, 0);
20258	}
20259#endif /* TARGET_MACHO */
20260      else
20261	{
20262	  tmp = gen_rtx_REG (Pmode, CX_REG);
20263	  output_set_got (tmp, NULL_RTX);
20264
20265	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20266	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20267	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20268	  fnaddr = gen_const_mem (Pmode, fnaddr);
20269	}
20270    }
20271
20272  /* Our sibling call patterns do not allow memories, because we have no
20273     predicate that can distinguish between frame and non-frame memory.
20274     For our purposes here, we can get away with (ab)using a jump pattern,
20275     because we're going to do no optimization.  */
20276  if (MEM_P (fnaddr))
20277    {
20278      if (sibcall_insn_operand (fnaddr, word_mode))
20279	{
20280	  fnaddr = XEXP (DECL_RTL (function), 0);
20281	  tmp = gen_rtx_MEM (QImode, fnaddr);
20282	  tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20283	  tmp = emit_call_insn (tmp);
20284	  SIBLING_CALL_P (tmp) = 1;
20285	}
20286      else
20287	emit_jump_insn (gen_indirect_jump (fnaddr));
20288    }
20289  else
20290    {
20291      if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20292	{
20293	  // CM_LARGE_PIC always uses pseudo PIC register which is
20294	  // uninitialized.  Since FUNCTION is local and calling it
20295	  // doesn't go through PLT, we use scratch register %r11 as
20296	  // PIC register and initialize it here.
20297	  pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20298	  ix86_init_large_pic_reg (tmp_regno);
20299	  fnaddr = legitimize_pic_address (fnaddr,
20300					   gen_rtx_REG (Pmode, tmp_regno));
20301	}
20302
20303      if (!sibcall_insn_operand (fnaddr, word_mode))
20304	{
20305	  tmp = gen_rtx_REG (word_mode, tmp_regno);
20306	  if (GET_MODE (fnaddr) != word_mode)
20307	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20308	  emit_move_insn (tmp, fnaddr);
20309	  fnaddr = tmp;
20310	}
20311
20312      tmp = gen_rtx_MEM (QImode, fnaddr);
20313      tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20314      tmp = emit_call_insn (tmp);
20315      SIBLING_CALL_P (tmp) = 1;
20316    }
20317  emit_barrier ();
20318
20319  /* Emit just enough of rest_of_compilation to get the insns emitted.  */
20320  insn = get_insns ();
20321  shorten_branches (insn);
20322  assemble_start_function (thunk_fndecl, fnname);
20323  final_start_function (insn, file, 1);
20324  final (insn, file, 1);
20325  final_end_function ();
20326  assemble_end_function (thunk_fndecl, fnname);
20327}
20328
20329static void
20330x86_file_start (void)
20331{
20332  default_file_start ();
20333  if (TARGET_16BIT)
20334    fputs ("\t.code16gcc\n", asm_out_file);
20335#if TARGET_MACHO
20336  darwin_file_start ();
20337#endif
20338  if (X86_FILE_START_VERSION_DIRECTIVE)
20339    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20340  if (X86_FILE_START_FLTUSED)
20341    fputs ("\t.global\t__fltused\n", asm_out_file);
20342  if (ix86_asm_dialect == ASM_INTEL)
20343    fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20344}
20345
20346int
20347x86_field_alignment (tree type, int computed)
20348{
20349  machine_mode mode;
20350
20351  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20352    return computed;
20353  if (TARGET_IAMCU)
20354    return iamcu_alignment (type, computed);
20355  mode = TYPE_MODE (strip_array_types (type));
20356  if (mode == DFmode || mode == DCmode
20357      || GET_MODE_CLASS (mode) == MODE_INT
20358      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20359    return MIN (32, computed);
20360  return computed;
20361}
20362
20363/* Print call to TARGET to FILE.  */
20364
20365static void
20366x86_print_call_or_nop (FILE *file, const char *target)
20367{
20368  if (flag_nop_mcount || !strcmp (target, "nop"))
20369    /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20370    fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20371  else
20372    fprintf (file, "1:\tcall\t%s\n", target);
20373}
20374
20375static bool
20376current_fentry_name (const char **name)
20377{
20378  tree attr = lookup_attribute ("fentry_name",
20379				DECL_ATTRIBUTES (current_function_decl));
20380  if (!attr)
20381    return false;
20382  *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20383  return true;
20384}
20385
20386static bool
20387current_fentry_section (const char **name)
20388{
20389  tree attr = lookup_attribute ("fentry_section",
20390				DECL_ATTRIBUTES (current_function_decl));
20391  if (!attr)
20392    return false;
20393  *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20394  return true;
20395}
20396
20397/* Output assembler code to FILE to increment profiler label # LABELNO
20398   for profiling a function entry.  */
20399void
20400x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20401{
20402  if (cfun->machine->endbr_queued_at_entrance)
20403    fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20404
20405  const char *mcount_name = MCOUNT_NAME;
20406
20407  if (current_fentry_name (&mcount_name))
20408    ;
20409  else if (fentry_name)
20410    mcount_name = fentry_name;
20411  else if (flag_fentry)
20412    mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20413
20414  if (TARGET_64BIT)
20415    {
20416#ifndef NO_PROFILE_COUNTERS
20417      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20418#endif
20419
20420      if (!TARGET_PECOFF && flag_pic)
20421	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20422      else
20423	x86_print_call_or_nop (file, mcount_name);
20424    }
20425  else if (flag_pic)
20426    {
20427#ifndef NO_PROFILE_COUNTERS
20428      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20429	       LPREFIX, labelno);
20430#endif
20431      fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20432    }
20433  else
20434    {
20435#ifndef NO_PROFILE_COUNTERS
20436      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20437	       LPREFIX, labelno);
20438#endif
20439      x86_print_call_or_nop (file, mcount_name);
20440    }
20441
20442  if (flag_record_mcount
20443	|| lookup_attribute ("fentry_section",
20444                                DECL_ATTRIBUTES (current_function_decl)))
20445    {
20446      const char *sname = "__mcount_loc";
20447
20448      if (current_fentry_section (&sname))
20449	;
20450      else if (fentry_section)
20451	sname = fentry_section;
20452
20453      fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20454      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20455      fprintf (file, "\t.previous\n");
20456    }
20457}
20458
20459/* We don't have exact information about the insn sizes, but we may assume
20460   quite safely that we are informed about all 1 byte insns and memory
20461   address sizes.  This is enough to eliminate unnecessary padding in
20462   99% of cases.  */
20463
20464int
20465ix86_min_insn_size (rtx_insn *insn)
20466{
20467  int l = 0, len;
20468
20469  if (!INSN_P (insn) || !active_insn_p (insn))
20470    return 0;
20471
20472  /* Discard alignments we've emit and jump instructions.  */
20473  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20474      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20475    return 0;
20476
20477  /* Important case - calls are always 5 bytes.
20478     It is common to have many calls in the row.  */
20479  if (CALL_P (insn)
20480      && symbolic_reference_mentioned_p (PATTERN (insn))
20481      && !SIBLING_CALL_P (insn))
20482    return 5;
20483  len = get_attr_length (insn);
20484  if (len <= 1)
20485    return 1;
20486
20487  /* For normal instructions we rely on get_attr_length being exact,
20488     with a few exceptions.  */
20489  if (!JUMP_P (insn))
20490    {
20491      enum attr_type type = get_attr_type (insn);
20492
20493      switch (type)
20494	{
20495	case TYPE_MULTI:
20496	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20497	      || asm_noperands (PATTERN (insn)) >= 0)
20498	    return 0;
20499	  break;
20500	case TYPE_OTHER:
20501	case TYPE_FCMP:
20502	  break;
20503	default:
20504	  /* Otherwise trust get_attr_length.  */
20505	  return len;
20506	}
20507
20508      l = get_attr_length_address (insn);
20509      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20510	l = 4;
20511    }
20512  if (l)
20513    return 1+l;
20514  else
20515    return 2;
20516}
20517
20518#ifdef ASM_OUTPUT_MAX_SKIP_PAD
20519
20520/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20521   window.  */
20522
20523static void
20524ix86_avoid_jump_mispredicts (void)
20525{
20526  rtx_insn *insn, *start = get_insns ();
20527  int nbytes = 0, njumps = 0;
20528  bool isjump = false;
20529
20530  /* Look for all minimal intervals of instructions containing 4 jumps.
20531     The intervals are bounded by START and INSN.  NBYTES is the total
20532     size of instructions in the interval including INSN and not including
20533     START.  When the NBYTES is smaller than 16 bytes, it is possible
20534     that the end of START and INSN ends up in the same 16byte page.
20535
20536     The smallest offset in the page INSN can start is the case where START
20537     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
20538     We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20539
20540     Don't consider asm goto as jump, while it can contain a jump, it doesn't
20541     have to, control transfer to label(s) can be performed through other
20542     means, and also we estimate minimum length of all asm stmts as 0.  */
20543  for (insn = start; insn; insn = NEXT_INSN (insn))
20544    {
20545      int min_size;
20546
20547      if (LABEL_P (insn))
20548	{
20549	  align_flags alignment = label_to_alignment (insn);
20550	  int align = alignment.levels[0].log;
20551	  int max_skip = alignment.levels[0].maxskip;
20552
20553	  if (max_skip > 15)
20554	    max_skip = 15;
20555	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20556	     already in the current 16 byte page, because otherwise
20557	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20558	     bytes to reach 16 byte boundary.  */
20559	  if (align <= 0
20560	      || (align <= 3 && max_skip != (1 << align) - 1))
20561	    max_skip = 0;
20562	  if (dump_file)
20563	    fprintf (dump_file, "Label %i with max_skip %i\n",
20564		     INSN_UID (insn), max_skip);
20565	  if (max_skip)
20566	    {
20567	      while (nbytes + max_skip >= 16)
20568		{
20569		  start = NEXT_INSN (start);
20570		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20571		      || CALL_P (start))
20572		    njumps--, isjump = true;
20573		  else
20574		    isjump = false;
20575		  nbytes -= ix86_min_insn_size (start);
20576		}
20577	    }
20578	  continue;
20579	}
20580
20581      min_size = ix86_min_insn_size (insn);
20582      nbytes += min_size;
20583      if (dump_file)
20584	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20585		 INSN_UID (insn), min_size);
20586      if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20587	  || CALL_P (insn))
20588	njumps++;
20589      else
20590	continue;
20591
20592      while (njumps > 3)
20593	{
20594	  start = NEXT_INSN (start);
20595	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20596	      || CALL_P (start))
20597	    njumps--, isjump = true;
20598	  else
20599	    isjump = false;
20600	  nbytes -= ix86_min_insn_size (start);
20601	}
20602      gcc_assert (njumps >= 0);
20603      if (dump_file)
20604        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20605		 INSN_UID (start), INSN_UID (insn), nbytes);
20606
20607      if (njumps == 3 && isjump && nbytes < 16)
20608	{
20609	  int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20610
20611	  if (dump_file)
20612	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20613		     INSN_UID (insn), padsize);
20614          emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20615	}
20616    }
20617}
20618#endif
20619
20620/* AMD Athlon works faster
20621   when RET is not destination of conditional jump or directly preceded
20622   by other jump instruction.  We avoid the penalty by inserting NOP just
20623   before the RET instructions in such cases.  */
20624static void
20625ix86_pad_returns (void)
20626{
20627  edge e;
20628  edge_iterator ei;
20629
20630  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20631    {
20632      basic_block bb = e->src;
20633      rtx_insn *ret = BB_END (bb);
20634      rtx_insn *prev;
20635      bool replace = false;
20636
20637      if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20638	  || optimize_bb_for_size_p (bb))
20639	continue;
20640      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20641	if (active_insn_p (prev) || LABEL_P (prev))
20642	  break;
20643      if (prev && LABEL_P (prev))
20644	{
20645	  edge e;
20646	  edge_iterator ei;
20647
20648	  FOR_EACH_EDGE (e, ei, bb->preds)
20649	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
20650		&& !(e->flags & EDGE_FALLTHRU))
20651	      {
20652		replace = true;
20653		break;
20654	      }
20655	}
20656      if (!replace)
20657	{
20658	  prev = prev_active_insn (ret);
20659	  if (prev
20660	      && ((JUMP_P (prev) && any_condjump_p (prev))
20661		  || CALL_P (prev)))
20662	    replace = true;
20663	  /* Empty functions get branch mispredict even when
20664	     the jump destination is not visible to us.  */
20665	  if (!prev && !optimize_function_for_size_p (cfun))
20666	    replace = true;
20667	}
20668      if (replace)
20669	{
20670	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20671	  delete_insn (ret);
20672	}
20673    }
20674}
20675
20676/* Count the minimum number of instructions in BB.  Return 4 if the
20677   number of instructions >= 4.  */
20678
20679static int
20680ix86_count_insn_bb (basic_block bb)
20681{
20682  rtx_insn *insn;
20683  int insn_count = 0;
20684
20685  /* Count number of instructions in this block.  Return 4 if the number
20686     of instructions >= 4.  */
20687  FOR_BB_INSNS (bb, insn)
20688    {
20689      /* Only happen in exit blocks.  */
20690      if (JUMP_P (insn)
20691	  && ANY_RETURN_P (PATTERN (insn)))
20692	break;
20693
20694      if (NONDEBUG_INSN_P (insn)
20695	  && GET_CODE (PATTERN (insn)) != USE
20696	  && GET_CODE (PATTERN (insn)) != CLOBBER)
20697	{
20698	  insn_count++;
20699	  if (insn_count >= 4)
20700	    return insn_count;
20701	}
20702    }
20703
20704  return insn_count;
20705}
20706
20707
20708/* Count the minimum number of instructions in code path in BB.
20709   Return 4 if the number of instructions >= 4.  */
20710
20711static int
20712ix86_count_insn (basic_block bb)
20713{
20714  edge e;
20715  edge_iterator ei;
20716  int min_prev_count;
20717
20718  /* Only bother counting instructions along paths with no
20719     more than 2 basic blocks between entry and exit.  Given
20720     that BB has an edge to exit, determine if a predecessor
20721     of BB has an edge from entry.  If so, compute the number
20722     of instructions in the predecessor block.  If there
20723     happen to be multiple such blocks, compute the minimum.  */
20724  min_prev_count = 4;
20725  FOR_EACH_EDGE (e, ei, bb->preds)
20726    {
20727      edge prev_e;
20728      edge_iterator prev_ei;
20729
20730      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20731	{
20732	  min_prev_count = 0;
20733	  break;
20734	}
20735      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20736	{
20737	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20738	    {
20739	      int count = ix86_count_insn_bb (e->src);
20740	      if (count < min_prev_count)
20741		min_prev_count = count;
20742	      break;
20743	    }
20744	}
20745    }
20746
20747  if (min_prev_count < 4)
20748    min_prev_count += ix86_count_insn_bb (bb);
20749
20750  return min_prev_count;
20751}
20752
20753/* Pad short function to 4 instructions.   */
20754
20755static void
20756ix86_pad_short_function (void)
20757{
20758  edge e;
20759  edge_iterator ei;
20760
20761  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20762    {
20763      rtx_insn *ret = BB_END (e->src);
20764      if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20765	{
20766	  int insn_count = ix86_count_insn (e->src);
20767
20768	  /* Pad short function.  */
20769	  if (insn_count < 4)
20770	    {
20771	      rtx_insn *insn = ret;
20772
20773	      /* Find epilogue.  */
20774	      while (insn
20775		     && (!NOTE_P (insn)
20776			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20777		insn = PREV_INSN (insn);
20778
20779	      if (!insn)
20780		insn = ret;
20781
20782	      /* Two NOPs count as one instruction.  */
20783	      insn_count = 2 * (4 - insn_count);
20784	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20785	    }
20786	}
20787    }
20788}
20789
20790/* Fix up a Windows system unwinder issue.  If an EH region falls through into
20791   the epilogue, the Windows system unwinder will apply epilogue logic and
20792   produce incorrect offsets.  This can be avoided by adding a nop between
20793   the last insn that can throw and the first insn of the epilogue.  */
20794
20795static void
20796ix86_seh_fixup_eh_fallthru (void)
20797{
20798  edge e;
20799  edge_iterator ei;
20800
20801  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20802    {
20803      rtx_insn *insn, *next;
20804
20805      /* Find the beginning of the epilogue.  */
20806      for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20807	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20808	  break;
20809      if (insn == NULL)
20810	continue;
20811
20812      /* We only care about preceding insns that can throw.  */
20813      insn = prev_active_insn (insn);
20814      if (insn == NULL || !can_throw_internal (insn))
20815	continue;
20816
20817      /* Do not separate calls from their debug information.  */
20818      for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20819	if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20820	  insn = next;
20821	else
20822	  break;
20823
20824      emit_insn_after (gen_nops (const1_rtx), insn);
20825    }
20826}
20827
20828/* Implement machine specific optimizations.  We implement padding of returns
20829   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
20830static void
20831ix86_reorg (void)
20832{
20833  /* We are freeing block_for_insn in the toplev to keep compatibility
20834     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
20835  compute_bb_for_insn ();
20836
20837  if (TARGET_SEH && current_function_has_exception_handlers ())
20838    ix86_seh_fixup_eh_fallthru ();
20839
20840  if (optimize && optimize_function_for_speed_p (cfun))
20841    {
20842      if (TARGET_PAD_SHORT_FUNCTION)
20843	ix86_pad_short_function ();
20844      else if (TARGET_PAD_RETURNS)
20845	ix86_pad_returns ();
20846#ifdef ASM_OUTPUT_MAX_SKIP_PAD
20847      if (TARGET_FOUR_JUMP_LIMIT)
20848	ix86_avoid_jump_mispredicts ();
20849#endif
20850    }
20851}
20852
20853/* Return nonzero when QImode register that must be represented via REX prefix
20854   is used.  */
20855bool
20856x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20857{
20858  int i;
20859  extract_insn_cached (insn);
20860  for (i = 0; i < recog_data.n_operands; i++)
20861    if (GENERAL_REG_P (recog_data.operand[i])
20862	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
20863       return true;
20864  return false;
20865}
20866
20867/* Return true when INSN mentions register that must be encoded using REX
20868   prefix.  */
20869bool
20870x86_extended_reg_mentioned_p (rtx insn)
20871{
20872  subrtx_iterator::array_type array;
20873  FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20874    {
20875      const_rtx x = *iter;
20876      if (REG_P (x)
20877	  && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20878	return true;
20879    }
20880  return false;
20881}
20882
20883/* If profitable, negate (without causing overflow) integer constant
20884   of mode MODE at location LOC.  Return true in this case.  */
20885bool
20886x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20887{
20888  HOST_WIDE_INT val;
20889
20890  if (!CONST_INT_P (*loc))
20891    return false;
20892
20893  switch (mode)
20894    {
20895    case E_DImode:
20896      /* DImode x86_64 constants must fit in 32 bits.  */
20897      gcc_assert (x86_64_immediate_operand (*loc, mode));
20898
20899      mode = SImode;
20900      break;
20901
20902    case E_SImode:
20903    case E_HImode:
20904    case E_QImode:
20905      break;
20906
20907    default:
20908      gcc_unreachable ();
20909    }
20910
20911  /* Avoid overflows.  */
20912  if (mode_signbit_p (mode, *loc))
20913    return false;
20914
20915  val = INTVAL (*loc);
20916
20917  /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20918     Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
20919  if ((val < 0 && val != -128)
20920      || val == 128)
20921    {
20922      *loc = GEN_INT (-val);
20923      return true;
20924    }
20925
20926  return false;
20927}
20928
20929/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
20930   optabs would emit if we didn't have TFmode patterns.  */
20931
20932void
20933x86_emit_floatuns (rtx operands[2])
20934{
20935  rtx_code_label *neglab, *donelab;
20936  rtx i0, i1, f0, in, out;
20937  machine_mode mode, inmode;
20938
20939  inmode = GET_MODE (operands[1]);
20940  gcc_assert (inmode == SImode || inmode == DImode);
20941
20942  out = operands[0];
20943  in = force_reg (inmode, operands[1]);
20944  mode = GET_MODE (out);
20945  neglab = gen_label_rtx ();
20946  donelab = gen_label_rtx ();
20947  f0 = gen_reg_rtx (mode);
20948
20949  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20950
20951  expand_float (out, in, 0);
20952
20953  emit_jump_insn (gen_jump (donelab));
20954  emit_barrier ();
20955
20956  emit_label (neglab);
20957
20958  i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20959			    1, OPTAB_DIRECT);
20960  i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20961			    1, OPTAB_DIRECT);
20962  i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20963
20964  expand_float (f0, i0, 0);
20965
20966  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20967
20968  emit_label (donelab);
20969}
20970
20971/* Target hook for scalar_mode_supported_p.  */
20972static bool
20973ix86_scalar_mode_supported_p (scalar_mode mode)
20974{
20975  if (DECIMAL_FLOAT_MODE_P (mode))
20976    return default_decimal_float_supported_p ();
20977  else if (mode == TFmode)
20978    return true;
20979  else
20980    return default_scalar_mode_supported_p (mode);
20981}
20982
20983/* Implements target hook vector_mode_supported_p.  */
20984static bool
20985ix86_vector_mode_supported_p (machine_mode mode)
20986{
20987  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20988    return true;
20989  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20990    return true;
20991  if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20992    return true;
20993  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20994    return true;
20995  if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
20996    return true;
20997  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20998    return true;
20999  return false;
21000}
21001
21002/* Target hook for c_mode_for_suffix.  */
21003static machine_mode
21004ix86_c_mode_for_suffix (char suffix)
21005{
21006  if (suffix == 'q')
21007    return TFmode;
21008  if (suffix == 'w')
21009    return XFmode;
21010
21011  return VOIDmode;
21012}
21013
21014/* Worker function for TARGET_MD_ASM_ADJUST.
21015
21016   We implement asm flag outputs, and maintain source compatibility
21017   with the old cc0-based compiler.  */
21018
21019static rtx_insn *
21020ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
21021		    vec<const char *> &constraints,
21022		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
21023{
21024  bool saw_asm_flag = false;
21025
21026  start_sequence ();
21027  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
21028    {
21029      const char *con = constraints[i];
21030      if (strncmp (con, "=@cc", 4) != 0)
21031	continue;
21032      con += 4;
21033      if (strchr (con, ',') != NULL)
21034	{
21035	  error ("alternatives not allowed in %<asm%> flag output");
21036	  continue;
21037	}
21038
21039      bool invert = false;
21040      if (con[0] == 'n')
21041	invert = true, con++;
21042
21043      machine_mode mode = CCmode;
21044      rtx_code code = UNKNOWN;
21045
21046      switch (con[0])
21047	{
21048	case 'a':
21049	  if (con[1] == 0)
21050	    mode = CCAmode, code = EQ;
21051	  else if (con[1] == 'e' && con[2] == 0)
21052	    mode = CCCmode, code = NE;
21053	  break;
21054	case 'b':
21055	  if (con[1] == 0)
21056	    mode = CCCmode, code = EQ;
21057	  else if (con[1] == 'e' && con[2] == 0)
21058	    mode = CCAmode, code = NE;
21059	  break;
21060	case 'c':
21061	  if (con[1] == 0)
21062	    mode = CCCmode, code = EQ;
21063	  break;
21064	case 'e':
21065	  if (con[1] == 0)
21066	    mode = CCZmode, code = EQ;
21067	  break;
21068	case 'g':
21069	  if (con[1] == 0)
21070	    mode = CCGCmode, code = GT;
21071	  else if (con[1] == 'e' && con[2] == 0)
21072	    mode = CCGCmode, code = GE;
21073	  break;
21074	case 'l':
21075	  if (con[1] == 0)
21076	    mode = CCGCmode, code = LT;
21077	  else if (con[1] == 'e' && con[2] == 0)
21078	    mode = CCGCmode, code = LE;
21079	  break;
21080	case 'o':
21081	  if (con[1] == 0)
21082	    mode = CCOmode, code = EQ;
21083	  break;
21084	case 'p':
21085	  if (con[1] == 0)
21086	    mode = CCPmode, code = EQ;
21087	  break;
21088	case 's':
21089	  if (con[1] == 0)
21090	    mode = CCSmode, code = EQ;
21091	  break;
21092	case 'z':
21093	  if (con[1] == 0)
21094	    mode = CCZmode, code = EQ;
21095	  break;
21096	}
21097      if (code == UNKNOWN)
21098	{
21099	  error ("unknown %<asm%> flag output %qs", constraints[i]);
21100	  continue;
21101	}
21102      if (invert)
21103	code = reverse_condition (code);
21104
21105      rtx dest = outputs[i];
21106      if (!saw_asm_flag)
21107	{
21108	  /* This is the first asm flag output.  Here we put the flags
21109	     register in as the real output and adjust the condition to
21110	     allow it.  */
21111	  constraints[i] = "=Bf";
21112	  outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21113	  saw_asm_flag = true;
21114	}
21115      else
21116	{
21117	  /* We don't need the flags register as output twice.  */
21118	  constraints[i] = "=X";
21119	  outputs[i] = gen_rtx_SCRATCH (SImode);
21120	}
21121
21122      rtx x = gen_rtx_REG (mode, FLAGS_REG);
21123      x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21124
21125      machine_mode dest_mode = GET_MODE (dest);
21126      if (!SCALAR_INT_MODE_P (dest_mode))
21127	{
21128	  error ("invalid type for %<asm%> flag output");
21129	  continue;
21130	}
21131
21132      if (dest_mode == QImode)
21133	emit_insn (gen_rtx_SET (dest, x));
21134      else
21135	{
21136	  rtx reg = gen_reg_rtx (QImode);
21137	  emit_insn (gen_rtx_SET (reg, x));
21138
21139	  reg = convert_to_mode (dest_mode, reg, 1);
21140	  emit_move_insn (dest, reg);
21141	}
21142    }
21143
21144  rtx_insn *seq = get_insns ();
21145  end_sequence ();
21146
21147  if (saw_asm_flag)
21148    return seq;
21149  else
21150    {
21151      /* If we had no asm flag outputs, clobber the flags.  */
21152      clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21153      SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21154      return NULL;
21155    }
21156}
21157
21158/* Implements target vector targetm.asm.encode_section_info.  */
21159
21160static void ATTRIBUTE_UNUSED
21161ix86_encode_section_info (tree decl, rtx rtl, int first)
21162{
21163  default_encode_section_info (decl, rtl, first);
21164
21165  if (ix86_in_large_data_p (decl))
21166    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21167}
21168
21169/* Worker function for REVERSE_CONDITION.  */
21170
21171enum rtx_code
21172ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21173{
21174  return (mode == CCFPmode
21175	  ? reverse_condition_maybe_unordered (code)
21176	  : reverse_condition (code));
21177}
21178
21179/* Output code to perform an x87 FP register move, from OPERANDS[1]
21180   to OPERANDS[0].  */
21181
21182const char *
21183output_387_reg_move (rtx_insn *insn, rtx *operands)
21184{
21185  if (REG_P (operands[0]))
21186    {
21187      if (REG_P (operands[1])
21188	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21189	{
21190	  if (REGNO (operands[0]) == FIRST_STACK_REG)
21191	    return output_387_ffreep (operands, 0);
21192	  return "fstp\t%y0";
21193	}
21194      if (STACK_TOP_P (operands[0]))
21195	return "fld%Z1\t%y1";
21196      return "fst\t%y0";
21197    }
21198  else if (MEM_P (operands[0]))
21199    {
21200      gcc_assert (REG_P (operands[1]));
21201      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21202	return "fstp%Z0\t%y0";
21203      else
21204	{
21205	  /* There is no non-popping store to memory for XFmode.
21206	     So if we need one, follow the store with a load.  */
21207	  if (GET_MODE (operands[0]) == XFmode)
21208	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21209	  else
21210	    return "fst%Z0\t%y0";
21211	}
21212    }
21213  else
21214    gcc_unreachable();
21215}
21216#ifdef TARGET_SOLARIS
21217/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
21218
21219static void
21220i386_solaris_elf_named_section (const char *name, unsigned int flags,
21221				tree decl)
21222{
21223  /* With Binutils 2.15, the "@unwind" marker must be specified on
21224     every occurrence of the ".eh_frame" section, not just the first
21225     one.  */
21226  if (TARGET_64BIT
21227      && strcmp (name, ".eh_frame") == 0)
21228    {
21229      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21230	       flags & SECTION_WRITE ? "aw" : "a");
21231      return;
21232    }
21233
21234#ifndef USE_GAS
21235  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21236    {
21237      solaris_elf_asm_comdat_section (name, flags, decl);
21238      return;
21239    }
21240
21241  /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21242     SPARC assembler.  One cannot mix single-letter flags and #exclude, so
21243     only emit the latter here.  */
21244  if (flags & SECTION_EXCLUDE)
21245    {
21246      fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21247      return;
21248    }
21249#endif
21250
21251  default_elf_asm_named_section (name, flags, decl);
21252}
21253#endif /* TARGET_SOLARIS */
21254
21255/* Return the mangling of TYPE if it is an extended fundamental type.  */
21256
21257static const char *
21258ix86_mangle_type (const_tree type)
21259{
21260  type = TYPE_MAIN_VARIANT (type);
21261
21262  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21263      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21264    return NULL;
21265
21266  switch (TYPE_MODE (type))
21267    {
21268    case E_TFmode:
21269      /* __float128 is "g".  */
21270      return "g";
21271    case E_XFmode:
21272      /* "long double" or __float80 is "e".  */
21273      return "e";
21274    default:
21275      return NULL;
21276    }
21277}
21278
21279static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21280
21281static tree
21282ix86_stack_protect_guard (void)
21283{
21284  if (TARGET_SSP_TLS_GUARD)
21285    {
21286      tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21287      int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21288      tree type = build_qualified_type (type_node, qual);
21289      tree t;
21290
21291      if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21292	{
21293	  t = ix86_tls_stack_chk_guard_decl;
21294
21295	  if (t == NULL)
21296	    {
21297	      rtx x;
21298
21299	      t = build_decl
21300		(UNKNOWN_LOCATION, VAR_DECL,
21301		 get_identifier (ix86_stack_protector_guard_symbol_str),
21302		 type);
21303	      TREE_STATIC (t) = 1;
21304	      TREE_PUBLIC (t) = 1;
21305	      DECL_EXTERNAL (t) = 1;
21306	      TREE_USED (t) = 1;
21307	      TREE_THIS_VOLATILE (t) = 1;
21308	      DECL_ARTIFICIAL (t) = 1;
21309	      DECL_IGNORED_P (t) = 1;
21310
21311	      /* Do not share RTL as the declaration is visible outside of
21312		 current function.  */
21313	      x = DECL_RTL (t);
21314	      RTX_FLAG (x, used) = 1;
21315
21316	      ix86_tls_stack_chk_guard_decl = t;
21317	    }
21318	}
21319      else
21320	{
21321	  tree asptrtype = build_pointer_type (type);
21322
21323	  t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21324	  t = build2 (MEM_REF, asptrtype, t,
21325		      build_int_cst (asptrtype, 0));
21326	  TREE_THIS_VOLATILE (t) = 1;
21327	}
21328
21329      return t;
21330    }
21331
21332  return default_stack_protect_guard ();
21333}
21334
21335/* For 32-bit code we can save PIC register setup by using
21336   __stack_chk_fail_local hidden function instead of calling
21337   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
21338   register, so it is better to call __stack_chk_fail directly.  */
21339
21340static tree ATTRIBUTE_UNUSED
21341ix86_stack_protect_fail (void)
21342{
21343  return TARGET_64BIT
21344	 ? default_external_stack_protect_fail ()
21345	 : default_hidden_stack_protect_fail ();
21346}
21347
21348/* Select a format to encode pointers in exception handling data.  CODE
21349   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
21350   true if the symbol may be affected by dynamic relocations.
21351
21352   ??? All x86 object file formats are capable of representing this.
21353   After all, the relocation needed is the same as for the call insn.
21354   Whether or not a particular assembler allows us to enter such, I
21355   guess we'll have to see.  */
21356
21357int
21358asm_preferred_eh_data_format (int code, int global)
21359{
21360  /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
21361  if (flag_pic || TARGET_PECOFF)
21362    {
21363      int type = DW_EH_PE_sdata8;
21364      if (!TARGET_64BIT
21365	  || ix86_cmodel == CM_SMALL_PIC
21366	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21367	type = DW_EH_PE_sdata4;
21368      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21369    }
21370
21371  if (ix86_cmodel == CM_SMALL
21372      || (ix86_cmodel == CM_MEDIUM && code))
21373    return DW_EH_PE_udata4;
21374
21375  return DW_EH_PE_absptr;
21376}
21377
21378/* Implement targetm.vectorize.builtin_vectorization_cost.  */
21379static int
21380ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21381                                 tree vectype, int)
21382{
21383  bool fp = false;
21384  machine_mode mode = TImode;
21385  int index;
21386  if (vectype != NULL)
21387    {
21388      fp = FLOAT_TYPE_P (vectype);
21389      mode = TYPE_MODE (vectype);
21390    }
21391
21392  switch (type_of_cost)
21393    {
21394      case scalar_stmt:
21395        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21396
21397      case scalar_load:
21398	/* load/store costs are relative to register move which is 2. Recompute
21399 	   it to COSTS_N_INSNS so everything have same base.  */
21400        return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21401			      : ix86_cost->int_load [2]) / 2;
21402
21403      case scalar_store:
21404        return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21405			      : ix86_cost->int_store [2]) / 2;
21406
21407      case vector_stmt:
21408        return ix86_vec_cost (mode,
21409			      fp ? ix86_cost->addss : ix86_cost->sse_op);
21410
21411      case vector_load:
21412	index = sse_store_index (mode);
21413	/* See PR82713 - we may end up being called on non-vector type.  */
21414	if (index < 0)
21415	  index = 2;
21416        return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21417
21418      case vector_store:
21419	index = sse_store_index (mode);
21420	/* See PR82713 - we may end up being called on non-vector type.  */
21421	if (index < 0)
21422	  index = 2;
21423        return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21424
21425      case vec_to_scalar:
21426      case scalar_to_vec:
21427        return ix86_vec_cost (mode, ix86_cost->sse_op);
21428
21429      /* We should have separate costs for unaligned loads and gather/scatter.
21430	 Do that incrementally.  */
21431      case unaligned_load:
21432	index = sse_store_index (mode);
21433	/* See PR82713 - we may end up being called on non-vector type.  */
21434	if (index < 0)
21435	  index = 2;
21436        return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21437
21438      case unaligned_store:
21439	index = sse_store_index (mode);
21440	/* See PR82713 - we may end up being called on non-vector type.  */
21441	if (index < 0)
21442	  index = 2;
21443        return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21444
21445      case vector_gather_load:
21446        return ix86_vec_cost (mode,
21447			      COSTS_N_INSNS
21448				 (ix86_cost->gather_static
21449				  + ix86_cost->gather_per_elt
21450				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21451
21452      case vector_scatter_store:
21453        return ix86_vec_cost (mode,
21454			      COSTS_N_INSNS
21455				 (ix86_cost->scatter_static
21456				  + ix86_cost->scatter_per_elt
21457				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21458
21459      case cond_branch_taken:
21460        return ix86_cost->cond_taken_branch_cost;
21461
21462      case cond_branch_not_taken:
21463        return ix86_cost->cond_not_taken_branch_cost;
21464
21465      case vec_perm:
21466      case vec_promote_demote:
21467        return ix86_vec_cost (mode, ix86_cost->sse_op);
21468
21469      case vec_construct:
21470	{
21471	  /* N element inserts into SSE vectors.  */
21472	  int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21473	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
21474	  if (GET_MODE_BITSIZE (mode) == 256)
21475	    cost += ix86_vec_cost (mode, ix86_cost->addss);
21476	  /* One vinserti64x4 and two vinserti128 for combining SSE
21477	     and AVX256 vectors to AVX512.  */
21478	  else if (GET_MODE_BITSIZE (mode) == 512)
21479	    cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21480	  return cost;
21481	}
21482
21483      default:
21484        gcc_unreachable ();
21485    }
21486}
21487
21488
21489/* This function returns the calling abi specific va_list type node.
21490   It returns  the FNDECL specific va_list type.  */
21491
21492static tree
21493ix86_fn_abi_va_list (tree fndecl)
21494{
21495  if (!TARGET_64BIT)
21496    return va_list_type_node;
21497  gcc_assert (fndecl != NULL_TREE);
21498
21499  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21500    return ms_va_list_type_node;
21501  else
21502    return sysv_va_list_type_node;
21503}
21504
21505/* Returns the canonical va_list type specified by TYPE. If there
21506   is no valid TYPE provided, it return NULL_TREE.  */
21507
21508static tree
21509ix86_canonical_va_list_type (tree type)
21510{
21511  if (TARGET_64BIT)
21512    {
21513      if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21514	return ms_va_list_type_node;
21515
21516      if ((TREE_CODE (type) == ARRAY_TYPE
21517	   && integer_zerop (array_type_nelts (type)))
21518	  || POINTER_TYPE_P (type))
21519	{
21520	  tree elem_type = TREE_TYPE (type);
21521	  if (TREE_CODE (elem_type) == RECORD_TYPE
21522	      && lookup_attribute ("sysv_abi va_list",
21523				   TYPE_ATTRIBUTES (elem_type)))
21524	    return sysv_va_list_type_node;
21525	}
21526
21527      return NULL_TREE;
21528    }
21529
21530  return std_canonical_va_list_type (type);
21531}
21532
21533/* Iterate through the target-specific builtin types for va_list.
21534   IDX denotes the iterator, *PTREE is set to the result type of
21535   the va_list builtin, and *PNAME to its internal type.
21536   Returns zero if there is no element for this index, otherwise
21537   IDX should be increased upon the next call.
21538   Note, do not iterate a base builtin's name like __builtin_va_list.
21539   Used from c_common_nodes_and_builtins.  */
21540
21541static int
21542ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21543{
21544  if (TARGET_64BIT)
21545    {
21546      switch (idx)
21547	{
21548	default:
21549	  break;
21550
21551	case 0:
21552	  *ptree = ms_va_list_type_node;
21553	  *pname = "__builtin_ms_va_list";
21554	  return 1;
21555
21556	case 1:
21557	  *ptree = sysv_va_list_type_node;
21558	  *pname = "__builtin_sysv_va_list";
21559	  return 1;
21560	}
21561    }
21562
21563  return 0;
21564}
21565
21566#undef TARGET_SCHED_DISPATCH
21567#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21568#undef TARGET_SCHED_DISPATCH_DO
21569#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21570#undef TARGET_SCHED_REASSOCIATION_WIDTH
21571#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21572#undef TARGET_SCHED_REORDER
21573#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21574#undef TARGET_SCHED_ADJUST_PRIORITY
21575#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21576#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21577#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21578  ix86_dependencies_evaluation_hook
21579
21580
21581/* Implementation of reassociation_width target hook used by
21582   reassoc phase to identify parallelism level in reassociated
21583   tree.  Statements tree_code is passed in OPC.  Arguments type
21584   is passed in MODE.  */
21585
21586static int
21587ix86_reassociation_width (unsigned int op, machine_mode mode)
21588{
21589  int width = 1;
21590  /* Vector part.  */
21591  if (VECTOR_MODE_P (mode))
21592    {
21593      int div = 1;
21594      if (INTEGRAL_MODE_P (mode))
21595	width = ix86_cost->reassoc_vec_int;
21596      else if (FLOAT_MODE_P (mode))
21597	width = ix86_cost->reassoc_vec_fp;
21598
21599      if (width == 1)
21600	return 1;
21601
21602      /* Integer vector instructions execute in FP unit
21603	 and can execute 3 additions and one multiplication per cycle.  */
21604      if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
21605	   || ix86_tune == PROCESSOR_ZNVER3)
21606   	  && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21607	return 1;
21608
21609      /* Account for targets that splits wide vectors into multiple parts.  */
21610      if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21611	div = GET_MODE_BITSIZE (mode) / 128;
21612      else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21613	div = GET_MODE_BITSIZE (mode) / 64;
21614      width = (width + div - 1) / div;
21615    }
21616  /* Scalar part.  */
21617  else if (INTEGRAL_MODE_P (mode))
21618    width = ix86_cost->reassoc_int;
21619  else if (FLOAT_MODE_P (mode))
21620    width = ix86_cost->reassoc_fp;
21621
21622  /* Avoid using too many registers in 32bit mode.  */
21623  if (!TARGET_64BIT && width > 2)
21624    width = 2;
21625  return width;
21626}
21627
21628/* ??? No autovectorization into MMX or 3DNOW until we can reliably
21629   place emms and femms instructions.  */
21630
21631static machine_mode
21632ix86_preferred_simd_mode (scalar_mode mode)
21633{
21634  if (!TARGET_SSE)
21635    return word_mode;
21636
21637  switch (mode)
21638    {
21639    case E_QImode:
21640      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21641	return V64QImode;
21642      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21643	return V32QImode;
21644      else
21645	return V16QImode;
21646
21647    case E_HImode:
21648      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21649	return V32HImode;
21650      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21651	return V16HImode;
21652      else
21653	return V8HImode;
21654
21655    case E_SImode:
21656      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21657	return V16SImode;
21658      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21659	return V8SImode;
21660      else
21661	return V4SImode;
21662
21663    case E_DImode:
21664      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21665	return V8DImode;
21666      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21667	return V4DImode;
21668      else
21669	return V2DImode;
21670
21671    case E_SFmode:
21672      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21673	return V16SFmode;
21674      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21675	return V8SFmode;
21676      else
21677	return V4SFmode;
21678
21679    case E_DFmode:
21680      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21681	return V8DFmode;
21682      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21683	return V4DFmode;
21684      else if (TARGET_SSE2)
21685	return V2DFmode;
21686      /* FALLTHRU */
21687
21688    default:
21689      return word_mode;
21690    }
21691}
21692
21693/* If AVX is enabled then try vectorizing with both 256bit and 128bit
21694   vectors.  If AVX512F is enabled then try vectorizing with 512bit,
21695   256bit and 128bit vectors.  */
21696
21697static unsigned int
21698ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
21699{
21700  if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21701    {
21702      modes->safe_push (V64QImode);
21703      modes->safe_push (V32QImode);
21704      modes->safe_push (V16QImode);
21705    }
21706  else if (TARGET_AVX512F && all)
21707    {
21708      modes->safe_push (V32QImode);
21709      modes->safe_push (V16QImode);
21710      modes->safe_push (V64QImode);
21711    }
21712  else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21713    {
21714      modes->safe_push (V32QImode);
21715      modes->safe_push (V16QImode);
21716    }
21717  else if (TARGET_AVX && all)
21718    {
21719      modes->safe_push (V16QImode);
21720      modes->safe_push (V32QImode);
21721    }
21722  else if (TARGET_MMX_WITH_SSE)
21723    modes->safe_push (V16QImode);
21724
21725  if (TARGET_MMX_WITH_SSE)
21726    modes->safe_push (V8QImode);
21727
21728  return 0;
21729}
21730
21731/* Implemenation of targetm.vectorize.get_mask_mode.  */
21732
21733static opt_machine_mode
21734ix86_get_mask_mode (machine_mode data_mode)
21735{
21736  unsigned vector_size = GET_MODE_SIZE (data_mode);
21737  unsigned nunits = GET_MODE_NUNITS (data_mode);
21738  unsigned elem_size = vector_size / nunits;
21739
21740  /* Scalar mask case.  */
21741  if ((TARGET_AVX512F && vector_size == 64)
21742      || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21743    {
21744      if (elem_size == 4
21745	  || elem_size == 8
21746	  || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
21747	return smallest_int_mode_for_size (nunits);
21748    }
21749
21750  scalar_int_mode elem_mode
21751    = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21752
21753  gcc_assert (elem_size * nunits == vector_size);
21754
21755  return mode_for_vector (elem_mode, nunits);
21756}
21757
21758
21759
21760/* Return class of registers which could be used for pseudo of MODE
21761   and of class RCLASS for spilling instead of memory.  Return NO_REGS
21762   if it is not possible or non-profitable.  */
21763
21764/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
21765
21766static reg_class_t
21767ix86_spill_class (reg_class_t rclass, machine_mode mode)
21768{
21769  if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21770      && TARGET_SSE2
21771      && TARGET_INTER_UNIT_MOVES_TO_VEC
21772      && TARGET_INTER_UNIT_MOVES_FROM_VEC
21773      && (mode == SImode || (TARGET_64BIT && mode == DImode))
21774      && INTEGER_CLASS_P (rclass))
21775    return ALL_SSE_REGS;
21776  return NO_REGS;
21777}
21778
21779/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
21780   but returns a lower bound.  */
21781
21782static unsigned int
21783ix86_max_noce_ifcvt_seq_cost (edge e)
21784{
21785  bool predictable_p = predictable_edge_p (e);
21786  if (predictable_p)
21787    {
21788      if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
21789	return param_max_rtl_if_conversion_predictable_cost;
21790    }
21791  else
21792    {
21793      if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
21794	return param_max_rtl_if_conversion_unpredictable_cost;
21795    }
21796
21797  return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21798}
21799
21800/* Return true if SEQ is a good candidate as a replacement for the
21801   if-convertible sequence described in IF_INFO.  */
21802
21803static bool
21804ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21805{
21806  if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21807    {
21808      int cmov_cnt = 0;
21809      /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21810	 Maybe we should allow even more conditional moves as long as they
21811	 are used far enough not to stall the CPU, or also consider
21812	 IF_INFO->TEST_BB succ edge probabilities.  */
21813      for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21814	{
21815	  rtx set = single_set (insn);
21816	  if (!set)
21817	    continue;
21818	  if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21819	    continue;
21820	  rtx src = SET_SRC (set);
21821	  machine_mode mode = GET_MODE (src);
21822	  if (GET_MODE_CLASS (mode) != MODE_INT
21823	      && GET_MODE_CLASS (mode) != MODE_FLOAT)
21824	    continue;
21825	  if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21826	      || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21827	    continue;
21828	  /* insn is CMOV or FCMOV.  */
21829	  if (++cmov_cnt > 1)
21830	    return false;
21831	}
21832    }
21833  return default_noce_conversion_profitable_p (seq, if_info);
21834}
21835
21836/* Implement targetm.vectorize.init_cost.  */
21837
21838static void *
21839ix86_init_cost (class loop *)
21840{
21841  unsigned *cost = XNEWVEC (unsigned, 3);
21842  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21843  return cost;
21844}
21845
21846/* Implement targetm.vectorize.add_stmt_cost.  */
21847
21848static unsigned
21849ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
21850		    class _stmt_vec_info *stmt_info, int misalign,
21851		    enum vect_cost_model_location where)
21852{
21853  unsigned *cost = (unsigned *) data;
21854  unsigned retval = 0;
21855  bool scalar_p
21856    = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21857
21858  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
21859  int stmt_cost = - 1;
21860
21861  bool fp = false;
21862  machine_mode mode = scalar_p ? SImode : TImode;
21863
21864  if (vectype != NULL)
21865    {
21866      fp = FLOAT_TYPE_P (vectype);
21867      mode = TYPE_MODE (vectype);
21868      if (scalar_p)
21869	mode = TYPE_MODE (TREE_TYPE (vectype));
21870    }
21871
21872  if ((kind == vector_stmt || kind == scalar_stmt)
21873      && stmt_info
21874      && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21875    {
21876      tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21877      /*machine_mode inner_mode = mode;
21878      if (VECTOR_MODE_P (mode))
21879	inner_mode = GET_MODE_INNER (mode);*/
21880
21881      switch (subcode)
21882	{
21883	case PLUS_EXPR:
21884	case POINTER_PLUS_EXPR:
21885	case MINUS_EXPR:
21886	  if (kind == scalar_stmt)
21887	    {
21888	      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21889		stmt_cost = ix86_cost->addss;
21890	      else if (X87_FLOAT_MODE_P (mode))
21891		stmt_cost = ix86_cost->fadd;
21892	      else
21893	        stmt_cost = ix86_cost->add;
21894	    }
21895	  else
21896	    stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21897				       : ix86_cost->sse_op);
21898	  break;
21899
21900	case MULT_EXPR:
21901	case WIDEN_MULT_EXPR:
21902	case MULT_HIGHPART_EXPR:
21903	  stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21904	  break;
21905	case NEGATE_EXPR:
21906	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21907	    stmt_cost = ix86_cost->sse_op;
21908	  else if (X87_FLOAT_MODE_P (mode))
21909	    stmt_cost = ix86_cost->fchs;
21910	  else if (VECTOR_MODE_P (mode))
21911	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21912	  else
21913	    stmt_cost = ix86_cost->add;
21914	  break;
21915	case TRUNC_DIV_EXPR:
21916	case CEIL_DIV_EXPR:
21917	case FLOOR_DIV_EXPR:
21918	case ROUND_DIV_EXPR:
21919	case TRUNC_MOD_EXPR:
21920	case CEIL_MOD_EXPR:
21921	case FLOOR_MOD_EXPR:
21922	case RDIV_EXPR:
21923	case ROUND_MOD_EXPR:
21924	case EXACT_DIV_EXPR:
21925	  stmt_cost = ix86_division_cost (ix86_cost, mode);
21926	  break;
21927
21928	case RSHIFT_EXPR:
21929	case LSHIFT_EXPR:
21930	case LROTATE_EXPR:
21931	case RROTATE_EXPR:
21932	  {
21933	    tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21934	    stmt_cost = ix86_shift_rotate_cost
21935			   (ix86_cost, mode,
21936		            TREE_CODE (op2) == INTEGER_CST,
21937			    cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21938		            true, false, false, NULL, NULL);
21939	  }
21940	  break;
21941	case NOP_EXPR:
21942	  /* Only sign-conversions are free.  */
21943	  if (tree_nop_conversion_p
21944	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21945		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21946	    stmt_cost = 0;
21947	  break;
21948
21949	case BIT_IOR_EXPR:
21950	case ABS_EXPR:
21951	case ABSU_EXPR:
21952	case MIN_EXPR:
21953	case MAX_EXPR:
21954	case BIT_XOR_EXPR:
21955	case BIT_AND_EXPR:
21956	case BIT_NOT_EXPR:
21957	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21958	    stmt_cost = ix86_cost->sse_op;
21959	  else if (VECTOR_MODE_P (mode))
21960	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21961	  else
21962	    stmt_cost = ix86_cost->add;
21963	  break;
21964	default:
21965	  break;
21966	}
21967    }
21968
21969  combined_fn cfn;
21970  if ((kind == vector_stmt || kind == scalar_stmt)
21971      && stmt_info
21972      && stmt_info->stmt
21973      && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21974    switch (cfn)
21975      {
21976      case CFN_FMA:
21977	stmt_cost = ix86_vec_cost (mode,
21978				   mode == SFmode ? ix86_cost->fmass
21979				   : ix86_cost->fmasd);
21980	break;
21981      default:
21982	break;
21983      }
21984
21985  /* If we do elementwise loads into a vector then we are bound by
21986     latency and execution resources for the many scalar loads
21987     (AGU and load ports).  Try to account for this by scaling the
21988     construction cost by the number of elements involved.  */
21989  if ((kind == vec_construct || kind == vec_to_scalar)
21990      && stmt_info
21991      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21992	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21993      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21994      && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21995    {
21996      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21997      stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21998    }
21999  if (stmt_cost == -1)
22000    stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22001
22002  /* Penalize DFmode vector operations for Bonnell.  */
22003  if (TARGET_BONNELL && kind == vector_stmt
22004      && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
22005    stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
22006
22007  /* Statements in an inner loop relative to the loop being
22008     vectorized are weighted more heavily.  The value here is
22009     arbitrary and could potentially be improved with analysis.  */
22010  if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
22011    count *= 50;  /* FIXME.  */
22012
22013  retval = (unsigned) (count * stmt_cost);
22014
22015  /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22016     for Silvermont as it has out of order integer pipeline and can execute
22017     2 scalar instruction per tick, but has in order SIMD pipeline.  */
22018  if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
22019       || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
22020    {
22021      tree lhs_op = gimple_get_lhs (stmt_info->stmt);
22022      if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
22023	retval = (retval * 17) / 10;
22024    }
22025
22026  cost[where] += retval;
22027
22028  return retval;
22029}
22030
22031/* Implement targetm.vectorize.finish_cost.  */
22032
22033static void
22034ix86_finish_cost (void *data, unsigned *prologue_cost,
22035		  unsigned *body_cost, unsigned *epilogue_cost)
22036{
22037  unsigned *cost = (unsigned *) data;
22038  *prologue_cost = cost[vect_prologue];
22039  *body_cost     = cost[vect_body];
22040  *epilogue_cost = cost[vect_epilogue];
22041}
22042
22043/* Implement targetm.vectorize.destroy_cost_data.  */
22044
22045static void
22046ix86_destroy_cost_data (void *data)
22047{
22048  free (data);
22049}
22050
22051/* Validate target specific memory model bits in VAL. */
22052
22053static unsigned HOST_WIDE_INT
22054ix86_memmodel_check (unsigned HOST_WIDE_INT val)
22055{
22056  enum memmodel model = memmodel_from_int (val);
22057  bool strong;
22058
22059  if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
22060				      |MEMMODEL_MASK)
22061      || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
22062    {
22063      warning (OPT_Winvalid_memory_model,
22064	       "unknown architecture specific memory model");
22065      return MEMMODEL_SEQ_CST;
22066    }
22067  strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
22068  if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
22069    {
22070      warning (OPT_Winvalid_memory_model,
22071	      "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22072	       "memory model");
22073      return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22074    }
22075  if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22076    {
22077      warning (OPT_Winvalid_memory_model,
22078	      "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22079	       "memory model");
22080      return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22081    }
22082  return val;
22083}
22084
22085/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22086   CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22087   CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
22088   or number of vecsize_mangle variants that should be emitted.  */
22089
22090static int
22091ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22092					     struct cgraph_simd_clone *clonei,
22093					     tree base_type, int num)
22094{
22095  int ret = 1;
22096
22097  if (clonei->simdlen
22098      && (clonei->simdlen < 2
22099	  || clonei->simdlen > 1024
22100	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22101    {
22102      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22103		  "unsupported simdlen %d", clonei->simdlen);
22104      return 0;
22105    }
22106
22107  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22108  if (TREE_CODE (ret_type) != VOID_TYPE)
22109    switch (TYPE_MODE (ret_type))
22110      {
22111      case E_QImode:
22112      case E_HImode:
22113      case E_SImode:
22114      case E_DImode:
22115      case E_SFmode:
22116      case E_DFmode:
22117      /* case E_SCmode: */
22118      /* case E_DCmode: */
22119	if (!AGGREGATE_TYPE_P (ret_type))
22120	  break;
22121	/* FALLTHRU */
22122      default:
22123	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22124		    "unsupported return type %qT for simd", ret_type);
22125	return 0;
22126      }
22127
22128  tree t;
22129  int i;
22130  tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22131  bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22132
22133  for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22134       t && t != void_list_node; t = TREE_CHAIN (t), i++)
22135    {
22136      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22137      switch (TYPE_MODE (arg_type))
22138	{
22139	case E_QImode:
22140	case E_HImode:
22141	case E_SImode:
22142	case E_DImode:
22143	case E_SFmode:
22144	case E_DFmode:
22145	/* case E_SCmode: */
22146	/* case E_DCmode: */
22147	  if (!AGGREGATE_TYPE_P (arg_type))
22148	    break;
22149	  /* FALLTHRU */
22150	default:
22151	  if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22152	    break;
22153	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22154		      "unsupported argument type %qT for simd", arg_type);
22155	  return 0;
22156	}
22157    }
22158
22159  if (!TREE_PUBLIC (node->decl))
22160    {
22161      /* If the function isn't exported, we can pick up just one ISA
22162	 for the clones.  */
22163      if (TARGET_AVX512F)
22164	clonei->vecsize_mangle = 'e';
22165      else if (TARGET_AVX2)
22166	clonei->vecsize_mangle = 'd';
22167      else if (TARGET_AVX)
22168	clonei->vecsize_mangle = 'c';
22169      else
22170	clonei->vecsize_mangle = 'b';
22171      ret = 1;
22172    }
22173  else
22174    {
22175      clonei->vecsize_mangle = "bcde"[num];
22176      ret = 4;
22177    }
22178  clonei->mask_mode = VOIDmode;
22179  switch (clonei->vecsize_mangle)
22180    {
22181    case 'b':
22182      clonei->vecsize_int = 128;
22183      clonei->vecsize_float = 128;
22184      break;
22185    case 'c':
22186      clonei->vecsize_int = 128;
22187      clonei->vecsize_float = 256;
22188      break;
22189    case 'd':
22190      clonei->vecsize_int = 256;
22191      clonei->vecsize_float = 256;
22192      break;
22193    case 'e':
22194      clonei->vecsize_int = 512;
22195      clonei->vecsize_float = 512;
22196      if (TYPE_MODE (base_type) == QImode)
22197	clonei->mask_mode = DImode;
22198      else
22199	clonei->mask_mode = SImode;
22200      break;
22201    }
22202  if (clonei->simdlen == 0)
22203    {
22204      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22205	clonei->simdlen = clonei->vecsize_int;
22206      else
22207	clonei->simdlen = clonei->vecsize_float;
22208      clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
22209    }
22210  else if (clonei->simdlen > 16)
22211    {
22212      /* For compatibility with ICC, use the same upper bounds
22213	 for simdlen.  In particular, for CTYPE below, use the return type,
22214	 unless the function returns void, in that case use the characteristic
22215	 type.  If it is possible for given SIMDLEN to pass CTYPE value
22216	 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22217	 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22218	 emit corresponding clone.  */
22219      tree ctype = ret_type;
22220      if (TREE_CODE (ret_type) == VOID_TYPE)
22221	ctype = base_type;
22222      int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22223      if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22224	cnt /= clonei->vecsize_int;
22225      else
22226	cnt /= clonei->vecsize_float;
22227      if (cnt > (TARGET_64BIT ? 16 : 8))
22228	{
22229	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22230		      "unsupported simdlen %d", clonei->simdlen);
22231	  return 0;
22232	}
22233      }
22234  return ret;
22235}
22236
22237/* If SIMD clone NODE can't be used in a vectorized loop
22238   in current function, return -1, otherwise return a badness of using it
22239   (0 if it is most desirable from vecsize_mangle point of view, 1
22240   slightly less desirable, etc.).  */
22241
22242static int
22243ix86_simd_clone_usable (struct cgraph_node *node)
22244{
22245  switch (node->simdclone->vecsize_mangle)
22246    {
22247    case 'b':
22248      if (!TARGET_SSE2)
22249	return -1;
22250      if (!TARGET_AVX)
22251	return 0;
22252      return TARGET_AVX2 ? 2 : 1;
22253    case 'c':
22254      if (!TARGET_AVX)
22255	return -1;
22256      return TARGET_AVX2 ? 1 : 0;
22257    case 'd':
22258      if (!TARGET_AVX2)
22259	return -1;
22260      return 0;
22261    case 'e':
22262      if (!TARGET_AVX512F)
22263	return -1;
22264      return 0;
22265    default:
22266      gcc_unreachable ();
22267    }
22268}
22269
22270/* This function adjusts the unroll factor based on
22271   the hardware capabilities. For ex, bdver3 has
22272   a loop buffer which makes unrolling of smaller
22273   loops less important. This function decides the
22274   unroll factor using number of memory references
22275   (value 32 is used) as a heuristic. */
22276
22277static unsigned
22278ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22279{
22280  basic_block *bbs;
22281  rtx_insn *insn;
22282  unsigned i;
22283  unsigned mem_count = 0;
22284
22285  if (!TARGET_ADJUST_UNROLL)
22286     return nunroll;
22287
22288  /* Count the number of memory references within the loop body.
22289     This value determines the unrolling factor for bdver3 and bdver4
22290     architectures. */
22291  subrtx_iterator::array_type array;
22292  bbs = get_loop_body (loop);
22293  for (i = 0; i < loop->num_nodes; i++)
22294    FOR_BB_INSNS (bbs[i], insn)
22295      if (NONDEBUG_INSN_P (insn))
22296	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22297	  if (const_rtx x = *iter)
22298	    if (MEM_P (x))
22299	      {
22300		machine_mode mode = GET_MODE (x);
22301		unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22302		if (n_words > 4)
22303		  mem_count += 2;
22304		else
22305		  mem_count += 1;
22306	      }
22307  free (bbs);
22308
22309  if (mem_count && mem_count <=32)
22310    return MIN (nunroll, 32 / mem_count);
22311
22312  return nunroll;
22313}
22314
22315
22316/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
22317
22318static bool
22319ix86_float_exceptions_rounding_supported_p (void)
22320{
22321  /* For x87 floating point with standard excess precision handling,
22322     there is no adddf3 pattern (since x87 floating point only has
22323     XFmode operations) so the default hook implementation gets this
22324     wrong.  */
22325  return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22326}
22327
22328/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
22329
22330static void
22331ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22332{
22333  if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22334    return;
22335  tree exceptions_var = create_tmp_var_raw (integer_type_node);
22336  if (TARGET_80387)
22337    {
22338      tree fenv_index_type = build_index_type (size_int (6));
22339      tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22340      tree fenv_var = create_tmp_var_raw (fenv_type);
22341      TREE_ADDRESSABLE (fenv_var) = 1;
22342      tree fenv_ptr = build_pointer_type (fenv_type);
22343      tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22344      fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22345      tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22346      tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22347      tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22348      tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22349      tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22350      tree hold_fnclex = build_call_expr (fnclex, 0);
22351      fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22352			 NULL_TREE, NULL_TREE);
22353      *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22354		      hold_fnclex);
22355      *clear = build_call_expr (fnclex, 0);
22356      tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22357      tree fnstsw_call = build_call_expr (fnstsw, 0);
22358      tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22359			    fnstsw_call, NULL_TREE, NULL_TREE);
22360      tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22361      tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22362				exceptions_var, exceptions_x87,
22363				NULL_TREE, NULL_TREE);
22364      *update = build2 (COMPOUND_EXPR, integer_type_node,
22365			sw_mod, update_mod);
22366      tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22367      *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22368    }
22369  if (TARGET_SSE && TARGET_SSE_MATH)
22370    {
22371      tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22372      tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22373      tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22374      tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22375      tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22376      tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22377				      mxcsr_orig_var, stmxcsr_hold_call,
22378				      NULL_TREE, NULL_TREE);
22379      tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22380				  mxcsr_orig_var,
22381				  build_int_cst (unsigned_type_node, 0x1f80));
22382      hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22383			     build_int_cst (unsigned_type_node, 0xffffffc0));
22384      tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22385				     mxcsr_mod_var, hold_mod_val,
22386				     NULL_TREE, NULL_TREE);
22387      tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22388      tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22389			      hold_assign_orig, hold_assign_mod);
22390      hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22391			 ldmxcsr_hold_call);
22392      if (*hold)
22393	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22394      else
22395	*hold = hold_all;
22396      tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22397      if (*clear)
22398	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22399			 ldmxcsr_clear_call);
22400      else
22401	*clear = ldmxcsr_clear_call;
22402      tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22403      tree exceptions_sse = fold_convert (integer_type_node,
22404					  stxmcsr_update_call);
22405      if (*update)
22406	{
22407	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22408					exceptions_var, exceptions_sse);
22409	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22410					   exceptions_var, exceptions_mod);
22411	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22412			    exceptions_assign);
22413	}
22414      else
22415	*update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22416			  exceptions_sse, NULL_TREE, NULL_TREE);
22417      tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22418      *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22419			ldmxcsr_update_call);
22420    }
22421  tree atomic_feraiseexcept
22422    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22423  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22424						    1, exceptions_var);
22425  *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22426		    atomic_feraiseexcept_call);
22427}
22428
22429#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22430/* For i386, common symbol is local only for non-PIE binaries.  For
22431   x86-64, common symbol is local only for non-PIE binaries or linker
22432   supports copy reloc in PIE binaries.   */
22433
22434static bool
22435ix86_binds_local_p (const_tree exp)
22436{
22437  return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22438				  (!flag_pic
22439				   || (TARGET_64BIT
22440				       && HAVE_LD_PIE_COPYRELOC != 0)));
22441}
22442#endif
22443
22444/* If MEM is in the form of [base+offset], extract the two parts
22445   of address and set to BASE and OFFSET, otherwise return false.  */
22446
22447static bool
22448extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22449{
22450  rtx addr;
22451
22452  gcc_assert (MEM_P (mem));
22453
22454  addr = XEXP (mem, 0);
22455
22456  if (GET_CODE (addr) == CONST)
22457    addr = XEXP (addr, 0);
22458
22459  if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22460    {
22461      *base = addr;
22462      *offset = const0_rtx;
22463      return true;
22464    }
22465
22466  if (GET_CODE (addr) == PLUS
22467      && (REG_P (XEXP (addr, 0))
22468	  || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22469      && CONST_INT_P (XEXP (addr, 1)))
22470    {
22471      *base = XEXP (addr, 0);
22472      *offset = XEXP (addr, 1);
22473      return true;
22474    }
22475
22476  return false;
22477}
22478
22479/* Given OPERANDS of consecutive load/store, check if we can merge
22480   them into move multiple.  LOAD is true if they are load instructions.
22481   MODE is the mode of memory operands.  */
22482
22483bool
22484ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22485				    machine_mode mode)
22486{
22487  HOST_WIDE_INT offval_1, offval_2, msize;
22488  rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22489
22490  if (load)
22491    {
22492      mem_1 = operands[1];
22493      mem_2 = operands[3];
22494      reg_1 = operands[0];
22495      reg_2 = operands[2];
22496    }
22497  else
22498    {
22499      mem_1 = operands[0];
22500      mem_2 = operands[2];
22501      reg_1 = operands[1];
22502      reg_2 = operands[3];
22503    }
22504
22505  gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22506
22507  if (REGNO (reg_1) != REGNO (reg_2))
22508    return false;
22509
22510  /* Check if the addresses are in the form of [base+offset].  */
22511  if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22512    return false;
22513  if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22514    return false;
22515
22516  /* Check if the bases are the same.  */
22517  if (!rtx_equal_p (base_1, base_2))
22518    return false;
22519
22520  offval_1 = INTVAL (offset_1);
22521  offval_2 = INTVAL (offset_2);
22522  msize = GET_MODE_SIZE (mode);
22523  /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
22524  if (offval_1 + msize != offval_2)
22525    return false;
22526
22527  return true;
22528}
22529
22530/* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
22531
22532static bool
22533ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22534			optimization_type opt_type)
22535{
22536  switch (op)
22537    {
22538    case asin_optab:
22539    case acos_optab:
22540    case log1p_optab:
22541    case exp_optab:
22542    case exp10_optab:
22543    case exp2_optab:
22544    case expm1_optab:
22545    case ldexp_optab:
22546    case scalb_optab:
22547    case round_optab:
22548      return opt_type == OPTIMIZE_FOR_SPEED;
22549
22550    case rint_optab:
22551      if (SSE_FLOAT_MODE_P (mode1)
22552	  && TARGET_SSE_MATH
22553	  && !flag_trapping_math
22554	  && !TARGET_SSE4_1)
22555	return opt_type == OPTIMIZE_FOR_SPEED;
22556      return true;
22557
22558    case floor_optab:
22559    case ceil_optab:
22560    case btrunc_optab:
22561      if (SSE_FLOAT_MODE_P (mode1)
22562	  && TARGET_SSE_MATH
22563	  && !flag_trapping_math
22564	  && TARGET_SSE4_1)
22565	return true;
22566      return opt_type == OPTIMIZE_FOR_SPEED;
22567
22568    case rsqrt_optab:
22569      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22570
22571    default:
22572      return true;
22573    }
22574}
22575
22576/* Address space support.
22577
22578   This is not "far pointers" in the 16-bit sense, but an easy way
22579   to use %fs and %gs segment prefixes.  Therefore:
22580
22581    (a) All address spaces have the same modes,
22582    (b) All address spaces have the same addresss forms,
22583    (c) While %fs and %gs are technically subsets of the generic
22584        address space, they are probably not subsets of each other.
22585    (d) Since we have no access to the segment base register values
22586        without resorting to a system call, we cannot convert a
22587        non-default address space to a default address space.
22588        Therefore we do not claim %fs or %gs are subsets of generic.
22589
22590   Therefore we can (mostly) use the default hooks.  */
22591
22592/* All use of segmentation is assumed to make address 0 valid.  */
22593
22594static bool
22595ix86_addr_space_zero_address_valid (addr_space_t as)
22596{
22597  return as != ADDR_SPACE_GENERIC;
22598}
22599
22600static void
22601ix86_init_libfuncs (void)
22602{
22603  if (TARGET_64BIT)
22604    {
22605      set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22606      set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22607    }
22608  else
22609    {
22610      set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22611      set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22612    }
22613
22614#if TARGET_MACHO
22615  darwin_rename_builtins ();
22616#endif
22617}
22618
22619/* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
22620   FPU, assume that the fpcw is set to extended precision; when using
22621   only SSE, rounding is correct; when using both SSE and the FPU,
22622   the rounding precision is indeterminate, since either may be chosen
22623   apparently at random.  */
22624
22625static enum flt_eval_method
22626ix86_get_excess_precision (enum excess_precision_type type)
22627{
22628  switch (type)
22629    {
22630      case EXCESS_PRECISION_TYPE_FAST:
22631	/* The fastest type to promote to will always be the native type,
22632	   whether that occurs with implicit excess precision or
22633	   otherwise.  */
22634	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22635      case EXCESS_PRECISION_TYPE_STANDARD:
22636      case EXCESS_PRECISION_TYPE_IMPLICIT:
22637	/* Otherwise, the excess precision we want when we are
22638	   in a standards compliant mode, and the implicit precision we
22639	   provide would be identical were it not for the unpredictable
22640	   cases.  */
22641	if (!TARGET_80387)
22642	  return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22643	else if (!TARGET_MIX_SSE_I387)
22644	  {
22645	    if (!(TARGET_SSE && TARGET_SSE_MATH))
22646	      return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22647	    else if (TARGET_SSE2)
22648	      return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22649	  }
22650
22651	/* If we are in standards compliant mode, but we know we will
22652	   calculate in unpredictable precision, return
22653	   FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
22654	   excess precision if the target can't guarantee it will honor
22655	   it.  */
22656	return (type == EXCESS_PRECISION_TYPE_STANDARD
22657		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22658		: FLT_EVAL_METHOD_UNPREDICTABLE);
22659      default:
22660	gcc_unreachable ();
22661    }
22662
22663  return FLT_EVAL_METHOD_UNPREDICTABLE;
22664}
22665
22666/* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
22667   decrements by exactly 2 no matter what the position was, there is no pushb.
22668
22669   But as CIE data alignment factor on this arch is -4 for 32bit targets
22670   and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22671   are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
22672
22673poly_int64
22674ix86_push_rounding (poly_int64 bytes)
22675{
22676  return ROUND_UP (bytes, UNITS_PER_WORD);
22677}
22678
22679/* Target-specific selftests.  */
22680
22681#if CHECKING_P
22682
22683namespace selftest {
22684
22685/* Verify that hard regs are dumped as expected (in compact mode).  */
22686
22687static void
22688ix86_test_dumping_hard_regs ()
22689{
22690  ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22691  ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22692}
22693
22694/* Test dumping an insn with repeated references to the same SCRATCH,
22695   to verify the rtx_reuse code.  */
22696
22697static void
22698ix86_test_dumping_memory_blockage ()
22699{
22700  set_new_first_and_last_insn (NULL, NULL);
22701
22702  rtx pat = gen_memory_blockage ();
22703  rtx_reuse_manager r;
22704  r.preprocess (pat);
22705
22706  /* Verify that the repeated references to the SCRATCH show use
22707     reuse IDS.  The first should be prefixed with a reuse ID,
22708     and the second should be dumped as a "reuse_rtx" of that ID.
22709     The expected string assumes Pmode == DImode.  */
22710  if (Pmode == DImode)
22711    ASSERT_RTL_DUMP_EQ_WITH_REUSE
22712      ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
22713       "        (unspec:BLK [\n"
22714       "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
22715       "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22716}
22717
22718/* Verify loading an RTL dump; specifically a dump of copying
22719   a param on x86_64 from a hard reg into the frame.
22720   This test is target-specific since the dump contains target-specific
22721   hard reg names.  */
22722
22723static void
22724ix86_test_loading_dump_fragment_1 ()
22725{
22726  rtl_dump_test t (SELFTEST_LOCATION,
22727		   locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22728
22729  rtx_insn *insn = get_insn_by_uid (1);
22730
22731  /* The block structure and indentation here is purely for
22732     readability; it mirrors the structure of the rtx.  */
22733  tree mem_expr;
22734  {
22735    rtx pat = PATTERN (insn);
22736    ASSERT_EQ (SET, GET_CODE (pat));
22737    {
22738      rtx dest = SET_DEST (pat);
22739      ASSERT_EQ (MEM, GET_CODE (dest));
22740      /* Verify the "/c" was parsed.  */
22741      ASSERT_TRUE (RTX_FLAG (dest, call));
22742      ASSERT_EQ (SImode, GET_MODE (dest));
22743      {
22744	rtx addr = XEXP (dest, 0);
22745	ASSERT_EQ (PLUS, GET_CODE (addr));
22746	ASSERT_EQ (DImode, GET_MODE (addr));
22747	{
22748	  rtx lhs = XEXP (addr, 0);
22749	  /* Verify that the "frame" REG was consolidated.  */
22750	  ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22751	}
22752	{
22753	  rtx rhs = XEXP (addr, 1);
22754	  ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22755	  ASSERT_EQ (-4, INTVAL (rhs));
22756	}
22757      }
22758      /* Verify the "[1 i+0 S4 A32]" was parsed.  */
22759      ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22760      /* "i" should have been handled by synthesizing a global int
22761	 variable named "i".  */
22762      mem_expr = MEM_EXPR (dest);
22763      ASSERT_NE (mem_expr, NULL);
22764      ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22765      ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22766      ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22767      ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22768      /* "+0".  */
22769      ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22770      ASSERT_EQ (0, MEM_OFFSET (dest));
22771      /* "S4".  */
22772      ASSERT_EQ (4, MEM_SIZE (dest));
22773      /* "A32.  */
22774      ASSERT_EQ (32, MEM_ALIGN (dest));
22775    }
22776    {
22777      rtx src = SET_SRC (pat);
22778      ASSERT_EQ (REG, GET_CODE (src));
22779      ASSERT_EQ (SImode, GET_MODE (src));
22780      ASSERT_EQ (5, REGNO (src));
22781      tree reg_expr = REG_EXPR (src);
22782      /* "i" here should point to the same var as for the MEM_EXPR.  */
22783      ASSERT_EQ (reg_expr, mem_expr);
22784    }
22785  }
22786}
22787
22788/* Verify that the RTL loader copes with a call_insn dump.
22789   This test is target-specific since the dump contains a target-specific
22790   hard reg name.  */
22791
22792static void
22793ix86_test_loading_call_insn ()
22794{
22795  /* The test dump includes register "xmm0", where requires TARGET_SSE
22796     to exist.  */
22797  if (!TARGET_SSE)
22798    return;
22799
22800  rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22801
22802  rtx_insn *insn = get_insns ();
22803  ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22804
22805  /* "/j".  */
22806  ASSERT_TRUE (RTX_FLAG (insn, jump));
22807
22808  rtx pat = PATTERN (insn);
22809  ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22810
22811  /* Verify REG_NOTES.  */
22812  {
22813    /* "(expr_list:REG_CALL_DECL".   */
22814    ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22815    rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22816    ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22817
22818    /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
22819    rtx_expr_list *note1 = note0->next ();
22820    ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22821
22822    ASSERT_EQ (NULL, note1->next ());
22823  }
22824
22825  /* Verify CALL_INSN_FUNCTION_USAGE.  */
22826  {
22827    /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
22828    rtx_expr_list *usage
22829      = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22830    ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22831    ASSERT_EQ (DFmode, GET_MODE (usage));
22832    ASSERT_EQ (USE, GET_CODE (usage->element ()));
22833    ASSERT_EQ (NULL, usage->next ());
22834  }
22835}
22836
22837/* Verify that the RTL loader copes a dump from print_rtx_function.
22838   This test is target-specific since the dump contains target-specific
22839   hard reg names.  */
22840
22841static void
22842ix86_test_loading_full_dump ()
22843{
22844  rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22845
22846  ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22847
22848  rtx_insn *insn_1 = get_insn_by_uid (1);
22849  ASSERT_EQ (NOTE, GET_CODE (insn_1));
22850
22851  rtx_insn *insn_7 = get_insn_by_uid (7);
22852  ASSERT_EQ (INSN, GET_CODE (insn_7));
22853  ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22854
22855  rtx_insn *insn_15 = get_insn_by_uid (15);
22856  ASSERT_EQ (INSN, GET_CODE (insn_15));
22857  ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22858
22859  /* Verify crtl->return_rtx.  */
22860  ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22861  ASSERT_EQ (0, REGNO (crtl->return_rtx));
22862  ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22863}
22864
22865/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22866   In particular, verify that it correctly loads the 2nd operand.
22867   This test is target-specific since these are machine-specific
22868   operands (and enums).  */
22869
22870static void
22871ix86_test_loading_unspec ()
22872{
22873  rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22874
22875  ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22876
22877  ASSERT_TRUE (cfun);
22878
22879  /* Test of an UNSPEC.  */
22880   rtx_insn *insn = get_insns ();
22881  ASSERT_EQ (INSN, GET_CODE (insn));
22882  rtx set = single_set (insn);
22883  ASSERT_NE (NULL, set);
22884  rtx dst = SET_DEST (set);
22885  ASSERT_EQ (MEM, GET_CODE (dst));
22886  rtx src = SET_SRC (set);
22887  ASSERT_EQ (UNSPEC, GET_CODE (src));
22888  ASSERT_EQ (BLKmode, GET_MODE (src));
22889  ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22890
22891  rtx v0 = XVECEXP (src, 0, 0);
22892
22893  /* Verify that the two uses of the first SCRATCH have pointer
22894     equality.  */
22895  rtx scratch_a = XEXP (dst, 0);
22896  ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22897
22898  rtx scratch_b = XEXP (v0, 0);
22899  ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22900
22901  ASSERT_EQ (scratch_a, scratch_b);
22902
22903  /* Verify that the two mems are thus treated as equal.  */
22904  ASSERT_TRUE (rtx_equal_p (dst, v0));
22905
22906  /* Verify that the insn is recognized.  */
22907  ASSERT_NE(-1, recog_memoized (insn));
22908
22909  /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
22910  insn = NEXT_INSN (insn);
22911  ASSERT_EQ (INSN, GET_CODE (insn));
22912
22913  set = single_set (insn);
22914  ASSERT_NE (NULL, set);
22915
22916  src = SET_SRC (set);
22917  ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22918  ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22919}
22920
22921/* Run all target-specific selftests.  */
22922
22923static void
22924ix86_run_selftests (void)
22925{
22926  ix86_test_dumping_hard_regs ();
22927  ix86_test_dumping_memory_blockage ();
22928
22929  /* Various tests of loading RTL dumps, here because they contain
22930     ix86-isms (e.g. names of hard regs).  */
22931  ix86_test_loading_dump_fragment_1 ();
22932  ix86_test_loading_call_insn ();
22933  ix86_test_loading_full_dump ();
22934  ix86_test_loading_unspec ();
22935}
22936
22937} // namespace selftest
22938
22939#endif /* CHECKING_P */
22940
22941/* Initialize the GCC target structure.  */
22942#undef TARGET_RETURN_IN_MEMORY
22943#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22944
22945#undef TARGET_LEGITIMIZE_ADDRESS
22946#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22947
22948#undef TARGET_ATTRIBUTE_TABLE
22949#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22950#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22951#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22952#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22953#  undef TARGET_MERGE_DECL_ATTRIBUTES
22954#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22955#endif
22956
22957#undef TARGET_COMP_TYPE_ATTRIBUTES
22958#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22959
22960#undef TARGET_INIT_BUILTINS
22961#define TARGET_INIT_BUILTINS ix86_init_builtins
22962#undef TARGET_BUILTIN_DECL
22963#define TARGET_BUILTIN_DECL ix86_builtin_decl
22964#undef TARGET_EXPAND_BUILTIN
22965#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22966
22967#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22968#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22969  ix86_builtin_vectorized_function
22970
22971#undef TARGET_VECTORIZE_BUILTIN_GATHER
22972#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22973
22974#undef TARGET_VECTORIZE_BUILTIN_SCATTER
22975#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22976
22977#undef TARGET_BUILTIN_RECIPROCAL
22978#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22979
22980#undef TARGET_ASM_FUNCTION_EPILOGUE
22981#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22982
22983#undef TARGET_ENCODE_SECTION_INFO
22984#ifndef SUBTARGET_ENCODE_SECTION_INFO
22985#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22986#else
22987#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22988#endif
22989
22990#undef TARGET_ASM_OPEN_PAREN
22991#define TARGET_ASM_OPEN_PAREN ""
22992#undef TARGET_ASM_CLOSE_PAREN
22993#define TARGET_ASM_CLOSE_PAREN ""
22994
22995#undef TARGET_ASM_BYTE_OP
22996#define TARGET_ASM_BYTE_OP ASM_BYTE
22997
22998#undef TARGET_ASM_ALIGNED_HI_OP
22999#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23000#undef TARGET_ASM_ALIGNED_SI_OP
23001#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23002#ifdef ASM_QUAD
23003#undef TARGET_ASM_ALIGNED_DI_OP
23004#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23005#endif
23006
23007#undef TARGET_PROFILE_BEFORE_PROLOGUE
23008#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23009
23010#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23011#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23012
23013#undef TARGET_ASM_UNALIGNED_HI_OP
23014#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23015#undef TARGET_ASM_UNALIGNED_SI_OP
23016#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23017#undef TARGET_ASM_UNALIGNED_DI_OP
23018#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23019
23020#undef TARGET_PRINT_OPERAND
23021#define TARGET_PRINT_OPERAND ix86_print_operand
23022#undef TARGET_PRINT_OPERAND_ADDRESS
23023#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23024#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23025#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23026#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23027#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23028
23029#undef TARGET_SCHED_INIT_GLOBAL
23030#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23031#undef TARGET_SCHED_ADJUST_COST
23032#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23033#undef TARGET_SCHED_ISSUE_RATE
23034#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23035#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23036#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23037  ia32_multipass_dfa_lookahead
23038#undef TARGET_SCHED_MACRO_FUSION_P
23039#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23040#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23041#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23042
23043#undef TARGET_FUNCTION_OK_FOR_SIBCALL
23044#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23045
23046#undef TARGET_MEMMODEL_CHECK
23047#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23048
23049#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23050#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23051
23052#ifdef HAVE_AS_TLS
23053#undef TARGET_HAVE_TLS
23054#define TARGET_HAVE_TLS true
23055#endif
23056#undef TARGET_CANNOT_FORCE_CONST_MEM
23057#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23058#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23059#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23060
23061#undef TARGET_DELEGITIMIZE_ADDRESS
23062#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23063
23064#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23065#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23066
23067#undef TARGET_MS_BITFIELD_LAYOUT_P
23068#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23069
23070#if TARGET_MACHO
23071#undef TARGET_BINDS_LOCAL_P
23072#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23073#else
23074#undef TARGET_BINDS_LOCAL_P
23075#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23076#endif
23077#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23078#undef TARGET_BINDS_LOCAL_P
23079#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23080#endif
23081
23082#undef TARGET_ASM_OUTPUT_MI_THUNK
23083#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23084#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23085#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23086
23087#undef TARGET_ASM_FILE_START
23088#define TARGET_ASM_FILE_START x86_file_start
23089
23090#undef TARGET_OPTION_OVERRIDE
23091#define TARGET_OPTION_OVERRIDE ix86_option_override
23092
23093#undef TARGET_REGISTER_MOVE_COST
23094#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23095#undef TARGET_MEMORY_MOVE_COST
23096#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23097#undef TARGET_RTX_COSTS
23098#define TARGET_RTX_COSTS ix86_rtx_costs
23099#undef TARGET_ADDRESS_COST
23100#define TARGET_ADDRESS_COST ix86_address_cost
23101
23102#undef TARGET_FLAGS_REGNUM
23103#define TARGET_FLAGS_REGNUM FLAGS_REG
23104#undef TARGET_FIXED_CONDITION_CODE_REGS
23105#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23106#undef TARGET_CC_MODES_COMPATIBLE
23107#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23108
23109#undef TARGET_MACHINE_DEPENDENT_REORG
23110#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23111
23112#undef TARGET_BUILD_BUILTIN_VA_LIST
23113#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23114
23115#undef TARGET_FOLD_BUILTIN
23116#define TARGET_FOLD_BUILTIN ix86_fold_builtin
23117
23118#undef TARGET_GIMPLE_FOLD_BUILTIN
23119#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23120
23121#undef TARGET_COMPARE_VERSION_PRIORITY
23122#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23123
23124#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23125#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23126  ix86_generate_version_dispatcher_body
23127
23128#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23129#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23130  ix86_get_function_versions_dispatcher
23131
23132#undef TARGET_ENUM_VA_LIST_P
23133#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23134
23135#undef TARGET_FN_ABI_VA_LIST
23136#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23137
23138#undef TARGET_CANONICAL_VA_LIST_TYPE
23139#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23140
23141#undef TARGET_EXPAND_BUILTIN_VA_START
23142#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23143
23144#undef TARGET_MD_ASM_ADJUST
23145#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23146
23147#undef TARGET_C_EXCESS_PRECISION
23148#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
23149#undef TARGET_PROMOTE_PROTOTYPES
23150#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23151#undef TARGET_SETUP_INCOMING_VARARGS
23152#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23153#undef TARGET_MUST_PASS_IN_STACK
23154#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23155#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23156#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23157#undef TARGET_FUNCTION_ARG_ADVANCE
23158#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23159#undef TARGET_FUNCTION_ARG
23160#define TARGET_FUNCTION_ARG ix86_function_arg
23161#undef TARGET_INIT_PIC_REG
23162#define TARGET_INIT_PIC_REG ix86_init_pic_reg
23163#undef TARGET_USE_PSEUDO_PIC_REG
23164#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23165#undef TARGET_FUNCTION_ARG_BOUNDARY
23166#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23167#undef TARGET_PASS_BY_REFERENCE
23168#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23169#undef TARGET_INTERNAL_ARG_POINTER
23170#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23171#undef TARGET_UPDATE_STACK_BOUNDARY
23172#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23173#undef TARGET_GET_DRAP_RTX
23174#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23175#undef TARGET_STRICT_ARGUMENT_NAMING
23176#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23177#undef TARGET_STATIC_CHAIN
23178#define TARGET_STATIC_CHAIN ix86_static_chain
23179#undef TARGET_TRAMPOLINE_INIT
23180#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23181#undef TARGET_RETURN_POPS_ARGS
23182#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23183
23184#undef TARGET_WARN_FUNC_RETURN
23185#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23186
23187#undef TARGET_LEGITIMATE_COMBINED_INSN
23188#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23189
23190#undef TARGET_ASAN_SHADOW_OFFSET
23191#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23192
23193#undef TARGET_GIMPLIFY_VA_ARG_EXPR
23194#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23195
23196#undef TARGET_SCALAR_MODE_SUPPORTED_P
23197#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23198
23199#undef TARGET_VECTOR_MODE_SUPPORTED_P
23200#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23201
23202#undef TARGET_C_MODE_FOR_SUFFIX
23203#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23204
23205#ifdef HAVE_AS_TLS
23206#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23207#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23208#endif
23209
23210#ifdef SUBTARGET_INSERT_ATTRIBUTES
23211#undef TARGET_INSERT_ATTRIBUTES
23212#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23213#endif
23214
23215#undef TARGET_MANGLE_TYPE
23216#define TARGET_MANGLE_TYPE ix86_mangle_type
23217
23218#undef TARGET_STACK_PROTECT_GUARD
23219#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23220
23221#if !TARGET_MACHO
23222#undef TARGET_STACK_PROTECT_FAIL
23223#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23224#endif
23225
23226#undef TARGET_FUNCTION_VALUE
23227#define TARGET_FUNCTION_VALUE ix86_function_value
23228
23229#undef TARGET_FUNCTION_VALUE_REGNO_P
23230#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23231
23232#undef TARGET_PROMOTE_FUNCTION_MODE
23233#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23234
23235#undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23236#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23237
23238#undef TARGET_MEMBER_TYPE_FORCES_BLK
23239#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23240
23241#undef TARGET_INSTANTIATE_DECLS
23242#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23243
23244#undef TARGET_SECONDARY_RELOAD
23245#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23246#undef TARGET_SECONDARY_MEMORY_NEEDED
23247#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23248#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23249#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23250
23251#undef TARGET_CLASS_MAX_NREGS
23252#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23253
23254#undef TARGET_PREFERRED_RELOAD_CLASS
23255#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23256#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23257#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23258#undef TARGET_CLASS_LIKELY_SPILLED_P
23259#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23260
23261#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23262#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23263  ix86_builtin_vectorization_cost
23264#undef TARGET_VECTORIZE_VEC_PERM_CONST
23265#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23266#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23267#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23268  ix86_preferred_simd_mode
23269#undef TARGET_VECTORIZE_SPLIT_REDUCTION
23270#define TARGET_VECTORIZE_SPLIT_REDUCTION \
23271  ix86_split_reduction
23272#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23273#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23274  ix86_autovectorize_vector_modes
23275#undef TARGET_VECTORIZE_GET_MASK_MODE
23276#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23277#undef TARGET_VECTORIZE_INIT_COST
23278#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23279#undef TARGET_VECTORIZE_ADD_STMT_COST
23280#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23281#undef TARGET_VECTORIZE_FINISH_COST
23282#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23283#undef TARGET_VECTORIZE_DESTROY_COST_DATA
23284#define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23285
23286#undef TARGET_SET_CURRENT_FUNCTION
23287#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23288
23289#undef TARGET_OPTION_VALID_ATTRIBUTE_P
23290#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23291
23292#undef TARGET_OPTION_SAVE
23293#define TARGET_OPTION_SAVE ix86_function_specific_save
23294
23295#undef TARGET_OPTION_RESTORE
23296#define TARGET_OPTION_RESTORE ix86_function_specific_restore
23297
23298#undef TARGET_OPTION_POST_STREAM_IN
23299#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23300
23301#undef TARGET_OPTION_PRINT
23302#define TARGET_OPTION_PRINT ix86_function_specific_print
23303
23304#undef TARGET_OPTION_FUNCTION_VERSIONS
23305#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23306
23307#undef TARGET_CAN_INLINE_P
23308#define TARGET_CAN_INLINE_P ix86_can_inline_p
23309
23310#undef TARGET_LEGITIMATE_ADDRESS_P
23311#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23312
23313#undef TARGET_REGISTER_PRIORITY
23314#define TARGET_REGISTER_PRIORITY ix86_register_priority
23315
23316#undef TARGET_REGISTER_USAGE_LEVELING_P
23317#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23318
23319#undef TARGET_LEGITIMATE_CONSTANT_P
23320#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23321
23322#undef TARGET_COMPUTE_FRAME_LAYOUT
23323#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23324
23325#undef TARGET_FRAME_POINTER_REQUIRED
23326#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23327
23328#undef TARGET_CAN_ELIMINATE
23329#define TARGET_CAN_ELIMINATE ix86_can_eliminate
23330
23331#undef TARGET_EXTRA_LIVE_ON_ENTRY
23332#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23333
23334#undef TARGET_ASM_CODE_END
23335#define TARGET_ASM_CODE_END ix86_code_end
23336
23337#undef TARGET_CONDITIONAL_REGISTER_USAGE
23338#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23339
23340#undef TARGET_CANONICALIZE_COMPARISON
23341#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23342
23343#undef TARGET_LOOP_UNROLL_ADJUST
23344#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23345
23346/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
23347#undef TARGET_SPILL_CLASS
23348#define TARGET_SPILL_CLASS ix86_spill_class
23349
23350#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23351#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23352  ix86_simd_clone_compute_vecsize_and_simdlen
23353
23354#undef TARGET_SIMD_CLONE_ADJUST
23355#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23356
23357#undef TARGET_SIMD_CLONE_USABLE
23358#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23359
23360#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23361#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23362
23363#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23364#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23365  ix86_float_exceptions_rounding_supported_p
23366
23367#undef TARGET_MODE_EMIT
23368#define TARGET_MODE_EMIT ix86_emit_mode_set
23369
23370#undef TARGET_MODE_NEEDED
23371#define TARGET_MODE_NEEDED ix86_mode_needed
23372
23373#undef TARGET_MODE_AFTER
23374#define TARGET_MODE_AFTER ix86_mode_after
23375
23376#undef TARGET_MODE_ENTRY
23377#define TARGET_MODE_ENTRY ix86_mode_entry
23378
23379#undef TARGET_MODE_EXIT
23380#define TARGET_MODE_EXIT ix86_mode_exit
23381
23382#undef TARGET_MODE_PRIORITY
23383#define TARGET_MODE_PRIORITY ix86_mode_priority
23384
23385#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23386#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23387
23388#undef TARGET_OFFLOAD_OPTIONS
23389#define TARGET_OFFLOAD_OPTIONS \
23390  ix86_offload_options
23391
23392#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23393#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23394
23395#undef TARGET_OPTAB_SUPPORTED_P
23396#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23397
23398#undef TARGET_HARD_REGNO_SCRATCH_OK
23399#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23400
23401#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23402#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23403
23404#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23405#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23406
23407#undef TARGET_INIT_LIBFUNCS
23408#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23409
23410#undef TARGET_EXPAND_DIVMOD_LIBFUNC
23411#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23412
23413#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23414#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23415
23416#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23417#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23418
23419#undef TARGET_HARD_REGNO_NREGS
23420#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23421#undef TARGET_HARD_REGNO_MODE_OK
23422#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23423
23424#undef TARGET_MODES_TIEABLE_P
23425#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23426
23427#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23428#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23429  ix86_hard_regno_call_part_clobbered
23430
23431#undef TARGET_CAN_CHANGE_MODE_CLASS
23432#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23433
23434#undef TARGET_STATIC_RTX_ALIGNMENT
23435#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23436#undef TARGET_CONSTANT_ALIGNMENT
23437#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23438
23439#undef TARGET_EMPTY_RECORD_P
23440#define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23441
23442#undef TARGET_WARN_PARAMETER_PASSING_ABI
23443#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23444
23445#undef TARGET_GET_MULTILIB_ABI_NAME
23446#define TARGET_GET_MULTILIB_ABI_NAME \
23447  ix86_get_multilib_abi_name
23448
23449static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23450{
23451#ifdef OPTION_GLIBC
23452  if (OPTION_GLIBC)
23453    return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23454  else
23455    return false;
23456#else
23457  return false;
23458#endif
23459}
23460
23461#undef TARGET_LIBC_HAS_FAST_FUNCTION
23462#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23463
23464#if CHECKING_P
23465#undef TARGET_RUN_TARGET_SELFTESTS
23466#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23467#endif /* #if CHECKING_P */
23468
23469struct gcc_target targetm = TARGET_INITIALIZER;
23470
23471#include "gt-i386.h"
23472