1/* Copyright (C) 1988-2020 Free Software Foundation, Inc.
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 3, or (at your option)
8any later version.
9
10GCC is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with GCC; see the file COPYING3.  If not see
17<http://www.gnu.org/licenses/>.  */
18
19#define IN_TARGET_CODE 1
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "memmodel.h"
28#include "gimple.h"
29#include "cfghooks.h"
30#include "cfgloop.h"
31#include "df.h"
32#include "tm_p.h"
33#include "stringpool.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "cgraph.h"
40#include "diagnostic.h"
41#include "cfgbuild.h"
42#include "alias.h"
43#include "fold-const.h"
44#include "attribs.h"
45#include "calls.h"
46#include "stor-layout.h"
47#include "varasm.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "flags.h"
51#include "except.h"
52#include "explow.h"
53#include "expr.h"
54#include "cfgrtl.h"
55#include "common/common-target.h"
56#include "langhooks.h"
57#include "reload.h"
58#include "gimplify.h"
59#include "dwarf2.h"
60#include "tm-constrs.h"
61#include "cselib.h"
62#include "sched-int.h"
63#include "opts.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "pass_manager.h"
67#include "target-globals.h"
68#include "gimple-iterator.h"
69#include "tree-vectorizer.h"
70#include "shrink-wrap.h"
71#include "builtins.h"
72#include "rtl-iter.h"
73#include "tree-iterator.h"
74#include "dbgcnt.h"
75#include "case-cfn-macros.h"
76#include "dojump.h"
77#include "fold-const-call.h"
78#include "tree-vrp.h"
79#include "tree-ssanames.h"
80#include "selftest.h"
81#include "selftest-rtl.h"
82#include "print-rtl.h"
83#include "intl.h"
84#include "ifcvt.h"
85#include "symbol-summary.h"
86#include "ipa-prop.h"
87#include "ipa-fnsummary.h"
88#include "wide-int-bitmask.h"
89#include "tree-vector-builder.h"
90#include "debug.h"
91#include "dwarf2out.h"
92#include "i386-builtins.h"
93#include "i386-features.h"
94
95const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
96  "savms64",
97  "resms64",
98  "resms64x",
99  "savms64f",
100  "resms64f",
101  "resms64fx"
102};
103
104const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
105/* The below offset values are where each register is stored for the layout
106   relative to incoming stack pointer.  The value of each m_regs[].offset will
107   be relative to the incoming base pointer (rax or rsi) used by the stub.
108
109    s_instances:   0		1		2		3
110    Offset:					realigned or	aligned + 8
111    Register	   aligned	aligned + 8	aligned w/HFP	w/HFP	*/
112    XMM15_REG,	/* 0x10		0x18		0x10		0x18	*/
113    XMM14_REG,	/* 0x20		0x28		0x20		0x28	*/
114    XMM13_REG,	/* 0x30		0x38		0x30		0x38	*/
115    XMM12_REG,	/* 0x40		0x48		0x40		0x48	*/
116    XMM11_REG,	/* 0x50		0x58		0x50		0x58	*/
117    XMM10_REG,	/* 0x60		0x68		0x60		0x68	*/
118    XMM9_REG,	/* 0x70		0x78		0x70		0x78	*/
119    XMM8_REG,	/* 0x80		0x88		0x80		0x88	*/
120    XMM7_REG,	/* 0x90		0x98		0x90		0x98	*/
121    XMM6_REG,	/* 0xa0		0xa8		0xa0		0xa8	*/
122    SI_REG,	/* 0xa8		0xb0		0xa8		0xb0	*/
123    DI_REG,	/* 0xb0		0xb8		0xb0		0xb8	*/
124    BX_REG,	/* 0xb8		0xc0		0xb8		0xc0	*/
125    BP_REG,	/* 0xc0		0xc8		N/A		N/A	*/
126    R12_REG,	/* 0xc8		0xd0		0xc0		0xc8	*/
127    R13_REG,	/* 0xd0		0xd8		0xc8		0xd0	*/
128    R14_REG,	/* 0xd8		0xe0		0xd0		0xd8	*/
129    R15_REG,	/* 0xe0		0xe8		0xd8		0xe0	*/
130};
131
132/* Instantiate static const values.  */
133const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
134const unsigned xlogue_layout::MIN_REGS;
135const unsigned xlogue_layout::MAX_REGS;
136const unsigned xlogue_layout::MAX_EXTRA_REGS;
137const unsigned xlogue_layout::VARIANT_COUNT;
138const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
139
140/* Initialize xlogue_layout::s_stub_names to zero.  */
141char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
142				[STUB_NAME_MAX_LEN];
143
144/* Instantiates all xlogue_layout instances.  */
145const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
146  xlogue_layout (0, false),
147  xlogue_layout (8, false),
148  xlogue_layout (0, true),
149  xlogue_layout (8, true)
150};
151
152/* Return an appropriate const instance of xlogue_layout based upon values
153   in cfun->machine and crtl.  */
154const class xlogue_layout &
155xlogue_layout::get_instance ()
156{
157  enum xlogue_stub_sets stub_set;
158  bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
159
160  if (stack_realign_fp)
161    stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
162  else if (frame_pointer_needed)
163    stub_set = aligned_plus_8
164	      ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165	      : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
166  else
167    stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
168
169  return s_instances[stub_set];
170}
171
172/* Determine how many clobbered registers can be saved by the stub.
173   Returns the count of registers the stub will save and restore.  */
174unsigned
175xlogue_layout::count_stub_managed_regs ()
176{
177  bool hfp = frame_pointer_needed || stack_realign_fp;
178  unsigned i, count;
179  unsigned regno;
180
181  for (count = i = MIN_REGS; i < MAX_REGS; ++i)
182    {
183      regno = REG_ORDER[i];
184      if (regno == BP_REG && hfp)
185	continue;
186      if (!ix86_save_reg (regno, false, false))
187	break;
188      ++count;
189    }
190  return count;
191}
192
193/* Determine if register REGNO is a stub managed register given the
194   total COUNT of stub managed registers.  */
195bool
196xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
197{
198  bool hfp = frame_pointer_needed || stack_realign_fp;
199  unsigned i;
200
201  for (i = 0; i < count; ++i)
202    {
203      gcc_assert (i < MAX_REGS);
204      if (REG_ORDER[i] == BP_REG && hfp)
205	++count;
206      else if (REG_ORDER[i] == regno)
207	return true;
208    }
209  return false;
210}
211
212/* Constructor for xlogue_layout.  */
213xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
214  : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
215    m_stack_align_off_in (stack_align_off_in)
216{
217  HOST_WIDE_INT offset = stack_align_off_in;
218  unsigned i, j;
219
220  for (i = j = 0; i < MAX_REGS; ++i)
221    {
222      unsigned regno = REG_ORDER[i];
223
224      if (regno == BP_REG && hfp)
225	continue;
226      if (SSE_REGNO_P (regno))
227	{
228	  offset += 16;
229	  /* Verify that SSE regs are always aligned.  */
230	  gcc_assert (!((stack_align_off_in + offset) & 15));
231	}
232      else
233	offset += 8;
234
235      m_regs[j].regno    = regno;
236      m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
237    }
238  gcc_assert (j == m_nregs);
239}
240
241const char *
242xlogue_layout::get_stub_name (enum xlogue_stub stub,
243			      unsigned n_extra_regs)
244{
245  const int have_avx = TARGET_AVX;
246  char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
247
248  /* Lazy init */
249  if (!*name)
250    {
251      int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
252			  (have_avx ? "avx" : "sse"),
253			  STUB_BASE_NAMES[stub],
254			  MIN_REGS + n_extra_regs);
255      gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
256    }
257
258  return name;
259}
260
261/* Return rtx of a symbol ref for the entry point (based upon
262   cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
263rtx
264xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
265{
266  const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
267  gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
268  gcc_assert (stub < XLOGUE_STUB_COUNT);
269  gcc_assert (crtl->stack_realign_finalized);
270
271  return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
272}
273
274unsigned scalar_chain::max_id = 0;
275
276namespace {
277
278/* Initialize new chain.  */
279
280scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
281{
282  smode = smode_;
283  vmode = vmode_;
284
285  chain_id = ++max_id;
286
287   if (dump_file)
288    fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
289
290  bitmap_obstack_initialize (NULL);
291  insns = BITMAP_ALLOC (NULL);
292  defs = BITMAP_ALLOC (NULL);
293  defs_conv = BITMAP_ALLOC (NULL);
294  queue = NULL;
295}
296
297/* Free chain's data.  */
298
299scalar_chain::~scalar_chain ()
300{
301  BITMAP_FREE (insns);
302  BITMAP_FREE (defs);
303  BITMAP_FREE (defs_conv);
304  bitmap_obstack_release (NULL);
305}
306
307/* Add instruction into chains' queue.  */
308
309void
310scalar_chain::add_to_queue (unsigned insn_uid)
311{
312  if (bitmap_bit_p (insns, insn_uid)
313      || bitmap_bit_p (queue, insn_uid))
314    return;
315
316  if (dump_file)
317    fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
318	     insn_uid, chain_id);
319  bitmap_set_bit (queue, insn_uid);
320}
321
322general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
323					    enum machine_mode vmode_)
324     : scalar_chain (smode_, vmode_)
325{
326  insns_conv = BITMAP_ALLOC (NULL);
327  n_sse_to_integer = 0;
328  n_integer_to_sse = 0;
329}
330
331general_scalar_chain::~general_scalar_chain ()
332{
333  BITMAP_FREE (insns_conv);
334}
335
336/* For DImode conversion, mark register defined by DEF as requiring
337   conversion.  */
338
339void
340general_scalar_chain::mark_dual_mode_def (df_ref def)
341{
342  gcc_assert (DF_REF_REG_DEF_P (def));
343
344  /* Record the def/insn pair so we can later efficiently iterate over
345     the defs to convert on insns not in the chain.  */
346  bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
347  if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
348    {
349      if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
350	  && !reg_new)
351	return;
352      n_integer_to_sse++;
353    }
354  else
355    {
356      if (!reg_new)
357	return;
358      n_sse_to_integer++;
359    }
360
361  if (dump_file)
362    fprintf (dump_file,
363	     "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364	     DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
365}
366
367/* For TImode conversion, it is unused.  */
368
369void
370timode_scalar_chain::mark_dual_mode_def (df_ref)
371{
372  gcc_unreachable ();
373}
374
375/* Check REF's chain to add new insns into a queue
376   and find registers requiring conversion.  */
377
378void
379scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
380{
381  df_link *chain;
382
383  gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
384	      || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
385  add_to_queue (DF_REF_INSN_UID (ref));
386
387  for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
388    {
389      unsigned uid = DF_REF_INSN_UID (chain->ref);
390
391      if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
392	continue;
393
394      if (!DF_REF_REG_MEM_P (chain->ref))
395	{
396	  if (bitmap_bit_p (insns, uid))
397	    continue;
398
399	  if (bitmap_bit_p (candidates, uid))
400	    {
401	      add_to_queue (uid);
402	      continue;
403	    }
404	}
405
406      if (DF_REF_REG_DEF_P (chain->ref))
407	{
408	  if (dump_file)
409	    fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
410		     DF_REF_REGNO (chain->ref), uid);
411	  mark_dual_mode_def (chain->ref);
412	}
413      else
414	{
415	  if (dump_file)
416	    fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
417		     DF_REF_REGNO (chain->ref), uid);
418	  mark_dual_mode_def (ref);
419	}
420    }
421}
422
423/* Add instruction into a chain.  */
424
425void
426scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
427{
428  if (bitmap_bit_p (insns, insn_uid))
429    return;
430
431  if (dump_file)
432    fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
433
434  bitmap_set_bit (insns, insn_uid);
435
436  rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
437  rtx def_set = single_set (insn);
438  if (def_set && REG_P (SET_DEST (def_set))
439      && !HARD_REGISTER_P (SET_DEST (def_set)))
440    bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
441
442  /* ???  The following is quadratic since analyze_register_chain
443     iterates over all refs to look for dual-mode regs.  Instead this
444     should be done separately for all regs mentioned in the chain once.  */
445  df_ref ref;
446  for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
447    if (!HARD_REGISTER_P (DF_REF_REG (ref)))
448      analyze_register_chain (candidates, ref);
449  for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
450    if (!DF_REF_REG_MEM_P (ref))
451      analyze_register_chain (candidates, ref);
452}
453
454/* Build new chain starting from insn INSN_UID recursively
455   adding all dependent uses and definitions.  */
456
457void
458scalar_chain::build (bitmap candidates, unsigned insn_uid)
459{
460  queue = BITMAP_ALLOC (NULL);
461  bitmap_set_bit (queue, insn_uid);
462
463  if (dump_file)
464    fprintf (dump_file, "Building chain #%d...\n", chain_id);
465
466  while (!bitmap_empty_p (queue))
467    {
468      insn_uid = bitmap_first_set_bit (queue);
469      bitmap_clear_bit (queue, insn_uid);
470      bitmap_clear_bit (candidates, insn_uid);
471      add_insn (candidates, insn_uid);
472    }
473
474  if (dump_file)
475    {
476      fprintf (dump_file, "Collected chain #%d...\n", chain_id);
477      fprintf (dump_file, "  insns: ");
478      dump_bitmap (dump_file, insns);
479      if (!bitmap_empty_p (defs_conv))
480	{
481	  bitmap_iterator bi;
482	  unsigned id;
483	  const char *comma = "";
484	  fprintf (dump_file, "  defs to convert: ");
485	  EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
486	    {
487	      fprintf (dump_file, "%sr%d", comma, id);
488	      comma = ", ";
489	    }
490	  fprintf (dump_file, "\n");
491	}
492    }
493
494  BITMAP_FREE (queue);
495}
496
497/* Return a cost of building a vector costant
498   instead of using a scalar one.  */
499
500int
501general_scalar_chain::vector_const_cost (rtx exp)
502{
503  gcc_assert (CONST_INT_P (exp));
504
505  if (standard_sse_constant_p (exp, vmode))
506    return ix86_cost->sse_op;
507  /* We have separate costs for SImode and DImode, use SImode costs
508     for smaller modes.  */
509  return ix86_cost->sse_load[smode == DImode ? 1 : 0];
510}
511
512/* Compute a gain for chain conversion.  */
513
514int
515general_scalar_chain::compute_convert_gain ()
516{
517  bitmap_iterator bi;
518  unsigned insn_uid;
519  int gain = 0;
520  int cost = 0;
521
522  if (dump_file)
523    fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
524
525  /* SSE costs distinguish between SImode and DImode loads/stores, for
526     int costs factor in the number of GPRs involved.  When supporting
527     smaller modes than SImode the int load/store costs need to be
528     adjusted as well.  */
529  unsigned sse_cost_idx = smode == DImode ? 1 : 0;
530  unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
531
532  EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
533    {
534      rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
535      rtx def_set = single_set (insn);
536      rtx src = SET_SRC (def_set);
537      rtx dst = SET_DEST (def_set);
538      int igain = 0;
539
540      if (REG_P (src) && REG_P (dst))
541	igain += 2 * m - ix86_cost->xmm_move;
542      else if (REG_P (src) && MEM_P (dst))
543	igain
544	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
545      else if (MEM_P (src) && REG_P (dst))
546	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
547      else if (GET_CODE (src) == ASHIFT
548	       || GET_CODE (src) == ASHIFTRT
549	       || GET_CODE (src) == LSHIFTRT)
550	{
551	  if (m == 2)
552	    {
553	      if (INTVAL (XEXP (src, 1)) >= 32)
554		igain += ix86_cost->add;
555	      else
556		igain += ix86_cost->shift_const;
557	    }
558
559	  igain += ix86_cost->shift_const - ix86_cost->sse_op;
560
561	  if (CONST_INT_P (XEXP (src, 0)))
562	    igain -= vector_const_cost (XEXP (src, 0));
563	}
564      else if (GET_CODE (src) == PLUS
565	       || GET_CODE (src) == MINUS
566	       || GET_CODE (src) == IOR
567	       || GET_CODE (src) == XOR
568	       || GET_CODE (src) == AND)
569	{
570	  igain += m * ix86_cost->add - ix86_cost->sse_op;
571	  /* Additional gain for andnot for targets without BMI.  */
572	  if (GET_CODE (XEXP (src, 0)) == NOT
573	      && !TARGET_BMI)
574	    igain += m * ix86_cost->add;
575
576	  if (CONST_INT_P (XEXP (src, 0)))
577	    igain -= vector_const_cost (XEXP (src, 0));
578	  if (CONST_INT_P (XEXP (src, 1)))
579	    igain -= vector_const_cost (XEXP (src, 1));
580	}
581      else if (GET_CODE (src) == NEG
582	       || GET_CODE (src) == NOT)
583	igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
584      else if (GET_CODE (src) == SMAX
585	       || GET_CODE (src) == SMIN
586	       || GET_CODE (src) == UMAX
587	       || GET_CODE (src) == UMIN)
588	{
589	  /* We do not have any conditional move cost, estimate it as a
590	     reg-reg move.  Comparisons are costed as adds.  */
591	  igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
592	  /* Integer SSE ops are all costed the same.  */
593	  igain -= ix86_cost->sse_op;
594	}
595      else if (GET_CODE (src) == COMPARE)
596	{
597	  /* Assume comparison cost is the same.  */
598	}
599      else if (CONST_INT_P (src))
600	{
601	  if (REG_P (dst))
602	    /* DImode can be immediate for TARGET_64BIT and SImode always.  */
603	    igain += m * COSTS_N_INSNS (1);
604	  else if (MEM_P (dst))
605	    igain += (m * ix86_cost->int_store[2]
606		     - ix86_cost->sse_store[sse_cost_idx]);
607	  igain -= vector_const_cost (src);
608	}
609      else
610	gcc_unreachable ();
611
612      if (igain != 0 && dump_file)
613	{
614	  fprintf (dump_file, "  Instruction gain %d for ", igain);
615	  dump_insn_slim (dump_file, insn);
616	}
617      gain += igain;
618    }
619
620  if (dump_file)
621    fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
622
623  /* Cost the integer to sse and sse to integer moves.  */
624  cost += n_sse_to_integer * ix86_cost->sse_to_integer;
625  /* ???  integer_to_sse but we only have that in the RA cost table.
626     Assume sse_to_integer/integer_to_sse are the same which they
627     are at the moment.  */
628  cost += n_integer_to_sse * ix86_cost->sse_to_integer;
629
630  if (dump_file)
631    fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
632
633  gain -= cost;
634
635  if (dump_file)
636    fprintf (dump_file, "  Total gain: %d\n", gain);
637
638  return gain;
639}
640
641/* Insert generated conversion instruction sequence INSNS
642   after instruction AFTER.  New BB may be required in case
643   instruction has EH region attached.  */
644
645void
646scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
647{
648  if (!control_flow_insn_p (after))
649    {
650      emit_insn_after (insns, after);
651      return;
652    }
653
654  basic_block bb = BLOCK_FOR_INSN (after);
655  edge e = find_fallthru_edge (bb->succs);
656  gcc_assert (e);
657
658  basic_block new_bb = split_edge (e);
659  emit_insn_after (insns, BB_HEAD (new_bb));
660}
661
662} // anon namespace
663
664/* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
665   zeroing the upper parts.  */
666
667static rtx
668gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
669{
670  switch (GET_MODE_NUNITS (vmode))
671    {
672    case 1:
673      /* We are not using this case currently.  */
674      gcc_unreachable ();
675    case 2:
676      return gen_rtx_VEC_CONCAT (vmode, gpr,
677				 CONST0_RTX (GET_MODE_INNER (vmode)));
678    default:
679      return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
680				CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
681    }
682}
683
684/* Make vector copies for all register REGNO definitions
685   and replace its uses in a chain.  */
686
687void
688general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
689{
690  rtx vreg = *defs_map.get (reg);
691
692  start_sequence ();
693  if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
694    {
695      rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
696      if (smode == DImode && !TARGET_64BIT)
697	{
698	  emit_move_insn (adjust_address (tmp, SImode, 0),
699			  gen_rtx_SUBREG (SImode, reg, 0));
700	  emit_move_insn (adjust_address (tmp, SImode, 4),
701			  gen_rtx_SUBREG (SImode, reg, 4));
702	}
703      else
704	emit_move_insn (copy_rtx (tmp), reg);
705      emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
706			      gen_gpr_to_xmm_move_src (vmode, tmp)));
707    }
708  else if (!TARGET_64BIT && smode == DImode)
709    {
710      if (TARGET_SSE4_1)
711	{
712	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
713				      CONST0_RTX (V4SImode),
714				      gen_rtx_SUBREG (SImode, reg, 0)));
715	  emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
716					gen_rtx_SUBREG (V4SImode, vreg, 0),
717					gen_rtx_SUBREG (SImode, reg, 4),
718					GEN_INT (2)));
719	}
720      else
721	{
722	  rtx tmp = gen_reg_rtx (DImode);
723	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
724				      CONST0_RTX (V4SImode),
725				      gen_rtx_SUBREG (SImode, reg, 0)));
726	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
727				      CONST0_RTX (V4SImode),
728				      gen_rtx_SUBREG (SImode, reg, 4)));
729	  emit_insn (gen_vec_interleave_lowv4si
730		     (gen_rtx_SUBREG (V4SImode, vreg, 0),
731		      gen_rtx_SUBREG (V4SImode, vreg, 0),
732		      gen_rtx_SUBREG (V4SImode, tmp, 0)));
733	}
734    }
735  else
736    emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
737			    gen_gpr_to_xmm_move_src (vmode, reg)));
738  rtx_insn *seq = get_insns ();
739  end_sequence ();
740  emit_conversion_insns (seq, insn);
741
742  if (dump_file)
743    fprintf (dump_file,
744	     "  Copied r%d to a vector register r%d for insn %d\n",
745	     REGNO (reg), REGNO (vreg), INSN_UID (insn));
746}
747
748/* Copy the definition SRC of INSN inside the chain to DST for
749   scalar uses outside of the chain.  */
750
751void
752general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
753{
754  start_sequence ();
755  if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
756    {
757      rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
758      emit_move_insn (tmp, src);
759      if (!TARGET_64BIT && smode == DImode)
760	{
761	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
762			  adjust_address (tmp, SImode, 0));
763	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
764			  adjust_address (tmp, SImode, 4));
765	}
766      else
767	emit_move_insn (dst, copy_rtx (tmp));
768    }
769  else if (!TARGET_64BIT && smode == DImode)
770    {
771      if (TARGET_SSE4_1)
772	{
773	  rtx tmp = gen_rtx_PARALLEL (VOIDmode,
774				      gen_rtvec (1, const0_rtx));
775	  emit_insn
776	      (gen_rtx_SET
777	       (gen_rtx_SUBREG (SImode, dst, 0),
778		gen_rtx_VEC_SELECT (SImode,
779				    gen_rtx_SUBREG (V4SImode, src, 0),
780				    tmp)));
781
782	  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
783	  emit_insn
784	      (gen_rtx_SET
785	       (gen_rtx_SUBREG (SImode, dst, 4),
786		gen_rtx_VEC_SELECT (SImode,
787				    gen_rtx_SUBREG (V4SImode, src, 0),
788				    tmp)));
789	}
790      else
791	{
792	  rtx vcopy = gen_reg_rtx (V2DImode);
793	  emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
794	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
795			  gen_rtx_SUBREG (SImode, vcopy, 0));
796	  emit_move_insn (vcopy,
797			  gen_rtx_LSHIFTRT (V2DImode,
798					    vcopy, GEN_INT (32)));
799	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
800			  gen_rtx_SUBREG (SImode, vcopy, 0));
801	}
802    }
803  else
804    emit_move_insn (dst, src);
805
806  rtx_insn *seq = get_insns ();
807  end_sequence ();
808  emit_conversion_insns (seq, insn);
809
810  if (dump_file)
811    fprintf (dump_file,
812	     "  Copied r%d to a scalar register r%d for insn %d\n",
813	     REGNO (src), REGNO (dst), INSN_UID (insn));
814}
815
816/* Convert operand OP in INSN.  We should handle
817   memory operands and uninitialized registers.
818   All other register uses are converted during
819   registers conversion.  */
820
821void
822general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
823{
824  *op = copy_rtx_if_shared (*op);
825
826  if (GET_CODE (*op) == NOT)
827    {
828      convert_op (&XEXP (*op, 0), insn);
829      PUT_MODE (*op, vmode);
830    }
831  else if (MEM_P (*op))
832    {
833      rtx tmp = gen_reg_rtx (GET_MODE (*op));
834
835      /* Handle movabs.  */
836      if (!memory_operand (*op, GET_MODE (*op)))
837	{
838	  rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
839
840	  emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
841	  *op = tmp2;
842	}
843
844      emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
845				     gen_gpr_to_xmm_move_src (vmode, *op)),
846			insn);
847      *op = gen_rtx_SUBREG (vmode, tmp, 0);
848
849      if (dump_file)
850	fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
851		 INSN_UID (insn), REGNO (tmp));
852    }
853  else if (REG_P (*op))
854    {
855      *op = gen_rtx_SUBREG (vmode, *op, 0);
856    }
857  else if (CONST_INT_P (*op))
858    {
859      rtx vec_cst;
860      rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
861
862      /* Prefer all ones vector in case of -1.  */
863      if (constm1_operand (*op, GET_MODE (*op)))
864	vec_cst = CONSTM1_RTX (vmode);
865      else
866	{
867	  unsigned n = GET_MODE_NUNITS (vmode);
868	  rtx *v = XALLOCAVEC (rtx, n);
869	  v[0] = *op;
870	  for (unsigned i = 1; i < n; ++i)
871	    v[i] = const0_rtx;
872	  vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
873	}
874
875      if (!standard_sse_constant_p (vec_cst, vmode))
876	{
877	  start_sequence ();
878	  vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
879	  rtx_insn *seq = get_insns ();
880	  end_sequence ();
881	  emit_insn_before (seq, insn);
882	}
883
884      emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
885      *op = tmp;
886    }
887  else
888    {
889      gcc_assert (SUBREG_P (*op));
890      gcc_assert (GET_MODE (*op) == vmode);
891    }
892}
893
894/* Convert INSN to vector mode.  */
895
896void
897general_scalar_chain::convert_insn (rtx_insn *insn)
898{
899  /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
900  for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
901    if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
902      {
903	df_link *use;
904	for (use = DF_REF_CHAIN (ref); use; use = use->next)
905	  if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
906	      && (DF_REF_REG_MEM_P (use->ref)
907		  || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
908	    break;
909	if (use)
910	  convert_reg (insn, DF_REF_REG (ref),
911		       *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
912	else if (MAY_HAVE_DEBUG_BIND_INSNS)
913	  {
914	    /* If we generated a scalar copy we can leave debug-insns
915	       as-is, if not, we have to adjust them.  */
916	    auto_vec<rtx_insn *, 5> to_reset_debug_insns;
917	    for (use = DF_REF_CHAIN (ref); use; use = use->next)
918	      if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
919		{
920		  rtx_insn *debug_insn = DF_REF_INSN (use->ref);
921		  /* If there's a reaching definition outside of the
922		     chain we have to reset.  */
923		  df_link *def;
924		  for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
925		    if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
926		      break;
927		  if (def)
928		    to_reset_debug_insns.safe_push (debug_insn);
929		  else
930		    {
931		      *DF_REF_REAL_LOC (use->ref)
932			= *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
933		      df_insn_rescan (debug_insn);
934		    }
935		}
936	    /* Have to do the reset outside of the DF_CHAIN walk to not
937	       disrupt it.  */
938	    while (!to_reset_debug_insns.is_empty ())
939	      {
940		rtx_insn *debug_insn = to_reset_debug_insns.pop ();
941		INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
942		df_insn_rescan_debug_internal (debug_insn);
943	      }
944	  }
945      }
946
947  /* Replace uses in this insn with the defs we use in the chain.  */
948  for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
949    if (!DF_REF_REG_MEM_P (ref))
950      if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
951	{
952	  /* Also update a corresponding REG_DEAD note.  */
953	  rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
954	  if (note)
955	    XEXP (note, 0) = *vreg;
956	  *DF_REF_REAL_LOC (ref) = *vreg;
957	}
958
959  rtx def_set = single_set (insn);
960  rtx src = SET_SRC (def_set);
961  rtx dst = SET_DEST (def_set);
962  rtx subreg;
963
964  if (MEM_P (dst) && !REG_P (src))
965    {
966      /* There are no scalar integer instructions and therefore
967	 temporary register usage is required.  */
968      rtx tmp = gen_reg_rtx (smode);
969      emit_conversion_insns (gen_move_insn (dst, tmp), insn);
970      dst = gen_rtx_SUBREG (vmode, tmp, 0);
971    }
972  else if (REG_P (dst))
973    {
974      /* Replace the definition with a SUBREG to the definition we
975         use inside the chain.  */
976      rtx *vdef = defs_map.get (dst);
977      if (vdef)
978	dst = *vdef;
979      dst = gen_rtx_SUBREG (vmode, dst, 0);
980      /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
981         is a non-REG_P.  So kill those off.  */
982      rtx note = find_reg_equal_equiv_note (insn);
983      if (note)
984	remove_note (insn, note);
985    }
986
987  switch (GET_CODE (src))
988    {
989    case ASHIFT:
990    case ASHIFTRT:
991    case LSHIFTRT:
992      convert_op (&XEXP (src, 0), insn);
993      PUT_MODE (src, vmode);
994      break;
995
996    case PLUS:
997    case MINUS:
998    case IOR:
999    case XOR:
1000    case AND:
1001    case SMAX:
1002    case SMIN:
1003    case UMAX:
1004    case UMIN:
1005      convert_op (&XEXP (src, 0), insn);
1006      convert_op (&XEXP (src, 1), insn);
1007      PUT_MODE (src, vmode);
1008      break;
1009
1010    case NEG:
1011      src = XEXP (src, 0);
1012      convert_op (&src, insn);
1013      subreg = gen_reg_rtx (vmode);
1014      emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1015      src = gen_rtx_MINUS (vmode, subreg, src);
1016      break;
1017
1018    case NOT:
1019      src = XEXP (src, 0);
1020      convert_op (&src, insn);
1021      subreg = gen_reg_rtx (vmode);
1022      emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1023      src = gen_rtx_XOR (vmode, src, subreg);
1024      break;
1025
1026    case MEM:
1027      if (!REG_P (dst))
1028	convert_op (&src, insn);
1029      break;
1030
1031    case REG:
1032      if (!MEM_P (dst))
1033	convert_op (&src, insn);
1034      break;
1035
1036    case SUBREG:
1037      gcc_assert (GET_MODE (src) == vmode);
1038      break;
1039
1040    case COMPARE:
1041      src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1042
1043      gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1044      subreg = gen_rtx_SUBREG (V2DImode, src, 0);
1045      emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
1046						    copy_rtx_if_shared (subreg),
1047						    copy_rtx_if_shared (subreg)),
1048			insn);
1049      dst = gen_rtx_REG (CCmode, FLAGS_REG);
1050      src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1051					       copy_rtx_if_shared (subreg)),
1052			    UNSPEC_PTEST);
1053      break;
1054
1055    case CONST_INT:
1056      convert_op (&src, insn);
1057      break;
1058
1059    default:
1060      gcc_unreachable ();
1061    }
1062
1063  SET_SRC (def_set) = src;
1064  SET_DEST (def_set) = dst;
1065
1066  /* Drop possible dead definitions.  */
1067  PATTERN (insn) = def_set;
1068
1069  INSN_CODE (insn) = -1;
1070  int patt = recog_memoized (insn);
1071  if  (patt == -1)
1072    fatal_insn_not_found (insn);
1073  df_insn_rescan (insn);
1074}
1075
1076/* Fix uses of converted REG in debug insns.  */
1077
1078void
1079timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1080{
1081  if (!flag_var_tracking)
1082    return;
1083
1084  df_ref ref, next;
1085  for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1086    {
1087      rtx_insn *insn = DF_REF_INSN (ref);
1088      /* Make sure the next ref is for a different instruction,
1089         so that we're not affected by the rescan.  */
1090      next = DF_REF_NEXT_REG (ref);
1091      while (next && DF_REF_INSN (next) == insn)
1092	next = DF_REF_NEXT_REG (next);
1093
1094      if (DEBUG_INSN_P (insn))
1095	{
1096	  /* It may be a debug insn with a TImode variable in
1097	     register.  */
1098	  bool changed = false;
1099	  for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1100	    {
1101	      rtx *loc = DF_REF_LOC (ref);
1102	      if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1103		{
1104		  *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1105		  changed = true;
1106		}
1107	    }
1108	  if (changed)
1109	    df_insn_rescan (insn);
1110	}
1111    }
1112}
1113
1114/* Convert INSN from TImode to V1T1mode.  */
1115
1116void
1117timode_scalar_chain::convert_insn (rtx_insn *insn)
1118{
1119  rtx def_set = single_set (insn);
1120  rtx src = SET_SRC (def_set);
1121  rtx dst = SET_DEST (def_set);
1122
1123  switch (GET_CODE (dst))
1124    {
1125    case REG:
1126      {
1127	rtx tmp = find_reg_equal_equiv_note (insn);
1128	if (tmp)
1129	  PUT_MODE (XEXP (tmp, 0), V1TImode);
1130	PUT_MODE (dst, V1TImode);
1131	fix_debug_reg_uses (dst);
1132      }
1133      break;
1134    case MEM:
1135      PUT_MODE (dst, V1TImode);
1136      break;
1137
1138    default:
1139      gcc_unreachable ();
1140    }
1141
1142  switch (GET_CODE (src))
1143    {
1144    case REG:
1145      PUT_MODE (src, V1TImode);
1146      /* Call fix_debug_reg_uses only if SRC is never defined.  */
1147      if (!DF_REG_DEF_CHAIN (REGNO (src)))
1148	fix_debug_reg_uses (src);
1149      break;
1150
1151    case MEM:
1152      PUT_MODE (src, V1TImode);
1153      break;
1154
1155    case CONST_WIDE_INT:
1156      if (NONDEBUG_INSN_P (insn))
1157	{
1158	  /* Since there are no instructions to store 128-bit constant,
1159	     temporary register usage is required.  */
1160	  rtx tmp = gen_reg_rtx (V1TImode);
1161	  start_sequence ();
1162	  src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1163	  src = validize_mem (force_const_mem (V1TImode, src));
1164	  rtx_insn *seq = get_insns ();
1165	  end_sequence ();
1166	  if (seq)
1167	    emit_insn_before (seq, insn);
1168	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1169	  dst = tmp;
1170	}
1171      break;
1172
1173    case CONST_INT:
1174      switch (standard_sse_constant_p (src, TImode))
1175	{
1176	case 1:
1177	  src = CONST0_RTX (GET_MODE (dst));
1178	  break;
1179	case 2:
1180	  src = CONSTM1_RTX (GET_MODE (dst));
1181	  break;
1182	default:
1183	  gcc_unreachable ();
1184	}
1185      if (NONDEBUG_INSN_P (insn))
1186	{
1187	  rtx tmp = gen_reg_rtx (V1TImode);
1188	  /* Since there are no instructions to store standard SSE
1189	     constant, temporary register usage is required.  */
1190	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1191	  dst = tmp;
1192	}
1193      break;
1194
1195    default:
1196      gcc_unreachable ();
1197    }
1198
1199  SET_SRC (def_set) = src;
1200  SET_DEST (def_set) = dst;
1201
1202  /* Drop possible dead definitions.  */
1203  PATTERN (insn) = def_set;
1204
1205  INSN_CODE (insn) = -1;
1206  recog_memoized (insn);
1207  df_insn_rescan (insn);
1208}
1209
1210/* Generate copies from defs used by the chain but not defined therein.
1211   Also populates defs_map which is used later by convert_insn.  */
1212
1213void
1214general_scalar_chain::convert_registers ()
1215{
1216  bitmap_iterator bi;
1217  unsigned id;
1218  EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1219    {
1220      rtx chain_reg = gen_reg_rtx (smode);
1221      defs_map.put (regno_reg_rtx[id], chain_reg);
1222    }
1223  EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1224    for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1225      if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1226	make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
1227}
1228
1229/* Convert whole chain creating required register
1230   conversions and copies.  */
1231
1232int
1233scalar_chain::convert ()
1234{
1235  bitmap_iterator bi;
1236  unsigned id;
1237  int converted_insns = 0;
1238
1239  if (!dbg_cnt (stv_conversion))
1240    return 0;
1241
1242  if (dump_file)
1243    fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1244
1245  convert_registers ();
1246
1247  EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1248    {
1249      convert_insn (DF_INSN_UID_GET (id)->insn);
1250      converted_insns++;
1251    }
1252
1253  return converted_insns;
1254}
1255
1256/* Return 1 if INSN uses or defines a hard register.
1257   Hard register uses in a memory address are ignored.
1258   Clobbers and flags definitions are ignored.  */
1259
1260static bool
1261has_non_address_hard_reg (rtx_insn *insn)
1262{
1263  df_ref ref;
1264  FOR_EACH_INSN_DEF (ref, insn)
1265    if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1266	&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1267	&& DF_REF_REGNO (ref) != FLAGS_REG)
1268      return true;
1269
1270  FOR_EACH_INSN_USE (ref, insn)
1271    if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
1272      return true;
1273
1274  return false;
1275}
1276
1277/* Check if comparison INSN may be transformed
1278   into vector comparison.  Currently we transform
1279   zero checks only which look like:
1280
1281   (set (reg:CCZ 17 flags)
1282        (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1283                             (subreg:SI (reg:DI x) 0))
1284		     (const_int 0 [0])))  */
1285
1286static bool
1287convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
1288{
1289  /* ??? Currently convertible for double-word DImode chain only.  */
1290  if (TARGET_64BIT || mode != DImode)
1291    return false;
1292
1293  if (!TARGET_SSE4_1)
1294    return false;
1295
1296  rtx def_set = single_set (insn);
1297
1298  gcc_assert (def_set);
1299
1300  rtx src = SET_SRC (def_set);
1301  rtx dst = SET_DEST (def_set);
1302
1303  gcc_assert (GET_CODE (src) == COMPARE);
1304
1305  if (GET_CODE (dst) != REG
1306      || REGNO (dst) != FLAGS_REG
1307      || GET_MODE (dst) != CCZmode)
1308    return false;
1309
1310  rtx op1 = XEXP (src, 0);
1311  rtx op2 = XEXP (src, 1);
1312
1313  if (op2 != CONST0_RTX (GET_MODE (op2)))
1314    return false;
1315
1316  if (GET_CODE (op1) != IOR)
1317    return false;
1318
1319  op2 = XEXP (op1, 1);
1320  op1 = XEXP (op1, 0);
1321
1322  if (!SUBREG_P (op1)
1323      || !SUBREG_P (op2)
1324      || GET_MODE (op1) != SImode
1325      || GET_MODE (op2) != SImode
1326      || ((SUBREG_BYTE (op1) != 0
1327	   || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
1328	  && (SUBREG_BYTE (op2) != 0
1329	      || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
1330    return false;
1331
1332  op1 = SUBREG_REG (op1);
1333  op2 = SUBREG_REG (op2);
1334
1335  if (op1 != op2
1336      || !REG_P (op1)
1337      || GET_MODE (op1) != DImode)
1338    return false;
1339
1340  return true;
1341}
1342
1343/* The general version of scalar_to_vector_candidate_p.  */
1344
1345static bool
1346general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
1347{
1348  rtx def_set = single_set (insn);
1349
1350  if (!def_set)
1351    return false;
1352
1353  if (has_non_address_hard_reg (insn))
1354    return false;
1355
1356  rtx src = SET_SRC (def_set);
1357  rtx dst = SET_DEST (def_set);
1358
1359  if (GET_CODE (src) == COMPARE)
1360    return convertible_comparison_p (insn, mode);
1361
1362  /* We are interested in "mode" only.  */
1363  if ((GET_MODE (src) != mode
1364       && !CONST_INT_P (src))
1365      || GET_MODE (dst) != mode)
1366    return false;
1367
1368  if (!REG_P (dst) && !MEM_P (dst))
1369    return false;
1370
1371  switch (GET_CODE (src))
1372    {
1373    case ASHIFTRT:
1374      if (!TARGET_AVX512VL)
1375	return false;
1376      /* FALLTHRU */
1377
1378    case ASHIFT:
1379    case LSHIFTRT:
1380      if (!CONST_INT_P (XEXP (src, 1))
1381	  || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
1382	return false;
1383      break;
1384
1385    case SMAX:
1386    case SMIN:
1387    case UMAX:
1388    case UMIN:
1389      if ((mode == DImode && !TARGET_AVX512VL)
1390	  || (mode == SImode && !TARGET_SSE4_1))
1391	return false;
1392      /* Fallthru.  */
1393
1394    case PLUS:
1395    case MINUS:
1396    case IOR:
1397    case XOR:
1398    case AND:
1399      if (!REG_P (XEXP (src, 1))
1400	  && !MEM_P (XEXP (src, 1))
1401	  && !CONST_INT_P (XEXP (src, 1)))
1402	return false;
1403
1404      if (GET_MODE (XEXP (src, 1)) != mode
1405	  && !CONST_INT_P (XEXP (src, 1)))
1406	return false;
1407      break;
1408
1409    case NEG:
1410    case NOT:
1411      break;
1412
1413    case REG:
1414      return true;
1415
1416    case MEM:
1417    case CONST_INT:
1418      return REG_P (dst);
1419
1420    default:
1421      return false;
1422    }
1423
1424  if (!REG_P (XEXP (src, 0))
1425      && !MEM_P (XEXP (src, 0))
1426      && !CONST_INT_P (XEXP (src, 0))
1427      /* Check for andnot case.  */
1428      && (GET_CODE (src) != AND
1429	  || GET_CODE (XEXP (src, 0)) != NOT
1430	  || !REG_P (XEXP (XEXP (src, 0), 0))))
1431      return false;
1432
1433  if (GET_MODE (XEXP (src, 0)) != mode
1434      && !CONST_INT_P (XEXP (src, 0)))
1435    return false;
1436
1437  return true;
1438}
1439
1440/* The TImode version of scalar_to_vector_candidate_p.  */
1441
1442static bool
1443timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1444{
1445  rtx def_set = single_set (insn);
1446
1447  if (!def_set)
1448    return false;
1449
1450  if (has_non_address_hard_reg (insn))
1451    return false;
1452
1453  rtx src = SET_SRC (def_set);
1454  rtx dst = SET_DEST (def_set);
1455
1456  /* Only TImode load and store are allowed.  */
1457  if (GET_MODE (dst) != TImode)
1458    return false;
1459
1460  if (MEM_P (dst))
1461    {
1462      /* Check for store.  Memory must be aligned or unaligned store
1463	 is optimal.  Only support store from register, standard SSE
1464	 constant or CONST_WIDE_INT generated from piecewise store.
1465
1466	 ??? Verify performance impact before enabling CONST_INT for
1467	 __int128 store.  */
1468      if (misaligned_operand (dst, TImode)
1469	  && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1470	return false;
1471
1472      switch (GET_CODE (src))
1473	{
1474	default:
1475	  return false;
1476
1477	case REG:
1478	case CONST_WIDE_INT:
1479	  return true;
1480
1481	case CONST_INT:
1482	  return standard_sse_constant_p (src, TImode);
1483	}
1484    }
1485  else if (MEM_P (src))
1486    {
1487      /* Check for load.  Memory must be aligned or unaligned load is
1488	 optimal.  */
1489      return (REG_P (dst)
1490	      && (!misaligned_operand (src, TImode)
1491		  || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1492    }
1493
1494  return false;
1495}
1496
1497/* For a register REGNO, scan instructions for its defs and uses.
1498   Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
1499
1500static void
1501timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1502				   unsigned int regno)
1503{
1504  for (df_ref def = DF_REG_DEF_CHAIN (regno);
1505       def;
1506       def = DF_REF_NEXT_REG (def))
1507    {
1508      if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1509	{
1510	  if (dump_file)
1511	    fprintf (dump_file,
1512		     "r%d has non convertible def in insn %d\n",
1513		     regno, DF_REF_INSN_UID (def));
1514
1515	  bitmap_set_bit (regs, regno);
1516	  break;
1517	}
1518    }
1519
1520  for (df_ref ref = DF_REG_USE_CHAIN (regno);
1521       ref;
1522       ref = DF_REF_NEXT_REG (ref))
1523    {
1524      /* Debug instructions are skipped.  */
1525      if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1526	  && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1527	{
1528	  if (dump_file)
1529	    fprintf (dump_file,
1530		     "r%d has non convertible use in insn %d\n",
1531		     regno, DF_REF_INSN_UID (ref));
1532
1533	  bitmap_set_bit (regs, regno);
1534	  break;
1535	}
1536    }
1537}
1538
1539/* The TImode version of remove_non_convertible_regs.  */
1540
1541static void
1542timode_remove_non_convertible_regs (bitmap candidates)
1543{
1544  bitmap_iterator bi;
1545  unsigned id;
1546  bitmap regs = BITMAP_ALLOC (NULL);
1547
1548  EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1549    {
1550      rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1551      rtx dest = SET_DEST (def_set);
1552      rtx src = SET_SRC (def_set);
1553
1554      if ((!REG_P (dest)
1555	   || bitmap_bit_p (regs, REGNO (dest))
1556	   || HARD_REGISTER_P (dest))
1557	  && (!REG_P (src)
1558	      || bitmap_bit_p (regs, REGNO (src))
1559	      || HARD_REGISTER_P (src)))
1560	continue;
1561
1562      if (REG_P (dest))
1563	timode_check_non_convertible_regs (candidates, regs,
1564					   REGNO (dest));
1565
1566      if (REG_P (src))
1567	timode_check_non_convertible_regs (candidates, regs,
1568					   REGNO (src));
1569    }
1570
1571  EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1572    {
1573      for (df_ref def = DF_REG_DEF_CHAIN (id);
1574	   def;
1575	   def = DF_REF_NEXT_REG (def))
1576	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1577	  {
1578	    if (dump_file)
1579	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1580		       DF_REF_INSN_UID (def));
1581
1582	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1583	  }
1584
1585      for (df_ref ref = DF_REG_USE_CHAIN (id);
1586	   ref;
1587	   ref = DF_REF_NEXT_REG (ref))
1588	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1589	  {
1590	    if (dump_file)
1591	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1592		       DF_REF_INSN_UID (ref));
1593
1594	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1595	  }
1596    }
1597
1598  BITMAP_FREE (regs);
1599}
1600
1601/* Main STV pass function.  Find and convert scalar
1602   instructions into vector mode when profitable.  */
1603
1604static unsigned int
1605convert_scalars_to_vector (bool timode_p)
1606{
1607  basic_block bb;
1608  int converted_insns = 0;
1609
1610  bitmap_obstack_initialize (NULL);
1611  const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1612  const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1613  bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
1614  for (unsigned i = 0; i < 3; ++i)
1615    bitmap_initialize (&candidates[i], &bitmap_default_obstack);
1616
1617  calculate_dominance_info (CDI_DOMINATORS);
1618  df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
1619  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
1620  df_analyze ();
1621
1622  /* Find all instructions we want to convert into vector mode.  */
1623  if (dump_file)
1624    fprintf (dump_file, "Searching for mode conversion candidates...\n");
1625
1626  FOR_EACH_BB_FN (bb, cfun)
1627    {
1628      rtx_insn *insn;
1629      FOR_BB_INSNS (bb, insn)
1630	if (timode_p
1631	    && timode_scalar_to_vector_candidate_p (insn))
1632	  {
1633	    if (dump_file)
1634	      fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
1635		       INSN_UID (insn));
1636
1637	    bitmap_set_bit (&candidates[2], INSN_UID (insn));
1638	  }
1639	else if (!timode_p)
1640	  {
1641	    /* Check {SI,DI}mode.  */
1642	    for (unsigned i = 0; i <= 1; ++i)
1643	      if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1644		{
1645		  if (dump_file)
1646		    fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
1647			     INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1648
1649		  bitmap_set_bit (&candidates[i], INSN_UID (insn));
1650		  break;
1651		}
1652	  }
1653    }
1654
1655  if (timode_p)
1656    timode_remove_non_convertible_regs (&candidates[2]);
1657
1658  for (unsigned i = 0; i <= 2; ++i)
1659    if (!bitmap_empty_p (&candidates[i]))
1660      break;
1661    else if (i == 2 && dump_file)
1662      fprintf (dump_file, "There are no candidates for optimization.\n");
1663
1664  for (unsigned i = 0; i <= 2; ++i)
1665    while (!bitmap_empty_p (&candidates[i]))
1666      {
1667	unsigned uid = bitmap_first_set_bit (&candidates[i]);
1668	scalar_chain *chain;
1669
1670	if (cand_mode[i] == TImode)
1671	  chain = new timode_scalar_chain;
1672	else
1673	  chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
1674
1675	/* Find instructions chain we want to convert to vector mode.
1676	   Check all uses and definitions to estimate all required
1677	   conversions.  */
1678	chain->build (&candidates[i], uid);
1679
1680	if (chain->compute_convert_gain () > 0)
1681	  converted_insns += chain->convert ();
1682	else
1683	  if (dump_file)
1684	    fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1685		     chain->chain_id);
1686
1687	delete chain;
1688      }
1689
1690  if (dump_file)
1691    fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1692
1693  for (unsigned i = 0; i <= 2; ++i)
1694    bitmap_release (&candidates[i]);
1695  bitmap_obstack_release (NULL);
1696  df_process_deferred_rescans ();
1697
1698  /* Conversion means we may have 128bit register spills/fills
1699     which require aligned stack.  */
1700  if (converted_insns)
1701    {
1702      if (crtl->stack_alignment_needed < 128)
1703	crtl->stack_alignment_needed = 128;
1704      if (crtl->stack_alignment_estimated < 128)
1705	crtl->stack_alignment_estimated = 128;
1706
1707      crtl->stack_realign_needed
1708	= INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1709      crtl->stack_realign_tried = crtl->stack_realign_needed;
1710
1711      crtl->stack_realign_processed = true;
1712
1713      if (!crtl->drap_reg)
1714	{
1715	  rtx drap_rtx = targetm.calls.get_drap_rtx ();
1716
1717	  /* stack_realign_drap and drap_rtx must match.  */
1718	  gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1719
1720	  /* Do nothing if NULL is returned,
1721	     which means DRAP is not needed.  */
1722	  if (drap_rtx != NULL)
1723	    {
1724	      crtl->args.internal_arg_pointer = drap_rtx;
1725
1726	      /* Call fixup_tail_calls to clean up
1727		 REG_EQUIV note if DRAP is needed. */
1728	      fixup_tail_calls ();
1729	    }
1730	}
1731
1732      /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
1733      if (TARGET_64BIT)
1734	for (tree parm = DECL_ARGUMENTS (current_function_decl);
1735	     parm; parm = DECL_CHAIN (parm))
1736	  {
1737	    if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1738	      continue;
1739	    if (DECL_RTL_SET_P (parm)
1740		&& GET_MODE (DECL_RTL (parm)) == V1TImode)
1741	      {
1742		rtx r = DECL_RTL (parm);
1743		if (REG_P (r))
1744		  SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1745	      }
1746	    if (DECL_INCOMING_RTL (parm)
1747		&& GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1748	      {
1749		rtx r = DECL_INCOMING_RTL (parm);
1750		if (REG_P (r))
1751		  DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1752	      }
1753	  }
1754    }
1755
1756  return 0;
1757}
1758
1759/* Modify the vzeroupper pattern in INSN so that it describes the effect
1760   that the instruction has on the SSE registers.  LIVE_REGS are the set
1761   of registers that are live across the instruction.
1762
1763   For a live register R we use:
1764
1765     (set (reg:V2DF R) (reg:V2DF R))
1766
1767   which preserves the low 128 bits but clobbers the upper bits.  */
1768
1769static void
1770ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
1771{
1772  rtx pattern = PATTERN (insn);
1773  unsigned int nregs = TARGET_64BIT ? 16 : 8;
1774  unsigned int npats = nregs;
1775  for (unsigned int i = 0; i < nregs; ++i)
1776    {
1777      unsigned int regno = GET_SSE_REGNO (i);
1778      if (!bitmap_bit_p (live_regs, regno))
1779	npats--;
1780    }
1781  if (npats == 0)
1782    return;
1783  rtvec vec = rtvec_alloc (npats + 1);
1784  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
1785  for (unsigned int i = 0, j = 0; i < nregs; ++i)
1786    {
1787      unsigned int regno = GET_SSE_REGNO (i);
1788      if (!bitmap_bit_p (live_regs, regno))
1789	continue;
1790      rtx reg = gen_rtx_REG (V2DImode, regno);
1791      ++j;
1792      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
1793    }
1794  XVEC (pattern, 0) = vec;
1795  INSN_CODE (insn) = -1;
1796  df_insn_rescan (insn);
1797}
1798
1799/* Walk the vzeroupper instructions in the function and annotate them
1800   with the effect that they have on the SSE registers.  */
1801
1802static void
1803ix86_add_reg_usage_to_vzerouppers (void)
1804{
1805  basic_block bb;
1806  rtx_insn *insn;
1807  auto_bitmap live_regs;
1808
1809  df_analyze ();
1810  FOR_EACH_BB_FN (bb, cfun)
1811    {
1812      bitmap_copy (live_regs, df_get_live_out (bb));
1813      df_simulate_initialize_backwards (bb, live_regs);
1814      FOR_BB_INSNS_REVERSE (bb, insn)
1815	{
1816	  if (!NONDEBUG_INSN_P (insn))
1817	    continue;
1818	  if (vzeroupper_pattern (PATTERN (insn), VOIDmode))
1819	    ix86_add_reg_usage_to_vzeroupper (insn, live_regs);
1820	  df_simulate_one_insn_backwards (bb, insn, live_regs);
1821	}
1822    }
1823}
1824
1825static unsigned int
1826rest_of_handle_insert_vzeroupper (void)
1827{
1828  if (TARGET_VZEROUPPER)
1829    {
1830      /* vzeroupper instructions are inserted immediately after reload to
1831	 account for possible spills from 256bit or 512bit registers.  The pass
1832	 reuses mode switching infrastructure by re-running mode insertion
1833	 pass, so disable entities that have already been processed.  */
1834      for (int i = 0; i < MAX_386_ENTITIES; i++)
1835	ix86_optimize_mode_switching[i] = 0;
1836
1837      ix86_optimize_mode_switching[AVX_U128] = 1;
1838
1839      /* Call optimize_mode_switching.  */
1840      g->get_passes ()->execute_pass_mode_switching ();
1841    }
1842  ix86_add_reg_usage_to_vzerouppers ();
1843  return 0;
1844}
1845
1846namespace {
1847
1848const pass_data pass_data_insert_vzeroupper =
1849{
1850  RTL_PASS, /* type */
1851  "vzeroupper", /* name */
1852  OPTGROUP_NONE, /* optinfo_flags */
1853  TV_MACH_DEP, /* tv_id */
1854  0, /* properties_required */
1855  0, /* properties_provided */
1856  0, /* properties_destroyed */
1857  0, /* todo_flags_start */
1858  TODO_df_finish, /* todo_flags_finish */
1859};
1860
1861class pass_insert_vzeroupper : public rtl_opt_pass
1862{
1863public:
1864  pass_insert_vzeroupper(gcc::context *ctxt)
1865    : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1866  {}
1867
1868  /* opt_pass methods: */
1869  virtual bool gate (function *)
1870    {
1871      return TARGET_AVX
1872	     && (TARGET_VZEROUPPER
1873		 || cfun->machine->has_explicit_vzeroupper);
1874    }
1875
1876  virtual unsigned int execute (function *)
1877    {
1878      return rest_of_handle_insert_vzeroupper ();
1879    }
1880
1881}; // class pass_insert_vzeroupper
1882
1883const pass_data pass_data_stv =
1884{
1885  RTL_PASS, /* type */
1886  "stv", /* name */
1887  OPTGROUP_NONE, /* optinfo_flags */
1888  TV_MACH_DEP, /* tv_id */
1889  0, /* properties_required */
1890  0, /* properties_provided */
1891  0, /* properties_destroyed */
1892  0, /* todo_flags_start */
1893  TODO_df_finish, /* todo_flags_finish */
1894};
1895
1896class pass_stv : public rtl_opt_pass
1897{
1898public:
1899  pass_stv (gcc::context *ctxt)
1900    : rtl_opt_pass (pass_data_stv, ctxt),
1901      timode_p (false)
1902  {}
1903
1904  /* opt_pass methods: */
1905  virtual bool gate (function *)
1906    {
1907      return ((!timode_p || TARGET_64BIT)
1908	      && TARGET_STV && TARGET_SSE2 && optimize > 1);
1909    }
1910
1911  virtual unsigned int execute (function *)
1912    {
1913      return convert_scalars_to_vector (timode_p);
1914    }
1915
1916  opt_pass *clone ()
1917    {
1918      return new pass_stv (m_ctxt);
1919    }
1920
1921  void set_pass_param (unsigned int n, bool param)
1922    {
1923      gcc_assert (n == 0);
1924      timode_p = param;
1925    }
1926
1927private:
1928  bool timode_p;
1929}; // class pass_stv
1930
1931} // anon namespace
1932
1933rtl_opt_pass *
1934make_pass_insert_vzeroupper (gcc::context *ctxt)
1935{
1936  return new pass_insert_vzeroupper (ctxt);
1937}
1938
1939rtl_opt_pass *
1940make_pass_stv (gcc::context *ctxt)
1941{
1942  return new pass_stv (ctxt);
1943}
1944
1945/* Inserting ENDBRANCH instructions.  */
1946
1947static unsigned int
1948rest_of_insert_endbranch (void)
1949{
1950  timevar_push (TV_MACH_DEP);
1951
1952  rtx cet_eb;
1953  rtx_insn *insn;
1954  basic_block bb;
1955
1956  /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
1957     absent among function attributes.  Later an optimization will be
1958     introduced to make analysis if an address of a static function is
1959     taken.  A static function whose address is not taken will get a
1960     nocf_check attribute.  This will allow to reduce the number of EB.  */
1961
1962  if (!lookup_attribute ("nocf_check",
1963			 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1964      && (!flag_manual_endbr
1965	  || lookup_attribute ("cf_check",
1966			       DECL_ATTRIBUTES (cfun->decl)))
1967      && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
1968	  || ix86_cmodel == CM_LARGE
1969	  || ix86_cmodel == CM_LARGE_PIC
1970	  || flag_force_indirect_call
1971	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1972	      && DECL_DLLIMPORT_P (cfun->decl))))
1973    {
1974      /* Queue ENDBR insertion to x86_function_profiler.  */
1975      if (crtl->profile && flag_fentry)
1976	cfun->machine->endbr_queued_at_entrance = true;
1977      else
1978	{
1979	  cet_eb = gen_nop_endbr ();
1980
1981	  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1982	  insn = BB_HEAD (bb);
1983	  emit_insn_before (cet_eb, insn);
1984	}
1985    }
1986
1987  bb = 0;
1988  FOR_EACH_BB_FN (bb, cfun)
1989    {
1990      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1991	   insn = NEXT_INSN (insn))
1992	{
1993	  if (CALL_P (insn))
1994	    {
1995	      bool need_endbr;
1996	      need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
1997	      if (!need_endbr && !SIBLING_CALL_P (insn))
1998		{
1999		  rtx call = get_call_rtx_from (insn);
2000		  rtx fnaddr = XEXP (call, 0);
2001		  tree fndecl = NULL_TREE;
2002
2003		  /* Also generate ENDBRANCH for non-tail call which
2004		     may return via indirect branch.  */
2005		  if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2006		    fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2007		  if (fndecl == NULL_TREE)
2008		    fndecl = MEM_EXPR (fnaddr);
2009		  if (fndecl
2010		      && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2011		      && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2012		    fndecl = NULL_TREE;
2013		  if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2014		    {
2015		      tree fntype = TREE_TYPE (fndecl);
2016		      if (lookup_attribute ("indirect_return",
2017					    TYPE_ATTRIBUTES (fntype)))
2018			need_endbr = true;
2019		    }
2020		}
2021	      if (!need_endbr)
2022		continue;
2023	      /* Generate ENDBRANCH after CALL, which can return more than
2024		 twice, setjmp-like functions.  */
2025
2026	      cet_eb = gen_nop_endbr ();
2027	      emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
2028	      continue;
2029	    }
2030
2031	  if (JUMP_P (insn) && flag_cet_switch)
2032	    {
2033	      rtx target = JUMP_LABEL (insn);
2034	      if (target == NULL_RTX || ANY_RETURN_P (target))
2035		continue;
2036
2037	      /* Check the jump is a switch table.  */
2038	      rtx_insn *label = as_a<rtx_insn *> (target);
2039	      rtx_insn *table = next_insn (label);
2040	      if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2041		continue;
2042
2043	      /* For the indirect jump find out all places it jumps and insert
2044		 ENDBRANCH there.  It should be done under a special flag to
2045		 control ENDBRANCH generation for switch stmts.  */
2046	      edge_iterator ei;
2047	      edge e;
2048	      basic_block dest_blk;
2049
2050	      FOR_EACH_EDGE (e, ei, bb->succs)
2051		{
2052		  rtx_insn *insn;
2053
2054		  dest_blk = e->dest;
2055		  insn = BB_HEAD (dest_blk);
2056		  gcc_assert (LABEL_P (insn));
2057		  cet_eb = gen_nop_endbr ();
2058		  emit_insn_after (cet_eb, insn);
2059		}
2060	      continue;
2061	    }
2062
2063	  if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2064	    {
2065	      cet_eb = gen_nop_endbr ();
2066	      emit_insn_after (cet_eb, insn);
2067	      continue;
2068	    }
2069	}
2070    }
2071
2072  timevar_pop (TV_MACH_DEP);
2073  return 0;
2074}
2075
2076namespace {
2077
2078const pass_data pass_data_insert_endbranch =
2079{
2080  RTL_PASS, /* type.  */
2081  "cet", /* name.  */
2082  OPTGROUP_NONE, /* optinfo_flags.  */
2083  TV_MACH_DEP, /* tv_id.  */
2084  0, /* properties_required.  */
2085  0, /* properties_provided.  */
2086  0, /* properties_destroyed.  */
2087  0, /* todo_flags_start.  */
2088  0, /* todo_flags_finish.  */
2089};
2090
2091class pass_insert_endbranch : public rtl_opt_pass
2092{
2093public:
2094  pass_insert_endbranch (gcc::context *ctxt)
2095    : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2096  {}
2097
2098  /* opt_pass methods: */
2099  virtual bool gate (function *)
2100    {
2101      return ((flag_cf_protection & CF_BRANCH));
2102    }
2103
2104  virtual unsigned int execute (function *)
2105    {
2106      return rest_of_insert_endbranch ();
2107    }
2108
2109}; // class pass_insert_endbranch
2110
2111} // anon namespace
2112
2113rtl_opt_pass *
2114make_pass_insert_endbranch (gcc::context *ctxt)
2115{
2116  return new pass_insert_endbranch (ctxt);
2117}
2118
2119/* At entry of the nearest common dominator for basic blocks with
2120   conversions, generate a single
2121	vxorps %xmmN, %xmmN, %xmmN
2122   for all
2123	vcvtss2sd  op, %xmmN, %xmmX
2124	vcvtsd2ss  op, %xmmN, %xmmX
2125	vcvtsi2ss  op, %xmmN, %xmmX
2126	vcvtsi2sd  op, %xmmN, %xmmX
2127
2128   NB: We want to generate only a single vxorps to cover the whole
2129   function.  The LCM algorithm isn't appropriate here since it may
2130   place a vxorps inside the loop.  */
2131
2132static unsigned int
2133remove_partial_avx_dependency (void)
2134{
2135  timevar_push (TV_MACH_DEP);
2136
2137  bitmap_obstack_initialize (NULL);
2138  bitmap convert_bbs = BITMAP_ALLOC (NULL);
2139
2140  basic_block bb;
2141  rtx_insn *insn, *set_insn;
2142  rtx set;
2143  rtx v4sf_const0 = NULL_RTX;
2144
2145  auto_vec<rtx_insn *> control_flow_insns;
2146
2147  /* We create invalid RTL initially so defer rescans.  */
2148  df_set_flags (DF_DEFER_INSN_RESCAN);
2149
2150  FOR_EACH_BB_FN (bb, cfun)
2151    {
2152      FOR_BB_INSNS (bb, insn)
2153	{
2154	  if (!NONDEBUG_INSN_P (insn))
2155	    continue;
2156
2157	  set = single_set (insn);
2158	  if (!set)
2159	    continue;
2160
2161	  if (get_attr_avx_partial_xmm_update (insn)
2162	      != AVX_PARTIAL_XMM_UPDATE_TRUE)
2163	    continue;
2164
2165	  if (!v4sf_const0)
2166	    v4sf_const0 = gen_reg_rtx (V4SFmode);
2167
2168	  /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2169	     SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2170	     vec_merge with subreg.  */
2171	  rtx src = SET_SRC (set);
2172	  rtx dest = SET_DEST (set);
2173	  machine_mode dest_mode = GET_MODE (dest);
2174
2175	  rtx zero;
2176	  machine_mode dest_vecmode;
2177	  if (dest_mode == E_SFmode)
2178	    {
2179	      dest_vecmode = V4SFmode;
2180	      zero = v4sf_const0;
2181	    }
2182	  else
2183	    {
2184	      dest_vecmode = V2DFmode;
2185	      zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2186	    }
2187
2188	  /* Change source to vector mode.  */
2189	  src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2190	  src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2191				   GEN_INT (HOST_WIDE_INT_1U));
2192	  /* Change destination to vector mode.  */
2193	  rtx vec = gen_reg_rtx (dest_vecmode);
2194	  /* Generate an XMM vector SET.  */
2195	  set = gen_rtx_SET (vec, src);
2196	  set_insn = emit_insn_before (set, insn);
2197	  df_insn_rescan (set_insn);
2198
2199	  if (cfun->can_throw_non_call_exceptions)
2200	    {
2201	      /* Handle REG_EH_REGION note.  */
2202	      rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2203	      if (note)
2204		{
2205		  control_flow_insns.safe_push (set_insn);
2206		  add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2207		}
2208	    }
2209
2210	  src = gen_rtx_SUBREG (dest_mode, vec, 0);
2211	  set = gen_rtx_SET (dest, src);
2212
2213	  /* Drop possible dead definitions.  */
2214	  PATTERN (insn) = set;
2215
2216	  INSN_CODE (insn) = -1;
2217	  recog_memoized (insn);
2218	  df_insn_rescan (insn);
2219	  bitmap_set_bit (convert_bbs, bb->index);
2220	}
2221    }
2222
2223  if (v4sf_const0)
2224    {
2225      /* (Re-)discover loops so that bb->loop_father can be used in the
2226	 analysis below.  */
2227      calculate_dominance_info (CDI_DOMINATORS);
2228      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2229
2230      /* Generate a vxorps at entry of the nearest dominator for basic
2231	 blocks with conversions, which is in the fake loop that
2232	 contains the whole function, so that there is only a single
2233	 vxorps in the whole function.   */
2234      bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2235					     convert_bbs);
2236      while (bb->loop_father->latch
2237	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
2238	bb = get_immediate_dominator (CDI_DOMINATORS,
2239				      bb->loop_father->header);
2240
2241      set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2242
2243      insn = BB_HEAD (bb);
2244      while (insn && !NONDEBUG_INSN_P (insn))
2245	{
2246	  if (insn == BB_END (bb))
2247	    {
2248	      insn = NULL;
2249	      break;
2250	    }
2251	  insn = NEXT_INSN (insn);
2252	}
2253      if (insn == BB_HEAD (bb))
2254        set_insn = emit_insn_before (set, insn);
2255      else
2256	set_insn = emit_insn_after (set,
2257				    insn ? PREV_INSN (insn) : BB_END (bb));
2258      df_insn_rescan (set_insn);
2259      loop_optimizer_finalize ();
2260
2261      if (!control_flow_insns.is_empty ())
2262	{
2263	  free_dominance_info (CDI_DOMINATORS);
2264
2265	  unsigned int i;
2266	  FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2267	    if (control_flow_insn_p (insn))
2268	      {
2269		/* Split the block after insn.  There will be a fallthru
2270		   edge, which is OK so we keep it.  We have to create
2271		   the exception edges ourselves.  */
2272		bb = BLOCK_FOR_INSN (insn);
2273		split_block (bb, insn);
2274		rtl_make_eh_edge (NULL, bb, BB_END (bb));
2275	      }
2276	}
2277    }
2278
2279  df_process_deferred_rescans ();
2280  df_clear_flags (DF_DEFER_INSN_RESCAN);
2281  bitmap_obstack_release (NULL);
2282  BITMAP_FREE (convert_bbs);
2283
2284  timevar_pop (TV_MACH_DEP);
2285  return 0;
2286}
2287
2288namespace {
2289
2290const pass_data pass_data_remove_partial_avx_dependency =
2291{
2292  RTL_PASS, /* type */
2293  "rpad", /* name */
2294  OPTGROUP_NONE, /* optinfo_flags */
2295  TV_MACH_DEP, /* tv_id */
2296  0, /* properties_required */
2297  0, /* properties_provided */
2298  0, /* properties_destroyed */
2299  0, /* todo_flags_start */
2300  0, /* todo_flags_finish */
2301};
2302
2303class pass_remove_partial_avx_dependency : public rtl_opt_pass
2304{
2305public:
2306  pass_remove_partial_avx_dependency (gcc::context *ctxt)
2307    : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2308  {}
2309
2310  /* opt_pass methods: */
2311  virtual bool gate (function *)
2312    {
2313      return (TARGET_AVX
2314	      && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2315	      && TARGET_SSE_MATH
2316	      && optimize
2317	      && optimize_function_for_speed_p (cfun));
2318    }
2319
2320  virtual unsigned int execute (function *)
2321    {
2322      return remove_partial_avx_dependency ();
2323    }
2324}; // class pass_rpad
2325
2326} // anon namespace
2327
2328rtl_opt_pass *
2329make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2330{
2331  return new pass_remove_partial_avx_dependency (ctxt);
2332}
2333
2334/* This compares the priority of target features in function DECL1
2335   and DECL2.  It returns positive value if DECL1 is higher priority,
2336   negative value if DECL2 is higher priority and 0 if they are the
2337   same.  */
2338
2339int
2340ix86_compare_version_priority (tree decl1, tree decl2)
2341{
2342  unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2343  unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2344
2345  return (int)priority1 - (int)priority2;
2346}
2347
2348/* V1 and V2 point to function versions with different priorities
2349   based on the target ISA.  This function compares their priorities.  */
2350
2351static int
2352feature_compare (const void *v1, const void *v2)
2353{
2354  typedef struct _function_version_info
2355    {
2356      tree version_decl;
2357      tree predicate_chain;
2358      unsigned int dispatch_priority;
2359    } function_version_info;
2360
2361  const function_version_info c1 = *(const function_version_info *)v1;
2362  const function_version_info c2 = *(const function_version_info *)v2;
2363  return (c2.dispatch_priority - c1.dispatch_priority);
2364}
2365
2366/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2367   to return a pointer to VERSION_DECL if the outcome of the expression
2368   formed by PREDICATE_CHAIN is true.  This function will be called during
2369   version dispatch to decide which function version to execute.  It returns
2370   the basic block at the end, to which more conditions can be added.  */
2371
2372static basic_block
2373add_condition_to_bb (tree function_decl, tree version_decl,
2374		     tree predicate_chain, basic_block new_bb)
2375{
2376  gimple *return_stmt;
2377  tree convert_expr, result_var;
2378  gimple *convert_stmt;
2379  gimple *call_cond_stmt;
2380  gimple *if_else_stmt;
2381
2382  basic_block bb1, bb2, bb3;
2383  edge e12, e23;
2384
2385  tree cond_var, and_expr_var = NULL_TREE;
2386  gimple_seq gseq;
2387
2388  tree predicate_decl, predicate_arg;
2389
2390  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2391
2392  gcc_assert (new_bb != NULL);
2393  gseq = bb_seq (new_bb);
2394
2395
2396  convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2397	     		 build_fold_addr_expr (version_decl));
2398  result_var = create_tmp_var (ptr_type_node);
2399  convert_stmt = gimple_build_assign (result_var, convert_expr);
2400  return_stmt = gimple_build_return (result_var);
2401
2402  if (predicate_chain == NULL_TREE)
2403    {
2404      gimple_seq_add_stmt (&gseq, convert_stmt);
2405      gimple_seq_add_stmt (&gseq, return_stmt);
2406      set_bb_seq (new_bb, gseq);
2407      gimple_set_bb (convert_stmt, new_bb);
2408      gimple_set_bb (return_stmt, new_bb);
2409      pop_cfun ();
2410      return new_bb;
2411    }
2412
2413  while (predicate_chain != NULL)
2414    {
2415      cond_var = create_tmp_var (integer_type_node);
2416      predicate_decl = TREE_PURPOSE (predicate_chain);
2417      predicate_arg = TREE_VALUE (predicate_chain);
2418      call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2419      gimple_call_set_lhs (call_cond_stmt, cond_var);
2420
2421      gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2422      gimple_set_bb (call_cond_stmt, new_bb);
2423      gimple_seq_add_stmt (&gseq, call_cond_stmt);
2424
2425      predicate_chain = TREE_CHAIN (predicate_chain);
2426
2427      if (and_expr_var == NULL)
2428        and_expr_var = cond_var;
2429      else
2430	{
2431	  gimple *assign_stmt;
2432	  /* Use MIN_EXPR to check if any integer is zero?.
2433	     and_expr_var = min_expr <cond_var, and_expr_var>  */
2434	  assign_stmt = gimple_build_assign (and_expr_var,
2435			  build2 (MIN_EXPR, integer_type_node,
2436				  cond_var, and_expr_var));
2437
2438	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2439	  gimple_set_bb (assign_stmt, new_bb);
2440	  gimple_seq_add_stmt (&gseq, assign_stmt);
2441	}
2442    }
2443
2444  if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2445	  		            integer_zero_node,
2446				    NULL_TREE, NULL_TREE);
2447  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2448  gimple_set_bb (if_else_stmt, new_bb);
2449  gimple_seq_add_stmt (&gseq, if_else_stmt);
2450
2451  gimple_seq_add_stmt (&gseq, convert_stmt);
2452  gimple_seq_add_stmt (&gseq, return_stmt);
2453  set_bb_seq (new_bb, gseq);
2454
2455  bb1 = new_bb;
2456  e12 = split_block (bb1, if_else_stmt);
2457  bb2 = e12->dest;
2458  e12->flags &= ~EDGE_FALLTHRU;
2459  e12->flags |= EDGE_TRUE_VALUE;
2460
2461  e23 = split_block (bb2, return_stmt);
2462
2463  gimple_set_bb (convert_stmt, bb2);
2464  gimple_set_bb (return_stmt, bb2);
2465
2466  bb3 = e23->dest;
2467  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2468
2469  remove_edge (e23);
2470  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2471
2472  pop_cfun ();
2473
2474  return bb3;
2475}
2476
2477/* This function generates the dispatch function for
2478   multi-versioned functions.  DISPATCH_DECL is the function which will
2479   contain the dispatch logic.  FNDECLS are the function choices for
2480   dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
2481   in DISPATCH_DECL in which the dispatch code is generated.  */
2482
2483static int
2484dispatch_function_versions (tree dispatch_decl,
2485			    void *fndecls_p,
2486			    basic_block *empty_bb)
2487{
2488  tree default_decl;
2489  gimple *ifunc_cpu_init_stmt;
2490  gimple_seq gseq;
2491  int ix;
2492  tree ele;
2493  vec<tree> *fndecls;
2494  unsigned int num_versions = 0;
2495  unsigned int actual_versions = 0;
2496  unsigned int i;
2497
2498  struct _function_version_info
2499    {
2500      tree version_decl;
2501      tree predicate_chain;
2502      unsigned int dispatch_priority;
2503    }*function_version_info;
2504
2505  gcc_assert (dispatch_decl != NULL
2506	      && fndecls_p != NULL
2507	      && empty_bb != NULL);
2508
2509  /*fndecls_p is actually a vector.  */
2510  fndecls = static_cast<vec<tree> *> (fndecls_p);
2511
2512  /* At least one more version other than the default.  */
2513  num_versions = fndecls->length ();
2514  gcc_assert (num_versions >= 2);
2515
2516  function_version_info = (struct _function_version_info *)
2517    XNEWVEC (struct _function_version_info, (num_versions - 1));
2518
2519  /* The first version in the vector is the default decl.  */
2520  default_decl = (*fndecls)[0];
2521
2522  push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2523
2524  gseq = bb_seq (*empty_bb);
2525  /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
2526     constructors, so explicity call __builtin_cpu_init here.  */
2527  ifunc_cpu_init_stmt
2528    = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2529  gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2530  gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2531  set_bb_seq (*empty_bb, gseq);
2532
2533  pop_cfun ();
2534
2535
2536  for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2537    {
2538      tree version_decl = ele;
2539      tree predicate_chain = NULL_TREE;
2540      unsigned int priority;
2541      /* Get attribute string, parse it and find the right predicate decl.
2542         The predicate function could be a lengthy combination of many
2543	 features, like arch-type and various isa-variants.  */
2544      priority = get_builtin_code_for_version (version_decl,
2545	 			               &predicate_chain);
2546
2547      if (predicate_chain == NULL_TREE)
2548	continue;
2549
2550      function_version_info [actual_versions].version_decl = version_decl;
2551      function_version_info [actual_versions].predicate_chain
2552	 = predicate_chain;
2553      function_version_info [actual_versions].dispatch_priority = priority;
2554      actual_versions++;
2555    }
2556
2557  /* Sort the versions according to descending order of dispatch priority.  The
2558     priority is based on the ISA.  This is not a perfect solution.  There
2559     could still be ambiguity.  If more than one function version is suitable
2560     to execute,  which one should be dispatched?  In future, allow the user
2561     to specify a dispatch  priority next to the version.  */
2562  qsort (function_version_info, actual_versions,
2563         sizeof (struct _function_version_info), feature_compare);
2564
2565  for  (i = 0; i < actual_versions; ++i)
2566    *empty_bb = add_condition_to_bb (dispatch_decl,
2567				     function_version_info[i].version_decl,
2568				     function_version_info[i].predicate_chain,
2569				     *empty_bb);
2570
2571  /* dispatch default version at the end.  */
2572  *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2573				   NULL, *empty_bb);
2574
2575  free (function_version_info);
2576  return 0;
2577}
2578
2579/* This function changes the assembler name for functions that are
2580   versions.  If DECL is a function version and has a "target"
2581   attribute, it appends the attribute string to its assembler name.  */
2582
2583static tree
2584ix86_mangle_function_version_assembler_name (tree decl, tree id)
2585{
2586  tree version_attr;
2587  const char *orig_name, *version_string;
2588  char *attr_str, *assembler_name;
2589
2590  if (DECL_DECLARED_INLINE_P (decl)
2591      && lookup_attribute ("gnu_inline",
2592			   DECL_ATTRIBUTES (decl)))
2593    error_at (DECL_SOURCE_LOCATION (decl),
2594	      "function versions cannot be marked as %<gnu_inline%>,"
2595	      " bodies have to be generated");
2596
2597  if (DECL_VIRTUAL_P (decl)
2598      || DECL_VINDEX (decl))
2599    sorry ("virtual function multiversioning not supported");
2600
2601  version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2602
2603  /* target attribute string cannot be NULL.  */
2604  gcc_assert (version_attr != NULL_TREE);
2605
2606  orig_name = IDENTIFIER_POINTER (id);
2607  version_string
2608    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2609
2610  if (strcmp (version_string, "default") == 0)
2611    return id;
2612
2613  attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2614  assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2615
2616  sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2617
2618  /* Allow assembler name to be modified if already set.  */
2619  if (DECL_ASSEMBLER_NAME_SET_P (decl))
2620    SET_DECL_RTL (decl, NULL);
2621
2622  tree ret = get_identifier (assembler_name);
2623  XDELETEVEC (attr_str);
2624  XDELETEVEC (assembler_name);
2625  return ret;
2626}
2627
2628tree
2629ix86_mangle_decl_assembler_name (tree decl, tree id)
2630{
2631  /* For function version, add the target suffix to the assembler name.  */
2632  if (TREE_CODE (decl) == FUNCTION_DECL
2633      && DECL_FUNCTION_VERSIONED (decl))
2634    id = ix86_mangle_function_version_assembler_name (decl, id);
2635#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2636  id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2637#endif
2638
2639  return id;
2640}
2641
2642/* Make a dispatcher declaration for the multi-versioned function DECL.
2643   Calls to DECL function will be replaced with calls to the dispatcher
2644   by the front-end.  Returns the decl of the dispatcher function.  */
2645
2646tree
2647ix86_get_function_versions_dispatcher (void *decl)
2648{
2649  tree fn = (tree) decl;
2650  struct cgraph_node *node = NULL;
2651  struct cgraph_node *default_node = NULL;
2652  struct cgraph_function_version_info *node_v = NULL;
2653  struct cgraph_function_version_info *first_v = NULL;
2654
2655  tree dispatch_decl = NULL;
2656
2657  struct cgraph_function_version_info *default_version_info = NULL;
2658
2659  gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2660
2661  node = cgraph_node::get (fn);
2662  gcc_assert (node != NULL);
2663
2664  node_v = node->function_version ();
2665  gcc_assert (node_v != NULL);
2666
2667  if (node_v->dispatcher_resolver != NULL)
2668    return node_v->dispatcher_resolver;
2669
2670  /* Find the default version and make it the first node.  */
2671  first_v = node_v;
2672  /* Go to the beginning of the chain.  */
2673  while (first_v->prev != NULL)
2674    first_v = first_v->prev;
2675  default_version_info = first_v;
2676  while (default_version_info != NULL)
2677    {
2678      if (is_function_default_version
2679	    (default_version_info->this_node->decl))
2680        break;
2681      default_version_info = default_version_info->next;
2682    }
2683
2684  /* If there is no default node, just return NULL.  */
2685  if (default_version_info == NULL)
2686    return NULL;
2687
2688  /* Make default info the first node.  */
2689  if (first_v != default_version_info)
2690    {
2691      default_version_info->prev->next = default_version_info->next;
2692      if (default_version_info->next)
2693        default_version_info->next->prev = default_version_info->prev;
2694      first_v->prev = default_version_info;
2695      default_version_info->next = first_v;
2696      default_version_info->prev = NULL;
2697    }
2698
2699  default_node = default_version_info->this_node;
2700
2701#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2702  if (targetm.has_ifunc_p ())
2703    {
2704      struct cgraph_function_version_info *it_v = NULL;
2705      struct cgraph_node *dispatcher_node = NULL;
2706      struct cgraph_function_version_info *dispatcher_version_info = NULL;
2707
2708      /* Right now, the dispatching is done via ifunc.  */
2709      dispatch_decl = make_dispatcher_decl (default_node->decl);
2710
2711      dispatcher_node = cgraph_node::get_create (dispatch_decl);
2712      gcc_assert (dispatcher_node != NULL);
2713      dispatcher_node->dispatcher_function = 1;
2714      dispatcher_version_info
2715	= dispatcher_node->insert_new_function_version ();
2716      dispatcher_version_info->next = default_version_info;
2717      dispatcher_node->definition = 1;
2718
2719      /* Set the dispatcher for all the versions.  */
2720      it_v = default_version_info;
2721      while (it_v != NULL)
2722	{
2723	  it_v->dispatcher_resolver = dispatch_decl;
2724	  it_v = it_v->next;
2725	}
2726    }
2727  else
2728#endif
2729    {
2730      error_at (DECL_SOURCE_LOCATION (default_node->decl),
2731		"multiversioning needs %<ifunc%> which is not supported "
2732		"on this target");
2733    }
2734
2735  return dispatch_decl;
2736}
2737
2738/* Make the resolver function decl to dispatch the versions of
2739   a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
2740   ifunc alias that will point to the created resolver.  Create an
2741   empty basic block in the resolver and store the pointer in
2742   EMPTY_BB.  Return the decl of the resolver function.  */
2743
2744static tree
2745make_resolver_func (const tree default_decl,
2746		    const tree ifunc_alias_decl,
2747		    basic_block *empty_bb)
2748{
2749  tree decl, type, t;
2750
2751  /* Create resolver function name based on default_decl.  */
2752  tree decl_name = clone_function_name (default_decl, "resolver");
2753  const char *resolver_name = IDENTIFIER_POINTER (decl_name);
2754
2755  /* The resolver function should return a (void *). */
2756  type = build_function_type_list (ptr_type_node, NULL_TREE);
2757
2758  decl = build_fn_decl (resolver_name, type);
2759  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2760
2761  DECL_NAME (decl) = decl_name;
2762  TREE_USED (decl) = 1;
2763  DECL_ARTIFICIAL (decl) = 1;
2764  DECL_IGNORED_P (decl) = 1;
2765  TREE_PUBLIC (decl) = 0;
2766  DECL_UNINLINABLE (decl) = 1;
2767
2768  /* Resolver is not external, body is generated.  */
2769  DECL_EXTERNAL (decl) = 0;
2770  DECL_EXTERNAL (ifunc_alias_decl) = 0;
2771
2772  DECL_CONTEXT (decl) = NULL_TREE;
2773  DECL_INITIAL (decl) = make_node (BLOCK);
2774  DECL_STATIC_CONSTRUCTOR (decl) = 0;
2775
2776  if (DECL_COMDAT_GROUP (default_decl)
2777      || TREE_PUBLIC (default_decl))
2778    {
2779      /* In this case, each translation unit with a call to this
2780	 versioned function will put out a resolver.  Ensure it
2781	 is comdat to keep just one copy.  */
2782      DECL_COMDAT (decl) = 1;
2783      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2784    }
2785  else
2786    TREE_PUBLIC (ifunc_alias_decl) = 0;
2787
2788  /* Build result decl and add to function_decl. */
2789  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2790  DECL_CONTEXT (t) = decl;
2791  DECL_ARTIFICIAL (t) = 1;
2792  DECL_IGNORED_P (t) = 1;
2793  DECL_RESULT (decl) = t;
2794
2795  gimplify_function_tree (decl);
2796  push_cfun (DECL_STRUCT_FUNCTION (decl));
2797  *empty_bb = init_lowered_empty_function (decl, false,
2798					   profile_count::uninitialized ());
2799
2800  cgraph_node::add_new_function (decl, true);
2801  symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
2802
2803  pop_cfun ();
2804
2805  gcc_assert (ifunc_alias_decl != NULL);
2806  /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
2807  DECL_ATTRIBUTES (ifunc_alias_decl)
2808    = make_attribute ("ifunc", resolver_name,
2809		      DECL_ATTRIBUTES (ifunc_alias_decl));
2810
2811  /* Create the alias for dispatch to resolver here.  */
2812  cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2813  return decl;
2814}
2815
2816/* Generate the dispatching code body to dispatch multi-versioned function
2817   DECL.  The target hook is called to process the "target" attributes and
2818   provide the code to dispatch the right function at run-time.  NODE points
2819   to the dispatcher decl whose body will be created.  */
2820
2821tree
2822ix86_generate_version_dispatcher_body (void *node_p)
2823{
2824  tree resolver_decl;
2825  basic_block empty_bb;
2826  tree default_ver_decl;
2827  struct cgraph_node *versn;
2828  struct cgraph_node *node;
2829
2830  struct cgraph_function_version_info *node_version_info = NULL;
2831  struct cgraph_function_version_info *versn_info = NULL;
2832
2833  node = (cgraph_node *)node_p;
2834
2835  node_version_info = node->function_version ();
2836  gcc_assert (node->dispatcher_function
2837	      && node_version_info != NULL);
2838
2839  if (node_version_info->dispatcher_resolver)
2840    return node_version_info->dispatcher_resolver;
2841
2842  /* The first version in the chain corresponds to the default version.  */
2843  default_ver_decl = node_version_info->next->this_node->decl;
2844
2845  /* node is going to be an alias, so remove the finalized bit.  */
2846  node->definition = false;
2847
2848  resolver_decl = make_resolver_func (default_ver_decl,
2849				      node->decl, &empty_bb);
2850
2851  node_version_info->dispatcher_resolver = resolver_decl;
2852
2853  push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
2854
2855  auto_vec<tree, 2> fn_ver_vec;
2856
2857  for (versn_info = node_version_info->next; versn_info;
2858       versn_info = versn_info->next)
2859    {
2860      versn = versn_info->this_node;
2861      /* Check for virtual functions here again, as by this time it should
2862	 have been determined if this function needs a vtable index or
2863	 not.  This happens for methods in derived classes that override
2864	 virtual methods in base classes but are not explicitly marked as
2865	 virtual.  */
2866      if (DECL_VINDEX (versn->decl))
2867	sorry ("virtual function multiversioning not supported");
2868
2869      fn_ver_vec.safe_push (versn->decl);
2870    }
2871
2872  dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
2873  cgraph_edge::rebuild_edges ();
2874  pop_cfun ();
2875  return resolver_decl;
2876}
2877
2878
2879