1// SPDX-License-Identifier: GPL-3.0-or-later
2/* Subroutines used for code generation on IBM RS/6000.
3   Copyright (C) 1991-2022 Free Software Foundation, Inc.
4   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6   This file is part of GCC.
7
8   GCC is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published
10   by the Free Software Foundation; either version 3, or (at your
11   option) any later version.
12
13   GCC is distributed in the hope that it will be useful, but WITHOUT
14   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16   License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with GCC; see the file COPYING3.  If not see
20   <http://www.gnu.org/licenses/>.  */
21
22#define IN_TARGET_CODE 1
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "rtl.h"
29#include "tree.h"
30#include "memmodel.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "cfgloop.h"
34#include "df.h"
35#include "tm_p.h"
36#include "stringpool.h"
37#include "expmed.h"
38#include "optabs.h"
39#include "regs.h"
40#include "ira.h"
41#include "recog.h"
42#include "cgraph.h"
43#include "diagnostic-core.h"
44#include "insn-attr.h"
45#include "flags.h"
46#include "alias.h"
47#include "fold-const.h"
48#include "attribs.h"
49#include "stor-layout.h"
50#include "calls.h"
51#include "print-tree.h"
52#include "varasm.h"
53#include "explow.h"
54#include "expr.h"
55#include "output.h"
56#include "common/common-target.h"
57#include "langhooks.h"
58#include "reload.h"
59#include "sched-int.h"
60#include "gimplify.h"
61#include "gimple-fold.h"
62#include "gimple-iterator.h"
63#include "gimple-walk.h"
64#include "ssa.h"
65#include "tree-vectorizer.h"
66#include "tree-ssa-propagate.h"
67#include "intl.h"
68#include "tm-constrs.h"
69#include "target-globals.h"
70#include "builtins.h"
71#include "tree-vector-builder.h"
72#include "context.h"
73#include "tree-pass.h"
74#include "symbol-summary.h"
75#include "ipa-prop.h"
76#include "ipa-fnsummary.h"
77#include "except.h"
78#if TARGET_XCOFF
79#include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
80#endif
81#include "case-cfn-macros.h"
82#include "ppc-auxv.h"
83#include "rs6000-internal.h"
84#include "opts.h"
85
86/* This file should be included last.  */
87#include "target-def.h"
88
89extern tree rs6000_builtin_mask_for_load (void);
90extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
91extern tree rs6000_builtin_reciprocal (tree);
92
93  /* Set -mabi=ieeelongdouble on some old targets.  In the future, power server
94     systems will also set long double to be IEEE 128-bit.  AIX and Darwin
95     explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96     those systems will not pick up this default.  This needs to be after all
97     of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98     properly defined.  */
99#ifndef TARGET_IEEEQUAD_DEFAULT
100#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) && !defined(POWERPC_NETBSD)
101#define TARGET_IEEEQUAD_DEFAULT 1
102#else
103#define TARGET_IEEEQUAD_DEFAULT 0
104#endif
105#endif
106
107/* Don't enable PC-relative addressing if the target does not support it.  */
108#ifndef PCREL_SUPPORTED_BY_OS
109#define PCREL_SUPPORTED_BY_OS	0
110#endif
111
112#ifdef USING_ELFOS_H
113/* Counter for labels which are to be placed in .fixup.  */
114int fixuplabelno = 0;
115#endif
116
117/* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
118int dot_symbols;
119
120/* Specify the machine mode that pointers have.  After generation of rtl, the
121   compiler makes no further distinction between pointers and any other objects
122   of this machine mode.  */
123scalar_int_mode rs6000_pmode;
124
125/* Track use of r13 in 64bit AIX TLS.  */
126static bool xcoff_tls_exec_model_detected = false;
127
128/* Width in bits of a pointer.  */
129unsigned rs6000_pointer_size;
130
131#ifdef HAVE_AS_GNU_ATTRIBUTE
132# ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133# define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134# endif
135/* Flag whether floating point values have been passed/returned.
136   Note that this doesn't say whether fprs are used, since the
137   Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138   should be set for soft-float values passed in gprs and ieee128
139   values passed in vsx registers.  */
140bool rs6000_passes_float = false;
141bool rs6000_passes_long_double = false;
142/* Flag whether vector values have been passed/returned.  */
143bool rs6000_passes_vector = false;
144/* Flag whether small (<= 8 byte) structures have been returned.  */
145bool rs6000_returns_struct = false;
146#endif
147
148/* Value is TRUE if register/mode pair is acceptable.  */
149static bool rs6000_hard_regno_mode_ok_p
150  [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
151
152/* Maximum number of registers needed for a given register class and mode.  */
153unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
154
155/* How many registers are needed for a given register and mode.  */
156unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157
158/* Map register number to register class.  */
159enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
160
161static int dbg_cost_ctrl;
162
163/* Flag to say the TOC is initialized */
164int toc_initialized, need_toc_init;
165char toc_label_name[10];
166
167/* Cached value of rs6000_variable_issue. This is cached in
168   rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
169static short cached_can_issue_more;
170
171static GTY(()) section *read_only_data_section;
172static GTY(()) section *private_data_section;
173static GTY(()) section *tls_data_section;
174static GTY(()) section *tls_private_data_section;
175static GTY(()) section *read_only_private_data_section;
176static GTY(()) section *sdata2_section;
177
178section *toc_section = 0;
179
180/* Describe the vector unit used for modes.  */
181enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
182enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
183
184/* Register classes for various constraints that are based on the target
185   switches.  */
186enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
187
188/* Describe the alignment of a vector.  */
189int rs6000_vector_align[NUM_MACHINE_MODES];
190
191/* What modes to automatically generate reciprocal divide estimate (fre) and
192   reciprocal sqrt (frsqrte) for.  */
193unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
194
195/* Masks to determine which reciprocal esitmate instructions to generate
196   automatically.  */
197enum rs6000_recip_mask {
198  RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
199  RECIP_DF_DIV		= 0x002,
200  RECIP_V4SF_DIV	= 0x004,
201  RECIP_V2DF_DIV	= 0x008,
202
203  RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
204  RECIP_DF_RSQRT	= 0x020,
205  RECIP_V4SF_RSQRT	= 0x040,
206  RECIP_V2DF_RSQRT	= 0x080,
207
208  /* Various combination of flags for -mrecip=xxx.  */
209  RECIP_NONE		= 0,
210  RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
211			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
212			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
213
214  RECIP_HIGH_PRECISION	= RECIP_ALL,
215
216  /* On low precision machines like the power5, don't enable double precision
217     reciprocal square root estimate, since it isn't accurate enough.  */
218  RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
219};
220
221/* -mrecip options.  */
222static struct
223{
224  const char *string;		/* option name */
225  unsigned int mask;		/* mask bits to set */
226} recip_options[] = {
227  { "all",	 RECIP_ALL },
228  { "none",	 RECIP_NONE },
229  { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
230		  | RECIP_V2DF_DIV) },
231  { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
232  { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
233  { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
234		  | RECIP_V2DF_RSQRT) },
235  { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
236  { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
237};
238
239/* On PowerPC, we have a limited number of target clones that we care about
240   which means we can use an array to hold the options, rather than having more
241   elaborate data structures to identify each possible variation.  Order the
242   clones from the default to the highest ISA.  */
243enum {
244  CLONE_DEFAULT		= 0,		/* default clone.  */
245  CLONE_ISA_2_05,			/* ISA 2.05 (power6).  */
246  CLONE_ISA_2_06,			/* ISA 2.06 (power7).  */
247  CLONE_ISA_2_07,			/* ISA 2.07 (power8).  */
248  CLONE_ISA_3_00,			/* ISA 3.0 (power9).  */
249  CLONE_ISA_3_1,			/* ISA 3.1 (power10).  */
250  CLONE_MAX
251};
252
253/* Map compiler ISA bits into HWCAP names.  */
254struct clone_map {
255  HOST_WIDE_INT isa_mask;	/* rs6000_isa mask */
256  const char *name;		/* name to use in __builtin_cpu_supports.  */
257};
258
259static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
260  { 0,				"" },		/* Default options.  */
261  { OPTION_MASK_CMPB,		"arch_2_05" },	/* ISA 2.05 (power6).  */
262  { OPTION_MASK_POPCNTD,	"arch_2_06" },	/* ISA 2.06 (power7).  */
263  { OPTION_MASK_P8_VECTOR,	"arch_2_07" },	/* ISA 2.07 (power8).  */
264  { OPTION_MASK_P9_VECTOR,	"arch_3_00" },	/* ISA 3.0 (power9).  */
265  { OPTION_MASK_POWER10,	"arch_3_1" },	/* ISA 3.1 (power10).  */
266};
267
268
269/* Newer LIBCs explicitly export this symbol to declare that they provide
270   the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB.  We emit a
271   reference to this symbol whenever we expand a CPU builtin, so that
272   we never link against an old LIBC.  */
273const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
274
275/* True if we have expanded a CPU builtin.  */
276bool cpu_builtin_p = false;
277
278/* Pointer to function (in rs6000-c.cc) that can define or undefine target
279   macros that have changed.  Languages that don't support the preprocessor
280   don't link in rs6000-c.cc, so we can't call it directly.  */
281void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
282
283/* Simplfy register classes into simpler classifications.  We assume
284   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285   check for standard register classes (gpr/floating/altivec/vsx) and
286   floating/vector classes (float/altivec/vsx).  */
287
288enum rs6000_reg_type {
289  NO_REG_TYPE,
290  PSEUDO_REG_TYPE,
291  GPR_REG_TYPE,
292  VSX_REG_TYPE,
293  ALTIVEC_REG_TYPE,
294  FPR_REG_TYPE,
295  SPR_REG_TYPE,
296  CR_REG_TYPE
297};
298
299/* Map register class to register type.  */
300static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
301
302/* First/last register type for the 'normal' register types (i.e. general
303   purpose, floating point, altivec, and VSX registers).  */
304#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305
306#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
307
308
309/* Register classes we care about in secondary reload or go if legitimate
310   address.  We only need to worry about GPR, FPR, and Altivec registers here,
311   along an ANY field that is the OR of the 3 register classes.  */
312
313enum rs6000_reload_reg_type {
314  RELOAD_REG_GPR,			/* General purpose registers.  */
315  RELOAD_REG_FPR,			/* Traditional floating point regs.  */
316  RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
317  RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
318  N_RELOAD_REG
319};
320
321/* For setting up register classes, loop through the 3 register classes mapping
322   into real registers, and skip the ANY class, which is just an OR of the
323   bits.  */
324#define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
325#define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
326
327/* Map reload register type to a register in the register class.  */
328struct reload_reg_map_type {
329  const char *name;			/* Register class name.  */
330  int reg;				/* Register in the register class.  */
331};
332
333static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
334  { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
335  { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
336  { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
337  { "Any",	-1 },			/* RELOAD_REG_ANY.  */
338};
339
340/* Mask bits for each register class, indexed per mode.  Historically the
341   compiler has been more restrictive which types can do PRE_MODIFY instead of
342   PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
343typedef unsigned char addr_mask_type;
344
345#define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
346#define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
347#define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
348#define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
349#define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
350#define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
351#define RELOAD_REG_AND_M16	0x40	/* AND -16 addressing.  */
352#define RELOAD_REG_QUAD_OFFSET	0x80	/* quad offset is limited.  */
353
354/* Register type masks based on the type, of valid addressing modes.  */
355struct rs6000_reg_addr {
356  enum insn_code reload_load;		/* INSN to reload for loading. */
357  enum insn_code reload_store;		/* INSN to reload for storing.  */
358  enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
359  enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
360  enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
361  addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
362  bool scalar_in_vmx_p;			/* Scalar value can go in VMX.  */
363};
364
365static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366
367/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
368static inline bool
369mode_supports_pre_incdec_p (machine_mode mode)
370{
371  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
372	  != 0);
373}
374
375/* Helper function to say whether a mode supports PRE_MODIFY.  */
376static inline bool
377mode_supports_pre_modify_p (machine_mode mode)
378{
379  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
380	  != 0);
381}
382
383/* Return true if we have D-form addressing in altivec registers.  */
384static inline bool
385mode_supports_vmx_dform (machine_mode mode)
386{
387  return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
388}
389
390/* Return true if we have D-form addressing in VSX registers.  This addressing
391   is more limited than normal d-form addressing in that the offset must be
392   aligned on a 16-byte boundary.  */
393static inline bool
394mode_supports_dq_form (machine_mode mode)
395{
396  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
397	  != 0);
398}
399
400/* Given that there exists at least one variable that is set (produced)
401   by OUT_INSN and read (consumed) by IN_INSN, return true iff
402   IN_INSN represents one or more memory store operations and none of
403   the variables set by OUT_INSN is used by IN_INSN as the address of a
404   store operation.  If either IN_INSN or OUT_INSN does not represent
405   a "single" RTL SET expression (as loosely defined by the
406   implementation of the single_set function) or a PARALLEL with only
407   SETs, CLOBBERs, and USEs inside, this function returns false.
408
409   This rs6000-specific version of store_data_bypass_p checks for
410   certain conditions that result in assertion failures (and internal
411   compiler errors) in the generic store_data_bypass_p function and
412   returns false rather than calling store_data_bypass_p if one of the
413   problematic conditions is detected.  */
414
415int
416rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417{
418  rtx out_set, in_set;
419  rtx out_pat, in_pat;
420  rtx out_exp, in_exp;
421  int i, j;
422
423  in_set = single_set (in_insn);
424  if (in_set)
425    {
426      if (MEM_P (SET_DEST (in_set)))
427	{
428	  out_set = single_set (out_insn);
429	  if (!out_set)
430	    {
431	      out_pat = PATTERN (out_insn);
432	      if (GET_CODE (out_pat) == PARALLEL)
433		{
434		  for (i = 0; i < XVECLEN (out_pat, 0); i++)
435		    {
436		      out_exp = XVECEXP (out_pat, 0, i);
437		      if ((GET_CODE (out_exp) == CLOBBER)
438			  || (GET_CODE (out_exp) == USE))
439			continue;
440		      else if (GET_CODE (out_exp) != SET)
441			return false;
442		    }
443		}
444	    }
445	}
446    }
447  else
448    {
449      in_pat = PATTERN (in_insn);
450      if (GET_CODE (in_pat) != PARALLEL)
451	return false;
452
453      for (i = 0; i < XVECLEN (in_pat, 0); i++)
454	{
455	  in_exp = XVECEXP (in_pat, 0, i);
456	  if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
457	    continue;
458	  else if (GET_CODE (in_exp) != SET)
459	    return false;
460
461	  if (MEM_P (SET_DEST (in_exp)))
462	    {
463	      out_set = single_set (out_insn);
464	      if (!out_set)
465		{
466		  out_pat = PATTERN (out_insn);
467		  if (GET_CODE (out_pat) != PARALLEL)
468		    return false;
469		  for (j = 0; j < XVECLEN (out_pat, 0); j++)
470		    {
471		      out_exp = XVECEXP (out_pat, 0, j);
472		      if ((GET_CODE (out_exp) == CLOBBER)
473			  || (GET_CODE (out_exp) == USE))
474			continue;
475		      else if (GET_CODE (out_exp) != SET)
476			return false;
477		    }
478		}
479	    }
480	}
481    }
482  return store_data_bypass_p (out_insn, in_insn);
483}
484
485
486/* Processor costs (relative to an add) */
487
488const struct processor_costs *rs6000_cost;
489
490/* Instruction size costs on 32bit processors.  */
491static const
492struct processor_costs size32_cost = {
493  COSTS_N_INSNS (1),    /* mulsi */
494  COSTS_N_INSNS (1),    /* mulsi_const */
495  COSTS_N_INSNS (1),    /* mulsi_const9 */
496  COSTS_N_INSNS (1),    /* muldi */
497  COSTS_N_INSNS (1),    /* divsi */
498  COSTS_N_INSNS (1),    /* divdi */
499  COSTS_N_INSNS (1),    /* fp */
500  COSTS_N_INSNS (1),    /* dmul */
501  COSTS_N_INSNS (1),    /* sdiv */
502  COSTS_N_INSNS (1),    /* ddiv */
503  32,			/* cache line size */
504  0,			/* l1 cache */
505  0,			/* l2 cache */
506  0,			/* streams */
507  0,			/* SF->DF convert */
508};
509
510/* Instruction size costs on 64bit processors.  */
511static const
512struct processor_costs size64_cost = {
513  COSTS_N_INSNS (1),    /* mulsi */
514  COSTS_N_INSNS (1),    /* mulsi_const */
515  COSTS_N_INSNS (1),    /* mulsi_const9 */
516  COSTS_N_INSNS (1),    /* muldi */
517  COSTS_N_INSNS (1),    /* divsi */
518  COSTS_N_INSNS (1),    /* divdi */
519  COSTS_N_INSNS (1),    /* fp */
520  COSTS_N_INSNS (1),    /* dmul */
521  COSTS_N_INSNS (1),    /* sdiv */
522  COSTS_N_INSNS (1),    /* ddiv */
523  128,			/* cache line size */
524  0,			/* l1 cache */
525  0,			/* l2 cache */
526  0,			/* streams */
527  0,			/* SF->DF convert */
528};
529
530/* Instruction costs on RS64A processors.  */
531static const
532struct processor_costs rs64a_cost = {
533  COSTS_N_INSNS (20),   /* mulsi */
534  COSTS_N_INSNS (12),   /* mulsi_const */
535  COSTS_N_INSNS (8),    /* mulsi_const9 */
536  COSTS_N_INSNS (34),   /* muldi */
537  COSTS_N_INSNS (65),   /* divsi */
538  COSTS_N_INSNS (67),   /* divdi */
539  COSTS_N_INSNS (4),    /* fp */
540  COSTS_N_INSNS (4),    /* dmul */
541  COSTS_N_INSNS (31),   /* sdiv */
542  COSTS_N_INSNS (31),   /* ddiv */
543  128,			/* cache line size */
544  128,			/* l1 cache */
545  2048,			/* l2 cache */
546  1,			/* streams */
547  0,			/* SF->DF convert */
548};
549
550/* Instruction costs on MPCCORE processors.  */
551static const
552struct processor_costs mpccore_cost = {
553  COSTS_N_INSNS (2),    /* mulsi */
554  COSTS_N_INSNS (2),    /* mulsi_const */
555  COSTS_N_INSNS (2),    /* mulsi_const9 */
556  COSTS_N_INSNS (2),    /* muldi */
557  COSTS_N_INSNS (6),    /* divsi */
558  COSTS_N_INSNS (6),    /* divdi */
559  COSTS_N_INSNS (4),    /* fp */
560  COSTS_N_INSNS (5),    /* dmul */
561  COSTS_N_INSNS (10),   /* sdiv */
562  COSTS_N_INSNS (17),   /* ddiv */
563  32,			/* cache line size */
564  4,			/* l1 cache */
565  16,			/* l2 cache */
566  1,			/* streams */
567  0,			/* SF->DF convert */
568};
569
570/* Instruction costs on PPC403 processors.  */
571static const
572struct processor_costs ppc403_cost = {
573  COSTS_N_INSNS (4),    /* mulsi */
574  COSTS_N_INSNS (4),    /* mulsi_const */
575  COSTS_N_INSNS (4),    /* mulsi_const9 */
576  COSTS_N_INSNS (4),    /* muldi */
577  COSTS_N_INSNS (33),   /* divsi */
578  COSTS_N_INSNS (33),   /* divdi */
579  COSTS_N_INSNS (11),   /* fp */
580  COSTS_N_INSNS (11),   /* dmul */
581  COSTS_N_INSNS (11),   /* sdiv */
582  COSTS_N_INSNS (11),   /* ddiv */
583  32,			/* cache line size */
584  4,			/* l1 cache */
585  16,			/* l2 cache */
586  1,			/* streams */
587  0,			/* SF->DF convert */
588};
589
590/* Instruction costs on PPC405 processors.  */
591static const
592struct processor_costs ppc405_cost = {
593  COSTS_N_INSNS (5),    /* mulsi */
594  COSTS_N_INSNS (4),    /* mulsi_const */
595  COSTS_N_INSNS (3),    /* mulsi_const9 */
596  COSTS_N_INSNS (5),    /* muldi */
597  COSTS_N_INSNS (35),   /* divsi */
598  COSTS_N_INSNS (35),   /* divdi */
599  COSTS_N_INSNS (11),   /* fp */
600  COSTS_N_INSNS (11),   /* dmul */
601  COSTS_N_INSNS (11),   /* sdiv */
602  COSTS_N_INSNS (11),   /* ddiv */
603  32,			/* cache line size */
604  16,			/* l1 cache */
605  128,			/* l2 cache */
606  1,			/* streams */
607  0,			/* SF->DF convert */
608};
609
610/* Instruction costs on PPC440 processors.  */
611static const
612struct processor_costs ppc440_cost = {
613  COSTS_N_INSNS (3),    /* mulsi */
614  COSTS_N_INSNS (2),    /* mulsi_const */
615  COSTS_N_INSNS (2),    /* mulsi_const9 */
616  COSTS_N_INSNS (3),    /* muldi */
617  COSTS_N_INSNS (34),   /* divsi */
618  COSTS_N_INSNS (34),   /* divdi */
619  COSTS_N_INSNS (5),    /* fp */
620  COSTS_N_INSNS (5),    /* dmul */
621  COSTS_N_INSNS (19),   /* sdiv */
622  COSTS_N_INSNS (33),   /* ddiv */
623  32,			/* cache line size */
624  32,			/* l1 cache */
625  256,			/* l2 cache */
626  1,			/* streams */
627  0,			/* SF->DF convert */
628};
629
630/* Instruction costs on PPC476 processors.  */
631static const
632struct processor_costs ppc476_cost = {
633  COSTS_N_INSNS (4),    /* mulsi */
634  COSTS_N_INSNS (4),    /* mulsi_const */
635  COSTS_N_INSNS (4),    /* mulsi_const9 */
636  COSTS_N_INSNS (4),    /* muldi */
637  COSTS_N_INSNS (11),   /* divsi */
638  COSTS_N_INSNS (11),   /* divdi */
639  COSTS_N_INSNS (6),    /* fp */
640  COSTS_N_INSNS (6),    /* dmul */
641  COSTS_N_INSNS (19),   /* sdiv */
642  COSTS_N_INSNS (33),   /* ddiv */
643  32,			/* l1 cache line size */
644  32,			/* l1 cache */
645  512,			/* l2 cache */
646  1,			/* streams */
647  0,			/* SF->DF convert */
648};
649
650/* Instruction costs on PPC601 processors.  */
651static const
652struct processor_costs ppc601_cost = {
653  COSTS_N_INSNS (5),    /* mulsi */
654  COSTS_N_INSNS (5),    /* mulsi_const */
655  COSTS_N_INSNS (5),    /* mulsi_const9 */
656  COSTS_N_INSNS (5),    /* muldi */
657  COSTS_N_INSNS (36),   /* divsi */
658  COSTS_N_INSNS (36),   /* divdi */
659  COSTS_N_INSNS (4),    /* fp */
660  COSTS_N_INSNS (5),    /* dmul */
661  COSTS_N_INSNS (17),   /* sdiv */
662  COSTS_N_INSNS (31),   /* ddiv */
663  32,			/* cache line size */
664  32,			/* l1 cache */
665  256,			/* l2 cache */
666  1,			/* streams */
667  0,			/* SF->DF convert */
668};
669
670/* Instruction costs on PPC603 processors.  */
671static const
672struct processor_costs ppc603_cost = {
673  COSTS_N_INSNS (5),    /* mulsi */
674  COSTS_N_INSNS (3),    /* mulsi_const */
675  COSTS_N_INSNS (2),    /* mulsi_const9 */
676  COSTS_N_INSNS (5),    /* muldi */
677  COSTS_N_INSNS (37),   /* divsi */
678  COSTS_N_INSNS (37),   /* divdi */
679  COSTS_N_INSNS (3),    /* fp */
680  COSTS_N_INSNS (4),    /* dmul */
681  COSTS_N_INSNS (18),   /* sdiv */
682  COSTS_N_INSNS (33),   /* ddiv */
683  32,			/* cache line size */
684  8,			/* l1 cache */
685  64,			/* l2 cache */
686  1,			/* streams */
687  0,			/* SF->DF convert */
688};
689
690/* Instruction costs on PPC604 processors.  */
691static const
692struct processor_costs ppc604_cost = {
693  COSTS_N_INSNS (4),    /* mulsi */
694  COSTS_N_INSNS (4),    /* mulsi_const */
695  COSTS_N_INSNS (4),    /* mulsi_const9 */
696  COSTS_N_INSNS (4),    /* muldi */
697  COSTS_N_INSNS (20),   /* divsi */
698  COSTS_N_INSNS (20),   /* divdi */
699  COSTS_N_INSNS (3),    /* fp */
700  COSTS_N_INSNS (3),    /* dmul */
701  COSTS_N_INSNS (18),   /* sdiv */
702  COSTS_N_INSNS (32),   /* ddiv */
703  32,			/* cache line size */
704  16,			/* l1 cache */
705  512,			/* l2 cache */
706  1,			/* streams */
707  0,			/* SF->DF convert */
708};
709
710/* Instruction costs on PPC604e processors.  */
711static const
712struct processor_costs ppc604e_cost = {
713  COSTS_N_INSNS (2),    /* mulsi */
714  COSTS_N_INSNS (2),    /* mulsi_const */
715  COSTS_N_INSNS (2),    /* mulsi_const9 */
716  COSTS_N_INSNS (2),    /* muldi */
717  COSTS_N_INSNS (20),   /* divsi */
718  COSTS_N_INSNS (20),   /* divdi */
719  COSTS_N_INSNS (3),    /* fp */
720  COSTS_N_INSNS (3),    /* dmul */
721  COSTS_N_INSNS (18),   /* sdiv */
722  COSTS_N_INSNS (32),   /* ddiv */
723  32,			/* cache line size */
724  32,			/* l1 cache */
725  1024,			/* l2 cache */
726  1,			/* streams */
727  0,			/* SF->DF convert */
728};
729
730/* Instruction costs on PPC620 processors.  */
731static const
732struct processor_costs ppc620_cost = {
733  COSTS_N_INSNS (5),    /* mulsi */
734  COSTS_N_INSNS (4),    /* mulsi_const */
735  COSTS_N_INSNS (3),    /* mulsi_const9 */
736  COSTS_N_INSNS (7),    /* muldi */
737  COSTS_N_INSNS (21),   /* divsi */
738  COSTS_N_INSNS (37),   /* divdi */
739  COSTS_N_INSNS (3),    /* fp */
740  COSTS_N_INSNS (3),    /* dmul */
741  COSTS_N_INSNS (18),   /* sdiv */
742  COSTS_N_INSNS (32),   /* ddiv */
743  128,			/* cache line size */
744  32,			/* l1 cache */
745  1024,			/* l2 cache */
746  1,			/* streams */
747  0,			/* SF->DF convert */
748};
749
750/* Instruction costs on PPC630 processors.  */
751static const
752struct processor_costs ppc630_cost = {
753  COSTS_N_INSNS (5),    /* mulsi */
754  COSTS_N_INSNS (4),    /* mulsi_const */
755  COSTS_N_INSNS (3),    /* mulsi_const9 */
756  COSTS_N_INSNS (7),    /* muldi */
757  COSTS_N_INSNS (21),   /* divsi */
758  COSTS_N_INSNS (37),   /* divdi */
759  COSTS_N_INSNS (3),    /* fp */
760  COSTS_N_INSNS (3),    /* dmul */
761  COSTS_N_INSNS (17),   /* sdiv */
762  COSTS_N_INSNS (21),   /* ddiv */
763  128,			/* cache line size */
764  64,			/* l1 cache */
765  1024,			/* l2 cache */
766  1,			/* streams */
767  0,			/* SF->DF convert */
768};
769
770/* Instruction costs on Cell processor.  */
771/* COSTS_N_INSNS (1) ~ one add.  */
772static const
773struct processor_costs ppccell_cost = {
774  COSTS_N_INSNS (9/2)+2,    /* mulsi */
775  COSTS_N_INSNS (6/2),    /* mulsi_const */
776  COSTS_N_INSNS (6/2),    /* mulsi_const9 */
777  COSTS_N_INSNS (15/2)+2,   /* muldi */
778  COSTS_N_INSNS (38/2),   /* divsi */
779  COSTS_N_INSNS (70/2),   /* divdi */
780  COSTS_N_INSNS (10/2),   /* fp */
781  COSTS_N_INSNS (10/2),   /* dmul */
782  COSTS_N_INSNS (74/2),   /* sdiv */
783  COSTS_N_INSNS (74/2),   /* ddiv */
784  128,			/* cache line size */
785  32,			/* l1 cache */
786  512,			/* l2 cache */
787  6,			/* streams */
788  0,			/* SF->DF convert */
789};
790
791/* Instruction costs on PPC750 and PPC7400 processors.  */
792static const
793struct processor_costs ppc750_cost = {
794  COSTS_N_INSNS (5),    /* mulsi */
795  COSTS_N_INSNS (3),    /* mulsi_const */
796  COSTS_N_INSNS (2),    /* mulsi_const9 */
797  COSTS_N_INSNS (5),    /* muldi */
798  COSTS_N_INSNS (17),   /* divsi */
799  COSTS_N_INSNS (17),   /* divdi */
800  COSTS_N_INSNS (3),    /* fp */
801  COSTS_N_INSNS (3),    /* dmul */
802  COSTS_N_INSNS (17),   /* sdiv */
803  COSTS_N_INSNS (31),   /* ddiv */
804  32,			/* cache line size */
805  32,			/* l1 cache */
806  512,			/* l2 cache */
807  1,			/* streams */
808  0,			/* SF->DF convert */
809};
810
811/* Instruction costs on PPC7450 processors.  */
812static const
813struct processor_costs ppc7450_cost = {
814  COSTS_N_INSNS (4),    /* mulsi */
815  COSTS_N_INSNS (3),    /* mulsi_const */
816  COSTS_N_INSNS (3),    /* mulsi_const9 */
817  COSTS_N_INSNS (4),    /* muldi */
818  COSTS_N_INSNS (23),   /* divsi */
819  COSTS_N_INSNS (23),   /* divdi */
820  COSTS_N_INSNS (5),    /* fp */
821  COSTS_N_INSNS (5),    /* dmul */
822  COSTS_N_INSNS (21),   /* sdiv */
823  COSTS_N_INSNS (35),   /* ddiv */
824  32,			/* cache line size */
825  32,			/* l1 cache */
826  1024,			/* l2 cache */
827  1,			/* streams */
828  0,			/* SF->DF convert */
829};
830
831/* Instruction costs on PPC8540 processors.  */
832static const
833struct processor_costs ppc8540_cost = {
834  COSTS_N_INSNS (4),    /* mulsi */
835  COSTS_N_INSNS (4),    /* mulsi_const */
836  COSTS_N_INSNS (4),    /* mulsi_const9 */
837  COSTS_N_INSNS (4),    /* muldi */
838  COSTS_N_INSNS (19),   /* divsi */
839  COSTS_N_INSNS (19),   /* divdi */
840  COSTS_N_INSNS (4),    /* fp */
841  COSTS_N_INSNS (4),    /* dmul */
842  COSTS_N_INSNS (29),   /* sdiv */
843  COSTS_N_INSNS (29),   /* ddiv */
844  32,			/* cache line size */
845  32,			/* l1 cache */
846  256,			/* l2 cache */
847  1,			/* prefetch streams /*/
848  0,			/* SF->DF convert */
849};
850
851/* Instruction costs on E300C2 and E300C3 cores.  */
852static const
853struct processor_costs ppce300c2c3_cost = {
854  COSTS_N_INSNS (4),    /* mulsi */
855  COSTS_N_INSNS (4),    /* mulsi_const */
856  COSTS_N_INSNS (4),    /* mulsi_const9 */
857  COSTS_N_INSNS (4),    /* muldi */
858  COSTS_N_INSNS (19),   /* divsi */
859  COSTS_N_INSNS (19),   /* divdi */
860  COSTS_N_INSNS (3),    /* fp */
861  COSTS_N_INSNS (4),    /* dmul */
862  COSTS_N_INSNS (18),   /* sdiv */
863  COSTS_N_INSNS (33),   /* ddiv */
864  32,
865  16,			/* l1 cache */
866  16,			/* l2 cache */
867  1,			/* prefetch streams /*/
868  0,			/* SF->DF convert */
869};
870
871/* Instruction costs on PPCE500MC processors.  */
872static const
873struct processor_costs ppce500mc_cost = {
874  COSTS_N_INSNS (4),    /* mulsi */
875  COSTS_N_INSNS (4),    /* mulsi_const */
876  COSTS_N_INSNS (4),    /* mulsi_const9 */
877  COSTS_N_INSNS (4),    /* muldi */
878  COSTS_N_INSNS (14),   /* divsi */
879  COSTS_N_INSNS (14),   /* divdi */
880  COSTS_N_INSNS (8),    /* fp */
881  COSTS_N_INSNS (10),   /* dmul */
882  COSTS_N_INSNS (36),   /* sdiv */
883  COSTS_N_INSNS (66),   /* ddiv */
884  64,			/* cache line size */
885  32,			/* l1 cache */
886  128,			/* l2 cache */
887  1,			/* prefetch streams /*/
888  0,			/* SF->DF convert */
889};
890
891/* Instruction costs on PPCE500MC64 processors.  */
892static const
893struct processor_costs ppce500mc64_cost = {
894  COSTS_N_INSNS (4),    /* mulsi */
895  COSTS_N_INSNS (4),    /* mulsi_const */
896  COSTS_N_INSNS (4),    /* mulsi_const9 */
897  COSTS_N_INSNS (4),    /* muldi */
898  COSTS_N_INSNS (14),   /* divsi */
899  COSTS_N_INSNS (14),   /* divdi */
900  COSTS_N_INSNS (4),    /* fp */
901  COSTS_N_INSNS (10),   /* dmul */
902  COSTS_N_INSNS (36),   /* sdiv */
903  COSTS_N_INSNS (66),   /* ddiv */
904  64,			/* cache line size */
905  32,			/* l1 cache */
906  128,			/* l2 cache */
907  1,			/* prefetch streams /*/
908  0,			/* SF->DF convert */
909};
910
911/* Instruction costs on PPCE5500 processors.  */
912static const
913struct processor_costs ppce5500_cost = {
914  COSTS_N_INSNS (5),    /* mulsi */
915  COSTS_N_INSNS (5),    /* mulsi_const */
916  COSTS_N_INSNS (4),    /* mulsi_const9 */
917  COSTS_N_INSNS (5),    /* muldi */
918  COSTS_N_INSNS (14),   /* divsi */
919  COSTS_N_INSNS (14),   /* divdi */
920  COSTS_N_INSNS (7),    /* fp */
921  COSTS_N_INSNS (10),   /* dmul */
922  COSTS_N_INSNS (36),   /* sdiv */
923  COSTS_N_INSNS (66),   /* ddiv */
924  64,			/* cache line size */
925  32,			/* l1 cache */
926  128,			/* l2 cache */
927  1,			/* prefetch streams /*/
928  0,			/* SF->DF convert */
929};
930
931/* Instruction costs on PPCE6500 processors.  */
932static const
933struct processor_costs ppce6500_cost = {
934  COSTS_N_INSNS (5),    /* mulsi */
935  COSTS_N_INSNS (5),    /* mulsi_const */
936  COSTS_N_INSNS (4),    /* mulsi_const9 */
937  COSTS_N_INSNS (5),    /* muldi */
938  COSTS_N_INSNS (14),   /* divsi */
939  COSTS_N_INSNS (14),   /* divdi */
940  COSTS_N_INSNS (7),    /* fp */
941  COSTS_N_INSNS (10),   /* dmul */
942  COSTS_N_INSNS (36),   /* sdiv */
943  COSTS_N_INSNS (66),   /* ddiv */
944  64,			/* cache line size */
945  32,			/* l1 cache */
946  128,			/* l2 cache */
947  1,			/* prefetch streams /*/
948  0,			/* SF->DF convert */
949};
950
951/* Instruction costs on AppliedMicro Titan processors.  */
952static const
953struct processor_costs titan_cost = {
954  COSTS_N_INSNS (5),    /* mulsi */
955  COSTS_N_INSNS (5),    /* mulsi_const */
956  COSTS_N_INSNS (5),    /* mulsi_const9 */
957  COSTS_N_INSNS (5),    /* muldi */
958  COSTS_N_INSNS (18),   /* divsi */
959  COSTS_N_INSNS (18),   /* divdi */
960  COSTS_N_INSNS (10),   /* fp */
961  COSTS_N_INSNS (10),   /* dmul */
962  COSTS_N_INSNS (46),   /* sdiv */
963  COSTS_N_INSNS (72),   /* ddiv */
964  32,			/* cache line size */
965  32,			/* l1 cache */
966  512,			/* l2 cache */
967  1,			/* prefetch streams /*/
968  0,			/* SF->DF convert */
969};
970
971/* Instruction costs on POWER4 and POWER5 processors.  */
972static const
973struct processor_costs power4_cost = {
974  COSTS_N_INSNS (3),    /* mulsi */
975  COSTS_N_INSNS (2),    /* mulsi_const */
976  COSTS_N_INSNS (2),    /* mulsi_const9 */
977  COSTS_N_INSNS (4),    /* muldi */
978  COSTS_N_INSNS (18),   /* divsi */
979  COSTS_N_INSNS (34),   /* divdi */
980  COSTS_N_INSNS (3),    /* fp */
981  COSTS_N_INSNS (3),    /* dmul */
982  COSTS_N_INSNS (17),   /* sdiv */
983  COSTS_N_INSNS (17),   /* ddiv */
984  128,			/* cache line size */
985  32,			/* l1 cache */
986  1024,			/* l2 cache */
987  8,			/* prefetch streams /*/
988  0,			/* SF->DF convert */
989};
990
991/* Instruction costs on POWER6 processors.  */
992static const
993struct processor_costs power6_cost = {
994  COSTS_N_INSNS (8),    /* mulsi */
995  COSTS_N_INSNS (8),    /* mulsi_const */
996  COSTS_N_INSNS (8),    /* mulsi_const9 */
997  COSTS_N_INSNS (8),    /* muldi */
998  COSTS_N_INSNS (22),   /* divsi */
999  COSTS_N_INSNS (28),   /* divdi */
1000  COSTS_N_INSNS (3),    /* fp */
1001  COSTS_N_INSNS (3),    /* dmul */
1002  COSTS_N_INSNS (13),   /* sdiv */
1003  COSTS_N_INSNS (16),   /* ddiv */
1004  128,			/* cache line size */
1005  64,			/* l1 cache */
1006  2048,			/* l2 cache */
1007  16,			/* prefetch streams */
1008  0,			/* SF->DF convert */
1009};
1010
1011/* Instruction costs on POWER7 processors.  */
1012static const
1013struct processor_costs power7_cost = {
1014  COSTS_N_INSNS (2),	/* mulsi */
1015  COSTS_N_INSNS (2),	/* mulsi_const */
1016  COSTS_N_INSNS (2),	/* mulsi_const9 */
1017  COSTS_N_INSNS (2),	/* muldi */
1018  COSTS_N_INSNS (18),	/* divsi */
1019  COSTS_N_INSNS (34),	/* divdi */
1020  COSTS_N_INSNS (3),	/* fp */
1021  COSTS_N_INSNS (3),	/* dmul */
1022  COSTS_N_INSNS (13),	/* sdiv */
1023  COSTS_N_INSNS (16),	/* ddiv */
1024  128,			/* cache line size */
1025  32,			/* l1 cache */
1026  256,			/* l2 cache */
1027  12,			/* prefetch streams */
1028  COSTS_N_INSNS (3),	/* SF->DF convert */
1029};
1030
1031/* Instruction costs on POWER8 processors.  */
1032static const
1033struct processor_costs power8_cost = {
1034  COSTS_N_INSNS (3),	/* mulsi */
1035  COSTS_N_INSNS (3),	/* mulsi_const */
1036  COSTS_N_INSNS (3),	/* mulsi_const9 */
1037  COSTS_N_INSNS (3),	/* muldi */
1038  COSTS_N_INSNS (19),	/* divsi */
1039  COSTS_N_INSNS (35),	/* divdi */
1040  COSTS_N_INSNS (3),	/* fp */
1041  COSTS_N_INSNS (3),	/* dmul */
1042  COSTS_N_INSNS (14),	/* sdiv */
1043  COSTS_N_INSNS (17),	/* ddiv */
1044  128,			/* cache line size */
1045  32,			/* l1 cache */
1046  512,			/* l2 cache */
1047  12,			/* prefetch streams */
1048  COSTS_N_INSNS (3),	/* SF->DF convert */
1049};
1050
1051/* Instruction costs on POWER9 processors.  */
1052static const
1053struct processor_costs power9_cost = {
1054  COSTS_N_INSNS (3),	/* mulsi */
1055  COSTS_N_INSNS (3),	/* mulsi_const */
1056  COSTS_N_INSNS (3),	/* mulsi_const9 */
1057  COSTS_N_INSNS (3),	/* muldi */
1058  COSTS_N_INSNS (8),	/* divsi */
1059  COSTS_N_INSNS (12),	/* divdi */
1060  COSTS_N_INSNS (3),	/* fp */
1061  COSTS_N_INSNS (3),	/* dmul */
1062  COSTS_N_INSNS (13),	/* sdiv */
1063  COSTS_N_INSNS (18),	/* ddiv */
1064  128,			/* cache line size */
1065  32,			/* l1 cache */
1066  512,			/* l2 cache */
1067  8,			/* prefetch streams */
1068  COSTS_N_INSNS (3),	/* SF->DF convert */
1069};
1070
1071/* Instruction costs on POWER10 processors.  */
1072static const
1073struct processor_costs power10_cost = {
1074  COSTS_N_INSNS (2),	/* mulsi */
1075  COSTS_N_INSNS (2),	/* mulsi_const */
1076  COSTS_N_INSNS (2),	/* mulsi_const9 */
1077  COSTS_N_INSNS (2),	/* muldi */
1078  COSTS_N_INSNS (6),	/* divsi */
1079  COSTS_N_INSNS (6),	/* divdi */
1080  COSTS_N_INSNS (2),	/* fp */
1081  COSTS_N_INSNS (2),	/* dmul */
1082  COSTS_N_INSNS (11),	/* sdiv */
1083  COSTS_N_INSNS (13),	/* ddiv */
1084  128,			/* cache line size */
1085  32,			/* l1 cache */
1086  512,			/* l2 cache */
1087  16,			/* prefetch streams */
1088  COSTS_N_INSNS (2),	/* SF->DF convert */
1089};
1090
1091/* Instruction costs on POWER A2 processors.  */
1092static const
1093struct processor_costs ppca2_cost = {
1094  COSTS_N_INSNS (16),    /* mulsi */
1095  COSTS_N_INSNS (16),    /* mulsi_const */
1096  COSTS_N_INSNS (16),    /* mulsi_const9 */
1097  COSTS_N_INSNS (16),   /* muldi */
1098  COSTS_N_INSNS (22),   /* divsi */
1099  COSTS_N_INSNS (28),   /* divdi */
1100  COSTS_N_INSNS (3),    /* fp */
1101  COSTS_N_INSNS (3),    /* dmul */
1102  COSTS_N_INSNS (59),   /* sdiv */
1103  COSTS_N_INSNS (72),   /* ddiv */
1104  64,
1105  16,			/* l1 cache */
1106  2048,			/* l2 cache */
1107  16,			/* prefetch streams */
1108  0,			/* SF->DF convert */
1109};
1110
1111/* Support for -mveclibabi=<xxx> to control which vector library to use.  */
1112static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1113
1114
1115static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1116static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1121static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124				      bool);
1125static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126				     unsigned int);
1127static bool is_microcoded_insn (rtx_insn *);
1128static bool is_nonpipeline_insn (rtx_insn *);
1129static bool is_cracked_insn (rtx_insn *);
1130static bool is_load_insn (rtx, rtx *);
1131static bool is_store_insn (rtx, rtx *);
1132static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134static bool insn_must_be_first_in_group (rtx_insn *);
1135static bool insn_must_be_last_in_group (rtx_insn *);
1136bool easy_vector_constant (rtx, machine_mode);
1137static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139#if TARGET_MACHO
1140static tree get_prev_label (tree);
1141#endif
1142static bool rs6000_mode_dependent_address (const_rtx);
1143static bool rs6000_debug_mode_dependent_address (const_rtx);
1144static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146						     machine_mode, rtx);
1147static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148							   machine_mode,
1149							   rtx);
1150static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152							   enum reg_class);
1153static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154						  reg_class_t,
1155						  reg_class_t);
1156static bool rs6000_debug_can_change_mode_class (machine_mode,
1157						machine_mode,
1158						reg_class_t);
1159
1160static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161  = rs6000_mode_dependent_address;
1162
1163enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164						     machine_mode, rtx)
1165  = rs6000_secondary_reload_class;
1166
1167enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168  = rs6000_preferred_reload_class;
1169
1170const int INSN_NOT_AVAILABLE = -1;
1171
1172static void rs6000_print_isa_options (FILE *, int, const char *,
1173				      HOST_WIDE_INT);
1174static void rs6000_print_builtin_options (FILE *, int, const char *,
1175					  HOST_WIDE_INT);
1176static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1177
1178static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1179static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1180					  enum rs6000_reg_type,
1181					  machine_mode,
1182					  secondary_reload_info *,
1183					  bool);
1184rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1185
1186/* Hash table stuff for keeping track of TOC entries.  */
1187
1188struct GTY((for_user)) toc_hash_struct
1189{
1190  /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191     ASM_OUTPUT_SPECIAL_POOL_ENTRY_P.  */
1192  rtx key;
1193  machine_mode key_mode;
1194  int labelno;
1195};
1196
1197struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1198{
1199  static hashval_t hash (toc_hash_struct *);
1200  static bool equal (toc_hash_struct *, toc_hash_struct *);
1201};
1202
1203static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1204
1205
1206
1207/* Default register names.  */
1208char rs6000_reg_names[][8] =
1209{
1210  /* GPRs */
1211      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1212      "8",  "9", "10", "11", "12", "13", "14", "15",
1213     "16", "17", "18", "19", "20", "21", "22", "23",
1214     "24", "25", "26", "27", "28", "29", "30", "31",
1215  /* FPRs */
1216      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1217      "8",  "9", "10", "11", "12", "13", "14", "15",
1218     "16", "17", "18", "19", "20", "21", "22", "23",
1219     "24", "25", "26", "27", "28", "29", "30", "31",
1220  /* VRs */
1221      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1222      "8",  "9", "10", "11", "12", "13", "14", "15",
1223     "16", "17", "18", "19", "20", "21", "22", "23",
1224     "24", "25", "26", "27", "28", "29", "30", "31",
1225  /* lr ctr ca ap */
1226     "lr", "ctr", "ca", "ap",
1227  /* cr0..cr7 */
1228      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1229  /* vrsave vscr sfp */
1230      "vrsave", "vscr", "sfp",
1231};
1232
1233#ifdef TARGET_REGNAMES
1234static const char alt_reg_names[][8] =
1235{
1236  /* GPRs */
1237   "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
1238   "%r8",  "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239  "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240  "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1241  /* FPRs */
1242   "%f0",  "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
1243   "%f8",  "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244  "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245  "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1246  /* VRs */
1247   "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
1248   "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249  "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250  "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1251  /* lr ctr ca ap */
1252    "lr",  "ctr",   "ca",   "ap",
1253  /* cr0..cr7 */
1254  "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255  /* vrsave vscr sfp */
1256  "vrsave", "vscr", "sfp",
1257};
1258#endif
1259
1260/* Table of valid machine attributes.  */
1261
1262static const struct attribute_spec rs6000_attribute_table[] =
1263{
1264  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265       affects_type_identity, handler, exclude } */
1266  { "altivec",   1, 1, false, true,  false, false,
1267    rs6000_handle_altivec_attribute, NULL },
1268  { "longcall",  0, 0, false, true,  true,  false,
1269    rs6000_handle_longcall_attribute, NULL },
1270  { "shortcall", 0, 0, false, true,  true,  false,
1271    rs6000_handle_longcall_attribute, NULL },
1272  { "ms_struct", 0, 0, false, false, false, false,
1273    rs6000_handle_struct_attribute, NULL },
1274  { "gcc_struct", 0, 0, false, false, false, false,
1275    rs6000_handle_struct_attribute, NULL },
1276#ifdef SUBTARGET_ATTRIBUTE_TABLE
1277  SUBTARGET_ATTRIBUTE_TABLE,
1278#endif
1279  { NULL,        0, 0, false, false, false, false, NULL, NULL }
1280};
1281
1282#ifndef TARGET_PROFILE_KERNEL
1283#define TARGET_PROFILE_KERNEL 0
1284#endif
1285
1286/* Initialize the GCC target structure.  */
1287#undef TARGET_ATTRIBUTE_TABLE
1288#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1293
1294#undef TARGET_ASM_ALIGNED_DI_OP
1295#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1296
1297/* Default unaligned ops are only provided for ELF.  Find the ops needed
1298   for non-ELF systems.  */
1299#ifndef OBJECT_FORMAT_ELF
1300#if TARGET_XCOFF
1301/* For XCOFF.  rs6000_assemble_integer will handle unaligned DIs on
1302   64-bit targets.  */
1303#undef TARGET_ASM_UNALIGNED_HI_OP
1304#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305#undef TARGET_ASM_UNALIGNED_SI_OP
1306#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307#undef TARGET_ASM_UNALIGNED_DI_OP
1308#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1309#else
1310/* For Darwin.  */
1311#undef TARGET_ASM_UNALIGNED_HI_OP
1312#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313#undef TARGET_ASM_UNALIGNED_SI_OP
1314#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315#undef TARGET_ASM_UNALIGNED_DI_OP
1316#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317#undef TARGET_ASM_ALIGNED_DI_OP
1318#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1319#endif
1320#endif
1321
1322/* This hook deals with fixups for relocatable code and DI-mode objects
1323   in 64-bit code.  */
1324#undef TARGET_ASM_INTEGER
1325#define TARGET_ASM_INTEGER rs6000_assemble_integer
1326
1327#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328#undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1330#endif
1331
1332#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334  rs6000_print_patchable_function_entry
1335
1336#undef TARGET_SET_UP_BY_PROLOGUE
1337#define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1338
1339#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1351
1352#undef TARGET_EXTRA_LIVE_ON_ENTRY
1353#define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1354
1355#undef TARGET_INTERNAL_ARG_POINTER
1356#define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1357
1358#undef TARGET_HAVE_TLS
1359#define TARGET_HAVE_TLS HAVE_AS_TLS
1360
1361#undef TARGET_CANNOT_FORCE_CONST_MEM
1362#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1363
1364#undef TARGET_DELEGITIMIZE_ADDRESS
1365#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1366
1367#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368#define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1369
1370#undef TARGET_LEGITIMATE_COMBINED_INSN
1371#define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1372
1373#undef TARGET_ASM_FUNCTION_PROLOGUE
1374#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375#undef TARGET_ASM_FUNCTION_EPILOGUE
1376#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1377
1378#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1380
1381#undef  TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382#define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1383
1384#undef TARGET_LEGITIMIZE_ADDRESS
1385#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1386
1387#undef  TARGET_SCHED_VARIABLE_ISSUE
1388#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1389
1390#undef TARGET_SCHED_ISSUE_RATE
1391#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392#undef TARGET_SCHED_ADJUST_COST
1393#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394#undef TARGET_SCHED_ADJUST_PRIORITY
1395#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398#undef TARGET_SCHED_INIT
1399#define TARGET_SCHED_INIT rs6000_sched_init
1400#undef TARGET_SCHED_FINISH
1401#define TARGET_SCHED_FINISH rs6000_sched_finish
1402#undef TARGET_SCHED_REORDER
1403#define TARGET_SCHED_REORDER rs6000_sched_reorder
1404#undef TARGET_SCHED_REORDER2
1405#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1406
1407#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1409
1410#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1412
1413#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415#undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417#undef TARGET_SCHED_SET_SCHED_CONTEXT
1418#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419#undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1421
1422#undef TARGET_SCHED_CAN_SPECULATE_INSN
1423#define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1424
1425#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
1429  rs6000_builtin_support_vector_misalignment
1430#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434  rs6000_builtin_vectorization_cost
1435#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437  rs6000_preferred_simd_mode
1438#undef TARGET_VECTORIZE_CREATE_COSTS
1439#define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1440
1441#undef TARGET_LOOP_UNROLL_ADJUST
1442#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1443
1444#undef TARGET_INIT_BUILTINS
1445#define TARGET_INIT_BUILTINS rs6000_init_builtins
1446#undef TARGET_BUILTIN_DECL
1447#define TARGET_BUILTIN_DECL rs6000_builtin_decl
1448
1449#undef TARGET_FOLD_BUILTIN
1450#define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451#undef TARGET_GIMPLE_FOLD_BUILTIN
1452#define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1453
1454#undef TARGET_EXPAND_BUILTIN
1455#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1456
1457#undef TARGET_MANGLE_TYPE
1458#define TARGET_MANGLE_TYPE rs6000_mangle_type
1459
1460#undef TARGET_INIT_LIBFUNCS
1461#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1462
1463#if TARGET_MACHO
1464#undef TARGET_BINDS_LOCAL_P
1465#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1466#endif
1467
1468#undef TARGET_MS_BITFIELD_LAYOUT_P
1469#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1470
1471#undef TARGET_ASM_OUTPUT_MI_THUNK
1472#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1473
1474#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1476
1477#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1479
1480#undef TARGET_REGISTER_MOVE_COST
1481#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482#undef TARGET_MEMORY_MOVE_COST
1483#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486  rs6000_ira_change_pseudo_allocno_class
1487#undef TARGET_CANNOT_COPY_INSN_P
1488#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489#undef TARGET_RTX_COSTS
1490#define TARGET_RTX_COSTS rs6000_rtx_costs
1491#undef TARGET_ADDRESS_COST
1492#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493#undef TARGET_INSN_COST
1494#define TARGET_INSN_COST rs6000_insn_cost
1495
1496#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498
1499#undef TARGET_PROMOTE_FUNCTION_MODE
1500#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1501
1502#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1504
1505#undef TARGET_RETURN_IN_MEMORY
1506#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1507
1508#undef TARGET_RETURN_IN_MSB
1509#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1510
1511#undef TARGET_SETUP_INCOMING_VARARGS
1512#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1513
1514/* Always strict argument naming on rs6000.  */
1515#undef TARGET_STRICT_ARGUMENT_NAMING
1516#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519#undef TARGET_SPLIT_COMPLEX_ARG
1520#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521#undef TARGET_MUST_PASS_IN_STACK
1522#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523#undef TARGET_PASS_BY_REFERENCE
1524#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525#undef TARGET_ARG_PARTIAL_BYTES
1526#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527#undef TARGET_FUNCTION_ARG_ADVANCE
1528#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529#undef TARGET_FUNCTION_ARG
1530#define TARGET_FUNCTION_ARG rs6000_function_arg
1531#undef TARGET_FUNCTION_ARG_PADDING
1532#define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533#undef TARGET_FUNCTION_ARG_BOUNDARY
1534#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535
1536#undef TARGET_BUILD_BUILTIN_VA_LIST
1537#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538
1539#undef TARGET_EXPAND_BUILTIN_VA_START
1540#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541
1542#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544
1545#undef TARGET_EH_RETURN_FILTER_MODE
1546#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547
1548#undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1549#define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1550
1551#undef TARGET_SCALAR_MODE_SUPPORTED_P
1552#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1553
1554#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1555#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1556  rs6000_libgcc_floating_mode_supported_p
1557
1558#undef TARGET_VECTOR_MODE_SUPPORTED_P
1559#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1560
1561#undef TARGET_FLOATN_MODE
1562#define TARGET_FLOATN_MODE rs6000_floatn_mode
1563
1564#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1566
1567#undef TARGET_MD_ASM_ADJUST
1568#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1569
1570#undef TARGET_OPTION_OVERRIDE
1571#define TARGET_OPTION_OVERRIDE rs6000_option_override
1572
1573#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1574#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1575  rs6000_builtin_vectorized_function
1576
1577#undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1578#define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1579  rs6000_builtin_md_vectorized_function
1580
1581#undef TARGET_STACK_PROTECT_GUARD
1582#define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1583
1584#if !TARGET_MACHO
1585#undef TARGET_STACK_PROTECT_FAIL
1586#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1587#endif
1588
1589#ifdef HAVE_AS_TLS
1590#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1591#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1592#endif
1593
1594/* Use a 32-bit anchor range.  This leads to sequences like:
1595
1596	addis	tmp,anchor,high
1597	add	dest,tmp,low
1598
1599   where tmp itself acts as an anchor, and can be shared between
1600   accesses to the same 64k page.  */
1601#undef TARGET_MIN_ANCHOR_OFFSET
1602#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1603#undef TARGET_MAX_ANCHOR_OFFSET
1604#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1605#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1606#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1607#undef TARGET_USE_BLOCKS_FOR_DECL_P
1608#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1609
1610#undef TARGET_BUILTIN_RECIPROCAL
1611#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1612
1613#undef TARGET_SECONDARY_RELOAD
1614#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615#undef TARGET_SECONDARY_MEMORY_NEEDED
1616#define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1617#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1618#define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1619
1620#undef TARGET_LEGITIMATE_ADDRESS_P
1621#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1622
1623#undef TARGET_MODE_DEPENDENT_ADDRESS_P
1624#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1625
1626#undef TARGET_COMPUTE_PRESSURE_CLASSES
1627#define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1628
1629#undef TARGET_CAN_ELIMINATE
1630#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1631
1632#undef TARGET_CONDITIONAL_REGISTER_USAGE
1633#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1634
1635#undef TARGET_SCHED_REASSOCIATION_WIDTH
1636#define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1637
1638#undef TARGET_TRAMPOLINE_INIT
1639#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1640
1641#undef TARGET_FUNCTION_VALUE
1642#define TARGET_FUNCTION_VALUE rs6000_function_value
1643
1644#undef TARGET_OPTION_VALID_ATTRIBUTE_P
1645#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1646
1647#undef TARGET_OPTION_SAVE
1648#define TARGET_OPTION_SAVE rs6000_function_specific_save
1649
1650#undef TARGET_OPTION_RESTORE
1651#define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1652
1653#undef TARGET_OPTION_PRINT
1654#define TARGET_OPTION_PRINT rs6000_function_specific_print
1655
1656#undef TARGET_CAN_INLINE_P
1657#define TARGET_CAN_INLINE_P rs6000_can_inline_p
1658
1659#undef TARGET_SET_CURRENT_FUNCTION
1660#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1661
1662#undef TARGET_LEGITIMATE_CONSTANT_P
1663#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1664
1665#undef TARGET_VECTORIZE_VEC_PERM_CONST
1666#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1667
1668#undef TARGET_CAN_USE_DOLOOP_P
1669#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1670
1671#undef TARGET_PREDICT_DOLOOP_P
1672#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1673
1674#undef TARGET_HAVE_COUNT_REG_DECR_P
1675#define TARGET_HAVE_COUNT_REG_DECR_P true
1676
1677/* 1000000000 is infinite cost in IVOPTs.  */
1678#undef TARGET_DOLOOP_COST_FOR_GENERIC
1679#define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1680
1681#undef TARGET_DOLOOP_COST_FOR_ADDRESS
1682#define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1683
1684#undef TARGET_PREFERRED_DOLOOP_MODE
1685#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1686
1687#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1688#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1689
1690#undef TARGET_LIBGCC_CMP_RETURN_MODE
1691#define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1692#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1693#define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1694#undef TARGET_UNWIND_WORD_MODE
1695#define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1696
1697#undef TARGET_OFFLOAD_OPTIONS
1698#define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1699
1700#undef TARGET_C_MODE_FOR_SUFFIX
1701#define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1702
1703#undef TARGET_INVALID_BINARY_OP
1704#define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1705
1706#undef TARGET_OPTAB_SUPPORTED_P
1707#define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1708
1709#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1710#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1711
1712#undef TARGET_COMPARE_VERSION_PRIORITY
1713#define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1714
1715#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1716#define TARGET_GENERATE_VERSION_DISPATCHER_BODY				\
1717  rs6000_generate_version_dispatcher_body
1718
1719#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1720#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER				\
1721  rs6000_get_function_versions_dispatcher
1722
1723#undef TARGET_OPTION_FUNCTION_VERSIONS
1724#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1725
1726#undef TARGET_HARD_REGNO_NREGS
1727#define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1728#undef TARGET_HARD_REGNO_MODE_OK
1729#define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1730
1731#undef TARGET_MODES_TIEABLE_P
1732#define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1733
1734#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1735#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1736  rs6000_hard_regno_call_part_clobbered
1737
1738#undef TARGET_SLOW_UNALIGNED_ACCESS
1739#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1740
1741#undef TARGET_CAN_CHANGE_MODE_CLASS
1742#define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1743
1744#undef TARGET_CONSTANT_ALIGNMENT
1745#define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1746
1747#undef TARGET_STARTING_FRAME_OFFSET
1748#define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1749
1750#undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1751#define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1752
1753#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1754#define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1755
1756#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1757#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1758  rs6000_cannot_substitute_mem_equiv_p
1759
1760#undef TARGET_INVALID_CONVERSION
1761#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1762
1763#undef TARGET_NEED_IPA_FN_TARGET_INFO
1764#define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1765
1766#undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1767#define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1768
1769
1770/* Processor table.  */
1771struct rs6000_ptt
1772{
1773  const char *const name;		/* Canonical processor name.  */
1774  const enum processor_type processor;	/* Processor type enum value.  */
1775  const HOST_WIDE_INT target_enable;	/* Target flags to enable.  */
1776};
1777
1778static struct rs6000_ptt const processor_target_table[] =
1779{
1780#define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781#include "rs6000-cpus.def"
1782#undef RS6000_CPU
1783};
1784
1785/* Look up a processor name for -mcpu=xxx and -mtune=xxx.  Return -1 if the
1786   name is invalid.  */
1787
1788static int
1789rs6000_cpu_name_lookup (const char *name)
1790{
1791  size_t i;
1792
1793  if (name != NULL)
1794    {
1795      for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1796	if (! strcmp (name, processor_target_table[i].name))
1797	  return (int)i;
1798    }
1799
1800  return -1;
1801}
1802
1803
1804/* Return number of consecutive hard regs needed starting at reg REGNO
1805   to hold something of mode MODE.
1806   This is ordinarily the length in words of a value of mode MODE
1807   but can be less for certain modes in special long registers.
1808
1809   POWER and PowerPC GPRs hold 32 bits worth;
1810   PowerPC64 GPRs and FPRs point register holds 64 bits worth.  */
1811
1812static int
1813rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1814{
1815  unsigned HOST_WIDE_INT reg_size;
1816
1817  /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818     128-bit floating point that can go in vector registers, which has VSX
1819     memory addressing.  */
1820  if (FP_REGNO_P (regno))
1821    reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1822		? UNITS_PER_VSX_WORD
1823		: UNITS_PER_FP_WORD);
1824
1825  else if (ALTIVEC_REGNO_P (regno))
1826    reg_size = UNITS_PER_ALTIVEC_WORD;
1827
1828  else
1829    reg_size = UNITS_PER_WORD;
1830
1831  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1832}
1833
1834/* Value is 1 if hard register REGNO can hold a value of machine-mode
1835   MODE.  */
1836static int
1837rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1838{
1839  int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1840
1841  if (COMPLEX_MODE_P (mode))
1842    mode = GET_MODE_INNER (mode);
1843
1844  /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
1845     registers.  */
1846  if (mode == OOmode)
1847    return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1848
1849  /* MMA accumulator modes need FPR registers divisible by 4.  */
1850  if (mode == XOmode)
1851    return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1852
1853  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
1854     register combinations, and use PTImode where we need to deal with quad
1855     word memory operations.  Don't allow quad words in the argument or frame
1856     pointer registers, just registers 0..31.  */
1857  if (mode == PTImode)
1858    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1859	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1860	    && ((regno & 1) == 0));
1861
1862  /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863     implementations.  Don't allow an item to be split between a FP register
1864     and an Altivec register.  Allow TImode in all VSX registers if the user
1865     asked for it.  */
1866  if (TARGET_VSX && VSX_REGNO_P (regno)
1867      && (VECTOR_MEM_VSX_P (mode)
1868	  || VECTOR_ALIGNMENT_P (mode)
1869	  || reg_addr[mode].scalar_in_vmx_p
1870	  || mode == TImode
1871	  || (TARGET_VADDUQM && mode == V1TImode)))
1872    {
1873      if (FP_REGNO_P (regno))
1874	return FP_REGNO_P (last_regno);
1875
1876      if (ALTIVEC_REGNO_P (regno))
1877	{
1878	  if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1879	    return 0;
1880
1881	  return ALTIVEC_REGNO_P (last_regno);
1882	}
1883    }
1884
1885  /* The GPRs can hold any mode, but values bigger than one register
1886     cannot go past R31.  */
1887  if (INT_REGNO_P (regno))
1888    return INT_REGNO_P (last_regno);
1889
1890  /* The float registers (except for VSX vector modes) can only hold floating
1891     modes and DImode.  */
1892  if (FP_REGNO_P (regno))
1893    {
1894      if (VECTOR_ALIGNMENT_P (mode))
1895	return false;
1896
1897      if (SCALAR_FLOAT_MODE_P (mode)
1898	  && (mode != TDmode || (regno % 2) == 0)
1899	  && FP_REGNO_P (last_regno))
1900	return 1;
1901
1902      if (GET_MODE_CLASS (mode) == MODE_INT)
1903	{
1904	  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1905	    return 1;
1906
1907	  if (TARGET_P8_VECTOR && (mode == SImode))
1908	    return 1;
1909
1910	  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1911	    return 1;
1912	}
1913
1914      return 0;
1915    }
1916
1917  /* The CR register can only hold CC modes.  */
1918  if (CR_REGNO_P (regno))
1919    return GET_MODE_CLASS (mode) == MODE_CC;
1920
1921  if (CA_REGNO_P (regno))
1922    return mode == Pmode || mode == SImode;
1923
1924  /* AltiVec only in AldyVec registers.  */
1925  if (ALTIVEC_REGNO_P (regno))
1926    return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1927	    || mode == V1TImode);
1928
1929  /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930     and it must be able to fit within the register set.  */
1931
1932  return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1933}
1934
1935/* Implement TARGET_HARD_REGNO_NREGS.  */
1936
1937static unsigned int
1938rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1939{
1940  return rs6000_hard_regno_nregs[mode][regno];
1941}
1942
1943/* Implement TARGET_HARD_REGNO_MODE_OK.  */
1944
1945static bool
1946rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1947{
1948  return rs6000_hard_regno_mode_ok_p[mode][regno];
1949}
1950
1951/* Implement TARGET_MODES_TIEABLE_P.
1952
1953   PTImode cannot tie with other modes because PTImode is restricted to even
1954   GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1955   57744).
1956
1957   Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958   registers) or XOmode (vector quad, restricted to FPR registers divisible
1959   by 4) to tie with other modes.
1960
1961   Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962   128-bit floating point on VSX systems ties with other vectors.  */
1963
1964static bool
1965rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1966{
1967  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1968      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1969    return mode1 == mode2;
1970
1971  if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1972    return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1973  if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1974    return false;
1975
1976  if (SCALAR_FLOAT_MODE_P (mode1))
1977    return SCALAR_FLOAT_MODE_P (mode2);
1978  if (SCALAR_FLOAT_MODE_P (mode2))
1979    return false;
1980
1981  if (GET_MODE_CLASS (mode1) == MODE_CC)
1982    return GET_MODE_CLASS (mode2) == MODE_CC;
1983  if (GET_MODE_CLASS (mode2) == MODE_CC)
1984    return false;
1985
1986  return true;
1987}
1988
1989/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  */
1990
1991static bool
1992rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1993				       machine_mode mode)
1994{
1995  if (TARGET_32BIT
1996      && TARGET_POWERPC64
1997      && GET_MODE_SIZE (mode) > 4
1998      && INT_REGNO_P (regno))
1999    return true;
2000
2001  if (TARGET_VSX
2002      && FP_REGNO_P (regno)
2003      && GET_MODE_SIZE (mode) > 8
2004      && !FLOAT128_2REG_P (mode))
2005    return true;
2006
2007  return false;
2008}
2009
2010/* Print interesting facts about registers.  */
2011static void
2012rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2013{
2014  int r, m;
2015
2016  for (r = first_regno; r <= last_regno; ++r)
2017    {
2018      const char *comma = "";
2019      int len;
2020
2021      if (first_regno == last_regno)
2022	fprintf (stderr, "%s:\t", reg_name);
2023      else
2024	fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2025
2026      len = 8;
2027      for (m = 0; m < NUM_MACHINE_MODES; ++m)
2028	if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2029	  {
2030	    if (len > 70)
2031	      {
2032		fprintf (stderr, ",\n\t");
2033		len = 8;
2034		comma = "";
2035	      }
2036
2037	    if (rs6000_hard_regno_nregs[m][r] > 1)
2038	      len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2039			     rs6000_hard_regno_nregs[m][r]);
2040	    else
2041	      len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2042
2043	    comma = ", ";
2044	  }
2045
2046      if (call_used_or_fixed_reg_p (r))
2047	{
2048	  if (len > 70)
2049	    {
2050	      fprintf (stderr, ",\n\t");
2051	      len = 8;
2052	      comma = "";
2053	    }
2054
2055	  len += fprintf (stderr, "%s%s", comma, "call-used");
2056	  comma = ", ";
2057	}
2058
2059      if (fixed_regs[r])
2060	{
2061	  if (len > 70)
2062	    {
2063	      fprintf (stderr, ",\n\t");
2064	      len = 8;
2065	      comma = "";
2066	    }
2067
2068	  len += fprintf (stderr, "%s%s", comma, "fixed");
2069	  comma = ", ";
2070	}
2071
2072      if (len > 70)
2073	{
2074	  fprintf (stderr, ",\n\t");
2075	  comma = "";
2076	}
2077
2078      len += fprintf (stderr, "%sreg-class = %s", comma,
2079		      reg_class_names[(int)rs6000_regno_regclass[r]]);
2080      comma = ", ";
2081
2082      if (len > 70)
2083	{
2084	  fprintf (stderr, ",\n\t");
2085	  comma = "";
2086	}
2087
2088      fprintf (stderr, "%sregno = %d\n", comma, r);
2089    }
2090}
2091
2092static const char *
2093rs6000_debug_vector_unit (enum rs6000_vector v)
2094{
2095  const char *ret;
2096
2097  switch (v)
2098    {
2099    case VECTOR_NONE:	   ret = "none";      break;
2100    case VECTOR_ALTIVEC:   ret = "altivec";   break;
2101    case VECTOR_VSX:	   ret = "vsx";       break;
2102    case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2103    default:		   ret = "unknown";   break;
2104    }
2105
2106  return ret;
2107}
2108
2109/* Inner function printing just the address mask for a particular reload
2110   register class.  */
2111DEBUG_FUNCTION char *
2112rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2113{
2114  static char ret[8];
2115  char *p = ret;
2116
2117  if ((mask & RELOAD_REG_VALID) != 0)
2118    *p++ = 'v';
2119  else if (keep_spaces)
2120    *p++ = ' ';
2121
2122  if ((mask & RELOAD_REG_MULTIPLE) != 0)
2123    *p++ = 'm';
2124  else if (keep_spaces)
2125    *p++ = ' ';
2126
2127  if ((mask & RELOAD_REG_INDEXED) != 0)
2128    *p++ = 'i';
2129  else if (keep_spaces)
2130    *p++ = ' ';
2131
2132  if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2133    *p++ = 'O';
2134  else if ((mask & RELOAD_REG_OFFSET) != 0)
2135    *p++ = 'o';
2136  else if (keep_spaces)
2137    *p++ = ' ';
2138
2139  if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2140    *p++ = '+';
2141  else if (keep_spaces)
2142    *p++ = ' ';
2143
2144  if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2145    *p++ = '+';
2146  else if (keep_spaces)
2147    *p++ = ' ';
2148
2149  if ((mask & RELOAD_REG_AND_M16) != 0)
2150    *p++ = '&';
2151  else if (keep_spaces)
2152    *p++ = ' ';
2153
2154  *p = '\0';
2155
2156  return ret;
2157}
2158
2159/* Print the address masks in a human readble fashion.  */
2160DEBUG_FUNCTION void
2161rs6000_debug_print_mode (ssize_t m)
2162{
2163  ssize_t rc;
2164  int spaces = 0;
2165
2166  fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2167  for (rc = 0; rc < N_RELOAD_REG; rc++)
2168    fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2169	     rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2170
2171  if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2172      || (reg_addr[m].reload_load != CODE_FOR_nothing))
2173    {
2174      fprintf (stderr, "%*s  Reload=%c%c", spaces, "",
2175	       (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2176	       (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2177      spaces = 0;
2178    }
2179  else
2180    spaces += strlen ("  Reload=sl");
2181
2182  if (reg_addr[m].scalar_in_vmx_p)
2183    {
2184      fprintf (stderr, "%*s  Upper=y", spaces, "");
2185      spaces = 0;
2186    }
2187  else
2188    spaces += strlen ("  Upper=y");
2189
2190  if (rs6000_vector_unit[m] != VECTOR_NONE
2191      || rs6000_vector_mem[m] != VECTOR_NONE)
2192    {
2193      fprintf (stderr, "%*s  vector: arith=%-10s mem=%s",
2194	       spaces, "",
2195	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2196	       rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2197    }
2198
2199  fputs ("\n", stderr);
2200}
2201
2202#define DEBUG_FMT_ID "%-32s= "
2203#define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
2204#define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205#define DEBUG_FMT_S   DEBUG_FMT_ID "%s\n"
2206
2207/* Print various interesting information with -mdebug=reg.  */
2208static void
2209rs6000_debug_reg_global (void)
2210{
2211  static const char *const tf[2] = { "false", "true" };
2212  const char *nl = (const char *)0;
2213  int m;
2214  size_t m1, m2, v;
2215  char costly_num[20];
2216  char nop_num[20];
2217  char flags_buffer[40];
2218  const char *costly_str;
2219  const char *nop_str;
2220  const char *trace_str;
2221  const char *abi_str;
2222  const char *cmodel_str;
2223  struct cl_target_option cl_opts;
2224
2225  /* Modes we want tieable information on.  */
2226  static const machine_mode print_tieable_modes[] = {
2227    QImode,
2228    HImode,
2229    SImode,
2230    DImode,
2231    TImode,
2232    PTImode,
2233    SFmode,
2234    DFmode,
2235    TFmode,
2236    IFmode,
2237    KFmode,
2238    SDmode,
2239    DDmode,
2240    TDmode,
2241    V2SImode,
2242    V2SFmode,
2243    V16QImode,
2244    V8HImode,
2245    V4SImode,
2246    V2DImode,
2247    V1TImode,
2248    V32QImode,
2249    V16HImode,
2250    V8SImode,
2251    V4DImode,
2252    V2TImode,
2253    V4SFmode,
2254    V2DFmode,
2255    V8SFmode,
2256    V4DFmode,
2257    OOmode,
2258    XOmode,
2259    CCmode,
2260    CCUNSmode,
2261    CCEQmode,
2262    CCFPmode,
2263  };
2264
2265  /* Virtual regs we are interested in.  */
2266  const static struct {
2267    int regno;			/* register number.  */
2268    const char *name;		/* register name.  */
2269  } virtual_regs[] = {
2270    { STACK_POINTER_REGNUM,			"stack pointer:" },
2271    { TOC_REGNUM,				"toc:          " },
2272    { STATIC_CHAIN_REGNUM,			"static chain: " },
2273    { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
2274    { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
2275    { ARG_POINTER_REGNUM,			"arg pointer:  " },
2276    { FRAME_POINTER_REGNUM,			"frame pointer:" },
2277    { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
2278    { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
2279    { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
2280    { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
2281    { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
2282    { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
2283    { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
2284    { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
2285    { LAST_VIRTUAL_REGISTER,			"last virtual: " },
2286  };
2287
2288  fputs ("\nHard register information:\n", stderr);
2289  rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2290  rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2291  rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2292			  LAST_ALTIVEC_REGNO,
2293			  "vs");
2294  rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2295  rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2296  rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2297  rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2298  rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2299  rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2300
2301  fputs ("\nVirtual/stack/frame registers:\n", stderr);
2302  for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2303    fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2304
2305  fprintf (stderr,
2306	   "\n"
2307	   "d  reg_class = %s\n"
2308	   "f  reg_class = %s\n"
2309	   "v  reg_class = %s\n"
2310	   "wa reg_class = %s\n"
2311	   "we reg_class = %s\n"
2312	   "wr reg_class = %s\n"
2313	   "wx reg_class = %s\n"
2314	   "wA reg_class = %s\n"
2315	   "\n",
2316	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2317	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2318	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2319	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2320	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2321	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2322	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2323	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2324
2325  nl = "\n";
2326  for (m = 0; m < NUM_MACHINE_MODES; ++m)
2327    rs6000_debug_print_mode (m);
2328
2329  fputs ("\n", stderr);
2330
2331  for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2332    {
2333      machine_mode mode1 = print_tieable_modes[m1];
2334      bool first_time = true;
2335
2336      nl = (const char *)0;
2337      for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2338	{
2339	  machine_mode mode2 = print_tieable_modes[m2];
2340	  if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2341	    {
2342	      if (first_time)
2343		{
2344		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2345		  nl = "\n";
2346		  first_time = false;
2347		}
2348
2349	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2350	    }
2351	}
2352
2353      if (!first_time)
2354	fputs ("\n", stderr);
2355    }
2356
2357  if (nl)
2358    fputs (nl, stderr);
2359
2360  if (rs6000_recip_control)
2361    {
2362      fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2363
2364      for (m = 0; m < NUM_MACHINE_MODES; ++m)
2365	if (rs6000_recip_bits[m])
2366	  {
2367	    fprintf (stderr,
2368		     "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2369		     GET_MODE_NAME (m),
2370		     (RS6000_RECIP_AUTO_RE_P (m)
2371		      ? "auto"
2372		      : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2373		     (RS6000_RECIP_AUTO_RSQRTE_P (m)
2374		      ? "auto"
2375		      : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2376	  }
2377
2378      fputs ("\n", stderr);
2379    }
2380
2381  if (rs6000_cpu_index >= 0)
2382    {
2383      const char *name = processor_target_table[rs6000_cpu_index].name;
2384      HOST_WIDE_INT flags
2385	= processor_target_table[rs6000_cpu_index].target_enable;
2386
2387      sprintf (flags_buffer, "-mcpu=%s flags", name);
2388      rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2389    }
2390  else
2391    fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2392
2393  if (rs6000_tune_index >= 0)
2394    {
2395      const char *name = processor_target_table[rs6000_tune_index].name;
2396      HOST_WIDE_INT flags
2397	= processor_target_table[rs6000_tune_index].target_enable;
2398
2399      sprintf (flags_buffer, "-mtune=%s flags", name);
2400      rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2401    }
2402  else
2403    fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2404
2405  cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2406  rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2407			    rs6000_isa_flags);
2408
2409  rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2410			    rs6000_isa_flags_explicit);
2411
2412  rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2413				rs6000_builtin_mask);
2414
2415  rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2416
2417  fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2418	   OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2419
2420  switch (rs6000_sched_costly_dep)
2421    {
2422    case max_dep_latency:
2423      costly_str = "max_dep_latency";
2424      break;
2425
2426    case no_dep_costly:
2427      costly_str = "no_dep_costly";
2428      break;
2429
2430    case all_deps_costly:
2431      costly_str = "all_deps_costly";
2432      break;
2433
2434    case true_store_to_load_dep_costly:
2435      costly_str = "true_store_to_load_dep_costly";
2436      break;
2437
2438    case store_to_load_dep_costly:
2439      costly_str = "store_to_load_dep_costly";
2440      break;
2441
2442    default:
2443      costly_str = costly_num;
2444      sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2445      break;
2446    }
2447
2448  fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2449
2450  switch (rs6000_sched_insert_nops)
2451    {
2452    case sched_finish_regroup_exact:
2453      nop_str = "sched_finish_regroup_exact";
2454      break;
2455
2456    case sched_finish_pad_groups:
2457      nop_str = "sched_finish_pad_groups";
2458      break;
2459
2460    case sched_finish_none:
2461      nop_str = "sched_finish_none";
2462      break;
2463
2464    default:
2465      nop_str = nop_num;
2466      sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2467      break;
2468    }
2469
2470  fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2471
2472  switch (rs6000_sdata)
2473    {
2474    default:
2475    case SDATA_NONE:
2476      break;
2477
2478    case SDATA_DATA:
2479      fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2480      break;
2481
2482    case SDATA_SYSV:
2483      fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2484      break;
2485
2486    case SDATA_EABI:
2487      fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2488      break;
2489
2490    }
2491
2492  switch (rs6000_traceback)
2493    {
2494    case traceback_default:	trace_str = "default";	break;
2495    case traceback_none:	trace_str = "none";	break;
2496    case traceback_part:	trace_str = "part";	break;
2497    case traceback_full:	trace_str = "full";	break;
2498    default:			trace_str = "unknown";	break;
2499    }
2500
2501  fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2502
2503  switch (rs6000_current_cmodel)
2504    {
2505    case CMODEL_SMALL:	cmodel_str = "small";	break;
2506    case CMODEL_MEDIUM:	cmodel_str = "medium";	break;
2507    case CMODEL_LARGE:	cmodel_str = "large";	break;
2508    default:		cmodel_str = "unknown";	break;
2509    }
2510
2511  fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2512
2513  switch (rs6000_current_abi)
2514    {
2515    case ABI_NONE:	abi_str = "none";	break;
2516    case ABI_AIX:	abi_str = "aix";	break;
2517    case ABI_ELFv2:	abi_str = "ELFv2";	break;
2518    case ABI_V4:	abi_str = "V4";		break;
2519    case ABI_DARWIN:	abi_str = "darwin";	break;
2520    default:		abi_str = "unknown";	break;
2521    }
2522
2523  fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2524
2525  if (rs6000_altivec_abi)
2526    fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2527
2528  if (rs6000_aix_extabi)
2529    fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2530
2531  if (rs6000_darwin64_abi)
2532    fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2533
2534  fprintf (stderr, DEBUG_FMT_S, "soft_float",
2535	   (TARGET_SOFT_FLOAT ? "true" : "false"));
2536
2537  if (TARGET_LINK_STACK)
2538    fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2539
2540  if (TARGET_P8_FUSION)
2541    {
2542      char options[80];
2543
2544      strcpy (options, "power8");
2545      if (TARGET_P8_FUSION_SIGN)
2546	strcat (options, ", sign");
2547
2548      fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2549    }
2550
2551  fprintf (stderr, DEBUG_FMT_S, "plt-format",
2552	   TARGET_SECURE_PLT ? "secure" : "bss");
2553  fprintf (stderr, DEBUG_FMT_S, "struct-return",
2554	   aix_struct_return ? "aix" : "sysv");
2555  fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2556  fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2557  fprintf (stderr, DEBUG_FMT_S, "align_branch",
2558	   tf[!!rs6000_align_branch_targets]);
2559  fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2560  fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2561	   rs6000_long_double_type_size);
2562  if (rs6000_long_double_type_size > 64)
2563    {
2564      fprintf (stderr, DEBUG_FMT_S, "long double type",
2565	       TARGET_IEEEQUAD ? "IEEE" : "IBM");
2566      fprintf (stderr, DEBUG_FMT_S, "default long double type",
2567	       TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2568    }
2569  fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2570	   (int)rs6000_sched_restricted_insns_priority);
2571  fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2572	   (int)END_BUILTINS);
2573
2574  fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2575	   (int)TARGET_FLOAT128_ENABLE_TYPE);
2576
2577  if (TARGET_VSX)
2578    fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2579	     (int)VECTOR_ELEMENT_SCALAR_64BIT);
2580
2581  if (TARGET_DIRECT_MOVE_128)
2582    fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2583	     (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2584}
2585
2586
2587/* Update the addr mask bits in reg_addr to help secondary reload and go if
2588   legitimate address support to figure out the appropriate addressing to
2589   use.  */
2590
2591static void
2592rs6000_setup_reg_addr_masks (void)
2593{
2594  ssize_t rc, reg, m, nregs;
2595  addr_mask_type any_addr_mask, addr_mask;
2596
2597  for (m = 0; m < NUM_MACHINE_MODES; ++m)
2598    {
2599      machine_mode m2 = (machine_mode) m;
2600      bool complex_p = false;
2601      bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2602      size_t msize;
2603
2604      if (COMPLEX_MODE_P (m2))
2605	{
2606	  complex_p = true;
2607	  m2 = GET_MODE_INNER (m2);
2608	}
2609
2610      msize = GET_MODE_SIZE (m2);
2611
2612      /* SDmode is special in that we want to access it only via REG+REG
2613	 addressing on power7 and above, since we want to use the LFIWZX and
2614	 STFIWZX instructions to load it.  */
2615      bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2616
2617      any_addr_mask = 0;
2618      for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2619	{
2620	  addr_mask = 0;
2621	  reg = reload_reg_map[rc].reg;
2622
2623	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
2624	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2625	    {
2626	      bool small_int_vsx_p = (small_int_p
2627				      && (rc == RELOAD_REG_FPR
2628					  || rc == RELOAD_REG_VMX));
2629
2630	      nregs = rs6000_hard_regno_nregs[m][reg];
2631	      addr_mask |= RELOAD_REG_VALID;
2632
2633	      /* Indicate if the mode takes more than 1 physical register.  If
2634		 it takes a single register, indicate it can do REG+REG
2635		 addressing.  Small integers in VSX registers can only do
2636		 REG+REG addressing.  */
2637	      if (small_int_vsx_p)
2638		addr_mask |= RELOAD_REG_INDEXED;
2639	      else if (nregs > 1 || m == BLKmode || complex_p)
2640		addr_mask |= RELOAD_REG_MULTIPLE;
2641	      else
2642		addr_mask |= RELOAD_REG_INDEXED;
2643
2644	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2645		 addressing.  If we allow scalars into Altivec registers,
2646		 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2647
2648		 For VSX systems, we don't allow update addressing for
2649		 DFmode/SFmode if those registers can go in both the
2650		 traditional floating point registers and Altivec registers.
2651		 The load/store instructions for the Altivec registers do not
2652		 have update forms.  If we allowed update addressing, it seems
2653		 to break IV-OPT code using floating point if the index type is
2654		 int instead of long (PR target/81550 and target/84042).  */
2655
2656	      if (TARGET_UPDATE
2657		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2658		  && msize <= 8
2659		  && !VECTOR_MODE_P (m2)
2660		  && !VECTOR_ALIGNMENT_P (m2)
2661		  && !complex_p
2662		  && (m != E_DFmode || !TARGET_VSX)
2663		  && (m != E_SFmode || !TARGET_P8_VECTOR)
2664		  && !small_int_vsx_p)
2665		{
2666		  addr_mask |= RELOAD_REG_PRE_INCDEC;
2667
2668		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2669		     we don't allow PRE_MODIFY for some multi-register
2670		     operations.  */
2671		  switch (m)
2672		    {
2673		    default:
2674		      addr_mask |= RELOAD_REG_PRE_MODIFY;
2675		      break;
2676
2677		    case E_DImode:
2678		      if (TARGET_POWERPC64)
2679			addr_mask |= RELOAD_REG_PRE_MODIFY;
2680		      break;
2681
2682		    case E_DFmode:
2683		    case E_DDmode:
2684		      if (TARGET_HARD_FLOAT)
2685			addr_mask |= RELOAD_REG_PRE_MODIFY;
2686		      break;
2687		    }
2688		}
2689	    }
2690
2691	  /* GPR and FPR registers can do REG+OFFSET addressing, except
2692	     possibly for SDmode.  ISA 3.0 (i.e. power9) adds D-form addressing
2693	     for 64-bit scalars and 32-bit SFmode to altivec registers.  */
2694	  if ((addr_mask != 0) && !indexed_only_p
2695	      && msize <= 8
2696	      && (rc == RELOAD_REG_GPR
2697		  || ((msize == 8 || m2 == SFmode)
2698		      && (rc == RELOAD_REG_FPR
2699			  || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2700	    addr_mask |= RELOAD_REG_OFFSET;
2701
2702	  /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703	     instructions are enabled.  The offset for 128-bit VSX registers is
2704	     only 12-bits.  While GPRs can handle the full offset range, VSX
2705	     registers can only handle the restricted range.  */
2706	  else if ((addr_mask != 0) && !indexed_only_p
2707		   && msize == 16 && TARGET_P9_VECTOR
2708		   && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2709		       || (m2 == TImode && TARGET_VSX)))
2710	    {
2711	      addr_mask |= RELOAD_REG_OFFSET;
2712	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2713		addr_mask |= RELOAD_REG_QUAD_OFFSET;
2714	    }
2715
2716	  /* Vector pairs can do both indexed and offset loads if the
2717	     instructions are enabled, otherwise they can only do offset loads
2718	     since it will be broken into two vector moves.  Vector quads can
2719	     only do offset loads.  */
2720	  else if ((addr_mask != 0) && TARGET_MMA
2721		   && (m2 == OOmode || m2 == XOmode))
2722	    {
2723	      addr_mask |= RELOAD_REG_OFFSET;
2724	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725		{
2726		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
2727		  if (m2 == OOmode)
2728		    addr_mask |= RELOAD_REG_INDEXED;
2729		}
2730	    }
2731
2732	  /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733	     addressing on 128-bit types.  */
2734	  if (rc == RELOAD_REG_VMX && msize == 16
2735	      && (addr_mask & RELOAD_REG_VALID) != 0)
2736	    addr_mask |= RELOAD_REG_AND_M16;
2737
2738	  reg_addr[m].addr_mask[rc] = addr_mask;
2739	  any_addr_mask |= addr_mask;
2740	}
2741
2742      reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2743    }
2744}
2745
2746
2747/* Initialize the various global tables that are based on register size.  */
2748static void
2749rs6000_init_hard_regno_mode_ok (bool global_init_p)
2750{
2751  ssize_t r, m, c;
2752  int align64;
2753  int align32;
2754
2755  /* Precalculate REGNO_REG_CLASS.  */
2756  rs6000_regno_regclass[0] = GENERAL_REGS;
2757  for (r = 1; r < 32; ++r)
2758    rs6000_regno_regclass[r] = BASE_REGS;
2759
2760  for (r = 32; r < 64; ++r)
2761    rs6000_regno_regclass[r] = FLOAT_REGS;
2762
2763  for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2764    rs6000_regno_regclass[r] = NO_REGS;
2765
2766  for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2767    rs6000_regno_regclass[r] = ALTIVEC_REGS;
2768
2769  rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2770  for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2771    rs6000_regno_regclass[r] = CR_REGS;
2772
2773  rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2774  rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2775  rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2776  rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2777  rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2778  rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2779  rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2780
2781  /* Precalculate register class to simpler reload register class.  We don't
2782     need all of the register classes that are combinations of different
2783     classes, just the simple ones that have constraint letters.  */
2784  for (c = 0; c < N_REG_CLASSES; c++)
2785    reg_class_to_reg_type[c] = NO_REG_TYPE;
2786
2787  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2788  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2789  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2790  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2791  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2792  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2793  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2794  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2795  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2796  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2797
2798  if (TARGET_VSX)
2799    {
2800      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2801      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2802    }
2803  else
2804    {
2805      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2806      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2807    }
2808
2809  /* Precalculate the valid memory formats as well as the vector information,
2810     this must be set up before the rs6000_hard_regno_nregs_internal calls
2811     below.  */
2812  gcc_assert ((int)VECTOR_NONE == 0);
2813  memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2814  memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2815
2816  gcc_assert ((int)CODE_FOR_nothing == 0);
2817  memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2818
2819  gcc_assert ((int)NO_REGS == 0);
2820  memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2821
2822  /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823     believes it can use native alignment or still uses 128-bit alignment.  */
2824  if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2825    {
2826      align64 = 64;
2827      align32 = 32;
2828    }
2829  else
2830    {
2831      align64 = 128;
2832      align32 = 128;
2833    }
2834
2835  /* KF mode (IEEE 128-bit in VSX registers).  We do not have arithmetic, so
2836     only set the memory modes.  Include TFmode if -mabi=ieeelongdouble.  */
2837  if (TARGET_FLOAT128_TYPE)
2838    {
2839      rs6000_vector_mem[KFmode] = VECTOR_VSX;
2840      rs6000_vector_align[KFmode] = 128;
2841
2842      if (FLOAT128_IEEE_P (TFmode))
2843	{
2844	  rs6000_vector_mem[TFmode] = VECTOR_VSX;
2845	  rs6000_vector_align[TFmode] = 128;
2846	}
2847    }
2848
2849  /* V2DF mode, VSX only.  */
2850  if (TARGET_VSX)
2851    {
2852      rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2853      rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2854      rs6000_vector_align[V2DFmode] = align64;
2855    }
2856
2857  /* V4SF mode, either VSX or Altivec.  */
2858  if (TARGET_VSX)
2859    {
2860      rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2861      rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2862      rs6000_vector_align[V4SFmode] = align32;
2863    }
2864  else if (TARGET_ALTIVEC)
2865    {
2866      rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2867      rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2868      rs6000_vector_align[V4SFmode] = align32;
2869    }
2870
2871  /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2872     and stores. */
2873  if (TARGET_ALTIVEC)
2874    {
2875      rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2876      rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2877      rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2878      rs6000_vector_align[V4SImode] = align32;
2879      rs6000_vector_align[V8HImode] = align32;
2880      rs6000_vector_align[V16QImode] = align32;
2881
2882      if (TARGET_VSX)
2883	{
2884	  rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2885	  rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2886	  rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2887	}
2888      else
2889	{
2890	  rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2891	  rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2892	  rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2893	}
2894    }
2895
2896  /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
2897     do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
2898  if (TARGET_VSX)
2899    {
2900      rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2901      rs6000_vector_unit[V2DImode]
2902	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2903      rs6000_vector_align[V2DImode] = align64;
2904
2905      rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2906      rs6000_vector_unit[V1TImode]
2907	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2908      rs6000_vector_align[V1TImode] = 128;
2909    }
2910
2911  /* DFmode, see if we want to use the VSX unit.  Memory is handled
2912     differently, so don't set rs6000_vector_mem.  */
2913  if (TARGET_VSX)
2914    {
2915      rs6000_vector_unit[DFmode] = VECTOR_VSX;
2916      rs6000_vector_align[DFmode] = 64;
2917    }
2918
2919  /* SFmode, see if we want to use the VSX unit.  */
2920  if (TARGET_P8_VECTOR)
2921    {
2922      rs6000_vector_unit[SFmode] = VECTOR_VSX;
2923      rs6000_vector_align[SFmode] = 32;
2924    }
2925
2926  /* Allow TImode in VSX register and set the VSX memory macros.  */
2927  if (TARGET_VSX)
2928    {
2929      rs6000_vector_mem[TImode] = VECTOR_VSX;
2930      rs6000_vector_align[TImode] = align64;
2931    }
2932
2933  /* Add support for vector pairs and vector quad registers.  */
2934  if (TARGET_MMA)
2935    {
2936      rs6000_vector_unit[OOmode] = VECTOR_NONE;
2937      rs6000_vector_mem[OOmode] = VECTOR_VSX;
2938      rs6000_vector_align[OOmode] = 256;
2939
2940      rs6000_vector_unit[XOmode] = VECTOR_NONE;
2941      rs6000_vector_mem[XOmode] = VECTOR_VSX;
2942      rs6000_vector_align[XOmode] = 512;
2943    }
2944
2945  /* Register class constraints for the constraints that depend on compile
2946     switches. When the VSX code was added, different constraints were added
2947     based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
2948     of the VSX registers are used.  The register classes for scalar floating
2949     point types is set, based on whether we allow that type into the upper
2950     (Altivec) registers.  GCC has register classes to target the Altivec
2951     registers for load/store operations, to select using a VSX memory
2952     operation instead of the traditional floating point operation.  The
2953     constraints are:
2954
2955	d  - Register class to use with traditional DFmode instructions.
2956	f  - Register class to use with traditional SFmode instructions.
2957	v  - Altivec register.
2958	wa - Any VSX register.
2959	wc - Reserved to represent individual CR bits (used in LLVM).
2960	wn - always NO_REGS.
2961	wr - GPR if 64-bit mode is permitted.
2962	wx - Float register if we can do 32-bit int stores.  */
2963
2964  if (TARGET_HARD_FLOAT)
2965    {
2966      rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;	/* SFmode  */
2967      rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;	/* DFmode  */
2968    }
2969
2970  if (TARGET_VSX)
2971    rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2972
2973  /* Add conditional constraints based on various options, to allow us to
2974     collapse multiple insn patterns.  */
2975  if (TARGET_ALTIVEC)
2976    rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2977
2978  if (TARGET_POWERPC64)
2979    {
2980      rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2981      rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2982    }
2983
2984  if (TARGET_STFIWX)
2985    rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;	/* DImode  */
2986
2987  /* Support for new direct moves (ISA 3.0 + 64bit).  */
2988  if (TARGET_DIRECT_MOVE_128)
2989    rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2990
2991  /* Set up the reload helper and direct move functions.  */
2992  if (TARGET_VSX || TARGET_ALTIVEC)
2993    {
2994      if (TARGET_64BIT)
2995	{
2996	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2997	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
2998	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
2999	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
3000	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
3001	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
3002	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
3003	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
3004	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_di_store;
3005	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_di_load;
3006	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
3007	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
3008	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
3009	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
3010	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_di_store;
3011	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_di_load;
3012	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_di_store;
3013	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_di_load;
3014	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_di_store;
3015	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_di_load;
3016
3017	  if (FLOAT128_VECTOR_P (KFmode))
3018	    {
3019	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3020	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_di_load;
3021	    }
3022
3023	  if (FLOAT128_VECTOR_P (TFmode))
3024	    {
3025	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3026	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_di_load;
3027	    }
3028
3029	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3030	     available.  */
3031	  if (TARGET_NO_SDMODE_STACK)
3032	    {
3033	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3034	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_di_load;
3035	    }
3036
3037	  if (TARGET_VSX)
3038	    {
3039	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
3040	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
3041	    }
3042
3043	  if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3044	    {
3045	      reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
3046	      reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
3047	      reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
3048	      reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
3049	      reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
3050	      reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
3051	      reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
3052	      reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3053	      reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
3054
3055	      reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
3056	      reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
3057	      reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
3058	      reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
3059	      reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
3060	      reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
3061	      reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
3062	      reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3063	      reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
3064
3065	      if (FLOAT128_VECTOR_P (KFmode))
3066		{
3067		  reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3068		  reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3069		}
3070
3071	      if (FLOAT128_VECTOR_P (TFmode))
3072		{
3073		  reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3074		  reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3075		}
3076
3077	      if (TARGET_MMA)
3078		{
3079		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3080		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3081		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3082		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3083		}
3084	    }
3085	}
3086      else
3087	{
3088	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3089	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
3090	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
3091	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
3092	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
3093	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
3094	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
3095	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
3096	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
3097	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
3098	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
3099	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
3100	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
3101	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
3102	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_si_store;
3103	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_si_load;
3104	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_si_store;
3105	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_si_load;
3106	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_si_store;
3107	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_si_load;
3108
3109	  if (FLOAT128_VECTOR_P (KFmode))
3110	    {
3111	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3112	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_si_load;
3113	    }
3114
3115	  if (FLOAT128_IEEE_P (TFmode))
3116	    {
3117	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3118	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_si_load;
3119	    }
3120
3121	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3122	     available.  */
3123	  if (TARGET_NO_SDMODE_STACK)
3124	    {
3125	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3126	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_si_load;
3127	    }
3128
3129	  if (TARGET_VSX)
3130	    {
3131	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
3132	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
3133	    }
3134
3135	  if (TARGET_DIRECT_MOVE)
3136	    {
3137	      reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3138	      reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3139	      reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3140	    }
3141	}
3142
3143      reg_addr[DFmode].scalar_in_vmx_p = true;
3144      reg_addr[DImode].scalar_in_vmx_p = true;
3145
3146      if (TARGET_P8_VECTOR)
3147	{
3148	  reg_addr[SFmode].scalar_in_vmx_p = true;
3149	  reg_addr[SImode].scalar_in_vmx_p = true;
3150
3151	  if (TARGET_P9_VECTOR)
3152	    {
3153	      reg_addr[HImode].scalar_in_vmx_p = true;
3154	      reg_addr[QImode].scalar_in_vmx_p = true;
3155	    }
3156	}
3157    }
3158
3159  /* Precalculate HARD_REGNO_NREGS.  */
3160  for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3161    for (m = 0; m < NUM_MACHINE_MODES; ++m)
3162      rs6000_hard_regno_nregs[m][r]
3163	= rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3164
3165  /* Precalculate TARGET_HARD_REGNO_MODE_OK.  */
3166  for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3167    for (m = 0; m < NUM_MACHINE_MODES; ++m)
3168      rs6000_hard_regno_mode_ok_p[m][r]
3169	= rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3170
3171  /* Precalculate CLASS_MAX_NREGS sizes.  */
3172  for (c = 0; c < LIM_REG_CLASSES; ++c)
3173    {
3174      int reg_size;
3175
3176      if (TARGET_VSX && VSX_REG_CLASS_P (c))
3177	reg_size = UNITS_PER_VSX_WORD;
3178
3179      else if (c == ALTIVEC_REGS)
3180	reg_size = UNITS_PER_ALTIVEC_WORD;
3181
3182      else if (c == FLOAT_REGS)
3183	reg_size = UNITS_PER_FP_WORD;
3184
3185      else
3186	reg_size = UNITS_PER_WORD;
3187
3188      for (m = 0; m < NUM_MACHINE_MODES; ++m)
3189	{
3190	  machine_mode m2 = (machine_mode)m;
3191	  int reg_size2 = reg_size;
3192
3193	  /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3194	     in VSX.  */
3195	  if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3196	    reg_size2 = UNITS_PER_FP_WORD;
3197
3198	  rs6000_class_max_nregs[m][c]
3199	    = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3200	}
3201    }
3202
3203  /* Calculate which modes to automatically generate code to use a the
3204     reciprocal divide and square root instructions.  In the future, possibly
3205     automatically generate the instructions even if the user did not specify
3206     -mrecip.  The older machines double precision reciprocal sqrt estimate is
3207     not accurate enough.  */
3208  memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3209  if (TARGET_FRES)
3210    rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3211  if (TARGET_FRE)
3212    rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3213  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3214    rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215  if (VECTOR_UNIT_VSX_P (V2DFmode))
3216    rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3217
3218  if (TARGET_FRSQRTES)
3219    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3220  if (TARGET_FRSQRTE)
3221    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3222  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3223    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224  if (VECTOR_UNIT_VSX_P (V2DFmode))
3225    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3226
3227  if (rs6000_recip_control)
3228    {
3229      if (!flag_finite_math_only)
3230	warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3231		 "-ffast-math");
3232      if (flag_trapping_math)
3233	warning (0, "%qs requires %qs or %qs", "-mrecip",
3234		 "-fno-trapping-math", "-ffast-math");
3235      if (!flag_reciprocal_math)
3236	warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3237		 "-ffast-math");
3238      if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3239	{
3240	  if (RS6000_RECIP_HAVE_RE_P (SFmode)
3241	      && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3242	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3243
3244	  if (RS6000_RECIP_HAVE_RE_P (DFmode)
3245	      && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3246	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247
3248	  if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3249	      && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3250	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3251
3252	  if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3253	      && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3254	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3255
3256	  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3257	      && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3258	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3259
3260	  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3261	      && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3262	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263
3264	  if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3265	      && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3266	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3267
3268	  if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3269	      && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3270	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3271	}
3272    }
3273
3274  /* Update the addr mask bits in reg_addr to help secondary reload and go if
3275     legitimate address support to figure out the appropriate addressing to
3276     use.  */
3277  rs6000_setup_reg_addr_masks ();
3278
3279  if (global_init_p || TARGET_DEBUG_TARGET)
3280    {
3281      if (TARGET_DEBUG_REG)
3282	rs6000_debug_reg_global ();
3283
3284      if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3285	fprintf (stderr,
3286		 "SImode variable mult cost       = %d\n"
3287		 "SImode constant mult cost       = %d\n"
3288		 "SImode short constant mult cost = %d\n"
3289		 "DImode multipliciation cost     = %d\n"
3290		 "SImode division cost            = %d\n"
3291		 "DImode division cost            = %d\n"
3292		 "Simple fp operation cost        = %d\n"
3293		 "DFmode multiplication cost      = %d\n"
3294		 "SFmode division cost            = %d\n"
3295		 "DFmode division cost            = %d\n"
3296		 "cache line size                 = %d\n"
3297		 "l1 cache size                   = %d\n"
3298		 "l2 cache size                   = %d\n"
3299		 "simultaneous prefetches         = %d\n"
3300		 "\n",
3301		 rs6000_cost->mulsi,
3302		 rs6000_cost->mulsi_const,
3303		 rs6000_cost->mulsi_const9,
3304		 rs6000_cost->muldi,
3305		 rs6000_cost->divsi,
3306		 rs6000_cost->divdi,
3307		 rs6000_cost->fp,
3308		 rs6000_cost->dmul,
3309		 rs6000_cost->sdiv,
3310		 rs6000_cost->ddiv,
3311		 rs6000_cost->cache_line_size,
3312		 rs6000_cost->l1_cache_size,
3313		 rs6000_cost->l2_cache_size,
3314		 rs6000_cost->simultaneous_prefetches);
3315    }
3316}
3317
3318#if TARGET_MACHO
3319/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS.  */
3320
3321static void
3322darwin_rs6000_override_options (void)
3323{
3324  /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3325     off.  */
3326  rs6000_altivec_abi = 1;
3327  TARGET_ALTIVEC_VRSAVE = 1;
3328  rs6000_current_abi = ABI_DARWIN;
3329
3330  if (DEFAULT_ABI == ABI_DARWIN
3331      && TARGET_64BIT)
3332      darwin_one_byte_bool = 1;
3333
3334  if (TARGET_64BIT && ! TARGET_POWERPC64)
3335    {
3336      rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3337      warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3338    }
3339
3340  /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3341     optimisation, and will not work with the most generic case (where the
3342     symbol is undefined external, but there is no symbl stub).  */
3343  if (TARGET_64BIT)
3344    rs6000_default_long_calls = 0;
3345
3346  /* ld_classic is (so far) still used for kernel (static) code, and supports
3347     the JBSR longcall / branch islands.  */
3348  if (flag_mkernel)
3349    {
3350      rs6000_default_long_calls = 1;
3351
3352      /* Allow a kext author to do -mkernel -mhard-float.  */
3353      if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3354        rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3355    }
3356
3357  /* Make -m64 imply -maltivec.  Darwin's 64-bit ABI includes
3358     Altivec.  */
3359  if (!flag_mkernel && !flag_apple_kext
3360      && TARGET_64BIT
3361      && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3362    rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3363
3364  /* Unless the user (not the configurer) has explicitly overridden
3365     it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3366     G4 unless targeting the kernel.  */
3367  if (!flag_mkernel
3368      && !flag_apple_kext
3369      && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3370      && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3371      && ! OPTION_SET_P (rs6000_cpu_index))
3372    {
3373      rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3374    }
3375}
3376#endif
3377
3378/* If not otherwise specified by a target, make 'long double' equivalent to
3379   'double'.  */
3380
3381#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3382#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3383#endif
3384
3385/* Return the builtin mask of the various options used that could affect which
3386   builtins were used.  In the past we used target_flags, but we've run out of
3387   bits, and some options are no longer in target_flags.  */
3388
3389HOST_WIDE_INT
3390rs6000_builtin_mask_calculate (void)
3391{
3392  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
3393	  | ((TARGET_CMPB)		    ? RS6000_BTM_CMPB	   : 0)
3394	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
3395	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
3396	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
3397	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
3398	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
3399	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
3400	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
3401	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
3402	  | ((TARGET_P9_VECTOR)		    ? RS6000_BTM_P9_VECTOR : 0)
3403	  | ((TARGET_P9_MISC)		    ? RS6000_BTM_P9_MISC   : 0)
3404	  | ((TARGET_MODULO)		    ? RS6000_BTM_MODULO    : 0)
3405	  | ((TARGET_64BIT)		    ? RS6000_BTM_64BIT     : 0)
3406	  | ((TARGET_POWERPC64)		    ? RS6000_BTM_POWERPC64 : 0)
3407	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
3408	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
3409	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0)
3410	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0)
3411	  | ((TARGET_LONG_DOUBLE_128
3412	      && TARGET_HARD_FLOAT
3413	      && !TARGET_IEEEQUAD)	    ? RS6000_BTM_LDBL128   : 0)
3414	  | ((TARGET_FLOAT128_TYPE)	    ? RS6000_BTM_FLOAT128  : 0)
3415	  | ((TARGET_FLOAT128_HW)	    ? RS6000_BTM_FLOAT128_HW : 0)
3416	  | ((TARGET_MMA)		    ? RS6000_BTM_MMA	   : 0)
3417	  | ((TARGET_POWER10)               ? RS6000_BTM_P10       : 0));
3418}
3419
3420/* Implement TARGET_MD_ASM_ADJUST.  All asm statements are considered
3421   to clobber the XER[CA] bit because clobbering that bit without telling
3422   the compiler worked just fine with versions of GCC before GCC 5, and
3423   breaking a lot of older code in ways that are hard to track down is
3424   not such a great idea.  */
3425
3426static rtx_insn *
3427rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3428		      vec<machine_mode> & /*input_modes*/,
3429		      vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3430		      HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3431{
3432  clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3433  SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3434  return NULL;
3435}
3436
3437/* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3438   but is called when the optimize level is changed via an attribute or
3439   pragma or when it is reset at the end of the code affected by the
3440   attribute or pragma.  It is not called at the beginning of compilation
3441   when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3442   actions then, you should have TARGET_OPTION_OVERRIDE call
3443   TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
3444
3445static void
3446rs6000_override_options_after_change (void)
3447{
3448  /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3449     turns -frename-registers on.  */
3450  if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3451       || (OPTION_SET_P (flag_unroll_all_loops)
3452	   && flag_unroll_all_loops))
3453    {
3454      if (!OPTION_SET_P (unroll_only_small_loops))
3455	unroll_only_small_loops = 0;
3456      if (!OPTION_SET_P (flag_rename_registers))
3457	flag_rename_registers = 1;
3458      if (!OPTION_SET_P (flag_cunroll_grow_size))
3459	flag_cunroll_grow_size = 1;
3460    }
3461  else if (!OPTION_SET_P (flag_cunroll_grow_size))
3462    flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3463
3464  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
3465  if (rs6000_rop_protect)
3466    flag_shrink_wrap = 0;
3467}
3468
3469#ifdef TARGET_USES_LINUX64_OPT
3470static void
3471rs6000_linux64_override_options ()
3472{
3473  if (!OPTION_SET_P (rs6000_alignment_flags))
3474    rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3475  if (rs6000_isa_flags & OPTION_MASK_64BIT)
3476    {
3477      if (DEFAULT_ABI != ABI_AIX)
3478	{
3479	  rs6000_current_abi = ABI_AIX;
3480	  error (INVALID_64BIT, "call");
3481	}
3482      dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3483      if (ELFv2_ABI_CHECK)
3484	{
3485	  rs6000_current_abi = ABI_ELFv2;
3486	  if (dot_symbols)
3487	    error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3488	}
3489      if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3490	{
3491	  rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3492	  error (INVALID_64BIT, "relocatable");
3493	}
3494      if (rs6000_isa_flags & OPTION_MASK_EABI)
3495	{
3496	  rs6000_isa_flags &= ~OPTION_MASK_EABI;
3497	  error (INVALID_64BIT, "eabi");
3498	}
3499      if (TARGET_PROTOTYPE)
3500	{
3501	  target_prototype = 0;
3502	  error (INVALID_64BIT, "prototype");
3503	}
3504      if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3505	{
3506	  rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3507	  error ("%<-m64%> requires a PowerPC64 cpu");
3508	}
3509      if (!OPTION_SET_P (rs6000_current_cmodel))
3510	SET_CMODEL (CMODEL_MEDIUM);
3511      if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3512	{
3513	  if (OPTION_SET_P (rs6000_current_cmodel)
3514	      && rs6000_current_cmodel != CMODEL_SMALL)
3515	    error ("%<-mcmodel%> incompatible with other toc options");
3516	  if (TARGET_MINIMAL_TOC)
3517	    SET_CMODEL (CMODEL_SMALL);
3518	  else if (TARGET_PCREL
3519		   || (PCREL_SUPPORTED_BY_OS
3520		       && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3521	    /* Ignore -mno-minimal-toc.  */
3522	    ;
3523	  else
3524	    SET_CMODEL (CMODEL_SMALL);
3525	}
3526      if (rs6000_current_cmodel != CMODEL_SMALL)
3527	{
3528	  if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3529	    TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3530	  if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3531	    TARGET_NO_SUM_IN_TOC = 0;
3532	}
3533      if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3534	{
3535	  if (OPTION_SET_P (rs6000_pltseq))
3536	    warning (0, "%qs unsupported for this ABI",
3537		     "-mpltseq");
3538	  rs6000_pltseq = false;
3539	}
3540    }
3541  else if (TARGET_64BIT)
3542    error (INVALID_32BIT, "32");
3543  else
3544    {
3545      if (TARGET_PROFILE_KERNEL)
3546	{
3547	  profile_kernel = 0;
3548	  error (INVALID_32BIT, "profile-kernel");
3549	}
3550      if (OPTION_SET_P (rs6000_current_cmodel))
3551	{
3552	  SET_CMODEL (CMODEL_SMALL);
3553	  error (INVALID_32BIT, "cmodel");
3554	}
3555    }
3556}
3557#endif
3558
3559/* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3560   This support is only in little endian GLIBC 2.32 or newer.  */
3561static bool
3562glibc_supports_ieee_128bit (void)
3563{
3564#ifdef OPTION_GLIBC
3565  if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3566      && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3567    return true;
3568#endif /* OPTION_GLIBC.  */
3569
3570  return false;
3571}
3572
3573/* Override command line options.
3574
3575   Combine build-specific configuration information with options
3576   specified on the command line to set various state variables which
3577   influence code generation, optimization, and expansion of built-in
3578   functions.  Assure that command-line configuration preferences are
3579   compatible with each other and with the build configuration; issue
3580   warnings while adjusting configuration or error messages while
3581   rejecting configuration.
3582
3583   Upon entry to this function:
3584
3585     This function is called once at the beginning of
3586     compilation, and then again at the start and end of compiling
3587     each section of code that has a different configuration, as
3588     indicated, for example, by adding the
3589
3590       __attribute__((__target__("cpu=power9")))
3591
3592     qualifier to a function definition or, for example, by bracketing
3593     code between
3594
3595       #pragma GCC target("altivec")
3596
3597     and
3598
3599       #pragma GCC reset_options
3600
3601     directives.  Parameter global_init_p is true for the initial
3602     invocation, which initializes global variables, and false for all
3603     subsequent invocations.
3604
3605
3606     Various global state information is assumed to be valid.  This
3607     includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3608     default CPU specified at build configure time, TARGET_DEFAULT,
3609     representing the default set of option flags for the default
3610     target, and OPTION_SET_P (rs6000_isa_flags), representing
3611     which options were requested on the command line.
3612
3613   Upon return from this function:
3614
3615     rs6000_isa_flags_explicit has a non-zero bit for each flag that
3616     was set by name on the command line.  Additionally, if certain
3617     attributes are automatically enabled or disabled by this function
3618     in order to assure compatibility between options and
3619     configuration, the flags associated with those attributes are
3620     also set.  By setting these "explicit bits", we avoid the risk
3621     that other code might accidentally overwrite these particular
3622     attributes with "default values".
3623
3624     The various bits of rs6000_isa_flags are set to indicate the
3625     target options that have been selected for the most current
3626     compilation efforts.  This has the effect of also turning on the
3627     associated TARGET_XXX values since these are macros which are
3628     generally defined to test the corresponding bit of the
3629     rs6000_isa_flags variable.
3630
3631     The variable rs6000_builtin_mask is set to represent the target
3632     options for the most current compilation efforts, consistent with
3633     the current contents of rs6000_isa_flags.  This variable controls
3634     expansion of built-in functions.
3635
3636     Various other global variables and fields of global structures
3637     (over 50 in all) are initialized to reflect the desired options
3638     for the most current compilation efforts.  */
3639
3640static bool
3641rs6000_option_override_internal (bool global_init_p)
3642{
3643  bool ret = true;
3644
3645  HOST_WIDE_INT set_masks;
3646  HOST_WIDE_INT ignore_masks;
3647  int cpu_index = -1;
3648  int tune_index;
3649  struct cl_target_option *main_target_opt
3650    = ((global_init_p || target_option_default_node == NULL)
3651       ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3652
3653  /* Print defaults.  */
3654  if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3655    rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3656
3657  /* Remember the explicit arguments.  */
3658  if (global_init_p)
3659    rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3660
3661  /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3662     library functions, so warn about it. The flag may be useful for
3663     performance studies from time to time though, so don't disable it
3664     entirely.  */
3665  if (OPTION_SET_P (rs6000_alignment_flags)
3666      && rs6000_alignment_flags == MASK_ALIGN_POWER
3667      && DEFAULT_ABI == ABI_DARWIN
3668      && TARGET_64BIT)
3669    warning (0, "%qs is not supported for 64-bit Darwin;"
3670	     " it is incompatible with the installed C and C++ libraries",
3671	     "-malign-power");
3672
3673  /* Numerous experiment shows that IRA based loop pressure
3674     calculation works better for RTL loop invariant motion on targets
3675     with enough (>= 32) registers.  It is an expensive optimization.
3676     So it is on only for peak performance.  */
3677  if (optimize >= 3 && global_init_p
3678      && !OPTION_SET_P (flag_ira_loop_pressure))
3679    flag_ira_loop_pressure = 1;
3680
3681  /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3682     for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3683     options were already specified.  */
3684  if (flag_sanitize & SANITIZE_USER_ADDRESS
3685      && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3686    flag_asynchronous_unwind_tables = 1;
3687
3688  /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3689     loop unroller is active.  It is only checked during unrolling, so
3690     we can just set it on by default.  */
3691  if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3692    flag_variable_expansion_in_unroller = 1;
3693
3694  /* Set the pointer size.  */
3695  if (TARGET_64BIT)
3696    {
3697      rs6000_pmode = DImode;
3698      rs6000_pointer_size = 64;
3699    }
3700  else
3701    {
3702      rs6000_pmode = SImode;
3703      rs6000_pointer_size = 32;
3704    }
3705
3706  /* Some OSs don't support saving the high part of 64-bit registers on context
3707     switch.  Other OSs don't support saving Altivec registers.  On those OSs,
3708     we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3709     if the user wants either, the user must explicitly specify them and we
3710     won't interfere with the user's specification.  */
3711
3712  set_masks = POWERPC_MASKS;
3713#ifdef OS_MISSING_POWERPC64
3714  if (OS_MISSING_POWERPC64)
3715    set_masks &= ~OPTION_MASK_POWERPC64;
3716#endif
3717#ifdef OS_MISSING_ALTIVEC
3718  if (OS_MISSING_ALTIVEC)
3719    set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3720		   | OTHER_VSX_VECTOR_MASKS);
3721#endif
3722
3723  /* Don't override by the processor default if given explicitly.  */
3724  set_masks &= ~rs6000_isa_flags_explicit;
3725
3726  /* Process the -mcpu=<xxx> and -mtune=<xxx> argument.  If the user changed
3727     the cpu in a target attribute or pragma, but did not specify a tuning
3728     option, use the cpu for the tuning option rather than the option specified
3729     with -mtune on the command line.  Process a '--with-cpu' configuration
3730     request as an implicit --cpu.  */
3731  if (rs6000_cpu_index >= 0)
3732    cpu_index = rs6000_cpu_index;
3733  else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3734    cpu_index = main_target_opt->x_rs6000_cpu_index;
3735  else if (OPTION_TARGET_CPU_DEFAULT)
3736    cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3737
3738  /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3739     compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3740     with those from the cpu, except for options that were explicitly set.  If
3741     we don't have a cpu, do not override the target bits set in
3742     TARGET_DEFAULT.  */
3743  if (cpu_index >= 0)
3744    {
3745      rs6000_cpu_index = cpu_index;
3746      rs6000_isa_flags &= ~set_masks;
3747      rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3748			   & set_masks);
3749    }
3750  else
3751    {
3752      /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3753	 POWERPC_MASKS.  Originally, TARGET_DEFAULT was used to initialize
3754	 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook.  When we switched
3755	 to using rs6000_isa_flags, we need to do the initialization here.
3756
3757	 If there is a TARGET_DEFAULT, use that.  Otherwise fall back to using
3758	 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults.  */
3759      HOST_WIDE_INT flags;
3760      if (TARGET_DEFAULT)
3761	flags = TARGET_DEFAULT;
3762      else
3763	{
3764	  /* PowerPC 64-bit LE requires at least ISA 2.07.  */
3765	  const char *default_cpu = (!TARGET_POWERPC64
3766				     ? "powerpc"
3767				     : (BYTES_BIG_ENDIAN
3768					? "powerpc64"
3769					: "powerpc64le"));
3770	  int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3771	  flags = processor_target_table[default_cpu_index].target_enable;
3772	}
3773      rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3774    }
3775
3776  if (rs6000_tune_index >= 0)
3777    tune_index = rs6000_tune_index;
3778  else if (cpu_index >= 0)
3779    rs6000_tune_index = tune_index = cpu_index;
3780  else
3781    {
3782      size_t i;
3783      enum processor_type tune_proc
3784	= (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3785
3786      tune_index = -1;
3787      for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3788	if (processor_target_table[i].processor == tune_proc)
3789	  {
3790	    tune_index = i;
3791	    break;
3792	  }
3793    }
3794
3795  if (cpu_index >= 0)
3796    rs6000_cpu = processor_target_table[cpu_index].processor;
3797  else
3798    rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3799
3800  gcc_assert (tune_index >= 0);
3801  rs6000_tune = processor_target_table[tune_index].processor;
3802
3803  if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3804      || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3805      || rs6000_cpu == PROCESSOR_PPCE5500)
3806    {
3807      if (TARGET_ALTIVEC)
3808	error ("AltiVec not supported in this target");
3809    }
3810
3811  /* If we are optimizing big endian systems for space, use the load/store
3812     multiple instructions.  */
3813  if (BYTES_BIG_ENDIAN && optimize_size)
3814    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3815
3816  /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3817     because the hardware doesn't support the instructions used in little
3818     endian mode, and causes an alignment trap.  The 750 does not cause an
3819     alignment trap (except when the target is unaligned).  */
3820
3821  if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3822    {
3823      rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3824      if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3825	warning (0, "%qs is not supported on little endian systems",
3826		 "-mmultiple");
3827    }
3828
3829  /* If little-endian, default to -mstrict-align on older processors.
3830     Testing for direct_move matches power8 and later.  */
3831  if (!BYTES_BIG_ENDIAN
3832      && !(processor_target_table[tune_index].target_enable
3833	   & OPTION_MASK_DIRECT_MOVE))
3834    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3835
3836  /* Add some warnings for VSX.  */
3837  if (TARGET_VSX)
3838    {
3839      const char *msg = NULL;
3840      if (!TARGET_HARD_FLOAT)
3841	{
3842	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3843	    msg = N_("%<-mvsx%> requires hardware floating point");
3844	  else
3845	    {
3846	      rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3847	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3848	    }
3849	}
3850      else if (TARGET_AVOID_XFORM > 0)
3851	msg = N_("%<-mvsx%> needs indexed addressing");
3852      else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3853				   & OPTION_MASK_ALTIVEC))
3854        {
3855	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3856	    msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3857	  else
3858	    msg = N_("%<-mno-altivec%> disables vsx");
3859        }
3860
3861      if (msg)
3862	{
3863	  warning (0, msg);
3864	  rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3865	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3866	}
3867    }
3868
3869  /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3870     the -mcpu setting to enable options that conflict. */
3871  if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3872      && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3873				       | OPTION_MASK_ALTIVEC
3874				       | OPTION_MASK_VSX)) != 0)
3875    rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3876			   | OPTION_MASK_DIRECT_MOVE)
3877		         & ~rs6000_isa_flags_explicit);
3878
3879  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3880    rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3881
3882#ifdef XCOFF_DEBUGGING_INFO
3883  /* For AIX default to 64-bit DWARF.  */
3884  if (!OPTION_SET_P (dwarf_offset_size))
3885    dwarf_offset_size = POINTER_SIZE_UNITS;
3886#endif
3887
3888  /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3889     off all of the options that depend on those flags.  */
3890  ignore_masks = rs6000_disable_incompatible_switches ();
3891
3892  /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3893     unless the user explicitly used the -mno-<option> to disable the code.  */
3894  if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3895    rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3896  else if (TARGET_P9_MINMAX)
3897    {
3898      if (cpu_index >= 0)
3899	{
3900	  if (cpu_index == PROCESSOR_POWER9)
3901	    {
3902	      /* legacy behavior: allow -mcpu=power9 with certain
3903		 capabilities explicitly disabled.  */
3904	      rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3905	    }
3906	  else
3907	    error ("power9 target option is incompatible with %<%s=<xxx>%> "
3908		   "for <xxx> less than power9", "-mcpu");
3909	}
3910      else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3911	       != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3912		   & rs6000_isa_flags_explicit))
3913	/* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3914	   were explicitly cleared.  */
3915	error ("%qs incompatible with explicitly disabled options",
3916	       "-mpower9-minmax");
3917      else
3918	rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3919    }
3920  else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3921    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3922  else if (TARGET_VSX)
3923    rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3924  else if (TARGET_POPCNTD)
3925    rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3926  else if (TARGET_DFP)
3927    rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3928  else if (TARGET_CMPB)
3929    rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3930  else if (TARGET_FPRND)
3931    rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3932  else if (TARGET_POPCNTB)
3933    rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3934  else if (TARGET_ALTIVEC)
3935    rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3936
3937  /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3938     target attribute or pragma which automatically enables both options,
3939     unless the altivec ABI was set.  This is set by default for 64-bit, but
3940     not for 32-bit.  Don't move this before the above code using ignore_masks,
3941     since it can reset the cleared VSX/ALTIVEC flag again.  */
3942  if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3943    rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3944			  & ~rs6000_isa_flags_explicit);
3945
3946  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3947    {
3948      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3949	error ("%qs requires %qs", "-mcrypto", "-maltivec");
3950      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3951    }
3952
3953  if (!TARGET_FPRND && TARGET_VSX)
3954    {
3955      if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3956	/* TARGET_VSX = 1 implies Power 7 and newer */
3957	error ("%qs requires %qs", "-mvsx", "-mfprnd");
3958      rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3959    }
3960
3961  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3962    {
3963      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3964	error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3965      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3966    }
3967
3968  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3969    {
3970      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3971	error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3972      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3973    }
3974
3975  if (TARGET_P8_VECTOR && !TARGET_VSX)
3976    {
3977      if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3978	  && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3979	error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3980      else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3981	{
3982	  rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3983	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3984	    rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3985	}
3986      else
3987	{
3988	  /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3989	     not explicit.  */
3990	  rs6000_isa_flags |= OPTION_MASK_VSX;
3991	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3992	}
3993    }
3994
3995  if (TARGET_DFP && !TARGET_HARD_FLOAT)
3996    {
3997      if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3998	error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3999      rs6000_isa_flags &= ~OPTION_MASK_DFP;
4000    }
4001
4002  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4003     silently turn off quad memory mode.  */
4004  if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4005    {
4006      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4007	warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4008
4009      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4010	warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4011
4012      rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4013			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4014    }
4015
4016  /* Non-atomic quad memory load/store are disabled for little endian, since
4017     the words are reversed, but atomic operations can still be done by
4018     swapping the words.  */
4019  if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4020    {
4021      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4022	warning (0, N_("%<-mquad-memory%> is not available in little endian "
4023		       "mode"));
4024
4025      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4026    }
4027
4028  /* Assume if the user asked for normal quad memory instructions, they want
4029     the atomic versions as well, unless they explicity told us not to use quad
4030     word atomic instructions.  */
4031  if (TARGET_QUAD_MEMORY
4032      && !TARGET_QUAD_MEMORY_ATOMIC
4033      && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4034    rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4035
4036  /* If we can shrink-wrap the TOC register save separately, then use
4037     -msave-toc-indirect unless explicitly disabled.  */
4038  if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4039      && flag_shrink_wrap_separate
4040      && optimize_function_for_speed_p (cfun))
4041    rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4042
4043  /* Enable power8 fusion if we are tuning for power8, even if we aren't
4044     generating power8 instructions.  Power9 does not optimize power8 fusion
4045     cases.  */
4046  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4047    {
4048      if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4049	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4050      else
4051	rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4052    }
4053
4054  /* Setting additional fusion flags turns on base fusion.  */
4055  if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4056    {
4057      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4058	{
4059	  if (TARGET_P8_FUSION_SIGN)
4060	    error ("%qs requires %qs", "-mpower8-fusion-sign",
4061		   "-mpower8-fusion");
4062
4063	  rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4064	}
4065      else
4066	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4067    }
4068
4069  /* Power8 does not fuse sign extended loads with the addis.  If we are
4070     optimizing at high levels for speed, convert a sign extended load into a
4071     zero extending load, and an explicit sign extension.  */
4072  if (TARGET_P8_FUSION
4073      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4074      && optimize_function_for_speed_p (cfun)
4075      && optimize >= 3)
4076    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4077
4078  /* ISA 3.0 vector instructions include ISA 2.07.  */
4079  if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4080    {
4081      /* We prefer to not mention undocumented options in
4082	 error messages.  However, if users have managed to select
4083	 power9-vector without selecting power8-vector, they
4084	 already know about undocumented flags.  */
4085      if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4086	  (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4087	error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4088      else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4089	{
4090	  rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4091	  if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4092	    rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4093	}
4094      else
4095	{
4096	  /* OPTION_MASK_P9_VECTOR is explicit and
4097	     OPTION_MASK_P8_VECTOR is not explicit.  */
4098	  rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4099	  rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4100	}
4101    }
4102
4103  /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4104     support. If we only have ISA 2.06 support, and the user did not specify
4105     the switch, leave it set to -1 so the movmisalign patterns are enabled,
4106     but we don't enable the full vectorization support  */
4107  if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4108    TARGET_ALLOW_MOVMISALIGN = 1;
4109
4110  else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4111    {
4112      if (TARGET_ALLOW_MOVMISALIGN > 0
4113	  && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4114	error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4115
4116      TARGET_ALLOW_MOVMISALIGN = 0;
4117    }
4118
4119  /* Determine when unaligned vector accesses are permitted, and when
4120     they are preferred over masked Altivec loads.  Note that if
4121     TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4122     TARGET_EFFICIENT_UNALIGNED_VSX must be as well.  The converse is
4123     not true.  */
4124  if (TARGET_EFFICIENT_UNALIGNED_VSX)
4125    {
4126      if (!TARGET_VSX)
4127	{
4128	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4129	    error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4130
4131	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4132	}
4133
4134      else if (!TARGET_ALLOW_MOVMISALIGN)
4135	{
4136	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4137	    error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4138		   "-mallow-movmisalign");
4139
4140	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4141	}
4142    }
4143
4144  if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4145    {
4146      if (TARGET_EFFICIENT_UNALIGNED_VSX)
4147	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4148      else
4149	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4150    }
4151
4152  /* Use long double size to select the appropriate long double.  We use
4153     TYPE_PRECISION to differentiate the 3 different long double types.  We map
4154     128 into the precision used for TFmode.  */
4155  int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4156				  ? 64
4157				  : FLOAT_PRECISION_TFmode);
4158
4159  /* Set long double size before the IEEE 128-bit tests.  */
4160  if (!OPTION_SET_P (rs6000_long_double_type_size))
4161    {
4162      if (main_target_opt != NULL
4163	  && (main_target_opt->x_rs6000_long_double_type_size
4164	      != default_long_double_size))
4165	error ("target attribute or pragma changes %<long double%> size");
4166      else
4167	rs6000_long_double_type_size = default_long_double_size;
4168    }
4169  else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4170    ; /* The option value can be seen when cl_target_option_restore is called.  */
4171  else if (rs6000_long_double_type_size == 128)
4172    rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4173
4174  /* Set -mabi=ieeelongdouble on some old targets.  In the future, power server
4175     systems will also set long double to be IEEE 128-bit.  AIX and Darwin
4176     explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4177     those systems will not pick up this default.  Warn if the user changes the
4178     default unless -Wno-psabi.  */
4179  if (!OPTION_SET_P (rs6000_ieeequad))
4180    rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4181
4182  else if (TARGET_LONG_DOUBLE_128)
4183    {
4184      if (global_options.x_rs6000_ieeequad
4185	  && (!TARGET_POPCNTD || !TARGET_VSX))
4186	error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4187
4188      if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4189	{
4190	  /* Determine if the user can change the default long double type at
4191	     compilation time.  You need GLIBC 2.32 or newer to be able to
4192	     change the long double type.  Only issue one warning.  */
4193	  static bool warned_change_long_double;
4194
4195	  if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4196	    {
4197	      warned_change_long_double = true;
4198	      if (TARGET_IEEEQUAD)
4199		warning (OPT_Wpsabi, "Using IEEE extended precision "
4200			 "%<long double%>");
4201	      else
4202		warning (OPT_Wpsabi, "Using IBM extended precision "
4203			 "%<long double%>");
4204	    }
4205	}
4206    }
4207
4208  /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4209     sytems.  In GCC 7, we would enable the IEEE 128-bit floating point
4210     infrastructure (-mfloat128-type) but not enable the actual __float128 type
4211     unless the user used the explicit -mfloat128.  In GCC 8, we enable both
4212     the keyword as well as the type.  */
4213  TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4214
4215  /* IEEE 128-bit floating point requires VSX support.  */
4216  if (TARGET_FLOAT128_KEYWORD)
4217    {
4218      if (!TARGET_VSX)
4219	{
4220	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4221	    error ("%qs requires VSX support", "-mfloat128");
4222
4223	  TARGET_FLOAT128_TYPE = 0;
4224	  rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4225				| OPTION_MASK_FLOAT128_HW);
4226	}
4227      else if (!TARGET_FLOAT128_TYPE)
4228	{
4229	  TARGET_FLOAT128_TYPE = 1;
4230	  warning (0, "The %<-mfloat128%> option may not be fully supported");
4231	}
4232    }
4233
4234  /* Enable the __float128 keyword under Linux by default.  */
4235  if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4236      && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4237    rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4238
4239  /* If we have are supporting the float128 type and full ISA 3.0 support,
4240     enable -mfloat128-hardware by default.  However, don't enable the
4241     __float128 keyword if it was explicitly turned off.  64-bit mode is needed
4242     because sometimes the compiler wants to put things in an integer
4243     container, and if we don't have __int128 support, it is impossible.  */
4244  if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4245      && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4246      && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4247    rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4248
4249  if (TARGET_FLOAT128_HW
4250      && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4251    {
4252      if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4253	error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4254
4255      rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4256    }
4257
4258  if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4259    {
4260      if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4261	error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4262
4263      rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4264    }
4265
4266  /* Enable -mprefixed by default on power10 systems.  */
4267  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4268    rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4269
4270  /* -mprefixed requires -mcpu=power10 (or later).  */
4271  else if (TARGET_PREFIXED && !TARGET_POWER10)
4272    {
4273      if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4274	error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4275
4276      rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4277    }
4278
4279  /* -mpcrel requires prefixed load/store addressing.  */
4280  if (TARGET_PCREL && !TARGET_PREFIXED)
4281    {
4282      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4283	error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4284
4285      rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4286    }
4287
4288  /* Print the options after updating the defaults.  */
4289  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4290    rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4291
4292  /* E500mc does "better" if we inline more aggressively.  Respect the
4293     user's opinion, though.  */
4294  if (rs6000_block_move_inline_limit == 0
4295      && (rs6000_tune == PROCESSOR_PPCE500MC
4296	  || rs6000_tune == PROCESSOR_PPCE500MC64
4297	  || rs6000_tune == PROCESSOR_PPCE5500
4298	  || rs6000_tune == PROCESSOR_PPCE6500))
4299    rs6000_block_move_inline_limit = 128;
4300
4301  /* store_one_arg depends on expand_block_move to handle at least the
4302     size of reg_parm_stack_space.  */
4303  if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4304    rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4305
4306  if (global_init_p)
4307    {
4308      /* If the appropriate debug option is enabled, replace the target hooks
4309	 with debug versions that call the real version and then prints
4310	 debugging information.  */
4311      if (TARGET_DEBUG_COST)
4312	{
4313	  targetm.rtx_costs = rs6000_debug_rtx_costs;
4314	  targetm.address_cost = rs6000_debug_address_cost;
4315	  targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4316	}
4317
4318      if (TARGET_DEBUG_ADDR)
4319	{
4320	  targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4321	  targetm.legitimize_address = rs6000_debug_legitimize_address;
4322	  rs6000_secondary_reload_class_ptr
4323	    = rs6000_debug_secondary_reload_class;
4324	  targetm.secondary_memory_needed
4325	    = rs6000_debug_secondary_memory_needed;
4326	  targetm.can_change_mode_class
4327	    = rs6000_debug_can_change_mode_class;
4328	  rs6000_preferred_reload_class_ptr
4329	    = rs6000_debug_preferred_reload_class;
4330	  rs6000_mode_dependent_address_ptr
4331	    = rs6000_debug_mode_dependent_address;
4332	}
4333
4334      if (rs6000_veclibabi_name)
4335	{
4336	  if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4337	    rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4338	  else
4339	    {
4340	      error ("unknown vectorization library ABI type in "
4341		     "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4342	      ret = false;
4343	    }
4344	}
4345    }
4346
4347  /* Enable Altivec ABI for AIX -maltivec.  */
4348  if (TARGET_XCOFF
4349      && (TARGET_ALTIVEC || TARGET_VSX)
4350      && !OPTION_SET_P (rs6000_altivec_abi))
4351    {
4352      if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4353	error ("target attribute or pragma changes AltiVec ABI");
4354      else
4355	rs6000_altivec_abi = 1;
4356    }
4357
4358  /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
4359     PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
4360     be explicitly overridden in either case.  */
4361  if (TARGET_ELF)
4362    {
4363      if (!OPTION_SET_P (rs6000_altivec_abi)
4364	  && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4365	{
4366	  if (main_target_opt != NULL &&
4367	      !main_target_opt->x_rs6000_altivec_abi)
4368	    error ("target attribute or pragma changes AltiVec ABI");
4369	  else
4370	    rs6000_altivec_abi = 1;
4371	}
4372    }
4373
4374  /* Set the Darwin64 ABI as default for 64-bit Darwin.
4375     So far, the only darwin64 targets are also MACH-O.  */
4376  if (TARGET_MACHO
4377      && DEFAULT_ABI == ABI_DARWIN
4378      && TARGET_64BIT)
4379    {
4380      if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4381	error ("target attribute or pragma changes darwin64 ABI");
4382      else
4383	{
4384	  rs6000_darwin64_abi = 1;
4385	  /* Default to natural alignment, for better performance.  */
4386	  rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4387	}
4388    }
4389
4390  /* Place FP constants in the constant pool instead of TOC
4391     if section anchors enabled.  */
4392  if (flag_section_anchors
4393      && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4394    TARGET_NO_FP_IN_TOC = 1;
4395
4396  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4397    rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4398
4399#ifdef SUBTARGET_OVERRIDE_OPTIONS
4400  SUBTARGET_OVERRIDE_OPTIONS;
4401#endif
4402#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4403  SUBSUBTARGET_OVERRIDE_OPTIONS;
4404#endif
4405#ifdef SUB3TARGET_OVERRIDE_OPTIONS
4406  SUB3TARGET_OVERRIDE_OPTIONS;
4407#endif
4408
4409  /* If the ABI has support for PC-relative relocations, enable it by default.
4410     This test depends on the sub-target tests above setting the code model to
4411     medium for ELF v2 systems.  */
4412  if (PCREL_SUPPORTED_BY_OS
4413      && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4414    rs6000_isa_flags |= OPTION_MASK_PCREL;
4415
4416  /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4417      after the subtarget override options are done.  */
4418  else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4419    {
4420      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4421	error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4422
4423      rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4424    }
4425
4426  /* Enable -mmma by default on power10 systems.  */
4427  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4428    rs6000_isa_flags |= OPTION_MASK_MMA;
4429
4430  if (TARGET_POWER10
4431      && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4432    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4433
4434  /* Turn off vector pair/mma options on non-power10 systems.  */
4435  else if (!TARGET_POWER10 && TARGET_MMA)
4436    {
4437      if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4438	error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4439
4440      rs6000_isa_flags &= ~OPTION_MASK_MMA;
4441    }
4442
4443  /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4444     such as "*movoo" uses vector pair access which use VSX registers.
4445     So make MMA require VSX support here.  */
4446  if (TARGET_MMA && !TARGET_VSX)
4447    {
4448      if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4449	error ("%qs requires %qs", "-mmma", "-mvsx");
4450      rs6000_isa_flags &= ~OPTION_MASK_MMA;
4451    }
4452
4453  if (!TARGET_PCREL && TARGET_PCREL_OPT)
4454    rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4455
4456  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4457    rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4458
4459  rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4460			&& rs6000_tune != PROCESSOR_POWER5
4461			&& rs6000_tune != PROCESSOR_POWER6
4462			&& rs6000_tune != PROCESSOR_POWER7
4463			&& rs6000_tune != PROCESSOR_POWER8
4464			&& rs6000_tune != PROCESSOR_POWER9
4465			&& rs6000_tune != PROCESSOR_POWER10
4466			&& rs6000_tune != PROCESSOR_PPCA2
4467			&& rs6000_tune != PROCESSOR_CELL
4468			&& rs6000_tune != PROCESSOR_PPC476);
4469  rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4470			 || rs6000_tune == PROCESSOR_POWER5
4471			 || rs6000_tune == PROCESSOR_POWER7
4472			 || rs6000_tune == PROCESSOR_POWER8);
4473  rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4474				 || rs6000_tune == PROCESSOR_POWER5
4475				 || rs6000_tune == PROCESSOR_POWER6
4476				 || rs6000_tune == PROCESSOR_POWER7
4477				 || rs6000_tune == PROCESSOR_POWER8
4478				 || rs6000_tune == PROCESSOR_POWER9
4479				 || rs6000_tune == PROCESSOR_POWER10
4480				 || rs6000_tune == PROCESSOR_PPCE500MC
4481				 || rs6000_tune == PROCESSOR_PPCE500MC64
4482				 || rs6000_tune == PROCESSOR_PPCE5500
4483				 || rs6000_tune == PROCESSOR_PPCE6500);
4484
4485  /* Allow debug switches to override the above settings.  These are set to -1
4486     in rs6000.opt to indicate the user hasn't directly set the switch.  */
4487  if (TARGET_ALWAYS_HINT >= 0)
4488    rs6000_always_hint = TARGET_ALWAYS_HINT;
4489
4490  if (TARGET_SCHED_GROUPS >= 0)
4491    rs6000_sched_groups = TARGET_SCHED_GROUPS;
4492
4493  if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4494    rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4495
4496  rs6000_sched_restricted_insns_priority
4497    = (rs6000_sched_groups ? 1 : 0);
4498
4499  /* Handle -msched-costly-dep option.  */
4500  rs6000_sched_costly_dep
4501    = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4502
4503  if (rs6000_sched_costly_dep_str)
4504    {
4505      if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4506	rs6000_sched_costly_dep = no_dep_costly;
4507      else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4508	rs6000_sched_costly_dep = all_deps_costly;
4509      else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4510	rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4511      else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4512	rs6000_sched_costly_dep = store_to_load_dep_costly;
4513      else
4514	rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4515				   atoi (rs6000_sched_costly_dep_str));
4516    }
4517
4518  /* Handle -minsert-sched-nops option.  */
4519  rs6000_sched_insert_nops
4520    = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4521
4522  if (rs6000_sched_insert_nops_str)
4523    {
4524      if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4525	rs6000_sched_insert_nops = sched_finish_none;
4526      else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4527	rs6000_sched_insert_nops = sched_finish_pad_groups;
4528      else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4529	rs6000_sched_insert_nops = sched_finish_regroup_exact;
4530      else
4531	rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4532				    atoi (rs6000_sched_insert_nops_str));
4533    }
4534
4535  /* Handle stack protector */
4536  if (!OPTION_SET_P (rs6000_stack_protector_guard))
4537#ifdef TARGET_THREAD_SSP_OFFSET
4538    rs6000_stack_protector_guard = SSP_TLS;
4539#else
4540    rs6000_stack_protector_guard = SSP_GLOBAL;
4541#endif
4542
4543#ifdef TARGET_THREAD_SSP_OFFSET
4544  rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4545  rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4546#endif
4547
4548  if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4549    {
4550      char *endp;
4551      const char *str = rs6000_stack_protector_guard_offset_str;
4552
4553      errno = 0;
4554      long offset = strtol (str, &endp, 0);
4555      if (!*str || *endp || errno)
4556	error ("%qs is not a valid number in %qs", str,
4557	       "-mstack-protector-guard-offset=");
4558
4559      if (!IN_RANGE (offset, -0x8000, 0x7fff)
4560	  || (TARGET_64BIT && (offset & 3)))
4561	error ("%qs is not a valid offset in %qs", str,
4562	       "-mstack-protector-guard-offset=");
4563
4564      rs6000_stack_protector_guard_offset = offset;
4565    }
4566
4567  if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4568    {
4569      const char *str = rs6000_stack_protector_guard_reg_str;
4570      int reg = decode_reg_name (str);
4571
4572      if (!IN_RANGE (reg, 1, 31))
4573	error ("%qs is not a valid base register in %qs", str,
4574	       "-mstack-protector-guard-reg=");
4575
4576      rs6000_stack_protector_guard_reg = reg;
4577    }
4578
4579  if (rs6000_stack_protector_guard == SSP_TLS
4580      && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4581    error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4582
4583  if (global_init_p)
4584    {
4585#ifdef TARGET_REGNAMES
4586      /* If the user desires alternate register names, copy in the
4587	 alternate names now.  */
4588      if (TARGET_REGNAMES)
4589	memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4590#endif
4591
4592      /* Set aix_struct_return last, after the ABI is determined.
4593	 If -maix-struct-return or -msvr4-struct-return was explicitly
4594	 used, don't override with the ABI default.  */
4595      if (!OPTION_SET_P (aix_struct_return))
4596	aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4597
4598#if 0
4599      /* IBM XL compiler defaults to unsigned bitfields.  */
4600      if (TARGET_XL_COMPAT)
4601	flag_signed_bitfields = 0;
4602#endif
4603
4604      if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4605	REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4606
4607      ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4608
4609      /* We can only guarantee the availability of DI pseudo-ops when
4610	 assembling for 64-bit targets.  */
4611      if (!TARGET_64BIT)
4612	{
4613	  targetm.asm_out.aligned_op.di = NULL;
4614	  targetm.asm_out.unaligned_op.di = NULL;
4615	}
4616
4617
4618      /* Set branch target alignment, if not optimizing for size.  */
4619      if (!optimize_size)
4620	{
4621	  /* Cell wants to be aligned 8byte for dual issue.  Titan wants to be
4622	     aligned 8byte to avoid misprediction by the branch predictor.  */
4623	  if (rs6000_tune == PROCESSOR_TITAN
4624	      || rs6000_tune == PROCESSOR_CELL)
4625	    {
4626	      if (flag_align_functions && !str_align_functions)
4627		str_align_functions = "8";
4628	      if (flag_align_jumps && !str_align_jumps)
4629		str_align_jumps = "8";
4630	      if (flag_align_loops && !str_align_loops)
4631		str_align_loops = "8";
4632	    }
4633	  if (rs6000_align_branch_targets)
4634	    {
4635	      if (flag_align_functions && !str_align_functions)
4636		str_align_functions = "16";
4637	      if (flag_align_jumps && !str_align_jumps)
4638		str_align_jumps = "16";
4639	      if (flag_align_loops && !str_align_loops)
4640		{
4641		  can_override_loop_align = 1;
4642		  str_align_loops = "16";
4643		}
4644	    }
4645	}
4646
4647      /* Arrange to save and restore machine status around nested functions.  */
4648      init_machine_status = rs6000_init_machine_status;
4649
4650      /* We should always be splitting complex arguments, but we can't break
4651	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
4652      if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4653	targetm.calls.split_complex_arg = NULL;
4654
4655      /* The AIX and ELFv1 ABIs define standard function descriptors.  */
4656      if (DEFAULT_ABI == ABI_AIX)
4657	targetm.calls.custom_function_descriptors = 0;
4658    }
4659
4660  /* Initialize rs6000_cost with the appropriate target costs.  */
4661  if (optimize_size)
4662    rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4663  else
4664    switch (rs6000_tune)
4665      {
4666      case PROCESSOR_RS64A:
4667	rs6000_cost = &rs64a_cost;
4668	break;
4669
4670      case PROCESSOR_MPCCORE:
4671	rs6000_cost = &mpccore_cost;
4672	break;
4673
4674      case PROCESSOR_PPC403:
4675	rs6000_cost = &ppc403_cost;
4676	break;
4677
4678      case PROCESSOR_PPC405:
4679	rs6000_cost = &ppc405_cost;
4680	break;
4681
4682      case PROCESSOR_PPC440:
4683	rs6000_cost = &ppc440_cost;
4684	break;
4685
4686      case PROCESSOR_PPC476:
4687	rs6000_cost = &ppc476_cost;
4688	break;
4689
4690      case PROCESSOR_PPC601:
4691	rs6000_cost = &ppc601_cost;
4692	break;
4693
4694      case PROCESSOR_PPC603:
4695	rs6000_cost = &ppc603_cost;
4696	break;
4697
4698      case PROCESSOR_PPC604:
4699	rs6000_cost = &ppc604_cost;
4700	break;
4701
4702      case PROCESSOR_PPC604e:
4703	rs6000_cost = &ppc604e_cost;
4704	break;
4705
4706      case PROCESSOR_PPC620:
4707	rs6000_cost = &ppc620_cost;
4708	break;
4709
4710      case PROCESSOR_PPC630:
4711	rs6000_cost = &ppc630_cost;
4712	break;
4713
4714      case PROCESSOR_CELL:
4715	rs6000_cost = &ppccell_cost;
4716	break;
4717
4718      case PROCESSOR_PPC750:
4719      case PROCESSOR_PPC7400:
4720	rs6000_cost = &ppc750_cost;
4721	break;
4722
4723      case PROCESSOR_PPC7450:
4724	rs6000_cost = &ppc7450_cost;
4725	break;
4726
4727      case PROCESSOR_PPC8540:
4728      case PROCESSOR_PPC8548:
4729	rs6000_cost = &ppc8540_cost;
4730	break;
4731
4732      case PROCESSOR_PPCE300C2:
4733      case PROCESSOR_PPCE300C3:
4734	rs6000_cost = &ppce300c2c3_cost;
4735	break;
4736
4737      case PROCESSOR_PPCE500MC:
4738	rs6000_cost = &ppce500mc_cost;
4739	break;
4740
4741      case PROCESSOR_PPCE500MC64:
4742	rs6000_cost = &ppce500mc64_cost;
4743	break;
4744
4745      case PROCESSOR_PPCE5500:
4746	rs6000_cost = &ppce5500_cost;
4747	break;
4748
4749      case PROCESSOR_PPCE6500:
4750	rs6000_cost = &ppce6500_cost;
4751	break;
4752
4753      case PROCESSOR_TITAN:
4754	rs6000_cost = &titan_cost;
4755	break;
4756
4757      case PROCESSOR_POWER4:
4758      case PROCESSOR_POWER5:
4759	rs6000_cost = &power4_cost;
4760	break;
4761
4762      case PROCESSOR_POWER6:
4763	rs6000_cost = &power6_cost;
4764	break;
4765
4766      case PROCESSOR_POWER7:
4767	rs6000_cost = &power7_cost;
4768	break;
4769
4770      case PROCESSOR_POWER8:
4771	rs6000_cost = &power8_cost;
4772	break;
4773
4774      case PROCESSOR_POWER9:
4775	rs6000_cost = &power9_cost;
4776	break;
4777
4778      case PROCESSOR_POWER10:
4779	rs6000_cost = &power10_cost;
4780	break;
4781
4782      case PROCESSOR_PPCA2:
4783	rs6000_cost = &ppca2_cost;
4784	break;
4785
4786      default:
4787	gcc_unreachable ();
4788      }
4789
4790  if (global_init_p)
4791    {
4792      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4793			   param_simultaneous_prefetches,
4794			   rs6000_cost->simultaneous_prefetches);
4795      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4796			   param_l1_cache_size,
4797			   rs6000_cost->l1_cache_size);
4798      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4799			   param_l1_cache_line_size,
4800			   rs6000_cost->cache_line_size);
4801      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4802			   param_l2_cache_size,
4803			   rs6000_cost->l2_cache_size);
4804
4805      /* Increase loop peeling limits based on performance analysis. */
4806      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4807			   param_max_peeled_insns, 400);
4808      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4809			   param_max_completely_peeled_insns, 400);
4810
4811      /* The lxvl/stxvl instructions don't perform well before Power10.  */
4812      if (TARGET_POWER10)
4813	SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4814			     param_vect_partial_vector_usage, 1);
4815      else
4816	SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4817			     param_vect_partial_vector_usage, 0);
4818
4819      /* Use the 'model' -fsched-pressure algorithm by default.  */
4820      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4821			   param_sched_pressure_algorithm,
4822			   SCHED_PRESSURE_MODEL);
4823
4824      /* If using typedef char *va_list, signal that
4825	 __builtin_va_start (&ap, 0) can be optimized to
4826	 ap = __builtin_next_arg (0).  */
4827      if (DEFAULT_ABI != ABI_V4)
4828	targetm.expand_builtin_va_start = NULL;
4829    }
4830
4831  rs6000_override_options_after_change ();
4832
4833  /* If not explicitly specified via option, decide whether to generate indexed
4834     load/store instructions.  A value of -1 indicates that the
4835     initial value of this variable has not been overwritten. During
4836     compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4837  if (TARGET_AVOID_XFORM == -1)
4838    /* Avoid indexed addressing when targeting Power6 in order to avoid the
4839     DERAT mispredict penalty.  However the LVE and STVE altivec instructions
4840     need indexed accesses and the type used is the scalar type of the element
4841     being loaded or stored.  */
4842    TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4843			  && !TARGET_ALTIVEC);
4844
4845  /* Set the -mrecip options.  */
4846  if (rs6000_recip_name)
4847    {
4848      char *p = ASTRDUP (rs6000_recip_name);
4849      char *q;
4850      unsigned int mask, i;
4851      bool invert;
4852
4853      while ((q = strtok (p, ",")) != NULL)
4854	{
4855	  p = NULL;
4856	  if (*q == '!')
4857	    {
4858	      invert = true;
4859	      q++;
4860	    }
4861	  else
4862	    invert = false;
4863
4864	  if (!strcmp (q, "default"))
4865	    mask = ((TARGET_RECIP_PRECISION)
4866		    ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4867	  else
4868	    {
4869	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4870		if (!strcmp (q, recip_options[i].string))
4871		  {
4872		    mask = recip_options[i].mask;
4873		    break;
4874		  }
4875
4876	      if (i == ARRAY_SIZE (recip_options))
4877		{
4878		  error ("unknown option for %<%s=%s%>", "-mrecip", q);
4879		  invert = false;
4880		  mask = 0;
4881		  ret = false;
4882		}
4883	    }
4884
4885	  if (invert)
4886	    rs6000_recip_control &= ~mask;
4887	  else
4888	    rs6000_recip_control |= mask;
4889	}
4890    }
4891
4892  /* Set the builtin mask of the various options used that could affect which
4893     builtins were used.  In the past we used target_flags, but we've run out
4894     of bits, and some options are no longer in target_flags.  */
4895  rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4896  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4897    rs6000_print_builtin_options (stderr, 0, "builtin mask",
4898				  rs6000_builtin_mask);
4899
4900  /* Initialize all of the registers.  */
4901  rs6000_init_hard_regno_mode_ok (global_init_p);
4902
4903  /* Save the initial options in case the user does function specific options */
4904  if (global_init_p)
4905    target_option_default_node = target_option_current_node
4906      = build_target_option_node (&global_options, &global_options_set);
4907
4908  /* If not explicitly specified via option, decide whether to generate the
4909     extra blr's required to preserve the link stack on some cpus (eg, 476).  */
4910  if (TARGET_LINK_STACK == -1)
4911    SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4912
4913  /* Deprecate use of -mno-speculate-indirect-jumps.  */
4914  if (!rs6000_speculate_indirect_jumps)
4915    warning (0, "%qs is deprecated and not recommended in any circumstances",
4916	     "-mno-speculate-indirect-jumps");
4917
4918  return ret;
4919}
4920
4921/* Implement TARGET_OPTION_OVERRIDE.  On the RS/6000 this is used to
4922   define the target cpu type.  */
4923
4924static void
4925rs6000_option_override (void)
4926{
4927  (void) rs6000_option_override_internal (true);
4928}
4929
4930
4931/* Implement LOOP_ALIGN. */
4932align_flags
4933rs6000_loop_align (rtx label)
4934{
4935  basic_block bb;
4936  int ninsns;
4937
4938  /* Don't override loop alignment if -falign-loops was specified. */
4939  if (!can_override_loop_align)
4940    return align_loops;
4941
4942  bb = BLOCK_FOR_INSN (label);
4943  ninsns = num_loop_insns(bb->loop_father);
4944
4945  /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4946  if (ninsns > 4 && ninsns <= 8
4947      && (rs6000_tune == PROCESSOR_POWER4
4948	  || rs6000_tune == PROCESSOR_POWER5
4949	  || rs6000_tune == PROCESSOR_POWER6
4950	  || rs6000_tune == PROCESSOR_POWER7
4951	  || rs6000_tune == PROCESSOR_POWER8))
4952    return align_flags (5);
4953  else
4954    return align_loops;
4955}
4956
4957/* Return true iff, data reference of TYPE can reach vector alignment (16)
4958   after applying N number of iterations.  This routine does not determine
4959   how may iterations are required to reach desired alignment.  */
4960
4961static bool
4962rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4963{
4964  if (is_packed)
4965    return false;
4966
4967  if (TARGET_32BIT)
4968    {
4969      if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4970        return true;
4971
4972      if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
4973        return true;
4974
4975      return false;
4976    }
4977  else
4978    {
4979      if (TARGET_MACHO)
4980        return false;
4981
4982      /* Assuming that all other types are naturally aligned. CHECKME!  */
4983      return true;
4984    }
4985}
4986
4987/* Return true if the vector misalignment factor is supported by the
4988   target.  */
4989static bool
4990rs6000_builtin_support_vector_misalignment (machine_mode mode,
4991					    const_tree type,
4992					    int misalignment,
4993					    bool is_packed)
4994{
4995  if (TARGET_VSX)
4996    {
4997      if (TARGET_EFFICIENT_UNALIGNED_VSX)
4998	return true;
4999
5000      /* Return if movmisalign pattern is not supported for this mode.  */
5001      if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5002        return false;
5003
5004      if (misalignment == -1)
5005	{
5006	  /* Misalignment factor is unknown at compile time but we know
5007	     it's word aligned.  */
5008	  if (rs6000_vector_alignment_reachable (type, is_packed))
5009            {
5010              int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5011
5012              if (element_size == 64 || element_size == 32)
5013               return true;
5014            }
5015
5016	  return false;
5017	}
5018
5019      /* VSX supports word-aligned vector.  */
5020      if (misalignment % 4 == 0)
5021	return true;
5022    }
5023  return false;
5024}
5025
5026/* Implement targetm.vectorize.builtin_vectorization_cost.  */
5027static int
5028rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5029                                   tree vectype, int misalign)
5030{
5031  unsigned elements;
5032  tree elem_type;
5033
5034  switch (type_of_cost)
5035    {
5036      case scalar_stmt:
5037      case scalar_store:
5038      case vector_stmt:
5039      case vector_store:
5040      case vec_to_scalar:
5041      case scalar_to_vec:
5042      case cond_branch_not_taken:
5043        return 1;
5044      case scalar_load:
5045      case vector_load:
5046	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5047	  return 2;
5048
5049      case vec_perm:
5050	/* Power7 has only one permute unit, make it a bit expensive.  */
5051	if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5052	  return 3;
5053	else
5054	  return 1;
5055
5056      case vec_promote_demote:
5057	/* Power7 has only one permute/pack unit, make it a bit expensive.  */
5058	if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5059	  return 4;
5060	else
5061	  return 1;
5062
5063      case cond_branch_taken:
5064        return 3;
5065
5066      case unaligned_load:
5067      case vector_gather_load:
5068	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5069	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5070	  return 2;
5071
5072	if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5073	  {
5074	    elements = TYPE_VECTOR_SUBPARTS (vectype);
5075	    /* See PR102767, consider V1TI to keep consistency.  */
5076	    if (elements == 2 || elements == 1)
5077	      /* Double word aligned.  */
5078	      return 4;
5079
5080	    if (elements == 4)
5081	      {
5082		switch (misalign)
5083		  {
5084		  case 8:
5085		    /* Double word aligned.  */
5086		    return 4;
5087
5088		  case -1:
5089		    /* Unknown misalignment.  */
5090		  case 4:
5091		  case 12:
5092		    /* Word aligned.  */
5093		    return 33;
5094
5095		  default:
5096		    gcc_unreachable ();
5097		  }
5098	      }
5099	  }
5100
5101	if (TARGET_ALTIVEC)
5102	  /* Misaligned loads are not supported.  */
5103	  gcc_unreachable ();
5104
5105	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5106	return 4;
5107
5108      case unaligned_store:
5109      case vector_scatter_store:
5110	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5111	  return 1;
5112
5113	if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5114	  {
5115	    elements = TYPE_VECTOR_SUBPARTS (vectype);
5116	    /* See PR102767, consider V1TI to keep consistency.  */
5117	    if (elements == 2 || elements == 1)
5118	      /* Double word aligned.  */
5119	      return 2;
5120
5121	    if (elements == 4)
5122	      {
5123		switch (misalign)
5124		  {
5125		  case 8:
5126		    /* Double word aligned.  */
5127		    return 2;
5128
5129		  case -1:
5130		    /* Unknown misalignment.  */
5131		  case 4:
5132		  case 12:
5133		    /* Word aligned.  */
5134		    return 23;
5135
5136		  default:
5137		    gcc_unreachable ();
5138		  }
5139	      }
5140	  }
5141
5142	if (TARGET_ALTIVEC)
5143	  /* Misaligned stores are not supported.  */
5144	  gcc_unreachable ();
5145
5146	return 2;
5147
5148      case vec_construct:
5149	/* This is a rough approximation assuming non-constant elements
5150	   constructed into a vector via element insertion.  FIXME:
5151	   vec_construct is not granular enough for uniformly good
5152	   decisions.  If the initialization is a splat, this is
5153	   cheaper than we estimate.  Improve this someday.  */
5154	elem_type = TREE_TYPE (vectype);
5155	/* 32-bit vectors loaded into registers are stored as double
5156	   precision, so we need 2 permutes, 2 converts, and 1 merge
5157	   to construct a vector of short floats from them.  */
5158	if (SCALAR_FLOAT_TYPE_P (elem_type)
5159	    && TYPE_PRECISION (elem_type) == 32)
5160	  return 5;
5161	/* On POWER9, integer vector types are built up in GPRs and then
5162	   use a direct move (2 cycles).  For POWER8 this is even worse,
5163	   as we need two direct moves and a merge, and the direct moves
5164	   are five cycles.  */
5165	else if (INTEGRAL_TYPE_P (elem_type))
5166	  {
5167	    if (TARGET_P9_VECTOR)
5168	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5169	    else
5170	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5171	  }
5172	else
5173	  /* V2DFmode doesn't need a direct move.  */
5174	  return 2;
5175
5176      default:
5177        gcc_unreachable ();
5178    }
5179}
5180
5181/* Implement targetm.vectorize.preferred_simd_mode.  */
5182
5183static machine_mode
5184rs6000_preferred_simd_mode (scalar_mode mode)
5185{
5186  opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5187
5188  if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5189    return vmode.require ();
5190
5191  return word_mode;
5192}
5193
5194class rs6000_cost_data : public vector_costs
5195{
5196public:
5197  using vector_costs::vector_costs;
5198
5199  unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5200			      stmt_vec_info stmt_info, slp_tree, tree vectype,
5201			      int misalign,
5202			      vect_cost_model_location where) override;
5203  void finish_cost (const vector_costs *) override;
5204
5205protected:
5206  void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5207				    vect_cost_model_location, unsigned int);
5208  void density_test (loop_vec_info);
5209  void adjust_vect_cost_per_loop (loop_vec_info);
5210
5211  /* Total number of vectorized stmts (loop only).  */
5212  unsigned m_nstmts = 0;
5213  /* Total number of loads (loop only).  */
5214  unsigned m_nloads = 0;
5215  /* Possible extra penalized cost on vector construction (loop only).  */
5216  unsigned m_extra_ctor_cost = 0;
5217  /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5218     instruction is needed by the vectorization.  */
5219  bool m_vect_nonmem = false;
5220};
5221
5222/* Test for likely overcommitment of vector hardware resources.  If a
5223   loop iteration is relatively large, and too large a percentage of
5224   instructions in the loop are vectorized, the cost model may not
5225   adequately reflect delays from unavailable vector resources.
5226   Penalize the loop body cost for this case.  */
5227
5228void
5229rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5230{
5231  /* This density test only cares about the cost of vector version of the
5232     loop, so immediately return if we are passed costing for the scalar
5233     version (namely computing single scalar iteration cost).  */
5234  if (m_costing_for_scalar)
5235    return;
5236
5237  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5238  basic_block *bbs = get_loop_body (loop);
5239  int nbbs = loop->num_nodes;
5240  int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5241
5242  for (int i = 0; i < nbbs; i++)
5243    {
5244      basic_block bb = bbs[i];
5245      gimple_stmt_iterator gsi;
5246
5247      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5248	{
5249	  gimple *stmt = gsi_stmt (gsi);
5250	  if (is_gimple_debug (stmt))
5251	    continue;
5252
5253	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5254
5255	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5256	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5257	    not_vec_cost++;
5258	}
5259    }
5260
5261  free (bbs);
5262  int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5263
5264  if (density_pct > rs6000_density_pct_threshold
5265      && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5266    {
5267      m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5268      if (dump_enabled_p ())
5269	dump_printf_loc (MSG_NOTE, vect_location,
5270			 "density %d%%, cost %d exceeds threshold, penalizing "
5271			 "loop body cost by %u%%\n", density_pct,
5272			 vec_cost + not_vec_cost, rs6000_density_penalty);
5273    }
5274
5275  /* Check whether we need to penalize the body cost to account
5276     for excess strided or elementwise loads.  */
5277  if (m_extra_ctor_cost > 0)
5278    {
5279      gcc_assert (m_nloads <= m_nstmts);
5280      unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5281
5282      /* It's likely to be bounded by latency and execution resources
5283	 from many scalar loads which are strided or elementwise loads
5284	 into a vector if both conditions below are found:
5285	   1. there are many loads, it's easy to result in a long wait
5286	      for load units;
5287	   2. load has a big proportion of all vectorized statements,
5288	      it's not easy to schedule other statements to spread among
5289	      the loads.
5290	 One typical case is the innermost loop of the hotspot of SPEC2017
5291	 503.bwaves_r without loop interchange.  */
5292      if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5293	  && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5294	{
5295	  m_costs[vect_body] += m_extra_ctor_cost;
5296	  if (dump_enabled_p ())
5297	    dump_printf_loc (MSG_NOTE, vect_location,
5298			     "Found %u loads and "
5299			     "load pct. %u%% exceed "
5300			     "the threshold, "
5301			     "penalizing loop body "
5302			     "cost by extra cost %u "
5303			     "for ctor.\n",
5304			     m_nloads, load_pct,
5305			     m_extra_ctor_cost);
5306	}
5307    }
5308}
5309
5310/* Implement targetm.vectorize.create_costs.  */
5311
5312static vector_costs *
5313rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5314{
5315  return new rs6000_cost_data (vinfo, costing_for_scalar);
5316}
5317
5318/* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5319   For some statement, we would like to further fine-grain tweak the cost on
5320   top of rs6000_builtin_vectorization_cost handling which doesn't have any
5321   information on statement operation codes etc.  One typical case here is
5322   COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5323   for scalar cost, but it should be priced more whatever transformed to either
5324   compare + branch or compare + isel instructions.  */
5325
5326static unsigned
5327rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5328				  struct _stmt_vec_info *stmt_info)
5329{
5330  if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5331      && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5332    {
5333      tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5334      if (subcode == COND_EXPR)
5335	return 2;
5336    }
5337
5338  return 0;
5339}
5340
5341/* Helper function for add_stmt_cost.  Check each statement cost
5342   entry, gather information and update the target_cost fields
5343   accordingly.  */
5344void
5345rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5346					       stmt_vec_info stmt_info,
5347					       vect_cost_model_location where,
5348					       unsigned int orig_count)
5349{
5350
5351  /* Check whether we're doing something other than just a copy loop.
5352     Not all such loops may be profitably vectorized; see
5353     rs6000_finish_cost.  */
5354  if (kind == vec_to_scalar
5355      || kind == vec_perm
5356      || kind == vec_promote_demote
5357      || kind == vec_construct
5358      || kind == scalar_to_vec
5359      || (where == vect_body && kind == vector_stmt))
5360    m_vect_nonmem = true;
5361
5362  /* Gather some information when we are costing the vectorized instruction
5363     for the statements located in a loop body.  */
5364  if (!m_costing_for_scalar
5365      && is_a<loop_vec_info> (m_vinfo)
5366      && where == vect_body)
5367    {
5368      m_nstmts += orig_count;
5369
5370      if (kind == scalar_load || kind == vector_load
5371	  || kind == unaligned_load || kind == vector_gather_load)
5372	m_nloads += orig_count;
5373
5374      /* Power processors do not currently have instructions for strided
5375	 and elementwise loads, and instead we must generate multiple
5376	 scalar loads.  This leads to undercounting of the cost.  We
5377	 account for this by scaling the construction cost by the number
5378	 of elements involved, and saving this as extra cost that we may
5379	 or may not need to apply.  When finalizing the cost of the loop,
5380	 the extra penalty is applied when the load density heuristics
5381	 are satisfied.  */
5382      if (kind == vec_construct && stmt_info
5383	  && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5384	  && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5385	      || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5386	{
5387	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5388	  unsigned int nunits = vect_nunits_for_cost (vectype);
5389	  /* As PR103702 shows, it's possible that vectorizer wants to do
5390	     costings for only one unit here, it's no need to do any
5391	     penalization for it, so simply early return here.  */
5392	  if (nunits == 1)
5393	    return;
5394	  /* i386 port adopts nunits * stmt_cost as the penalized cost
5395	     for this kind of penalization, we used to follow it but
5396	     found it could result in an unreliable body cost especially
5397	     for V16QI/V8HI modes.  To make it better, we choose this
5398	     new heuristic: for each scalar load, we use 2 as penalized
5399	     cost for the case with 2 nunits and use 1 for the other
5400	     cases.  It's without much supporting theory, mainly
5401	     concluded from the broad performance evaluations on Power8,
5402	     Power9 and Power10.  One possibly related point is that:
5403	     vector construction for more units would use more insns,
5404	     it has more chances to schedule them better (even run in
5405	     parallelly when enough available units at that time), so
5406	     it seems reasonable not to penalize that much for them.  */
5407	  unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5408	  unsigned int extra_cost = nunits * adjusted_cost;
5409	  m_extra_ctor_cost += extra_cost;
5410	}
5411    }
5412}
5413
5414unsigned
5415rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5416				 stmt_vec_info stmt_info, slp_tree,
5417				 tree vectype, int misalign,
5418				 vect_cost_model_location where)
5419{
5420  unsigned retval = 0;
5421
5422  if (flag_vect_cost_model)
5423    {
5424      int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5425							 misalign);
5426      stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5427      /* Statements in an inner loop relative to the loop being
5428	 vectorized are weighted more heavily.  The value here is
5429	 arbitrary and could potentially be improved with analysis.  */
5430      unsigned int orig_count = count;
5431      retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5432      m_costs[where] += retval;
5433
5434      update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5435    }
5436
5437  return retval;
5438}
5439
5440/* For some target specific vectorization cost which can't be handled per stmt,
5441   we check the requisite conditions and adjust the vectorization cost
5442   accordingly if satisfied.  One typical example is to model shift cost for
5443   vector with length by counting number of required lengths under condition
5444   LOOP_VINFO_FULLY_WITH_LENGTH_P.  */
5445
5446void
5447rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5448{
5449  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5450    {
5451      rgroup_controls *rgc;
5452      unsigned int num_vectors_m1;
5453      unsigned int shift_cnt = 0;
5454      FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5455	if (rgc->type)
5456	  /* Each length needs one shift to fill into bits 0-7.  */
5457	  shift_cnt += num_vectors_m1 + 1;
5458
5459      add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5460		     NULL_TREE, 0, vect_body);
5461    }
5462}
5463
5464void
5465rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5466{
5467  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5468    {
5469      adjust_vect_cost_per_loop (loop_vinfo);
5470      density_test (loop_vinfo);
5471
5472      /* Don't vectorize minimum-vectorization-factor, simple copy loops
5473	 that require versioning for any reason.  The vectorization is at
5474	 best a wash inside the loop, and the versioning checks make
5475	 profitability highly unlikely and potentially quite harmful.  */
5476      if (!m_vect_nonmem
5477	  && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5478	  && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5479	m_costs[vect_body] += 10000;
5480    }
5481
5482  vector_costs::finish_cost (scalar_costs);
5483}
5484
5485/* Implement targetm.loop_unroll_adjust.  */
5486
5487static unsigned
5488rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5489{
5490   if (unroll_only_small_loops)
5491    {
5492      /* TODO: These are hardcoded values right now.  We probably should use
5493	 a PARAM here.  */
5494      if (loop->ninsns <= 6)
5495	return MIN (4, nunroll);
5496      if (loop->ninsns <= 10)
5497	return MIN (2, nunroll);
5498
5499      return 0;
5500    }
5501
5502  return nunroll;
5503}
5504
5505/* Returns a function decl for a vectorized version of the builtin function
5506   with builtin function code FN and the result vector type TYPE, or NULL_TREE
5507   if it is not available.
5508
5509   Implement targetm.vectorize.builtin_vectorized_function.  */
5510
5511static tree
5512rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5513				    tree type_in)
5514{
5515  machine_mode in_mode, out_mode;
5516  int in_n, out_n;
5517
5518  if (TARGET_DEBUG_BUILTIN)
5519    fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5520	     combined_fn_name (combined_fn (fn)),
5521	     GET_MODE_NAME (TYPE_MODE (type_out)),
5522	     GET_MODE_NAME (TYPE_MODE (type_in)));
5523
5524  /* TODO: Should this be gcc_assert?  */
5525  if (TREE_CODE (type_out) != VECTOR_TYPE
5526      || TREE_CODE (type_in) != VECTOR_TYPE)
5527    return NULL_TREE;
5528
5529  out_mode = TYPE_MODE (TREE_TYPE (type_out));
5530  out_n = TYPE_VECTOR_SUBPARTS (type_out);
5531  in_mode = TYPE_MODE (TREE_TYPE (type_in));
5532  in_n = TYPE_VECTOR_SUBPARTS (type_in);
5533
5534  switch (fn)
5535    {
5536    CASE_CFN_COPYSIGN:
5537      if (VECTOR_UNIT_VSX_P (V2DFmode)
5538	  && out_mode == DFmode && out_n == 2
5539	  && in_mode == DFmode && in_n == 2)
5540	return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5541      if (VECTOR_UNIT_VSX_P (V4SFmode)
5542	  && out_mode == SFmode && out_n == 4
5543	  && in_mode == SFmode && in_n == 4)
5544	return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5545      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5546	  && out_mode == SFmode && out_n == 4
5547	  && in_mode == SFmode && in_n == 4)
5548	return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5549      break;
5550    CASE_CFN_CEIL:
5551      if (VECTOR_UNIT_VSX_P (V2DFmode)
5552	  && out_mode == DFmode && out_n == 2
5553	  && in_mode == DFmode && in_n == 2)
5554	return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5555      if (VECTOR_UNIT_VSX_P (V4SFmode)
5556	  && out_mode == SFmode && out_n == 4
5557	  && in_mode == SFmode && in_n == 4)
5558	return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5559      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5560	  && out_mode == SFmode && out_n == 4
5561	  && in_mode == SFmode && in_n == 4)
5562	return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5563      break;
5564    CASE_CFN_FLOOR:
5565      if (VECTOR_UNIT_VSX_P (V2DFmode)
5566	  && out_mode == DFmode && out_n == 2
5567	  && in_mode == DFmode && in_n == 2)
5568	return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5569      if (VECTOR_UNIT_VSX_P (V4SFmode)
5570	  && out_mode == SFmode && out_n == 4
5571	  && in_mode == SFmode && in_n == 4)
5572	return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5573      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5574	  && out_mode == SFmode && out_n == 4
5575	  && in_mode == SFmode && in_n == 4)
5576	return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5577      break;
5578    CASE_CFN_FMA:
5579      if (VECTOR_UNIT_VSX_P (V2DFmode)
5580	  && out_mode == DFmode && out_n == 2
5581	  && in_mode == DFmode && in_n == 2)
5582	return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5583      if (VECTOR_UNIT_VSX_P (V4SFmode)
5584	  && out_mode == SFmode && out_n == 4
5585	  && in_mode == SFmode && in_n == 4)
5586	return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5587      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5588	  && out_mode == SFmode && out_n == 4
5589	  && in_mode == SFmode && in_n == 4)
5590	return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5591      break;
5592    CASE_CFN_TRUNC:
5593      if (VECTOR_UNIT_VSX_P (V2DFmode)
5594	  && out_mode == DFmode && out_n == 2
5595	  && in_mode == DFmode && in_n == 2)
5596	return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5597      if (VECTOR_UNIT_VSX_P (V4SFmode)
5598	  && out_mode == SFmode && out_n == 4
5599	  && in_mode == SFmode && in_n == 4)
5600	return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5601      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5602	  && out_mode == SFmode && out_n == 4
5603	  && in_mode == SFmode && in_n == 4)
5604	return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5605      break;
5606    CASE_CFN_NEARBYINT:
5607      if (VECTOR_UNIT_VSX_P (V2DFmode)
5608	  && flag_unsafe_math_optimizations
5609	  && out_mode == DFmode && out_n == 2
5610	  && in_mode == DFmode && in_n == 2)
5611	return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5612      if (VECTOR_UNIT_VSX_P (V4SFmode)
5613	  && flag_unsafe_math_optimizations
5614	  && out_mode == SFmode && out_n == 4
5615	  && in_mode == SFmode && in_n == 4)
5616	return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5617      break;
5618    CASE_CFN_RINT:
5619      if (VECTOR_UNIT_VSX_P (V2DFmode)
5620	  && !flag_trapping_math
5621	  && out_mode == DFmode && out_n == 2
5622	  && in_mode == DFmode && in_n == 2)
5623	return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5624      if (VECTOR_UNIT_VSX_P (V4SFmode)
5625	  && !flag_trapping_math
5626	  && out_mode == SFmode && out_n == 4
5627	  && in_mode == SFmode && in_n == 4)
5628	return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5629      break;
5630    default:
5631      break;
5632    }
5633
5634  /* Generate calls to libmass if appropriate.  */
5635  if (rs6000_veclib_handler)
5636    return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5637
5638  return NULL_TREE;
5639}
5640
5641/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5642   library with vectorized intrinsics.  */
5643
5644static tree
5645rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5646				   tree type_in)
5647{
5648  char name[32];
5649  const char *suffix = NULL;
5650  tree fntype, new_fndecl, bdecl = NULL_TREE;
5651  int n_args = 1;
5652  const char *bname;
5653  machine_mode el_mode, in_mode;
5654  int n, in_n;
5655
5656  /* Libmass is suitable for unsafe math only as it does not correctly support
5657     parts of IEEE with the required precision such as denormals.  Only support
5658     it if we have VSX to use the simd d2 or f4 functions.
5659     XXX: Add variable length support.  */
5660  if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5661    return NULL_TREE;
5662
5663  el_mode = TYPE_MODE (TREE_TYPE (type_out));
5664  n = TYPE_VECTOR_SUBPARTS (type_out);
5665  in_mode = TYPE_MODE (TREE_TYPE (type_in));
5666  in_n = TYPE_VECTOR_SUBPARTS (type_in);
5667  if (el_mode != in_mode
5668      || n != in_n)
5669    return NULL_TREE;
5670
5671  switch (fn)
5672    {
5673    CASE_CFN_ATAN2:
5674    CASE_CFN_HYPOT:
5675    CASE_CFN_POW:
5676      n_args = 2;
5677      gcc_fallthrough ();
5678
5679    CASE_CFN_ACOS:
5680    CASE_CFN_ACOSH:
5681    CASE_CFN_ASIN:
5682    CASE_CFN_ASINH:
5683    CASE_CFN_ATAN:
5684    CASE_CFN_ATANH:
5685    CASE_CFN_CBRT:
5686    CASE_CFN_COS:
5687    CASE_CFN_COSH:
5688    CASE_CFN_ERF:
5689    CASE_CFN_ERFC:
5690    CASE_CFN_EXP2:
5691    CASE_CFN_EXP:
5692    CASE_CFN_EXPM1:
5693    CASE_CFN_LGAMMA:
5694    CASE_CFN_LOG10:
5695    CASE_CFN_LOG1P:
5696    CASE_CFN_LOG2:
5697    CASE_CFN_LOG:
5698    CASE_CFN_SIN:
5699    CASE_CFN_SINH:
5700    CASE_CFN_SQRT:
5701    CASE_CFN_TAN:
5702    CASE_CFN_TANH:
5703      if (el_mode == DFmode && n == 2)
5704	{
5705	  bdecl = mathfn_built_in (double_type_node, fn);
5706	  suffix = "d2";				/* pow -> powd2 */
5707	}
5708      else if (el_mode == SFmode && n == 4)
5709	{
5710	  bdecl = mathfn_built_in (float_type_node, fn);
5711	  suffix = "4";					/* powf -> powf4 */
5712	}
5713      else
5714	return NULL_TREE;
5715      if (!bdecl)
5716	return NULL_TREE;
5717      break;
5718
5719    default:
5720      return NULL_TREE;
5721    }
5722
5723  gcc_assert (suffix != NULL);
5724  bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5725  if (!bname)
5726    return NULL_TREE;
5727
5728  strcpy (name, bname + strlen ("__builtin_"));
5729  strcat (name, suffix);
5730
5731  if (n_args == 1)
5732    fntype = build_function_type_list (type_out, type_in, NULL);
5733  else if (n_args == 2)
5734    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5735  else
5736    gcc_unreachable ();
5737
5738  /* Build a function declaration for the vectorized function.  */
5739  new_fndecl = build_decl (BUILTINS_LOCATION,
5740			   FUNCTION_DECL, get_identifier (name), fntype);
5741  TREE_PUBLIC (new_fndecl) = 1;
5742  DECL_EXTERNAL (new_fndecl) = 1;
5743  DECL_IS_NOVOPS (new_fndecl) = 1;
5744  TREE_READONLY (new_fndecl) = 1;
5745
5746  return new_fndecl;
5747}
5748
5749
5750/* Default CPU string for rs6000*_file_start functions.  */
5751static const char *rs6000_default_cpu;
5752
5753#ifdef USING_ELFOS_H
5754const char *rs6000_machine;
5755
5756const char *
5757rs6000_machine_from_flags (void)
5758{
5759  /* e300 and e500 */
5760  if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5761    return "e300";
5762  if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5763    return "e500";
5764  if (rs6000_cpu == PROCESSOR_PPCE500MC)
5765    return "e500mc";
5766  if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5767    return "e500mc64";
5768  if (rs6000_cpu == PROCESSOR_PPCE5500)
5769    return "e5500";
5770  if (rs6000_cpu == PROCESSOR_PPCE6500)
5771    return "e6500";
5772
5773  /* 400 series */
5774  if (rs6000_cpu == PROCESSOR_PPC403)
5775    return "\"403\"";
5776  if (rs6000_cpu == PROCESSOR_PPC405)
5777    return "\"405\"";
5778  if (rs6000_cpu == PROCESSOR_PPC440)
5779    return "\"440\"";
5780  if (rs6000_cpu == PROCESSOR_PPC476)
5781    return "\"476\"";
5782
5783  /* A2 */
5784  if (rs6000_cpu == PROCESSOR_PPCA2)
5785    return "a2";
5786
5787  /* Cell BE */
5788  if (rs6000_cpu == PROCESSOR_CELL)
5789    return "cell";
5790
5791  /* Titan */
5792  if (rs6000_cpu == PROCESSOR_TITAN)
5793    return "titan";
5794
5795  /* 500 series and 800 series */
5796  if (rs6000_cpu == PROCESSOR_MPCCORE)
5797    return "\"821\"";
5798
5799#if 0
5800  /* This (and ppc64 below) are disabled here (for now at least) because
5801     PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5802     are #define'd as some of these.  Untangling that is a job for later.  */
5803
5804  /* 600 series and 700 series, "classic" */
5805  if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5806      || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5807      || rs6000_cpu == PROCESSOR_PPC750)
5808    return "ppc";
5809#endif
5810
5811  /* Classic with AltiVec, "G4" */
5812  if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5813    return "\"7450\"";
5814
5815#if 0
5816  /* The older 64-bit CPUs */
5817  if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5818      || rs6000_cpu == PROCESSOR_RS64A)
5819    return "ppc64";
5820#endif
5821
5822  HOST_WIDE_INT flags = rs6000_isa_flags;
5823
5824  /* Disable the flags that should never influence the .machine selection.  */
5825  flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5826
5827  if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5828    return "power10";
5829  if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5830    return "power9";
5831  if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5832    return "power8";
5833  if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5834    return "power7";
5835  if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5836    return "power6";
5837  if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5838    return "power5";
5839  if ((flags & ISA_2_1_MASKS) != 0)
5840    return "power4";
5841  if ((flags & OPTION_MASK_POWERPC64) != 0)
5842    return "ppc64";
5843  return "ppc";
5844}
5845
5846void
5847emit_asm_machine (void)
5848{
5849  fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5850}
5851#endif
5852
5853/* Do anything needed at the start of the asm file.  */
5854
5855static void
5856rs6000_file_start (void)
5857{
5858  char buffer[80];
5859  const char *start = buffer;
5860  FILE *file = asm_out_file;
5861
5862  rs6000_default_cpu = TARGET_CPU_DEFAULT;
5863
5864  default_file_start ();
5865
5866  if (flag_verbose_asm)
5867    {
5868      sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5869
5870      if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5871	{
5872	  fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5873	  start = "";
5874	}
5875
5876      if (OPTION_SET_P (rs6000_cpu_index))
5877	{
5878	  fprintf (file, "%s -mcpu=%s", start,
5879		   processor_target_table[rs6000_cpu_index].name);
5880	  start = "";
5881	}
5882
5883      if (OPTION_SET_P (rs6000_tune_index))
5884	{
5885	  fprintf (file, "%s -mtune=%s", start,
5886		   processor_target_table[rs6000_tune_index].name);
5887	  start = "";
5888	}
5889
5890      if (PPC405_ERRATUM77)
5891	{
5892	  fprintf (file, "%s PPC405CR_ERRATUM77", start);
5893	  start = "";
5894	}
5895
5896#ifdef USING_ELFOS_H
5897      switch (rs6000_sdata)
5898	{
5899	case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5900	case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5901	case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5902	case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5903	}
5904
5905      if (rs6000_sdata && g_switch_value)
5906	{
5907	  fprintf (file, "%s -G %d", start,
5908		   g_switch_value);
5909	  start = "";
5910	}
5911#endif
5912
5913      if (*start == '\0')
5914	putc ('\n', file);
5915    }
5916
5917#ifdef USING_ELFOS_H
5918  rs6000_machine = rs6000_machine_from_flags ();
5919  emit_asm_machine ();
5920#endif
5921
5922  if (DEFAULT_ABI == ABI_ELFv2)
5923    fprintf (file, "\t.abiversion 2\n");
5924}
5925
5926
5927/* Return nonzero if this function is known to have a null epilogue.  */
5928
5929int
5930direct_return (void)
5931{
5932  if (reload_completed)
5933    {
5934      rs6000_stack_t *info = rs6000_stack_info ();
5935
5936      if (info->first_gp_reg_save == 32
5937	  && info->first_fp_reg_save == 64
5938	  && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5939	  && ! info->lr_save_p
5940	  && ! info->cr_save_p
5941	  && info->vrsave_size == 0
5942	  && ! info->push_p)
5943	return 1;
5944    }
5945
5946  return 0;
5947}
5948
5949/* Helper for num_insns_constant.  Calculate number of instructions to
5950   load VALUE to a single gpr using combinations of addi, addis, ori,
5951   oris, sldi and rldimi instructions.  */
5952
5953static int
5954num_insns_constant_gpr (HOST_WIDE_INT value)
5955{
5956  /* signed constant loadable with addi */
5957  if (SIGNED_INTEGER_16BIT_P (value))
5958    return 1;
5959
5960  /* constant loadable with addis */
5961  else if ((value & 0xffff) == 0
5962	   && (value >> 31 == -1 || value >> 31 == 0))
5963    return 1;
5964
5965  /* PADDI can support up to 34 bit signed integers.  */
5966  else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5967    return 1;
5968
5969  else if (TARGET_POWERPC64)
5970    {
5971      HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5972      HOST_WIDE_INT high = value >> 31;
5973
5974      if (high == 0 || high == -1)
5975	return 2;
5976
5977      high >>= 1;
5978
5979      if (low == 0 || low == high)
5980	return num_insns_constant_gpr (high) + 1;
5981      else if (high == 0)
5982	return num_insns_constant_gpr (low) + 1;
5983      else
5984	return (num_insns_constant_gpr (high)
5985		+ num_insns_constant_gpr (low) + 1);
5986    }
5987
5988  else
5989    return 2;
5990}
5991
5992/* Helper for num_insns_constant.  Allow constants formed by the
5993   num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5994   and handle modes that require multiple gprs.  */
5995
5996static int
5997num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5998{
5999  int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6000  int total = 0;
6001  while (nregs-- > 0)
6002    {
6003      HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6004      int insns = num_insns_constant_gpr (low);
6005      if (insns > 2
6006	  /* We won't get more than 2 from num_insns_constant_gpr
6007	     except when TARGET_POWERPC64 and mode is DImode or
6008	     wider, so the register mode must be DImode.  */
6009	  && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6010	insns = 2;
6011      total += insns;
6012      /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6013	 it all at once would be UB. */
6014      value >>= (BITS_PER_WORD - 1);
6015      value >>= 1;
6016    }
6017  return total;
6018}
6019
6020/* Return the number of instructions it takes to form a constant in as
6021   many gprs are needed for MODE.  */
6022
6023int
6024num_insns_constant (rtx op, machine_mode mode)
6025{
6026  HOST_WIDE_INT val;
6027
6028  switch (GET_CODE (op))
6029    {
6030    case CONST_INT:
6031      val = INTVAL (op);
6032      break;
6033
6034    case CONST_WIDE_INT:
6035      {
6036	int insns = 0;
6037	for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6038	  insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6039					     DImode);
6040	return insns;
6041      }
6042
6043    case CONST_DOUBLE:
6044      {
6045	const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6046
6047	if (mode == SFmode || mode == SDmode)
6048	  {
6049	    long l;
6050
6051	    if (mode == SDmode)
6052	      REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6053	    else
6054	      REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6055	    /* See the first define_split in rs6000.md handling a
6056	       const_double_operand.  */
6057	    val = l;
6058	    mode = SImode;
6059	  }
6060	else if (mode == DFmode || mode == DDmode)
6061	  {
6062	    long l[2];
6063
6064	    if (mode == DDmode)
6065	      REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6066	    else
6067	      REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6068
6069	    /* See the second (32-bit) and third (64-bit) define_split
6070	       in rs6000.md handling a const_double_operand.  */
6071	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6072	    val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6073	    mode = DImode;
6074	  }
6075	else if (mode == TFmode || mode == TDmode
6076		 || mode == KFmode || mode == IFmode)
6077	  {
6078	    long l[4];
6079	    int insns;
6080
6081	    if (mode == TDmode)
6082	      REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6083	    else
6084	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6085
6086	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6087	    val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6088	    insns = num_insns_constant_multi (val, DImode);
6089	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6090	    val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6091	    insns += num_insns_constant_multi (val, DImode);
6092	    return insns;
6093	  }
6094	else
6095	  gcc_unreachable ();
6096      }
6097      break;
6098
6099    default:
6100      gcc_unreachable ();
6101    }
6102
6103  return num_insns_constant_multi (val, mode);
6104}
6105
6106/* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6107   If the mode of OP is MODE_VECTOR_INT, this simply returns the
6108   corresponding element of the vector, but for V4SFmode, the
6109   corresponding "float" is interpreted as an SImode integer.  */
6110
6111HOST_WIDE_INT
6112const_vector_elt_as_int (rtx op, unsigned int elt)
6113{
6114  rtx tmp;
6115
6116  /* We can't handle V2DImode and V2DFmode vector constants here yet.  */
6117  gcc_assert (GET_MODE (op) != V2DImode
6118	      && GET_MODE (op) != V2DFmode);
6119
6120  tmp = CONST_VECTOR_ELT (op, elt);
6121  if (GET_MODE (op) == V4SFmode)
6122    tmp = gen_lowpart (SImode, tmp);
6123  return INTVAL (tmp);
6124}
6125
6126/* Return true if OP can be synthesized with a particular vspltisb, vspltish
6127   or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
6128   depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
6129   all items are set to the same value and contain COPIES replicas of the
6130   vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6131   operand and the others are set to the value of the operand's msb.  */
6132
6133static bool
6134vspltis_constant (rtx op, unsigned step, unsigned copies)
6135{
6136  machine_mode mode = GET_MODE (op);
6137  machine_mode inner = GET_MODE_INNER (mode);
6138
6139  unsigned i;
6140  unsigned nunits;
6141  unsigned bitsize;
6142  unsigned mask;
6143
6144  HOST_WIDE_INT val;
6145  HOST_WIDE_INT splat_val;
6146  HOST_WIDE_INT msb_val;
6147
6148  if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6149    return false;
6150
6151  nunits = GET_MODE_NUNITS (mode);
6152  bitsize = GET_MODE_BITSIZE (inner);
6153  mask = GET_MODE_MASK (inner);
6154
6155  val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6156  splat_val = val;
6157  msb_val = val >= 0 ? 0 : -1;
6158
6159  if (val == 0 && step > 1)
6160    {
6161      /* Special case for loading most significant bit with step > 1.
6162	 In that case, match 0s in all but step-1s elements, where match
6163	 EASY_VECTOR_MSB.  */
6164      for (i = 1; i < nunits; ++i)
6165	{
6166	  unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6167	  HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6168	  if ((i & (step - 1)) == step - 1)
6169	    {
6170	      if (!EASY_VECTOR_MSB (elt_val, inner))
6171		break;
6172	    }
6173	  else if (elt_val)
6174	    break;
6175	}
6176      if (i == nunits)
6177	return true;
6178    }
6179
6180  /* Construct the value to be splatted, if possible.  If not, return 0.  */
6181  for (i = 2; i <= copies; i *= 2)
6182    {
6183      HOST_WIDE_INT small_val;
6184      bitsize /= 2;
6185      small_val = splat_val >> bitsize;
6186      mask >>= bitsize;
6187      if (splat_val != ((HOST_WIDE_INT)
6188          ((unsigned HOST_WIDE_INT) small_val << bitsize)
6189          | (small_val & mask)))
6190	return false;
6191      splat_val = small_val;
6192      inner = smallest_int_mode_for_size (bitsize);
6193    }
6194
6195  /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
6196  if (EASY_VECTOR_15 (splat_val))
6197    ;
6198
6199  /* Also check if we can splat, and then add the result to itself.  Do so if
6200     the value is positive, of if the splat instruction is using OP's mode;
6201     for splat_val < 0, the splat and the add should use the same mode.  */
6202  else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6203           && (splat_val >= 0 || (step == 1 && copies == 1)))
6204    ;
6205
6206  /* Also check if are loading up the most significant bit which can be done by
6207     loading up -1 and shifting the value left by -1.  Only do this for
6208     step 1 here, for larger steps it is done earlier.  */
6209  else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6210    ;
6211
6212  else
6213    return false;
6214
6215  /* Check if VAL is present in every STEP-th element, and the
6216     other elements are filled with its most significant bit.  */
6217  for (i = 1; i < nunits; ++i)
6218    {
6219      HOST_WIDE_INT desired_val;
6220      unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6221      if ((i & (step - 1)) == 0)
6222	desired_val = val;
6223      else
6224	desired_val = msb_val;
6225
6226      if (desired_val != const_vector_elt_as_int (op, elt))
6227	return false;
6228    }
6229
6230  return true;
6231}
6232
6233/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6234   instruction, filling in the bottom elements with 0 or -1.
6235
6236   Return 0 if the constant cannot be generated with VSLDOI.  Return positive
6237   for the number of zeroes to shift in, or negative for the number of 0xff
6238   bytes to shift in.
6239
6240   OP is a CONST_VECTOR.  */
6241
6242int
6243vspltis_shifted (rtx op)
6244{
6245  machine_mode mode = GET_MODE (op);
6246  machine_mode inner = GET_MODE_INNER (mode);
6247
6248  unsigned i, j;
6249  unsigned nunits;
6250  unsigned mask;
6251
6252  HOST_WIDE_INT val;
6253
6254  if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6255    return false;
6256
6257  /* We need to create pseudo registers to do the shift, so don't recognize
6258     shift vector constants after reload.  Don't match it even before RA
6259     after split1 is done, because there won't be further splitting pass
6260     before RA to do the splitting.  */
6261  if (!can_create_pseudo_p ()
6262      || (cfun->curr_properties & PROP_rtl_split_insns))
6263    return false;
6264
6265  nunits = GET_MODE_NUNITS (mode);
6266  mask = GET_MODE_MASK (inner);
6267
6268  val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6269
6270  /* Check if the value can really be the operand of a vspltis[bhw].  */
6271  if (EASY_VECTOR_15 (val))
6272    ;
6273
6274  /* Also check if we are loading up the most significant bit which can be done
6275     by loading up -1 and shifting the value left by -1.  */
6276  else if (EASY_VECTOR_MSB (val, inner))
6277    ;
6278
6279  else
6280    return 0;
6281
6282  /* Check if VAL is present in every STEP-th element until we find elements
6283     that are 0 or all 1 bits.  */
6284  for (i = 1; i < nunits; ++i)
6285    {
6286      unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6287      HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6288
6289      /* If the value isn't the splat value, check for the remaining elements
6290	 being 0/-1.  */
6291      if (val != elt_val)
6292	{
6293	  if (elt_val == 0)
6294	    {
6295	      for (j = i+1; j < nunits; ++j)
6296		{
6297		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6298		  if (const_vector_elt_as_int (op, elt2) != 0)
6299		    return 0;
6300		}
6301
6302	      return (nunits - i) * GET_MODE_SIZE (inner);
6303	    }
6304
6305	  else if ((elt_val & mask) == mask)
6306	    {
6307	      for (j = i+1; j < nunits; ++j)
6308		{
6309		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6310		  if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6311		    return 0;
6312		}
6313
6314	      return -((nunits - i) * GET_MODE_SIZE (inner));
6315	    }
6316
6317	  else
6318	    return 0;
6319	}
6320    }
6321
6322  /* If all elements are equal, we don't need to do VSLDOI.  */
6323  return 0;
6324}
6325
6326
6327/* Return non-zero (element mode byte size) if OP is of the given MODE
6328   and can be synthesized with a vspltisb, vspltish or vspltisw.  */
6329
6330int
6331easy_altivec_constant (rtx op, machine_mode mode)
6332{
6333  unsigned step, copies;
6334
6335  if (mode == VOIDmode)
6336    mode = GET_MODE (op);
6337  else if (mode != GET_MODE (op))
6338    return 0;
6339
6340  /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
6341     constants.  */
6342  if (mode == V2DFmode)
6343    return zero_constant (op, mode) ? 8 : 0;
6344
6345  else if (mode == V2DImode)
6346    {
6347      if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6348	  || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6349	return 0;
6350
6351      if (zero_constant (op, mode))
6352	return 8;
6353
6354      if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6355	  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6356	return 8;
6357
6358      return 0;
6359    }
6360
6361  /* V1TImode is a special container for TImode.  Ignore for now.  */
6362  else if (mode == V1TImode)
6363    return 0;
6364
6365  /* Start with a vspltisw.  */
6366  step = GET_MODE_NUNITS (mode) / 4;
6367  copies = 1;
6368
6369  if (vspltis_constant (op, step, copies))
6370    return 4;
6371
6372  /* Then try with a vspltish.  */
6373  if (step == 1)
6374    copies <<= 1;
6375  else
6376    step >>= 1;
6377
6378  if (vspltis_constant (op, step, copies))
6379    return 2;
6380
6381  /* And finally a vspltisb.  */
6382  if (step == 1)
6383    copies <<= 1;
6384  else
6385    step >>= 1;
6386
6387  if (vspltis_constant (op, step, copies))
6388    return 1;
6389
6390  if (vspltis_shifted (op) != 0)
6391    return GET_MODE_SIZE (GET_MODE_INNER (mode));
6392
6393  return 0;
6394}
6395
6396/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6397   result is OP.  Abort if it is not possible.  */
6398
6399rtx
6400gen_easy_altivec_constant (rtx op)
6401{
6402  machine_mode mode = GET_MODE (op);
6403  int nunits = GET_MODE_NUNITS (mode);
6404  rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6405  unsigned step = nunits / 4;
6406  unsigned copies = 1;
6407
6408  /* Start with a vspltisw.  */
6409  if (vspltis_constant (op, step, copies))
6410    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6411
6412  /* Then try with a vspltish.  */
6413  if (step == 1)
6414    copies <<= 1;
6415  else
6416    step >>= 1;
6417
6418  if (vspltis_constant (op, step, copies))
6419    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6420
6421  /* And finally a vspltisb.  */
6422  if (step == 1)
6423    copies <<= 1;
6424  else
6425    step >>= 1;
6426
6427  if (vspltis_constant (op, step, copies))
6428    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6429
6430  gcc_unreachable ();
6431}
6432
6433/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6434   instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6435
6436   Return the number of instructions needed (1 or 2) into the address pointed
6437   via NUM_INSNS_PTR.
6438
6439   Return the constant that is being split via CONSTANT_PTR.  */
6440
6441bool
6442xxspltib_constant_p (rtx op,
6443		     machine_mode mode,
6444		     int *num_insns_ptr,
6445		     int *constant_ptr)
6446{
6447  size_t nunits = GET_MODE_NUNITS (mode);
6448  size_t i;
6449  HOST_WIDE_INT value;
6450  rtx element;
6451
6452  /* Set the returned values to out of bound values.  */
6453  *num_insns_ptr = -1;
6454  *constant_ptr = 256;
6455
6456  if (!TARGET_P9_VECTOR)
6457    return false;
6458
6459  if (mode == VOIDmode)
6460    mode = GET_MODE (op);
6461
6462  else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6463    return false;
6464
6465  /* Handle (vec_duplicate <constant>).  */
6466  if (GET_CODE (op) == VEC_DUPLICATE)
6467    {
6468      if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6469	  && mode != V2DImode)
6470	return false;
6471
6472      element = XEXP (op, 0);
6473      if (!CONST_INT_P (element))
6474	return false;
6475
6476      value = INTVAL (element);
6477      if (!IN_RANGE (value, -128, 127))
6478	return false;
6479    }
6480
6481  /* Handle (const_vector [...]).  */
6482  else if (GET_CODE (op) == CONST_VECTOR)
6483    {
6484      if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6485	  && mode != V2DImode)
6486	return false;
6487
6488      element = CONST_VECTOR_ELT (op, 0);
6489      if (!CONST_INT_P (element))
6490	return false;
6491
6492      value = INTVAL (element);
6493      if (!IN_RANGE (value, -128, 127))
6494	return false;
6495
6496      for (i = 1; i < nunits; i++)
6497	{
6498	  element = CONST_VECTOR_ELT (op, i);
6499	  if (!CONST_INT_P (element))
6500	    return false;
6501
6502	  if (value != INTVAL (element))
6503	    return false;
6504	}
6505    }
6506
6507  /* Handle integer constants being loaded into the upper part of the VSX
6508     register as a scalar.  If the value isn't 0/-1, only allow it if the mode
6509     can go in Altivec registers.  Prefer VSPLTISW/VUPKHSW over XXSPLITIB.  */
6510  else if (CONST_INT_P (op))
6511    {
6512      if (!SCALAR_INT_MODE_P (mode))
6513	return false;
6514
6515      value = INTVAL (op);
6516      if (!IN_RANGE (value, -128, 127))
6517	return false;
6518
6519      if (!IN_RANGE (value, -1, 0))
6520	{
6521	  if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6522	    return false;
6523
6524	  if (EASY_VECTOR_15 (value))
6525	    return false;
6526	}
6527    }
6528
6529  else
6530    return false;
6531
6532  /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6533     sign extend.  Special case 0/-1 to allow getting any VSX register instead
6534     of an Altivec register.  */
6535  if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6536      && EASY_VECTOR_15 (value))
6537    return false;
6538
6539  /* Return # of instructions and the constant byte for XXSPLTIB.  */
6540  if (mode == V16QImode)
6541    *num_insns_ptr = 1;
6542
6543  else if (IN_RANGE (value, -1, 0))
6544    *num_insns_ptr = 1;
6545
6546  /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6547     single XXSPLTIW or XXSPLTIDP instruction.  */
6548  else if (vsx_prefixed_constant (op, mode))
6549    return false;
6550
6551  /* Return XXSPLITB followed by a sign extend operation to convert the
6552     constant to V8HImode or V4SImode.  */
6553  else
6554    *num_insns_ptr = 2;
6555
6556  *constant_ptr = (int) value;
6557  return true;
6558}
6559
6560const char *
6561output_vec_const_move (rtx *operands)
6562{
6563  int shift;
6564  machine_mode mode;
6565  rtx dest, vec;
6566
6567  dest = operands[0];
6568  vec = operands[1];
6569  mode = GET_MODE (dest);
6570
6571  if (TARGET_VSX)
6572    {
6573      bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6574      int xxspltib_value = 256;
6575      int num_insns = -1;
6576
6577      if (zero_constant (vec, mode))
6578	{
6579	  if (TARGET_P9_VECTOR)
6580	    return "xxspltib %x0,0";
6581
6582	  else if (dest_vmx_p)
6583	    return "vspltisw %0,0";
6584
6585	  else
6586	    return "xxlxor %x0,%x0,%x0";
6587	}
6588
6589      if (all_ones_constant (vec, mode))
6590	{
6591	  if (TARGET_P9_VECTOR)
6592	    return "xxspltib %x0,255";
6593
6594	  else if (dest_vmx_p)
6595	    return "vspltisw %0,-1";
6596
6597	  else if (TARGET_P8_VECTOR)
6598	    return "xxlorc %x0,%x0,%x0";
6599
6600	  else
6601	    gcc_unreachable ();
6602	}
6603
6604      vec_const_128bit_type vsx_const;
6605      if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6606	{
6607	  unsigned imm = constant_generates_lxvkq (&vsx_const);
6608	  if (imm)
6609	    {
6610	      operands[2] = GEN_INT (imm);
6611	      return "lxvkq %x0,%2";
6612	    }
6613
6614	  imm = constant_generates_xxspltiw (&vsx_const);
6615	  if (imm)
6616	    {
6617	      operands[2] = GEN_INT (imm);
6618	      return "xxspltiw %x0,%2";
6619	    }
6620
6621	  imm = constant_generates_xxspltidp (&vsx_const);
6622	  if (imm)
6623	    {
6624	      operands[2] = GEN_INT (imm);
6625	      return "xxspltidp %x0,%2";
6626	    }
6627	}
6628
6629      if (TARGET_P9_VECTOR
6630	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6631	{
6632	  if (num_insns == 1)
6633	    {
6634	      operands[2] = GEN_INT (xxspltib_value & 0xff);
6635	      return "xxspltib %x0,%2";
6636	    }
6637
6638	  return "#";
6639	}
6640    }
6641
6642  if (TARGET_ALTIVEC)
6643    {
6644      rtx splat_vec;
6645
6646      gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6647      if (zero_constant (vec, mode))
6648	return "vspltisw %0,0";
6649
6650      if (all_ones_constant (vec, mode))
6651	return "vspltisw %0,-1";
6652
6653      /* Do we need to construct a value using VSLDOI?  */
6654      shift = vspltis_shifted (vec);
6655      if (shift != 0)
6656	return "#";
6657
6658      splat_vec = gen_easy_altivec_constant (vec);
6659      gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6660      operands[1] = XEXP (splat_vec, 0);
6661      if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6662	return "#";
6663
6664      switch (GET_MODE (splat_vec))
6665	{
6666	case E_V4SImode:
6667	  return "vspltisw %0,%1";
6668
6669	case E_V8HImode:
6670	  return "vspltish %0,%1";
6671
6672	case E_V16QImode:
6673	  return "vspltisb %0,%1";
6674
6675	default:
6676	  gcc_unreachable ();
6677	}
6678    }
6679
6680  gcc_unreachable ();
6681}
6682
6683/* Initialize vector TARGET to VALS.  */
6684
6685void
6686rs6000_expand_vector_init (rtx target, rtx vals)
6687{
6688  machine_mode mode = GET_MODE (target);
6689  machine_mode inner_mode = GET_MODE_INNER (mode);
6690  unsigned int n_elts = GET_MODE_NUNITS (mode);
6691  int n_var = 0, one_var = -1;
6692  bool all_same = true, all_const_zero = true;
6693  rtx x, mem;
6694  unsigned int i;
6695
6696  for (i = 0; i < n_elts; ++i)
6697    {
6698      x = XVECEXP (vals, 0, i);
6699      if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6700	++n_var, one_var = i;
6701      else if (x != CONST0_RTX (inner_mode))
6702	all_const_zero = false;
6703
6704      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6705	all_same = false;
6706    }
6707
6708  if (n_var == 0)
6709    {
6710      rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6711      bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6712      if ((int_vector_p || TARGET_VSX) && all_const_zero)
6713	{
6714	  /* Zero register.  */
6715	  emit_move_insn (target, CONST0_RTX (mode));
6716	  return;
6717	}
6718      else if (int_vector_p && easy_vector_constant (const_vec, mode))
6719	{
6720	  /* Splat immediate.  */
6721	  emit_insn (gen_rtx_SET (target, const_vec));
6722	  return;
6723	}
6724      else
6725	{
6726	  /* Load from constant pool.  */
6727	  emit_move_insn (target, const_vec);
6728	  return;
6729	}
6730    }
6731
6732  /* Double word values on VSX can use xxpermdi or lxvdsx.  */
6733  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6734    {
6735      rtx op[2];
6736      size_t i;
6737      size_t num_elements = all_same ? 1 : 2;
6738      for (i = 0; i < num_elements; i++)
6739	{
6740	  op[i] = XVECEXP (vals, 0, i);
6741	  /* Just in case there is a SUBREG with a smaller mode, do a
6742	     conversion.  */
6743	  if (GET_MODE (op[i]) != inner_mode)
6744	    {
6745	      rtx tmp = gen_reg_rtx (inner_mode);
6746	      convert_move (tmp, op[i], 0);
6747	      op[i] = tmp;
6748	    }
6749	  /* Allow load with splat double word.  */
6750	  else if (MEM_P (op[i]))
6751	    {
6752	      if (!all_same)
6753		op[i] = force_reg (inner_mode, op[i]);
6754	    }
6755	  else if (!REG_P (op[i]))
6756	    op[i] = force_reg (inner_mode, op[i]);
6757	}
6758
6759      if (all_same)
6760	{
6761	  if (mode == V2DFmode)
6762	    emit_insn (gen_vsx_splat_v2df (target, op[0]));
6763	  else
6764	    emit_insn (gen_vsx_splat_v2di (target, op[0]));
6765	}
6766      else
6767	{
6768	  if (mode == V2DFmode)
6769	    emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6770	  else
6771	    emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6772	}
6773      return;
6774    }
6775
6776  /* Special case initializing vector int if we are on 64-bit systems with
6777     direct move or we have the ISA 3.0 instructions.  */
6778  if (mode == V4SImode  && VECTOR_MEM_VSX_P (V4SImode)
6779      && TARGET_DIRECT_MOVE_64BIT)
6780    {
6781      if (all_same)
6782	{
6783	  rtx element0 = XVECEXP (vals, 0, 0);
6784	  if (MEM_P (element0))
6785	    element0 = rs6000_force_indexed_or_indirect_mem (element0);
6786	  else
6787	    element0 = force_reg (SImode, element0);
6788
6789	  if (TARGET_P9_VECTOR)
6790	    emit_insn (gen_vsx_splat_v4si (target, element0));
6791	  else
6792	    {
6793	      rtx tmp = gen_reg_rtx (DImode);
6794	      emit_insn (gen_zero_extendsidi2 (tmp, element0));
6795	      emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6796	    }
6797	  return;
6798	}
6799      else
6800	{
6801	  rtx elements[4];
6802	  size_t i;
6803
6804	  for (i = 0; i < 4; i++)
6805	    elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6806
6807	  emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6808					elements[2], elements[3]));
6809	  return;
6810	}
6811    }
6812
6813  /* With single precision floating point on VSX, know that internally single
6814     precision is actually represented as a double, and either make 2 V2DF
6815     vectors, and convert these vectors to single precision, or do one
6816     conversion, and splat the result to the other elements.  */
6817  if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6818    {
6819      if (all_same)
6820	{
6821	  rtx element0 = XVECEXP (vals, 0, 0);
6822
6823	  if (TARGET_P9_VECTOR)
6824	    {
6825	      if (MEM_P (element0))
6826		element0 = rs6000_force_indexed_or_indirect_mem (element0);
6827
6828	      emit_insn (gen_vsx_splat_v4sf (target, element0));
6829	    }
6830
6831	  else
6832	    {
6833	      rtx freg = gen_reg_rtx (V4SFmode);
6834	      rtx sreg = force_reg (SFmode, element0);
6835	      rtx cvt  = (TARGET_XSCVDPSPN
6836			  ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6837			  : gen_vsx_xscvdpsp_scalar (freg, sreg));
6838
6839	      emit_insn (cvt);
6840	      emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6841						      const0_rtx));
6842	    }
6843	}
6844      else
6845	{
6846	  if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6847	    {
6848	      rtx tmp_sf[4];
6849	      rtx tmp_si[4];
6850	      rtx tmp_di[4];
6851	      rtx mrg_di[4];
6852	      for (i = 0; i < 4; i++)
6853		{
6854		  tmp_si[i] = gen_reg_rtx (SImode);
6855		  tmp_di[i] = gen_reg_rtx (DImode);
6856		  mrg_di[i] = gen_reg_rtx (DImode);
6857		  tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6858		  emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6859		  emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6860		}
6861
6862	      if (!BYTES_BIG_ENDIAN)
6863		{
6864		  std::swap (tmp_di[0], tmp_di[1]);
6865		  std::swap (tmp_di[2], tmp_di[3]);
6866		}
6867
6868	      emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6869	      emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6870	      emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6871	      emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6872
6873	      rtx tmp_v2di = gen_reg_rtx (V2DImode);
6874	      emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6875	      emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6876	    }
6877	  else
6878	    {
6879	      rtx dbl_even = gen_reg_rtx (V2DFmode);
6880	      rtx dbl_odd  = gen_reg_rtx (V2DFmode);
6881	      rtx flt_even = gen_reg_rtx (V4SFmode);
6882	      rtx flt_odd  = gen_reg_rtx (V4SFmode);
6883	      rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6884	      rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6885	      rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6886	      rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6887
6888	      emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6889	      emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6890	      emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6891	      emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6892	      rs6000_expand_extract_even (target, flt_even, flt_odd);
6893	    }
6894	}
6895      return;
6896    }
6897
6898  /* Special case initializing vector short/char that are splats if we are on
6899     64-bit systems with direct move.  */
6900  if (all_same && TARGET_DIRECT_MOVE_64BIT
6901      && (mode == V16QImode || mode == V8HImode))
6902    {
6903      rtx op0 = XVECEXP (vals, 0, 0);
6904      rtx di_tmp = gen_reg_rtx (DImode);
6905
6906      if (!REG_P (op0))
6907	op0 = force_reg (GET_MODE_INNER (mode), op0);
6908
6909      if (mode == V16QImode)
6910	{
6911	  emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6912	  emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6913	  return;
6914	}
6915
6916      if (mode == V8HImode)
6917	{
6918	  emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6919	  emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6920	  return;
6921	}
6922    }
6923
6924  /* Store value to stack temp.  Load vector element.  Splat.  However, splat
6925     of 64-bit items is not supported on Altivec.  */
6926  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6927    {
6928      mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6929      emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6930		      XVECEXP (vals, 0, 0));
6931      x = gen_rtx_UNSPEC (VOIDmode,
6932			  gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6933      emit_insn (gen_rtx_PARALLEL (VOIDmode,
6934				   gen_rtvec (2,
6935					      gen_rtx_SET (target, mem),
6936					      x)));
6937      x = gen_rtx_VEC_SELECT (inner_mode, target,
6938			      gen_rtx_PARALLEL (VOIDmode,
6939						gen_rtvec (1, const0_rtx)));
6940      emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6941      return;
6942    }
6943
6944  /* One field is non-constant.  Load constant then overwrite
6945     varying field.  */
6946  if (n_var == 1)
6947    {
6948      rtx copy = copy_rtx (vals);
6949
6950      /* Load constant part of vector, substitute neighboring value for
6951	 varying element.  */
6952      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6953      rs6000_expand_vector_init (target, copy);
6954
6955      /* Insert variable.  */
6956      rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6957				GEN_INT (one_var));
6958      return;
6959    }
6960
6961  if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6962    {
6963      rtx op[16];
6964      /* Force the values into word_mode registers.  */
6965      for (i = 0; i < n_elts; i++)
6966	{
6967	  rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6968	  machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6969	  op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6970	}
6971
6972      /* Take unsigned char big endianness on 64bit as example for below
6973	 construction, the input values are: A, B, C, D, ..., O, P.  */
6974
6975      if (TARGET_DIRECT_MOVE_128)
6976	{
6977	  /* Move to VSX register with vec_concat, each has 2 values.
6978	     eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6979		 vr1[1] = { xxxxxxxC, xxxxxxxD };
6980		 ...
6981		 vr1[7] = { xxxxxxxO, xxxxxxxP };  */
6982	  rtx vr1[8];
6983	  for (i = 0; i < n_elts / 2; i++)
6984	    {
6985	      vr1[i] = gen_reg_rtx (V2DImode);
6986	      emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6987					      op[i * 2 + 1]));
6988	    }
6989
6990	  /* Pack vectors with 2 values into vectors with 4 values.
6991	     eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6992		 vr2[1] = { xxxExxxF, xxxGxxxH };
6993		 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6994		 vr2[3] = { xxxMxxxN, xxxOxxxP };  */
6995	  rtx vr2[4];
6996	  for (i = 0; i < n_elts / 4; i++)
6997	    {
6998	      vr2[i] = gen_reg_rtx (V4SImode);
6999	      emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7000					      vr1[i * 2 + 1]));
7001	    }
7002
7003	  /* Pack vectors with 4 values into vectors with 8 values.
7004	     eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7005		 vr3[1] = { xIxJxKxL, xMxNxOxP };  */
7006	  rtx vr3[2];
7007	  for (i = 0; i < n_elts / 8; i++)
7008	    {
7009	      vr3[i] = gen_reg_rtx (V8HImode);
7010	      emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7011					      vr2[i * 2 + 1]));
7012	    }
7013
7014	  /* If it's V8HImode, it's done and return it. */
7015	  if (mode == V8HImode)
7016	    {
7017	      emit_insn (gen_rtx_SET (target, vr3[0]));
7018	      return;
7019	    }
7020
7021	  /* Pack vectors with 8 values into 16 values.  */
7022	  rtx res = gen_reg_rtx (V16QImode);
7023	  emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7024	  emit_insn (gen_rtx_SET (target, res));
7025	}
7026      else
7027	{
7028	  rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7029	  rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7030	  rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7031	  rtx perm_idx;
7032
7033	  /* Set up some common gen routines and values.  */
7034	  if (BYTES_BIG_ENDIAN)
7035	    {
7036	      if (mode == V16QImode)
7037		{
7038		  merge_v16qi = gen_altivec_vmrghb;
7039		  merge_v8hi = gen_altivec_vmrglh;
7040		}
7041	      else
7042		merge_v8hi = gen_altivec_vmrghh;
7043
7044	      merge_v4si = gen_altivec_vmrglw;
7045	      perm_idx = GEN_INT (3);
7046	    }
7047	  else
7048	    {
7049	      if (mode == V16QImode)
7050		{
7051		  merge_v16qi = gen_altivec_vmrglb;
7052		  merge_v8hi = gen_altivec_vmrghh;
7053		}
7054	      else
7055		merge_v8hi = gen_altivec_vmrglh;
7056
7057	      merge_v4si = gen_altivec_vmrghw;
7058	      perm_idx = GEN_INT (0);
7059	    }
7060
7061	  /* Move to VSX register with direct move.
7062	     eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7063		 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7064		 ...
7065		 vr_qi[15] = { xxxxxxxP, xxxxxxxx };  */
7066	  rtx vr_qi[16];
7067	  for (i = 0; i < n_elts; i++)
7068	    {
7069	      vr_qi[i] = gen_reg_rtx (V16QImode);
7070	      if (TARGET_POWERPC64)
7071		emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7072	      else
7073		emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7074	    }
7075
7076	  /* Merge/move to vector short.
7077	     eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7078		 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7079		 ...
7080		 vr_hi[7] = { xxxxxxxx, xxxxxxOP };  */
7081	  rtx vr_hi[8];
7082	  for (i = 0; i < 8; i++)
7083	    {
7084	      rtx tmp = vr_qi[i];
7085	      if (mode == V16QImode)
7086		{
7087		  tmp = gen_reg_rtx (V16QImode);
7088		  emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7089		}
7090	      vr_hi[i] = gen_reg_rtx (V8HImode);
7091	      emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7092	    }
7093
7094	  /* Merge vector short to vector int.
7095	     eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7096		 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7097		 ...
7098		 vr_si[3] = { xxxxxxxx, xxxxMNOP };  */
7099	  rtx vr_si[4];
7100	  for (i = 0; i < 4; i++)
7101	    {
7102	      rtx tmp = gen_reg_rtx (V8HImode);
7103	      emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7104	      vr_si[i] = gen_reg_rtx (V4SImode);
7105	      emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7106	    }
7107
7108	  /* Merge vector int to vector long.
7109	     eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7110		 vr_di[1] = { xxxxxxxx, IJKLMNOP };  */
7111	  rtx vr_di[2];
7112	  for (i = 0; i < 2; i++)
7113	    {
7114	      rtx tmp = gen_reg_rtx (V4SImode);
7115	      emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7116	      vr_di[i] = gen_reg_rtx (V2DImode);
7117	      emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7118	    }
7119
7120	  rtx res = gen_reg_rtx (V2DImode);
7121	  emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7122	  emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7123	}
7124
7125      return;
7126    }
7127
7128  /* Construct the vector in memory one field at a time
7129     and load the whole vector.  */
7130  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7131  for (i = 0; i < n_elts; i++)
7132    emit_move_insn (adjust_address_nv (mem, inner_mode,
7133				    i * GET_MODE_SIZE (inner_mode)),
7134		    XVECEXP (vals, 0, i));
7135  emit_move_insn (target, mem);
7136}
7137
7138/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7139   is variable and also counts by vector element size for p9 and above.  */
7140
7141static void
7142rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7143{
7144  machine_mode mode = GET_MODE (target);
7145
7146  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7147
7148  machine_mode inner_mode = GET_MODE (val);
7149
7150  int width = GET_MODE_SIZE (inner_mode);
7151
7152  gcc_assert (width >= 1 && width <= 8);
7153
7154  int shift = exact_log2 (width);
7155
7156  machine_mode idx_mode = GET_MODE (idx);
7157
7158  machine_mode shift_mode;
7159  /* Gen function pointers for shifting left and generation of permutation
7160     control vectors.  */
7161  rtx (*gen_ashl) (rtx, rtx, rtx);
7162  rtx (*gen_pcvr1) (rtx, rtx);
7163  rtx (*gen_pcvr2) (rtx, rtx);
7164
7165  if (TARGET_POWERPC64)
7166    {
7167      shift_mode = DImode;
7168      gen_ashl = gen_ashldi3;
7169      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7170				   : gen_altivec_lvsr_reg_di;
7171      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7172				   : gen_altivec_lvsl_reg_di;
7173    }
7174  else
7175    {
7176      shift_mode = SImode;
7177      gen_ashl = gen_ashlsi3;
7178      gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7179				   : gen_altivec_lvsr_reg_si;
7180      gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7181				   : gen_altivec_lvsl_reg_si;
7182    }
7183  /* Generate the IDX for permute shift, width is the vector element size.
7184     idx = idx * width.  */
7185  rtx tmp = gen_reg_rtx (shift_mode);
7186  idx = convert_modes (shift_mode, idx_mode, idx, 1);
7187
7188  emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7189
7190  /* Generate one permutation control vector used for rotating the element
7191     at to-insert position to element zero in target vector.  lvsl is
7192     used for big endianness while lvsr is used for little endianness:
7193     lvs[lr]    v1,0,idx.  */
7194  rtx pcvr1 = gen_reg_rtx (V16QImode);
7195  emit_insn (gen_pcvr1 (pcvr1, tmp));
7196
7197  rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7198  rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7199					   pcvr1);
7200  emit_insn (perm1);
7201
7202  /* Insert val into element 0 of target vector.  */
7203  rs6000_expand_vector_set (target, val, const0_rtx);
7204
7205  /* Rotate back with a reversed permutation control vector generated from:
7206     lvs[rl]   v2,0,idx.  */
7207  rtx pcvr2 = gen_reg_rtx (V16QImode);
7208  emit_insn (gen_pcvr2 (pcvr2, tmp));
7209
7210  rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7211					   pcvr2);
7212  emit_insn (perm2);
7213}
7214
7215/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7216   is variable and also counts by vector element size for p7 & p8.  */
7217
7218static void
7219rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7220{
7221  machine_mode mode = GET_MODE (target);
7222
7223  gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7224
7225  machine_mode inner_mode = GET_MODE (val);
7226  HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7227
7228  int width = GET_MODE_SIZE (inner_mode);
7229  gcc_assert (width >= 1 && width <= 4);
7230
7231  int shift = exact_log2 (width);
7232
7233  machine_mode idx_mode = GET_MODE (idx);
7234
7235  machine_mode shift_mode;
7236  rtx (*gen_ashl)(rtx, rtx, rtx);
7237  rtx (*gen_add)(rtx, rtx, rtx);
7238  rtx (*gen_sub)(rtx, rtx, rtx);
7239  rtx (*gen_lvsl)(rtx, rtx);
7240
7241  if (TARGET_POWERPC64)
7242    {
7243      shift_mode = DImode;
7244      gen_ashl = gen_ashldi3;
7245      gen_add = gen_adddi3;
7246      gen_sub = gen_subdi3;
7247      gen_lvsl = gen_altivec_lvsl_reg_di;
7248    }
7249  else
7250    {
7251      shift_mode = SImode;
7252      gen_ashl = gen_ashlsi3;
7253      gen_add = gen_addsi3;
7254      gen_sub = gen_subsi3;
7255      gen_lvsl = gen_altivec_lvsl_reg_si;
7256    }
7257
7258  /*  idx = idx * width.  */
7259  rtx tmp = gen_reg_rtx (shift_mode);
7260  idx = convert_modes (shift_mode, idx_mode, idx, 1);
7261
7262  emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7263
7264  /*  For LE:  idx = idx + 8.  */
7265  if (!BYTES_BIG_ENDIAN)
7266    emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7267  else
7268    emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7269
7270  /*  lxv vs33, mask.
7271      DImode: 0xffffffffffffffff0000000000000000
7272      SImode: 0x00000000ffffffff0000000000000000
7273      HImode: 0x000000000000ffff0000000000000000.
7274      QImode: 0x00000000000000ff0000000000000000.  */
7275  rtx mask = gen_reg_rtx (V16QImode);
7276  rtx mask_v2di = gen_reg_rtx (V2DImode);
7277  rtvec v = rtvec_alloc (2);
7278  if (!BYTES_BIG_ENDIAN)
7279    {
7280      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7281      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7282    }
7283  else
7284    {
7285      RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7286      RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7287    }
7288  emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7289  rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7290  emit_insn (gen_rtx_SET (mask, sub_mask));
7291
7292  /*  mtvsrd[wz] f0,tmp_val.  */
7293  rtx tmp_val = gen_reg_rtx (SImode);
7294  if (inner_mode == E_SFmode)
7295    if (TARGET_DIRECT_MOVE_64BIT)
7296      emit_insn (gen_movsi_from_sf (tmp_val, val));
7297    else
7298      {
7299	rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7300	emit_insn (gen_movsf_hardfloat (stack, val));
7301	rtx stack2 = copy_rtx (stack);
7302	PUT_MODE (stack2, SImode);
7303	emit_move_insn (tmp_val, stack2);
7304      }
7305  else
7306    tmp_val = force_reg (SImode, val);
7307
7308  rtx val_v16qi = gen_reg_rtx (V16QImode);
7309  rtx val_v2di = gen_reg_rtx (V2DImode);
7310  rtvec vec_val = rtvec_alloc (2);
7311  if (!BYTES_BIG_ENDIAN)
7312  {
7313    RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7314    RTVEC_ELT (vec_val, 1) = tmp_val;
7315  }
7316  else
7317  {
7318    RTVEC_ELT (vec_val, 0) = tmp_val;
7319    RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7320  }
7321  emit_insn (
7322    gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7323  rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7324  emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7325
7326  /*  lvsl    13,0,idx.  */
7327  rtx pcv = gen_reg_rtx (V16QImode);
7328  emit_insn (gen_lvsl (pcv, tmp));
7329
7330  /*  vperm 1,1,1,13.  */
7331  /*  vperm 0,0,0,13.  */
7332  rtx val_perm = gen_reg_rtx (V16QImode);
7333  rtx mask_perm = gen_reg_rtx (V16QImode);
7334  emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7335  emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7336
7337  rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7338
7339  /*  xxsel 34,34,32,33.  */
7340  emit_insn (
7341    gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7342}
7343
7344/* Set field ELT_RTX of TARGET to VAL.  */
7345
7346void
7347rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7348{
7349  machine_mode mode = GET_MODE (target);
7350  machine_mode inner_mode = GET_MODE_INNER (mode);
7351  rtx reg = gen_reg_rtx (mode);
7352  rtx mask, mem, x;
7353  int width = GET_MODE_SIZE (inner_mode);
7354  int i;
7355
7356  val = force_reg (GET_MODE (val), val);
7357
7358  if (VECTOR_MEM_VSX_P (mode))
7359    {
7360      if (!CONST_INT_P (elt_rtx))
7361	{
7362	  /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7363	     when elt_rtx is variable.  */
7364	  if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7365	    {
7366	      rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7367	      return;
7368	    }
7369	  else if (TARGET_VSX)
7370	    {
7371	      rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7372	      return;
7373	    }
7374	  else
7375	    gcc_assert (CONST_INT_P (elt_rtx));
7376	}
7377
7378      rtx insn = NULL_RTX;
7379
7380      if (mode == V2DFmode)
7381	insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7382
7383      else if (mode == V2DImode)
7384	insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7385
7386      else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7387	{
7388	  if (mode == V4SImode)
7389	    insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7390	  else if (mode == V8HImode)
7391	    insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7392	  else if (mode == V16QImode)
7393	    insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7394	  else if (mode == V4SFmode)
7395	    insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7396	}
7397
7398      if (insn)
7399	{
7400	  emit_insn (insn);
7401	  return;
7402	}
7403    }
7404
7405  /* Simplify setting single element vectors like V1TImode.  */
7406  if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7407      && INTVAL (elt_rtx) == 0)
7408    {
7409      emit_move_insn (target, gen_lowpart (mode, val));
7410      return;
7411    }
7412
7413  /* Load single variable value.  */
7414  mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7415  emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7416  x = gen_rtx_UNSPEC (VOIDmode,
7417		      gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7418  emit_insn (gen_rtx_PARALLEL (VOIDmode,
7419			       gen_rtvec (2,
7420					  gen_rtx_SET (reg, mem),
7421					  x)));
7422
7423  /* Linear sequence.  */
7424  mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7425  for (i = 0; i < 16; ++i)
7426    XVECEXP (mask, 0, i) = GEN_INT (i);
7427
7428  /* Set permute mask to insert element into target.  */
7429  for (i = 0; i < width; ++i)
7430    XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7431  x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7432
7433  if (BYTES_BIG_ENDIAN)
7434    x = gen_rtx_UNSPEC (mode,
7435			gen_rtvec (3, target, reg,
7436				   force_reg (V16QImode, x)),
7437			UNSPEC_VPERM);
7438  else
7439    {
7440      if (TARGET_P9_VECTOR)
7441	x = gen_rtx_UNSPEC (mode,
7442			    gen_rtvec (3, reg, target,
7443				       force_reg (V16QImode, x)),
7444			    UNSPEC_VPERMR);
7445      else
7446	{
7447	  /* Invert selector.  We prefer to generate VNAND on P8 so
7448	     that future fusion opportunities can kick in, but must
7449	     generate VNOR elsewhere.  */
7450	  rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7451	  rtx iorx = (TARGET_P8_VECTOR
7452		      ? gen_rtx_IOR (V16QImode, notx, notx)
7453		      : gen_rtx_AND (V16QImode, notx, notx));
7454	  rtx tmp = gen_reg_rtx (V16QImode);
7455	  emit_insn (gen_rtx_SET (tmp, iorx));
7456
7457	  /* Permute with operands reversed and adjusted selector.  */
7458	  x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7459			      UNSPEC_VPERM);
7460	}
7461    }
7462
7463  emit_insn (gen_rtx_SET (target, x));
7464}
7465
7466/* Extract field ELT from VEC into TARGET.  */
7467
7468void
7469rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7470{
7471  machine_mode mode = GET_MODE (vec);
7472  machine_mode inner_mode = GET_MODE_INNER (mode);
7473  rtx mem;
7474
7475  if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7476    {
7477      switch (mode)
7478	{
7479	default:
7480	  break;
7481	case E_V1TImode:
7482	  emit_move_insn (target, gen_lowpart (TImode, vec));
7483	  break;
7484	case E_V2DFmode:
7485	  emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7486	  return;
7487	case E_V2DImode:
7488	  emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7489	  return;
7490	case E_V4SFmode:
7491	  emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7492	  return;
7493	case E_V16QImode:
7494	  if (TARGET_DIRECT_MOVE_64BIT)
7495	    {
7496	      emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7497	      return;
7498	    }
7499	  else
7500	    break;
7501	case E_V8HImode:
7502	  if (TARGET_DIRECT_MOVE_64BIT)
7503	    {
7504	      emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7505	      return;
7506	    }
7507	  else
7508	    break;
7509	case E_V4SImode:
7510	  if (TARGET_DIRECT_MOVE_64BIT)
7511	    {
7512	      emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7513	      return;
7514	    }
7515	  break;
7516	}
7517    }
7518  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7519	   && TARGET_DIRECT_MOVE_64BIT)
7520    {
7521      if (GET_MODE (elt) != DImode)
7522	{
7523	  rtx tmp = gen_reg_rtx (DImode);
7524	  convert_move (tmp, elt, 0);
7525	  elt = tmp;
7526	}
7527      else if (!REG_P (elt))
7528	elt = force_reg (DImode, elt);
7529
7530      switch (mode)
7531	{
7532	case E_V1TImode:
7533	  emit_move_insn (target, gen_lowpart (TImode, vec));
7534	  return;
7535
7536	case E_V2DFmode:
7537	  emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7538	  return;
7539
7540	case E_V2DImode:
7541	  emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7542	  return;
7543
7544	case E_V4SFmode:
7545	  emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7546	  return;
7547
7548	case E_V4SImode:
7549	  emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7550	  return;
7551
7552	case E_V8HImode:
7553	  emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7554	  return;
7555
7556	case E_V16QImode:
7557	  emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7558	  return;
7559
7560	default:
7561	  gcc_unreachable ();
7562	}
7563    }
7564
7565  /* Allocate mode-sized buffer.  */
7566  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7567
7568  emit_move_insn (mem, vec);
7569  if (CONST_INT_P (elt))
7570    {
7571      int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7572
7573      /* Add offset to field within buffer matching vector element.  */
7574      mem = adjust_address_nv (mem, inner_mode,
7575			       modulo_elt * GET_MODE_SIZE (inner_mode));
7576      emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7577    }
7578  else
7579    {
7580      unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7581      rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7582      rtx new_addr = gen_reg_rtx (Pmode);
7583
7584      elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7585      if (ele_size > 1)
7586	elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7587      new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7588      new_addr = change_address (mem, inner_mode, new_addr);
7589      emit_move_insn (target, new_addr);
7590    }
7591}
7592
7593/* Return the offset within a memory object (MEM) of a vector type to a given
7594   element within the vector (ELEMENT) with an element size (SCALAR_SIZE).  If
7595   the element is constant, we return a constant integer.
7596
7597   Otherwise, we use a base register temporary to calculate the offset after
7598   masking it to fit within the bounds of the vector and scaling it.  The
7599   masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7600   built-in function.  */
7601
7602static rtx
7603get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7604{
7605  if (CONST_INT_P (element))
7606    return GEN_INT (INTVAL (element) * scalar_size);
7607
7608  /* All insns should use the 'Q' constraint (address is a single register) if
7609     the element number is not a constant.  */
7610  gcc_assert (satisfies_constraint_Q (mem));
7611
7612  /* Mask the element to make sure the element number is between 0 and the
7613     maximum number of elements - 1 so that we don't generate an address
7614     outside the vector.  */
7615  rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7616  rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7617  emit_insn (gen_rtx_SET (base_tmp, and_op));
7618
7619  /* Shift the element to get the byte offset from the element number.  */
7620  int shift = exact_log2 (scalar_size);
7621  gcc_assert (shift >= 0);
7622
7623  if (shift > 0)
7624    {
7625      rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7626      emit_insn (gen_rtx_SET (base_tmp, shift_op));
7627    }
7628
7629  return base_tmp;
7630}
7631
7632/* Helper function update PC-relative addresses when we are adjusting a memory
7633   address (ADDR) to a vector to point to a scalar field within the vector with
7634   a constant offset (ELEMENT_OFFSET).  If the address is not valid, we can
7635   use the base register temporary (BASE_TMP) to form the address.  */
7636
7637static rtx
7638adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7639{
7640  rtx new_addr = NULL;
7641
7642  gcc_assert (CONST_INT_P (element_offset));
7643
7644  if (GET_CODE (addr) == CONST)
7645    addr = XEXP (addr, 0);
7646
7647  if (GET_CODE (addr) == PLUS)
7648    {
7649      rtx op0 = XEXP (addr, 0);
7650      rtx op1 = XEXP (addr, 1);
7651
7652      if (CONST_INT_P (op1))
7653	{
7654	  HOST_WIDE_INT offset
7655	    = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7656
7657	  if (offset == 0)
7658	    new_addr = op0;
7659
7660	  else
7661	    {
7662	      rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7663	      new_addr = gen_rtx_CONST (Pmode, plus);
7664	    }
7665	}
7666
7667      else
7668	{
7669	  emit_move_insn (base_tmp, addr);
7670	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7671	}
7672    }
7673
7674  else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7675    {
7676      rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7677      new_addr = gen_rtx_CONST (Pmode, plus);
7678    }
7679
7680  else
7681    gcc_unreachable ();
7682
7683  return new_addr;
7684}
7685
7686/* Adjust a memory address (MEM) of a vector type to point to a scalar field
7687   within the vector (ELEMENT) with a mode (SCALAR_MODE).  Use a base register
7688   temporary (BASE_TMP) to fixup the address.  Return the new memory address
7689   that is valid for reads or writes to a given register (SCALAR_REG).
7690
7691   This function is expected to be called after reload is completed when we are
7692   splitting insns.  The temporary BASE_TMP might be set multiple times with
7693   this code.  */
7694
7695rtx
7696rs6000_adjust_vec_address (rtx scalar_reg,
7697			   rtx mem,
7698			   rtx element,
7699			   rtx base_tmp,
7700			   machine_mode scalar_mode)
7701{
7702  unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7703  rtx addr = XEXP (mem, 0);
7704  rtx new_addr;
7705
7706  gcc_assert (!reg_mentioned_p (base_tmp, addr));
7707  gcc_assert (!reg_mentioned_p (base_tmp, element));
7708
7709  /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
7710  gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7711
7712  /* Calculate what we need to add to the address to get the element
7713     address.  */
7714  rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7715
7716  /* Create the new address pointing to the element within the vector.  If we
7717     are adding 0, we don't have to change the address.  */
7718  if (element_offset == const0_rtx)
7719    new_addr = addr;
7720
7721  /* A simple indirect address can be converted into a reg + offset
7722     address.  */
7723  else if (REG_P (addr) || SUBREG_P (addr))
7724    new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7725
7726  /* For references to local static variables, fold a constant offset into the
7727     address.  */
7728  else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7729    new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7730
7731  /* Optimize D-FORM addresses with constant offset with a constant element, to
7732     include the element offset in the address directly.  */
7733  else if (GET_CODE (addr) == PLUS)
7734    {
7735      rtx op0 = XEXP (addr, 0);
7736      rtx op1 = XEXP (addr, 1);
7737
7738      gcc_assert (REG_P (op0) || SUBREG_P (op0));
7739      if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7740	{
7741	  /* op0 should never be r0, because r0+offset is not valid.  But it
7742	     doesn't hurt to make sure it is not r0.  */
7743	  gcc_assert (reg_or_subregno (op0) != 0);
7744
7745	  /* D-FORM address with constant element number.  */
7746	  HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7747	  rtx offset_rtx = GEN_INT (offset);
7748	  new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7749	}
7750      else
7751	{
7752	  /* If we don't have a D-FORM address with a constant element number,
7753	     add the two elements in the current address.  Then add the offset.
7754
7755	     Previously, we tried to add the offset to OP1 and change the
7756	     address to an X-FORM format adding OP0 and BASE_TMP, but it became
7757	     complicated because we had to verify that op1 was not GPR0 and we
7758	     had a constant element offset (due to the way ADDI is defined).
7759	     By doing the add of OP0 and OP1 first, and then adding in the
7760	     offset, it has the benefit that if D-FORM instructions are
7761	     allowed, the offset is part of the memory access to the vector
7762	     element. */
7763	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7764	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7765	}
7766    }
7767
7768  else
7769    {
7770      emit_move_insn (base_tmp, addr);
7771      new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7772    }
7773
7774    /* If the address isn't valid, move the address into the temporary base
7775       register.  Some reasons it could not be valid include:
7776
7777       The address offset overflowed the 16 or 34 bit offset size;
7778       We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7779       We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7780       Only X_FORM loads can be done, and the address is D_FORM.  */
7781
7782  enum insn_form iform
7783    = address_to_insn_form (new_addr, scalar_mode,
7784			    reg_to_non_prefixed (scalar_reg, scalar_mode));
7785
7786  if (iform == INSN_FORM_BAD)
7787    {
7788      emit_move_insn (base_tmp, new_addr);
7789      new_addr = base_tmp;
7790    }
7791
7792  return change_address (mem, scalar_mode, new_addr);
7793}
7794
7795/* Split a variable vec_extract operation into the component instructions.  */
7796
7797void
7798rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7799			      rtx tmp_altivec)
7800{
7801  machine_mode mode = GET_MODE (src);
7802  machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7803  unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7804  int byte_shift = exact_log2 (scalar_size);
7805
7806  gcc_assert (byte_shift >= 0);
7807
7808  /* If we are given a memory address, optimize to load just the element.  We
7809     don't have to adjust the vector element number on little endian
7810     systems.  */
7811  if (MEM_P (src))
7812    {
7813      emit_move_insn (dest,
7814		      rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7815						 scalar_mode));
7816      return;
7817    }
7818
7819  else if (REG_P (src) || SUBREG_P (src))
7820    {
7821      int num_elements = GET_MODE_NUNITS (mode);
7822      int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7823      int bit_shift = 7 - exact_log2 (num_elements);
7824      rtx element2;
7825      unsigned int dest_regno = reg_or_subregno (dest);
7826      unsigned int src_regno = reg_or_subregno (src);
7827      unsigned int element_regno = reg_or_subregno (element);
7828
7829      gcc_assert (REG_P (tmp_gpr));
7830
7831      /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7832	 a general purpose register.  */
7833      if (TARGET_P9_VECTOR
7834	  && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7835	  && INT_REGNO_P (dest_regno)
7836	  && ALTIVEC_REGNO_P (src_regno)
7837	  && INT_REGNO_P (element_regno))
7838	{
7839	  rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7840	  rtx element_si = gen_rtx_REG (SImode, element_regno);
7841
7842	  if (mode == V16QImode)
7843	    emit_insn (BYTES_BIG_ENDIAN
7844		       ? gen_vextublx (dest_si, element_si, src)
7845		       : gen_vextubrx (dest_si, element_si, src));
7846
7847	  else if (mode == V8HImode)
7848	    {
7849	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7850	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7851	      emit_insn (BYTES_BIG_ENDIAN
7852			 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7853			 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7854	    }
7855
7856
7857	  else
7858	    {
7859	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7860	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7861	      emit_insn (BYTES_BIG_ENDIAN
7862			 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7863			 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7864	    }
7865
7866	  return;
7867	}
7868
7869
7870      gcc_assert (REG_P (tmp_altivec));
7871
7872      /* For little endian, adjust element ordering.  For V2DI/V2DF, we can use
7873	 an XOR, otherwise we need to subtract.  The shift amount is so VSLO
7874	 will shift the element into the upper position (adding 3 to convert a
7875	 byte shift into a bit shift).  */
7876      if (scalar_size == 8)
7877	{
7878	  if (!BYTES_BIG_ENDIAN)
7879	    {
7880	      emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7881	      element2 = tmp_gpr;
7882	    }
7883	  else
7884	    element2 = element;
7885
7886	  /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7887	     bit.  */
7888	  emit_insn (gen_rtx_SET (tmp_gpr,
7889				  gen_rtx_AND (DImode,
7890					       gen_rtx_ASHIFT (DImode,
7891							       element2,
7892							       GEN_INT (6)),
7893					       GEN_INT (64))));
7894	}
7895      else
7896	{
7897	  if (!BYTES_BIG_ENDIAN)
7898	    {
7899	      rtx num_ele_m1 = GEN_INT (num_elements - 1);
7900
7901	      emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7902	      emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7903	      element2 = tmp_gpr;
7904	    }
7905	  else
7906	    element2 = element;
7907
7908	  emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7909	}
7910
7911      /* Get the value into the lower byte of the Altivec register where VSLO
7912	 expects it.  */
7913      if (TARGET_P9_VECTOR)
7914	emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7915      else if (can_create_pseudo_p ())
7916	emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7917      else
7918	{
7919	  rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7920	  emit_move_insn (tmp_di, tmp_gpr);
7921	  emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7922	}
7923
7924      /* Do the VSLO to get the value into the final location.  */
7925      switch (mode)
7926	{
7927	case E_V2DFmode:
7928	  emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7929	  return;
7930
7931	case E_V2DImode:
7932	  emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7933	  return;
7934
7935	case E_V4SFmode:
7936	  {
7937	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7938	    rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7939	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7940	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7941					  tmp_altivec));
7942
7943	    emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7944	    return;
7945	  }
7946
7947	case E_V4SImode:
7948	case E_V8HImode:
7949	case E_V16QImode:
7950	  {
7951	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7952	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7953	    rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7954	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7955					  tmp_altivec));
7956	    emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7957	    emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7958				    GEN_INT (64 - bits_in_element)));
7959	    return;
7960	  }
7961
7962	default:
7963	  gcc_unreachable ();
7964	}
7965
7966      return;
7967    }
7968  else
7969    gcc_unreachable ();
7970 }
7971
7972/* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
7973   selects whether the alignment is abi mandated, optional, or
7974   both abi and optional alignment.  */
7975
7976unsigned int
7977rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7978{
7979  if (how != align_opt)
7980    {
7981      if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7982	align = 128;
7983    }
7984
7985  if (how != align_abi)
7986    {
7987      if (TREE_CODE (type) == ARRAY_TYPE
7988	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
7989	{
7990	  if (align < BITS_PER_WORD)
7991	    align = BITS_PER_WORD;
7992	}
7993    }
7994
7995  return align;
7996}
7997
7998/* Implement TARGET_SLOW_UNALIGNED_ACCESS.  Altivec vector memory
7999   instructions simply ignore the low bits; VSX memory instructions
8000   are aligned to 4 or 8 bytes.  */
8001
8002static bool
8003rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8004{
8005  return (STRICT_ALIGNMENT
8006	  || (!TARGET_EFFICIENT_UNALIGNED_VSX
8007	      && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8008		  || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8009		      && (int) align < VECTOR_ALIGN (mode)))));
8010}
8011
8012/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints.  */
8013
8014unsigned int
8015rs6000_special_adjust_field_align (tree type, unsigned int computed)
8016{
8017  if (computed <= 32 || TYPE_PACKED (type))
8018    return computed;
8019
8020  /* Strip initial arrays.  */
8021  while (TREE_CODE (type) == ARRAY_TYPE)
8022    type = TREE_TYPE (type);
8023
8024  /* If RECORD or UNION, recursively find the first field. */
8025  while (AGGREGATE_TYPE_P (type))
8026    {
8027      tree field = TYPE_FIELDS (type);
8028
8029      /* Skip all non field decls */
8030      while (field != NULL
8031	     && (TREE_CODE (field) != FIELD_DECL
8032		 || DECL_FIELD_ABI_IGNORED (field)))
8033	field = DECL_CHAIN (field);
8034
8035      if (! field)
8036	break;
8037
8038      /* A packed field does not contribute any extra alignment.  */
8039      if (DECL_PACKED (field))
8040	return computed;
8041
8042      type = TREE_TYPE (field);
8043
8044      /* Strip arrays.  */
8045      while (TREE_CODE (type) == ARRAY_TYPE)
8046	type = TREE_TYPE (type);
8047    }
8048
8049  if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8050      && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8051    computed = MIN (computed, 32);
8052
8053  return computed;
8054}
8055
8056/* AIX increases natural record alignment to doubleword if the innermost first
8057   field is an FP double while the FP fields remain word aligned.
8058   Only called if TYPE initially is a RECORD or UNION.  */
8059
8060unsigned int
8061rs6000_special_round_type_align (tree type, unsigned int computed,
8062				 unsigned int specified)
8063{
8064  unsigned int align = MAX (computed, specified);
8065
8066  if (TYPE_PACKED (type) || align >= 64)
8067    return align;
8068
8069  /* If RECORD or UNION, recursively find the first field. */
8070  do
8071    {
8072      tree field = TYPE_FIELDS (type);
8073
8074      /* Skip all non field decls */
8075      while (field != NULL
8076	     && (TREE_CODE (field) != FIELD_DECL
8077		 || DECL_FIELD_ABI_IGNORED (field)))
8078	field = DECL_CHAIN (field);
8079
8080      if (! field)
8081	break;
8082
8083      /* A packed field does not contribute any extra alignment.  */
8084      if (DECL_PACKED (field))
8085	return align;
8086
8087      type = TREE_TYPE (field);
8088
8089      /* Strip arrays.  */
8090      while (TREE_CODE (type) == ARRAY_TYPE)
8091	type = TREE_TYPE (type);
8092    } while (AGGREGATE_TYPE_P (type));
8093
8094  if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8095      && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8096    align = MAX (align, 64);
8097
8098  return align;
8099}
8100
8101/* Darwin increases record alignment to the natural alignment of
8102   the first field.  */
8103
8104unsigned int
8105darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8106					unsigned int specified)
8107{
8108  unsigned int align = MAX (computed, specified);
8109
8110  if (TYPE_PACKED (type))
8111    return align;
8112
8113  /* Find the first field, looking down into aggregates.  */
8114  do {
8115    tree field = TYPE_FIELDS (type);
8116    /* Skip all non field decls */
8117    while (field != NULL
8118	   && (TREE_CODE (field) != FIELD_DECL
8119	       || DECL_FIELD_ABI_IGNORED (field)))
8120      field = DECL_CHAIN (field);
8121    if (! field)
8122      break;
8123    /* A packed field does not contribute any extra alignment.  */
8124    if (DECL_PACKED (field))
8125      return align;
8126    type = TREE_TYPE (field);
8127    while (TREE_CODE (type) == ARRAY_TYPE)
8128      type = TREE_TYPE (type);
8129  } while (AGGREGATE_TYPE_P (type));
8130
8131  if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8132    align = MAX (align, TYPE_ALIGN (type));
8133
8134  return align;
8135}
8136
8137/* Return 1 for an operand in small memory on V.4/eabi.  */
8138
8139int
8140small_data_operand (rtx op ATTRIBUTE_UNUSED,
8141		    machine_mode mode ATTRIBUTE_UNUSED)
8142{
8143#if TARGET_ELF
8144  rtx sym_ref;
8145
8146  if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8147    return 0;
8148
8149  if (DEFAULT_ABI != ABI_V4)
8150    return 0;
8151
8152  if (SYMBOL_REF_P (op))
8153    sym_ref = op;
8154
8155  else if (GET_CODE (op) != CONST
8156	   || GET_CODE (XEXP (op, 0)) != PLUS
8157	   || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8158	   || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8159    return 0;
8160
8161  else
8162    {
8163      rtx sum = XEXP (op, 0);
8164      HOST_WIDE_INT summand;
8165
8166      /* We have to be careful here, because it is the referenced address
8167	 that must be 32k from _SDA_BASE_, not just the symbol.  */
8168      summand = INTVAL (XEXP (sum, 1));
8169      if (summand < 0 || summand > g_switch_value)
8170	return 0;
8171
8172      sym_ref = XEXP (sum, 0);
8173    }
8174
8175  return SYMBOL_REF_SMALL_P (sym_ref);
8176#else
8177  return 0;
8178#endif
8179}
8180
8181/* Return true if either operand is a general purpose register.  */
8182
8183bool
8184gpr_or_gpr_p (rtx op0, rtx op1)
8185{
8186  return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8187	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8188}
8189
8190/* Return true if this is a move direct operation between GPR registers and
8191   floating point/VSX registers.  */
8192
8193bool
8194direct_move_p (rtx op0, rtx op1)
8195{
8196  if (!REG_P (op0) || !REG_P (op1))
8197    return false;
8198
8199  if (!TARGET_DIRECT_MOVE)
8200    return false;
8201
8202  int regno0 = REGNO (op0);
8203  int regno1 = REGNO (op1);
8204  if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8205    return false;
8206
8207  if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8208    return true;
8209
8210  if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8211    return true;
8212
8213  return false;
8214}
8215
8216/* Return true if the ADDR is an acceptable address for a quad memory
8217   operation of mode MODE (either LQ/STQ for general purpose registers, or
8218   LXV/STXV for vector registers under ISA 3.0.  GPR_P is true if this address
8219   is intended for LQ/STQ.  If it is false, the address is intended for the ISA
8220   3.0 LXV/STXV instruction.  */
8221
8222bool
8223quad_address_p (rtx addr, machine_mode mode, bool strict)
8224{
8225  rtx op0, op1;
8226
8227  if (GET_MODE_SIZE (mode) < 16)
8228    return false;
8229
8230  if (legitimate_indirect_address_p (addr, strict))
8231    return true;
8232
8233  if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8234    return false;
8235
8236  /* Is this a valid prefixed address?  If the bottom four bits of the offset
8237     are non-zero, we could use a prefixed instruction (which does not have the
8238     DQ-form constraint that the traditional instruction had) instead of
8239     forcing the unaligned offset to a GPR.  */
8240  if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8241    return true;
8242
8243  if (GET_CODE (addr) != PLUS)
8244    return false;
8245
8246  op0 = XEXP (addr, 0);
8247  if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8248    return false;
8249
8250  op1 = XEXP (addr, 1);
8251  if (!CONST_INT_P (op1))
8252    return false;
8253
8254  return quad_address_offset_p (INTVAL (op1));
8255}
8256
8257/* Return true if this is a load or store quad operation.  This function does
8258   not handle the atomic quad memory instructions.  */
8259
8260bool
8261quad_load_store_p (rtx op0, rtx op1)
8262{
8263  bool ret;
8264
8265  if (!TARGET_QUAD_MEMORY)
8266    ret = false;
8267
8268  else if (REG_P (op0) && MEM_P (op1))
8269    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8270	   && quad_memory_operand (op1, GET_MODE (op1))
8271	   && !reg_overlap_mentioned_p (op0, op1));
8272
8273  else if (MEM_P (op0) && REG_P (op1))
8274    ret = (quad_memory_operand (op0, GET_MODE (op0))
8275	   && quad_int_reg_operand (op1, GET_MODE (op1)));
8276
8277  else
8278    ret = false;
8279
8280  if (TARGET_DEBUG_ADDR)
8281    {
8282      fprintf (stderr, "\n========== quad_load_store, return %s\n",
8283	       ret ? "true" : "false");
8284      debug_rtx (gen_rtx_SET (op0, op1));
8285    }
8286
8287  return ret;
8288}
8289
8290/* Given an address, return a constant offset term if one exists.  */
8291
8292static rtx
8293address_offset (rtx op)
8294{
8295  if (GET_CODE (op) == PRE_INC
8296      || GET_CODE (op) == PRE_DEC)
8297    op = XEXP (op, 0);
8298  else if (GET_CODE (op) == PRE_MODIFY
8299	   || GET_CODE (op) == LO_SUM)
8300    op = XEXP (op, 1);
8301
8302  if (GET_CODE (op) == CONST)
8303    op = XEXP (op, 0);
8304
8305  if (GET_CODE (op) == PLUS)
8306    op = XEXP (op, 1);
8307
8308  if (CONST_INT_P (op))
8309    return op;
8310
8311  return NULL_RTX;
8312}
8313
8314/* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8315   the mode.  If we can't find (or don't know) the alignment of the symbol
8316   we assume (optimistically) that it's sufficiently aligned [??? maybe we
8317   should be pessimistic].  Offsets are validated in the same way as for
8318   reg + offset.  */
8319static bool
8320darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8321{
8322  /* We should not get here with this.  */
8323  gcc_checking_assert (! mode_supports_dq_form (mode));
8324
8325  if (GET_CODE (x) == CONST)
8326    x = XEXP (x, 0);
8327
8328  /* If we are building PIC code, then any symbol must be wrapped in an
8329     UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted.  */
8330  bool machopic_offs_p = false;
8331  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8332    {
8333      x =  XVECEXP (x, 0, 0);
8334      machopic_offs_p = true;
8335    }
8336
8337  rtx sym = NULL_RTX;
8338  unsigned HOST_WIDE_INT offset = 0;
8339
8340  if (GET_CODE (x) == PLUS)
8341    {
8342      sym = XEXP (x, 0);
8343      if (! SYMBOL_REF_P (sym))
8344	return false;
8345      if (!CONST_INT_P (XEXP (x, 1)))
8346	return false;
8347      offset = INTVAL (XEXP (x, 1));
8348    }
8349  else if (SYMBOL_REF_P (x))
8350    sym = x;
8351  else if (CONST_INT_P (x))
8352    offset = INTVAL (x);
8353  else if (GET_CODE (x) == LABEL_REF)
8354    offset = 0; // We assume code labels are Pmode aligned
8355  else
8356    return false; // not sure what we have here.
8357
8358  /* If we don't know the alignment of the thing to which the symbol refers,
8359     we assume optimistically it is "enough".
8360     ??? maybe we should be pessimistic instead.  */
8361  unsigned align = 0;
8362
8363  if (sym)
8364    {
8365      tree decl = SYMBOL_REF_DECL (sym);
8366      /* As noted above, PIC code cannot use a bare SYMBOL_REF.  */
8367      if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8368	return false;
8369#if TARGET_MACHO
8370      if (MACHO_SYMBOL_INDIRECTION_P (sym))
8371      /* The decl in an indirection symbol is the original one, which might
8372	 be less aligned than the indirection.  Our indirections are always
8373	 pointer-aligned.  */
8374	;
8375      else
8376#endif
8377      if (decl && DECL_ALIGN (decl))
8378	align = DECL_ALIGN_UNIT (decl);
8379   }
8380
8381  unsigned int extra = 0;
8382  switch (mode)
8383    {
8384    case E_DFmode:
8385    case E_DDmode:
8386    case E_DImode:
8387      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8388	 addressing.  */
8389      if (VECTOR_MEM_VSX_P (mode))
8390	return false;
8391
8392      if (!TARGET_POWERPC64)
8393	extra = 4;
8394      else if ((offset & 3) || (align & 3))
8395	return false;
8396      break;
8397
8398    case E_TFmode:
8399    case E_IFmode:
8400    case E_KFmode:
8401    case E_TDmode:
8402    case E_TImode:
8403    case E_PTImode:
8404      extra = 8;
8405      if (!TARGET_POWERPC64)
8406	extra = 12;
8407      else if ((offset & 3) || (align & 3))
8408	return false;
8409      break;
8410
8411    default:
8412      break;
8413    }
8414
8415  /* We only care if the access(es) would cause a change to the high part.  */
8416  offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8417  return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8418}
8419
8420/* Return true if the MEM operand is a memory operand suitable for use
8421   with a (full width, possibly multiple) gpr load/store.  On
8422   powerpc64 this means the offset must be divisible by 4.
8423   Implements 'Y' constraint.
8424
8425   Accept direct, indexed, offset, lo_sum and tocref.  Since this is
8426   a constraint function we know the operand has satisfied a suitable
8427   memory predicate.
8428
8429   Offsetting a lo_sum should not be allowed, except where we know by
8430   alignment that a 32k boundary is not crossed.  Note that by
8431   "offsetting" here we mean a further offset to access parts of the
8432   MEM.  It's fine to have a lo_sum where the inner address is offset
8433   from a sym, since the same sym+offset will appear in the high part
8434   of the address calculation.  */
8435
8436bool
8437mem_operand_gpr (rtx op, machine_mode mode)
8438{
8439  unsigned HOST_WIDE_INT offset;
8440  int extra;
8441  rtx addr = XEXP (op, 0);
8442
8443  /* PR85755: Allow PRE_INC and PRE_DEC addresses.  */
8444  if (TARGET_UPDATE
8445      && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8446      && mode_supports_pre_incdec_p (mode)
8447      && legitimate_indirect_address_p (XEXP (addr, 0), false))
8448    return true;
8449
8450  /* Allow prefixed instructions if supported.  If the bottom two bits of the
8451     offset are non-zero, we could use a prefixed instruction (which does not
8452     have the DS-form constraint that the traditional instruction had) instead
8453     of forcing the unaligned offset to a GPR.  */
8454  if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8455    return true;
8456
8457  /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8458     really OK.  Doing this early avoids teaching all the other machinery
8459     about them.  */
8460  if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8461    return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8462
8463  /* Only allow offsettable addresses.  See PRs 83969 and 84279.  */
8464  if (!rs6000_offsettable_memref_p (op, mode, false))
8465    return false;
8466
8467  op = address_offset (addr);
8468  if (op == NULL_RTX)
8469    return true;
8470
8471  offset = INTVAL (op);
8472  if (TARGET_POWERPC64 && (offset & 3) != 0)
8473    return false;
8474
8475  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8476  if (extra < 0)
8477    extra = 0;
8478
8479  if (GET_CODE (addr) == LO_SUM)
8480    /* For lo_sum addresses, we must allow any offset except one that
8481       causes a wrap, so test only the low 16 bits.  */
8482    offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8483
8484  return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8485}
8486
8487/* As above, but for DS-FORM VSX insns.  Unlike mem_operand_gpr,
8488   enforce an offset divisible by 4 even for 32-bit.  */
8489
8490bool
8491mem_operand_ds_form (rtx op, machine_mode mode)
8492{
8493  unsigned HOST_WIDE_INT offset;
8494  int extra;
8495  rtx addr = XEXP (op, 0);
8496
8497  /* Allow prefixed instructions if supported.  If the bottom two bits of the
8498     offset are non-zero, we could use a prefixed instruction (which does not
8499     have the DS-form constraint that the traditional instruction had) instead
8500     of forcing the unaligned offset to a GPR.  */
8501  if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8502    return true;
8503
8504  if (!offsettable_address_p (false, mode, addr))
8505    return false;
8506
8507  op = address_offset (addr);
8508  if (op == NULL_RTX)
8509    return true;
8510
8511  offset = INTVAL (op);
8512  if ((offset & 3) != 0)
8513    return false;
8514
8515  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8516  if (extra < 0)
8517    extra = 0;
8518
8519  if (GET_CODE (addr) == LO_SUM)
8520    /* For lo_sum addresses, we must allow any offset except one that
8521       causes a wrap, so test only the low 16 bits.  */
8522    offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8523
8524  return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8525}
8526
8527/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
8528
8529static bool
8530reg_offset_addressing_ok_p (machine_mode mode)
8531{
8532  switch (mode)
8533    {
8534    case E_V16QImode:
8535    case E_V8HImode:
8536    case E_V4SFmode:
8537    case E_V4SImode:
8538    case E_V2DFmode:
8539    case E_V2DImode:
8540    case E_V1TImode:
8541    case E_TImode:
8542    case E_TFmode:
8543    case E_KFmode:
8544      /* AltiVec/VSX vector modes.  Only reg+reg addressing was valid until the
8545	 ISA 3.0 vector d-form addressing mode was added.  While TImode is not
8546	 a vector mode, if we want to use the VSX registers to move it around,
8547	 we need to restrict ourselves to reg+reg addressing.  Similarly for
8548	 IEEE 128-bit floating point that is passed in a single vector
8549	 register.  */
8550      if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8551	return mode_supports_dq_form (mode);
8552      break;
8553
8554      /* The vector pair/quad types support offset addressing if the
8555	 underlying vectors support offset addressing.  */
8556    case E_OOmode:
8557    case E_XOmode:
8558      return TARGET_MMA;
8559
8560    case E_SDmode:
8561      /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8562	 addressing for the LFIWZX and STFIWX instructions.  */
8563      if (TARGET_NO_SDMODE_STACK)
8564	return false;
8565      break;
8566
8567    default:
8568      break;
8569    }
8570
8571  return true;
8572}
8573
8574static bool
8575virtual_stack_registers_memory_p (rtx op)
8576{
8577  int regnum;
8578
8579  if (REG_P (op))
8580    regnum = REGNO (op);
8581
8582  else if (GET_CODE (op) == PLUS
8583	   && REG_P (XEXP (op, 0))
8584	   && CONST_INT_P (XEXP (op, 1)))
8585    regnum = REGNO (XEXP (op, 0));
8586
8587  else
8588    return false;
8589
8590  return (regnum >= FIRST_VIRTUAL_REGISTER
8591	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8592}
8593
8594/* Return true if a MODE sized memory accesses to OP plus OFFSET
8595   is known to not straddle a 32k boundary.  This function is used
8596   to determine whether -mcmodel=medium code can use TOC pointer
8597   relative addressing for OP.  This means the alignment of the TOC
8598   pointer must also be taken into account, and unfortunately that is
8599   only 8 bytes.  */
8600
8601#ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8602#define POWERPC64_TOC_POINTER_ALIGNMENT 8
8603#endif
8604
8605static bool
8606offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8607			     machine_mode mode)
8608{
8609  tree decl;
8610  unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8611
8612  if (!SYMBOL_REF_P (op))
8613    return false;
8614
8615  /* ISA 3.0 vector d-form addressing is restricted, don't allow
8616     SYMBOL_REF.  */
8617  if (mode_supports_dq_form (mode))
8618    return false;
8619
8620  dsize = GET_MODE_SIZE (mode);
8621  decl = SYMBOL_REF_DECL (op);
8622  if (!decl)
8623    {
8624      if (dsize == 0)
8625	return false;
8626
8627      /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8628	 replacing memory addresses with an anchor plus offset.  We
8629	 could find the decl by rummaging around in the block->objects
8630	 VEC for the given offset but that seems like too much work.  */
8631      dalign = BITS_PER_UNIT;
8632      if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8633	  && SYMBOL_REF_ANCHOR_P (op)
8634	  && SYMBOL_REF_BLOCK (op) != NULL)
8635	{
8636	  struct object_block *block = SYMBOL_REF_BLOCK (op);
8637
8638	  dalign = block->alignment;
8639	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
8640	}
8641      else if (CONSTANT_POOL_ADDRESS_P (op))
8642	{
8643	  /* It would be nice to have get_pool_align()..  */
8644	  machine_mode cmode = get_pool_mode (op);
8645
8646	  dalign = GET_MODE_ALIGNMENT (cmode);
8647	}
8648    }
8649  else if (DECL_P (decl))
8650    {
8651      dalign = DECL_ALIGN (decl);
8652
8653      if (dsize == 0)
8654	{
8655	  /* Allow BLKmode when the entire object is known to not
8656	     cross a 32k boundary.  */
8657	  if (!DECL_SIZE_UNIT (decl))
8658	    return false;
8659
8660	  if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8661	    return false;
8662
8663	  dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8664	  if (dsize > 32768)
8665	    return false;
8666
8667	  dalign /= BITS_PER_UNIT;
8668	  if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8669	    dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8670	  return dalign >= dsize;
8671	}
8672    }
8673  else
8674    gcc_unreachable ();
8675
8676  /* Find how many bits of the alignment we know for this access.  */
8677  dalign /= BITS_PER_UNIT;
8678  if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8679    dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8680  mask = dalign - 1;
8681  lsb = offset & -offset;
8682  mask &= lsb - 1;
8683  dalign = mask + 1;
8684
8685  return dalign >= dsize;
8686}
8687
8688static bool
8689constant_pool_expr_p (rtx op)
8690{
8691  rtx base, offset;
8692
8693  split_const (op, &base, &offset);
8694  return (SYMBOL_REF_P (base)
8695	  && CONSTANT_POOL_ADDRESS_P (base)
8696	  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8697}
8698
8699/* Create a TOC reference for symbol_ref SYMBOL.  If LARGETOC_REG is non-null,
8700   use that as the register to put the HIGH value into if register allocation
8701   is already done.  */
8702
8703rtx
8704create_TOC_reference (rtx symbol, rtx largetoc_reg)
8705{
8706  rtx tocrel, tocreg, hi;
8707
8708  gcc_assert (TARGET_TOC);
8709
8710  if (TARGET_DEBUG_ADDR)
8711    {
8712      if (SYMBOL_REF_P (symbol))
8713	fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8714		 XSTR (symbol, 0));
8715      else
8716	{
8717	  fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8718		   GET_RTX_NAME (GET_CODE (symbol)));
8719	  debug_rtx (symbol);
8720	}
8721    }
8722
8723  if (!can_create_pseudo_p ())
8724    df_set_regs_ever_live (TOC_REGISTER, true);
8725
8726  tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8727  tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8728  if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8729    return tocrel;
8730
8731  hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8732  if (largetoc_reg != NULL)
8733    {
8734      emit_move_insn (largetoc_reg, hi);
8735      hi = largetoc_reg;
8736    }
8737  return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8738}
8739
8740/* These are only used to pass through from print_operand/print_operand_address
8741   to rs6000_output_addr_const_extra over the intervening function
8742   output_addr_const which is not target code.  */
8743static const_rtx tocrel_base_oac, tocrel_offset_oac;
8744
8745/* Return true if OP is a toc pointer relative address (the output
8746   of create_TOC_reference).  If STRICT, do not match non-split
8747   -mcmodel=large/medium toc pointer relative addresses.  If the pointers
8748   are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8749   TOCREL_OFFSET_RET respectively.  */
8750
8751bool
8752toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8753		     const_rtx *tocrel_offset_ret)
8754{
8755  if (!TARGET_TOC)
8756    return false;
8757
8758  if (TARGET_CMODEL != CMODEL_SMALL)
8759    {
8760      /* When strict ensure we have everything tidy.  */
8761      if (strict
8762	  && !(GET_CODE (op) == LO_SUM
8763	       && REG_P (XEXP (op, 0))
8764	       && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8765	return false;
8766
8767      /* When not strict, allow non-split TOC addresses and also allow
8768	 (lo_sum (high ..)) TOC addresses created during reload.  */
8769      if (GET_CODE (op) == LO_SUM)
8770	op = XEXP (op, 1);
8771    }
8772
8773  const_rtx tocrel_base = op;
8774  const_rtx tocrel_offset = const0_rtx;
8775
8776  if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8777    {
8778      tocrel_base = XEXP (op, 0);
8779      tocrel_offset = XEXP (op, 1);
8780    }
8781
8782  if (tocrel_base_ret)
8783    *tocrel_base_ret = tocrel_base;
8784  if (tocrel_offset_ret)
8785    *tocrel_offset_ret = tocrel_offset;
8786
8787  return (GET_CODE (tocrel_base) == UNSPEC
8788	  && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8789	  && REG_P (XVECEXP (tocrel_base, 0, 1))
8790	  && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8791}
8792
8793/* Return true if X is a constant pool address, and also for cmodel=medium
8794   if X is a toc-relative address known to be offsettable within MODE.  */
8795
8796bool
8797legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8798				    bool strict)
8799{
8800  const_rtx tocrel_base, tocrel_offset;
8801  return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8802	  && (TARGET_CMODEL != CMODEL_MEDIUM
8803	      || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8804	      || mode == QImode
8805	      || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8806					      INTVAL (tocrel_offset), mode)));
8807}
8808
8809static bool
8810legitimate_small_data_p (machine_mode mode, rtx x)
8811{
8812  return (DEFAULT_ABI == ABI_V4
8813	  && !flag_pic && !TARGET_TOC
8814	  && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8815	  && small_data_operand (x, mode));
8816}
8817
8818bool
8819rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8820				    bool strict, bool worst_case)
8821{
8822  unsigned HOST_WIDE_INT offset;
8823  unsigned int extra;
8824
8825  if (GET_CODE (x) != PLUS)
8826    return false;
8827  if (!REG_P (XEXP (x, 0)))
8828    return false;
8829  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8830    return false;
8831  if (mode_supports_dq_form (mode))
8832    return quad_address_p (x, mode, strict);
8833  if (!reg_offset_addressing_ok_p (mode))
8834    return virtual_stack_registers_memory_p (x);
8835  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8836    return true;
8837  if (!CONST_INT_P (XEXP (x, 1)))
8838    return false;
8839
8840  offset = INTVAL (XEXP (x, 1));
8841  extra = 0;
8842  switch (mode)
8843    {
8844    case E_DFmode:
8845    case E_DDmode:
8846    case E_DImode:
8847      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8848	 addressing.  */
8849      if (VECTOR_MEM_VSX_P (mode))
8850	return false;
8851
8852      if (!worst_case)
8853	break;
8854      if (!TARGET_POWERPC64)
8855	extra = 4;
8856      else if (offset & 3)
8857	return false;
8858      break;
8859
8860    case E_TFmode:
8861    case E_IFmode:
8862    case E_KFmode:
8863    case E_TDmode:
8864    case E_TImode:
8865    case E_PTImode:
8866      extra = 8;
8867      if (!worst_case)
8868	break;
8869      if (!TARGET_POWERPC64)
8870	extra = 12;
8871      else if (offset & 3)
8872	return false;
8873      break;
8874
8875    default:
8876      break;
8877    }
8878
8879  if (TARGET_PREFIXED)
8880    return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8881  else
8882    return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8883}
8884
8885bool
8886legitimate_indexed_address_p (rtx x, int strict)
8887{
8888  rtx op0, op1;
8889
8890  if (GET_CODE (x) != PLUS)
8891    return false;
8892
8893  op0 = XEXP (x, 0);
8894  op1 = XEXP (x, 1);
8895
8896  return (REG_P (op0) && REG_P (op1)
8897	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8898	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
8899	      || (INT_REG_OK_FOR_BASE_P (op1, strict)
8900		  && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8901}
8902
8903bool
8904avoiding_indexed_address_p (machine_mode mode)
8905{
8906  unsigned int msize = GET_MODE_SIZE (mode);
8907
8908  /* Avoid indexed addressing for modes that have non-indexed load/store
8909     instruction forms.  On power10, vector pairs have an indexed
8910     form, but vector quads don't.  */
8911  if (msize > 16)
8912    return msize != 32;
8913
8914  return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8915}
8916
8917bool
8918legitimate_indirect_address_p (rtx x, int strict)
8919{
8920  return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8921}
8922
8923bool
8924macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8925{
8926  if (!TARGET_MACHO || !flag_pic
8927      || mode != SImode || !MEM_P (x))
8928    return false;
8929  x = XEXP (x, 0);
8930
8931  if (GET_CODE (x) != LO_SUM)
8932    return false;
8933  if (!REG_P (XEXP (x, 0)))
8934    return false;
8935  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8936    return false;
8937  x = XEXP (x, 1);
8938
8939  return CONSTANT_P (x);
8940}
8941
8942static bool
8943legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8944{
8945  if (GET_CODE (x) != LO_SUM)
8946    return false;
8947  if (!REG_P (XEXP (x, 0)))
8948    return false;
8949  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8950    return false;
8951  /* quad word addresses are restricted, and we can't use LO_SUM.  */
8952  if (mode_supports_dq_form (mode))
8953    return false;
8954  x = XEXP (x, 1);
8955
8956  if (TARGET_ELF)
8957    {
8958      bool large_toc_ok;
8959
8960      if (DEFAULT_ABI == ABI_V4 && flag_pic)
8961	return false;
8962      /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8963	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
8964	 recognizes some LO_SUM addresses as valid although this
8965	 function says opposite.  In most cases, LRA through different
8966	 transformations can generate correct code for address reloads.
8967	 It cannot manage only some LO_SUM cases.  So we need to add
8968	 code here saying that some addresses are still valid.  */
8969      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8970		      && small_toc_ref (x, VOIDmode));
8971      if (TARGET_TOC && ! large_toc_ok)
8972	return false;
8973      if (GET_MODE_NUNITS (mode) != 1)
8974	return false;
8975      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8976	  && !(/* ??? Assume floating point reg based on mode?  */
8977	       TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8978	return false;
8979
8980      return CONSTANT_P (x) || large_toc_ok;
8981    }
8982  else if (TARGET_MACHO)
8983    {
8984      if (GET_MODE_NUNITS (mode) != 1)
8985	return false;
8986      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8987	  && !(/* see above  */
8988	       TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8989	return false;
8990#if TARGET_MACHO
8991      if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
8992	return CONSTANT_P (x);
8993#endif
8994      /* Macho-O PIC code from here.  */
8995      if (GET_CODE (x) == CONST)
8996	x = XEXP (x, 0);
8997
8998      /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET.  */
8999      if (SYMBOL_REF_P (x))
9000	return false;
9001
9002      /* So this is OK if the wrapped object is const.  */
9003      if (GET_CODE (x) == UNSPEC
9004	  && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9005	return CONSTANT_P (XVECEXP (x, 0, 0));
9006      return CONSTANT_P (x);
9007    }
9008  return false;
9009}
9010
9011
9012/* Try machine-dependent ways of modifying an illegitimate address
9013   to be legitimate.  If we find one, return the new, valid address.
9014   This is used from only one place: `memory_address' in explow.cc.
9015
9016   OLDX is the address as it was before break_out_memory_refs was
9017   called.  In some cases it is useful to look at this to decide what
9018   needs to be done.
9019
9020   It is always safe for this function to do nothing.  It exists to
9021   recognize opportunities to optimize the output.
9022
9023   On RS/6000, first check for the sum of a register with a constant
9024   integer that is out of range.  If so, generate code to add the
9025   constant with the low-order 16 bits masked to the register and force
9026   this result into another register (this can be done with `cau').
9027   Then generate an address of REG+(CONST&0xffff), allowing for the
9028   possibility of bit 16 being a one.
9029
9030   Then check for the sum of a register and something not constant, try to
9031   load the other things into a register and return the sum.  */
9032
9033static rtx
9034rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9035			   machine_mode mode)
9036{
9037  unsigned int extra;
9038
9039  if (!reg_offset_addressing_ok_p (mode)
9040      || mode_supports_dq_form (mode))
9041    {
9042      if (virtual_stack_registers_memory_p (x))
9043	return x;
9044
9045      /* In theory we should not be seeing addresses of the form reg+0,
9046	 but just in case it is generated, optimize it away.  */
9047      if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9048	return force_reg (Pmode, XEXP (x, 0));
9049
9050      /* For TImode with load/store quad, restrict addresses to just a single
9051	 pointer, so it works with both GPRs and VSX registers.  */
9052      /* Make sure both operands are registers.  */
9053      else if (GET_CODE (x) == PLUS
9054	       && (mode != TImode || !TARGET_VSX))
9055	return gen_rtx_PLUS (Pmode,
9056			     force_reg (Pmode, XEXP (x, 0)),
9057			     force_reg (Pmode, XEXP (x, 1)));
9058      else
9059	return force_reg (Pmode, x);
9060    }
9061  if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9062    {
9063      enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9064      if (model != 0)
9065	return rs6000_legitimize_tls_address (x, model);
9066    }
9067
9068  extra = 0;
9069  switch (mode)
9070    {
9071    case E_TFmode:
9072    case E_TDmode:
9073    case E_TImode:
9074    case E_PTImode:
9075    case E_IFmode:
9076    case E_KFmode:
9077      /* As in legitimate_offset_address_p we do not assume
9078	 worst-case.  The mode here is just a hint as to the registers
9079	 used.  A TImode is usually in gprs, but may actually be in
9080	 fprs.  Leave worst-case scenario for reload to handle via
9081	 insn constraints.  PTImode is only GPRs.  */
9082      extra = 8;
9083      break;
9084    default:
9085      break;
9086    }
9087
9088  if (GET_CODE (x) == PLUS
9089      && REG_P (XEXP (x, 0))
9090      && CONST_INT_P (XEXP (x, 1))
9091      && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9092	  >= 0x10000 - extra))
9093    {
9094      HOST_WIDE_INT high_int, low_int;
9095      rtx sum;
9096      low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9097      if (low_int >= 0x8000 - extra)
9098	low_int = 0;
9099      high_int = INTVAL (XEXP (x, 1)) - low_int;
9100      sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9101					 gen_int_mode (high_int, Pmode)), 0);
9102      return plus_constant (Pmode, sum, low_int);
9103    }
9104  else if (GET_CODE (x) == PLUS
9105	   && REG_P (XEXP (x, 0))
9106	   && !CONST_INT_P (XEXP (x, 1))
9107	   && GET_MODE_NUNITS (mode) == 1
9108	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9109	       || (/* ??? Assume floating point reg based on mode?  */
9110		   TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9111	   && !avoiding_indexed_address_p (mode))
9112    {
9113      return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9114			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9115    }
9116  else if ((TARGET_ELF
9117#if TARGET_MACHO
9118	    || !MACHO_DYNAMIC_NO_PIC_P
9119#endif
9120	    )
9121	   && TARGET_32BIT
9122	   && TARGET_NO_TOC_OR_PCREL
9123	   && !flag_pic
9124	   && !CONST_INT_P (x)
9125	   && !CONST_WIDE_INT_P (x)
9126	   && !CONST_DOUBLE_P (x)
9127	   && CONSTANT_P (x)
9128	   && GET_MODE_NUNITS (mode) == 1
9129	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9130	       || (/* ??? Assume floating point reg based on mode?  */
9131		   TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9132    {
9133      rtx reg = gen_reg_rtx (Pmode);
9134      if (TARGET_ELF)
9135	emit_insn (gen_elf_high (reg, x));
9136      else
9137	emit_insn (gen_macho_high (Pmode, reg, x));
9138      return gen_rtx_LO_SUM (Pmode, reg, x);
9139    }
9140  else if (TARGET_TOC
9141	   && SYMBOL_REF_P (x)
9142	   && constant_pool_expr_p (x)
9143	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9144    return create_TOC_reference (x, NULL_RTX);
9145  else
9146    return x;
9147}
9148
9149/* Debug version of rs6000_legitimize_address.  */
9150static rtx
9151rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9152{
9153  rtx ret;
9154  rtx_insn *insns;
9155
9156  start_sequence ();
9157  ret = rs6000_legitimize_address (x, oldx, mode);
9158  insns = get_insns ();
9159  end_sequence ();
9160
9161  if (ret != x)
9162    {
9163      fprintf (stderr,
9164	       "\nrs6000_legitimize_address: mode %s, old code %s, "
9165	       "new code %s, modified\n",
9166	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9167	       GET_RTX_NAME (GET_CODE (ret)));
9168
9169      fprintf (stderr, "Original address:\n");
9170      debug_rtx (x);
9171
9172      fprintf (stderr, "oldx:\n");
9173      debug_rtx (oldx);
9174
9175      fprintf (stderr, "New address:\n");
9176      debug_rtx (ret);
9177
9178      if (insns)
9179	{
9180	  fprintf (stderr, "Insns added:\n");
9181	  debug_rtx_list (insns, 20);
9182	}
9183    }
9184  else
9185    {
9186      fprintf (stderr,
9187	       "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9188	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9189
9190      debug_rtx (x);
9191    }
9192
9193  if (insns)
9194    emit_insn (insns);
9195
9196  return ret;
9197}
9198
9199/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9200   We need to emit DTP-relative relocations.  */
9201
9202static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9203static void
9204rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9205{
9206  switch (size)
9207    {
9208    case 4:
9209      fputs ("\t.long\t", file);
9210      break;
9211    case 8:
9212      fputs (DOUBLE_INT_ASM_OP, file);
9213      break;
9214    default:
9215      gcc_unreachable ();
9216    }
9217  output_addr_const (file, x);
9218  if (TARGET_ELF)
9219    fputs ("@dtprel+0x8000", file);
9220}
9221
9222/* Return true if X is a symbol that refers to real (rather than emulated)
9223   TLS.  */
9224
9225static bool
9226rs6000_real_tls_symbol_ref_p (rtx x)
9227{
9228  return (SYMBOL_REF_P (x)
9229	  && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9230}
9231
9232/* In the name of slightly smaller debug output, and to cater to
9233   general assembler lossage, recognize various UNSPEC sequences
9234   and turn them back into a direct symbol reference.  */
9235
9236static rtx
9237rs6000_delegitimize_address (rtx orig_x)
9238{
9239  rtx x, y, offset;
9240
9241  /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion.  It
9242     encodes loading up the high part of the address of a TOC reference along
9243     with a load of a GPR using the same base register used for the load.  We
9244     return the original SYMBOL_REF.
9245
9246	(set (reg:INT1 <reg>
9247	     (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9248
9249     UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass.  These
9250     UNSPECs include the external SYMBOL_REF along with the value being loaded.
9251     We return the original SYMBOL_REF.
9252
9253	(parallel [(set (reg:DI <base-reg>)
9254			(unspec:DI [(symbol_ref <symbol>)
9255				    (const_int <marker>)]
9256				   UNSPEC_PCREL_OPT_LD_ADDR))
9257		   (set (reg:DI <load-reg>)
9258			(unspec:DI [(const_int 0)]
9259				   UNSPEC_PCREL_OPT_LD_DATA))])
9260
9261     UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9262     GPR being loaded is the same as the GPR used to hold the external address.
9263
9264	(set (reg:DI <base-reg>)
9265	     (unspec:DI [(symbol_ref <symbol>)
9266			 (const_int <marker>)]
9267			UNSPEC_PCREL_OPT_LD_SAME_REG))
9268
9269     UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass.  This
9270     UNSPEC include the external SYMBOL_REF along with the value being loaded.
9271     We return the original SYMBOL_REF.
9272
9273	(parallel [(set (reg:DI <base-reg>)
9274			(unspec:DI [(symbol_ref <symbol>)
9275				    (const_int <marker>)]
9276				   UNSPEC_PCREL_OPT_ST_ADDR))
9277		   (use (reg <store-reg>))])  */
9278
9279  if (GET_CODE (orig_x) == UNSPEC)
9280    switch (XINT (orig_x, 1))
9281      {
9282      case UNSPEC_FUSION_GPR:
9283      case UNSPEC_PCREL_OPT_LD_ADDR:
9284      case UNSPEC_PCREL_OPT_LD_SAME_REG:
9285      case UNSPEC_PCREL_OPT_ST_ADDR:
9286	orig_x = XVECEXP (orig_x, 0, 0);
9287	break;
9288
9289      default:
9290	break;
9291      }
9292
9293  orig_x = delegitimize_mem_from_attrs (orig_x);
9294
9295  x = orig_x;
9296  if (MEM_P (x))
9297    x = XEXP (x, 0);
9298
9299  y = x;
9300  if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9301    y = XEXP (y, 1);
9302
9303  offset = NULL_RTX;
9304  if (GET_CODE (y) == PLUS
9305      && GET_MODE (y) == Pmode
9306      && CONST_INT_P (XEXP (y, 1)))
9307    {
9308      offset = XEXP (y, 1);
9309      y = XEXP (y, 0);
9310    }
9311
9312  if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9313    {
9314      y = XVECEXP (y, 0, 0);
9315
9316#ifdef HAVE_AS_TLS
9317      /* Do not associate thread-local symbols with the original
9318	 constant pool symbol.  */
9319      if (TARGET_XCOFF
9320	  && SYMBOL_REF_P (y)
9321	  && CONSTANT_POOL_ADDRESS_P (y)
9322	  && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9323	return orig_x;
9324#endif
9325
9326      if (offset != NULL_RTX)
9327	y = gen_rtx_PLUS (Pmode, y, offset);
9328      if (!MEM_P (orig_x))
9329	return y;
9330      else
9331	return replace_equiv_address_nv (orig_x, y);
9332    }
9333
9334  if (TARGET_MACHO
9335      && GET_CODE (orig_x) == LO_SUM
9336      && GET_CODE (XEXP (orig_x, 1)) == CONST)
9337    {
9338      y = XEXP (XEXP (orig_x, 1), 0);
9339      if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9340	return XVECEXP (y, 0, 0);
9341    }
9342
9343  return orig_x;
9344}
9345
9346/* Return true if X shouldn't be emitted into the debug info.
9347   The linker doesn't like .toc section references from
9348   .debug_* sections, so reject .toc section symbols.  */
9349
9350static bool
9351rs6000_const_not_ok_for_debug_p (rtx x)
9352{
9353  if (GET_CODE (x) == UNSPEC)
9354    return true;
9355  if (SYMBOL_REF_P (x)
9356      && CONSTANT_POOL_ADDRESS_P (x))
9357    {
9358      rtx c = get_pool_constant (x);
9359      machine_mode cmode = get_pool_mode (x);
9360      if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9361	return true;
9362    }
9363
9364  return false;
9365}
9366
9367/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
9368
9369static bool
9370rs6000_legitimate_combined_insn (rtx_insn *insn)
9371{
9372  int icode = INSN_CODE (insn);
9373
9374  /* Reject creating doloop insns.  Combine should not be allowed
9375     to create these for a number of reasons:
9376     1) In a nested loop, if combine creates one of these in an
9377     outer loop and the register allocator happens to allocate ctr
9378     to the outer loop insn, then the inner loop can't use ctr.
9379     Inner loops ought to be more highly optimized.
9380     2) Combine often wants to create one of these from what was
9381     originally a three insn sequence, first combining the three
9382     insns to two, then to ctrsi/ctrdi.  When ctrsi/ctrdi is not
9383     allocated ctr, the splitter takes use back to the three insn
9384     sequence.  It's better to stop combine at the two insn
9385     sequence.
9386     3) Faced with not being able to allocate ctr for ctrsi/crtdi
9387     insns, the register allocator sometimes uses floating point
9388     or vector registers for the pseudo.  Since ctrsi/ctrdi is a
9389     jump insn and output reloads are not implemented for jumps,
9390     the ctrsi/ctrdi splitters need to handle all possible cases.
9391     That's a pain, and it gets to be seriously difficult when a
9392     splitter that runs after reload needs memory to transfer from
9393     a gpr to fpr.  See PR70098 and PR71763 which are not fixed
9394     for the difficult case.  It's better to not create problems
9395     in the first place.  */
9396  if (icode != CODE_FOR_nothing
9397      && (icode == CODE_FOR_bdz_si
9398	  || icode == CODE_FOR_bdz_di
9399	  || icode == CODE_FOR_bdnz_si
9400	  || icode == CODE_FOR_bdnz_di
9401	  || icode == CODE_FOR_bdztf_si
9402	  || icode == CODE_FOR_bdztf_di
9403	  || icode == CODE_FOR_bdnztf_si
9404	  || icode == CODE_FOR_bdnztf_di))
9405    return false;
9406
9407  return true;
9408}
9409
9410/* Construct the SYMBOL_REF for the tls_get_addr function.  */
9411
9412static GTY(()) rtx rs6000_tls_symbol;
9413static rtx
9414rs6000_tls_get_addr (void)
9415{
9416  if (!rs6000_tls_symbol)
9417    rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9418
9419  return rs6000_tls_symbol;
9420}
9421
9422/* Construct the SYMBOL_REF for TLS GOT references.  */
9423
9424static GTY(()) rtx rs6000_got_symbol;
9425rtx
9426rs6000_got_sym (void)
9427{
9428  if (!rs6000_got_symbol)
9429    {
9430      rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9431      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9432      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9433    }
9434
9435  return rs6000_got_symbol;
9436}
9437
9438/* AIX Thread-Local Address support.  */
9439
9440static rtx
9441rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9442{
9443  rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9444  const char *name;
9445  char *tlsname;
9446
9447  /* Place addr into TOC constant pool.  */
9448  sym = force_const_mem (GET_MODE (addr), addr);
9449
9450  /* Output the TOC entry and create the MEM referencing the value.  */
9451  if (constant_pool_expr_p (XEXP (sym, 0))
9452      && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9453    {
9454      tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9455      mem = gen_const_mem (Pmode, tocref);
9456      set_mem_alias_set (mem, get_TOC_alias_set ());
9457    }
9458  else
9459    return sym;
9460
9461  /* Use global-dynamic for local-dynamic.  */
9462  if (model == TLS_MODEL_GLOBAL_DYNAMIC
9463      || model == TLS_MODEL_LOCAL_DYNAMIC)
9464    {
9465      /* Create new TOC reference for @m symbol.  */
9466      name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9467      tlsname = XALLOCAVEC (char, strlen (name) + 1);
9468      strcpy (tlsname, "*LCM");
9469      strcat (tlsname, name + 3);
9470      rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9471      SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9472      tocref = create_TOC_reference (modaddr, NULL_RTX);
9473      rtx modmem = gen_const_mem (Pmode, tocref);
9474      set_mem_alias_set (modmem, get_TOC_alias_set ());
9475
9476      rtx modreg = gen_reg_rtx (Pmode);
9477      emit_insn (gen_rtx_SET (modreg, modmem));
9478
9479      tmpreg = gen_reg_rtx (Pmode);
9480      emit_insn (gen_rtx_SET (tmpreg, mem));
9481
9482      dest = gen_reg_rtx (Pmode);
9483      if (TARGET_32BIT)
9484	emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9485      else
9486	emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9487      return dest;
9488    }
9489  /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13.  */
9490  else if (TARGET_32BIT)
9491    {
9492      tlsreg = gen_reg_rtx (SImode);
9493      emit_insn (gen_tls_get_tpointer (tlsreg));
9494    }
9495  else
9496    {
9497      tlsreg = gen_rtx_REG (DImode, 13);
9498      xcoff_tls_exec_model_detected = true;
9499    }
9500
9501  /* Load the TOC value into temporary register.  */
9502  tmpreg = gen_reg_rtx (Pmode);
9503  emit_insn (gen_rtx_SET (tmpreg, mem));
9504  set_unique_reg_note (get_last_insn (), REG_EQUAL,
9505		       gen_rtx_MINUS (Pmode, addr, tlsreg));
9506
9507  /* Add TOC symbol value to TLS pointer.  */
9508  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9509
9510  return dest;
9511}
9512
9513/* Passes the tls arg value for global dynamic and local dynamic
9514   emit_library_call_value in rs6000_legitimize_tls_address to
9515   rs6000_call_aix and rs6000_call_sysv.  This is used to emit the
9516   marker relocs put on __tls_get_addr calls.  */
9517static rtx global_tlsarg;
9518
9519/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
9520   this (thread-local) address.  */
9521
9522static rtx
9523rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9524{
9525  rtx dest, insn;
9526
9527  if (TARGET_XCOFF)
9528    return rs6000_legitimize_tls_address_aix (addr, model);
9529
9530  dest = gen_reg_rtx (Pmode);
9531  if (model == TLS_MODEL_LOCAL_EXEC
9532      && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9533    {
9534      rtx tlsreg;
9535
9536      if (TARGET_64BIT)
9537	{
9538	  tlsreg = gen_rtx_REG (Pmode, 13);
9539	  insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9540	}
9541      else
9542	{
9543	  tlsreg = gen_rtx_REG (Pmode, 2);
9544	  insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9545	}
9546      emit_insn (insn);
9547    }
9548  else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9549    {
9550      rtx tlsreg, tmp;
9551
9552      tmp = gen_reg_rtx (Pmode);
9553      if (TARGET_64BIT)
9554	{
9555	  tlsreg = gen_rtx_REG (Pmode, 13);
9556	  insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9557	}
9558      else
9559	{
9560	  tlsreg = gen_rtx_REG (Pmode, 2);
9561	  insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9562	}
9563      emit_insn (insn);
9564      if (TARGET_64BIT)
9565	insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9566      else
9567	insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9568      emit_insn (insn);
9569    }
9570  else
9571    {
9572      rtx got, tga, tmp1, tmp2;
9573
9574      /* We currently use relocations like @got@tlsgd for tls, which
9575	 means the linker will handle allocation of tls entries, placing
9576	 them in the .got section.  So use a pointer to the .got section,
9577	 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9578	 or to secondary GOT sections used by 32-bit -fPIC.  */
9579      if (rs6000_pcrel_p ())
9580	got = const0_rtx;
9581      else if (TARGET_64BIT)
9582	got = gen_rtx_REG (Pmode, 2);
9583      else
9584	{
9585	  if (flag_pic == 1)
9586	    got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9587	  else
9588	    {
9589	      rtx gsym = rs6000_got_sym ();
9590	      got = gen_reg_rtx (Pmode);
9591	      if (flag_pic == 0)
9592		rs6000_emit_move (got, gsym, Pmode);
9593	      else
9594		{
9595		  rtx mem, lab;
9596
9597		  tmp1 = gen_reg_rtx (Pmode);
9598		  tmp2 = gen_reg_rtx (Pmode);
9599		  mem = gen_const_mem (Pmode, tmp1);
9600		  lab = gen_label_rtx ();
9601		  emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9602		  emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9603		  if (TARGET_LINK_STACK)
9604		    emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9605		  emit_move_insn (tmp2, mem);
9606		  rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9607		  set_unique_reg_note (last, REG_EQUAL, gsym);
9608		}
9609	    }
9610	}
9611
9612      if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9613	{
9614	  rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9615				    UNSPEC_TLSGD);
9616	  tga = rs6000_tls_get_addr ();
9617	  rtx argreg = gen_rtx_REG (Pmode, 3);
9618	  emit_insn (gen_rtx_SET (argreg, arg));
9619	  global_tlsarg = arg;
9620	  emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9621	  global_tlsarg = NULL_RTX;
9622
9623	  /* Make a note so that the result of this call can be CSEd.  */
9624	  rtvec vec = gen_rtvec (1, copy_rtx (arg));
9625	  rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9626	  set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9627	}
9628      else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9629	{
9630	  rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9631	  tga = rs6000_tls_get_addr ();
9632	  tmp1 = gen_reg_rtx (Pmode);
9633	  rtx argreg = gen_rtx_REG (Pmode, 3);
9634	  emit_insn (gen_rtx_SET (argreg, arg));
9635	  global_tlsarg = arg;
9636	  emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9637	  global_tlsarg = NULL_RTX;
9638
9639	  /* Make a note so that the result of this call can be CSEd.  */
9640	  rtvec vec = gen_rtvec (1, copy_rtx (arg));
9641	  rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9642	  set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9643
9644	  if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9645	    {
9646	      if (TARGET_64BIT)
9647		insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9648	      else
9649		insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9650	    }
9651	  else if (rs6000_tls_size == 32)
9652	    {
9653	      tmp2 = gen_reg_rtx (Pmode);
9654	      if (TARGET_64BIT)
9655		insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9656	      else
9657		insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9658	      emit_insn (insn);
9659	      if (TARGET_64BIT)
9660		insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9661	      else
9662		insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9663	    }
9664	  else
9665	    {
9666	      tmp2 = gen_reg_rtx (Pmode);
9667	      if (TARGET_64BIT)
9668		insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9669	      else
9670		insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9671	      emit_insn (insn);
9672	      insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9673	    }
9674	  emit_insn (insn);
9675	}
9676      else
9677	{
9678	  /* IE, or 64-bit offset LE.  */
9679	  tmp2 = gen_reg_rtx (Pmode);
9680	  if (TARGET_64BIT)
9681	    insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9682	  else
9683	    insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9684	  emit_insn (insn);
9685	  if (rs6000_pcrel_p ())
9686	    {
9687	      if (TARGET_64BIT)
9688		insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9689	      else
9690		insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9691	    }
9692	  else if (TARGET_64BIT)
9693	    insn = gen_tls_tls_64 (dest, tmp2, addr);
9694	  else
9695	    insn = gen_tls_tls_32 (dest, tmp2, addr);
9696	  emit_insn (insn);
9697	}
9698    }
9699
9700  return dest;
9701}
9702
9703/* Only create the global variable for the stack protect guard if we are using
9704   the global flavor of that guard.  */
9705static tree
9706rs6000_init_stack_protect_guard (void)
9707{
9708  if (rs6000_stack_protector_guard == SSP_GLOBAL)
9709    return default_stack_protect_guard ();
9710
9711  return NULL_TREE;
9712}
9713
9714/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9715
9716static bool
9717rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9718{
9719  if (GET_CODE (x) == HIGH
9720      && GET_CODE (XEXP (x, 0)) == UNSPEC)
9721    return true;
9722
9723  /* A TLS symbol in the TOC cannot contain a sum.  */
9724  if (GET_CODE (x) == CONST
9725      && GET_CODE (XEXP (x, 0)) == PLUS
9726      && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9727      && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9728    return true;
9729
9730  /* Allow AIX TOC TLS symbols in the constant pool,
9731     but not ELF TLS symbols.  */
9732  return TARGET_ELF && tls_referenced_p (x);
9733}
9734
9735/* Return true iff the given SYMBOL_REF refers to a constant pool entry
9736   that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9737   can be addressed relative to the toc pointer.  */
9738
9739static bool
9740use_toc_relative_ref (rtx sym, machine_mode mode)
9741{
9742  return ((constant_pool_expr_p (sym)
9743	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9744					       get_pool_mode (sym)))
9745	  || (TARGET_CMODEL == CMODEL_MEDIUM
9746	      && SYMBOL_REF_LOCAL_P (sym)
9747	      && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9748}
9749
9750/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9751   that is a valid memory address for an instruction.
9752   The MODE argument is the machine mode for the MEM expression
9753   that wants to use this address.
9754
9755   On the RS/6000, there are four valid address: a SYMBOL_REF that
9756   refers to a constant pool entry of an address (or the sum of it
9757   plus a constant), a short (16-bit signed) constant plus a register,
9758   the sum of two registers, or a register indirect, possibly with an
9759   auto-increment.  For DFmode, DDmode and DImode with a constant plus
9760   register, we must ensure that both words are addressable or PowerPC64
9761   with offset word aligned.
9762
9763   For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9764   32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9765   because adjacent memory cells are accessed by adding word-sized offsets
9766   during assembly output.  */
9767static bool
9768rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9769{
9770  bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9771  bool quad_offset_p = mode_supports_dq_form (mode);
9772
9773  if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9774    return 0;
9775
9776  /* Handle unaligned altivec lvx/stvx type addresses.  */
9777  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9778      && GET_CODE (x) == AND
9779      && CONST_INT_P (XEXP (x, 1))
9780      && INTVAL (XEXP (x, 1)) == -16)
9781    {
9782      x = XEXP (x, 0);
9783      return (legitimate_indirect_address_p (x, reg_ok_strict)
9784	      || legitimate_indexed_address_p (x, reg_ok_strict)
9785	      || virtual_stack_registers_memory_p (x));
9786    }
9787
9788  if (legitimate_indirect_address_p (x, reg_ok_strict))
9789    return 1;
9790  if (TARGET_UPDATE
9791      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9792      && mode_supports_pre_incdec_p (mode)
9793      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9794    return 1;
9795
9796  /* Handle prefixed addresses (PC-relative or 34-bit offset).  */
9797  if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9798    return 1;
9799
9800  /* Handle restricted vector d-form offsets in ISA 3.0.  */
9801  if (quad_offset_p)
9802    {
9803      if (quad_address_p (x, mode, reg_ok_strict))
9804	return 1;
9805    }
9806  else if (virtual_stack_registers_memory_p (x))
9807    return 1;
9808
9809  else if (reg_offset_p)
9810    {
9811      if (legitimate_small_data_p (mode, x))
9812	return 1;
9813      if (legitimate_constant_pool_address_p (x, mode,
9814					     reg_ok_strict || lra_in_progress))
9815	return 1;
9816    }
9817
9818  /* For TImode, if we have TImode in VSX registers, only allow register
9819     indirect addresses.  This will allow the values to go in either GPRs
9820     or VSX registers without reloading.  The vector types would tend to
9821     go into VSX registers, so we allow REG+REG, while TImode seems
9822     somewhat split, in that some uses are GPR based, and some VSX based.  */
9823  /* FIXME: We could loosen this by changing the following to
9824       if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9825     but currently we cannot allow REG+REG addressing for TImode.  See
9826     PR72827 for complete details on how this ends up hoodwinking DSE.  */
9827  if (mode == TImode && TARGET_VSX)
9828    return 0;
9829  /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
9830  if (! reg_ok_strict
9831      && reg_offset_p
9832      && GET_CODE (x) == PLUS
9833      && REG_P (XEXP (x, 0))
9834      && (XEXP (x, 0) == virtual_stack_vars_rtx
9835	  || XEXP (x, 0) == arg_pointer_rtx)
9836      && CONST_INT_P (XEXP (x, 1)))
9837    return 1;
9838  if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9839    return 1;
9840  if (!FLOAT128_2REG_P (mode)
9841      && (TARGET_HARD_FLOAT
9842	  || TARGET_POWERPC64
9843	  || (mode != DFmode && mode != DDmode))
9844      && (TARGET_POWERPC64 || mode != DImode)
9845      && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9846      && mode != PTImode
9847      && !avoiding_indexed_address_p (mode)
9848      && legitimate_indexed_address_p (x, reg_ok_strict))
9849    return 1;
9850  if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9851      && mode_supports_pre_modify_p (mode)
9852      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9853      && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9854					      reg_ok_strict, false)
9855	  || (!avoiding_indexed_address_p (mode)
9856	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9857      && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9858    {
9859      /* There is no prefixed version of the load/store with update.  */
9860      rtx addr = XEXP (x, 1);
9861      return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9862    }
9863  if (reg_offset_p && !quad_offset_p
9864      && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9865    return 1;
9866  return 0;
9867}
9868
9869/* Debug version of rs6000_legitimate_address_p.  */
9870static bool
9871rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9872				   bool reg_ok_strict)
9873{
9874  bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9875  fprintf (stderr,
9876	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9877	   "strict = %d, reload = %s, code = %s\n",
9878	   ret ? "true" : "false",
9879	   GET_MODE_NAME (mode),
9880	   reg_ok_strict,
9881	   (reload_completed ? "after" : "before"),
9882	   GET_RTX_NAME (GET_CODE (x)));
9883  debug_rtx (x);
9884
9885  return ret;
9886}
9887
9888/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
9889
9890static bool
9891rs6000_mode_dependent_address_p (const_rtx addr,
9892				 addr_space_t as ATTRIBUTE_UNUSED)
9893{
9894  return rs6000_mode_dependent_address_ptr (addr);
9895}
9896
9897/* Go to LABEL if ADDR (a legitimate address expression)
9898   has an effect that depends on the machine mode it is used for.
9899
9900   On the RS/6000 this is true of all integral offsets (since AltiVec
9901   and VSX modes don't allow them) or is a pre-increment or decrement.
9902
9903   ??? Except that due to conceptual problems in offsettable_address_p
9904   we can't really report the problems of integral offsets.  So leave
9905   this assuming that the adjustable offset must be valid for the
9906   sub-words of a TFmode operand, which is what we had before.  */
9907
9908static bool
9909rs6000_mode_dependent_address (const_rtx addr)
9910{
9911  switch (GET_CODE (addr))
9912    {
9913    case PLUS:
9914      /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9915	 is considered a legitimate address before reload, so there
9916	 are no offset restrictions in that case.  Note that this
9917	 condition is safe in strict mode because any address involving
9918	 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9919	 been rejected as illegitimate.  */
9920      if (XEXP (addr, 0) != virtual_stack_vars_rtx
9921	  && XEXP (addr, 0) != arg_pointer_rtx
9922	  && CONST_INT_P (XEXP (addr, 1)))
9923	{
9924	  HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9925	  HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9926	  if (TARGET_PREFIXED)
9927	    return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9928	  else
9929	    return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9930	}
9931      break;
9932
9933    case LO_SUM:
9934      /* Anything in the constant pool is sufficiently aligned that
9935	 all bytes have the same high part address.  */
9936      return !legitimate_constant_pool_address_p (addr, QImode, false);
9937
9938    /* Auto-increment cases are now treated generically in recog.cc.  */
9939    case PRE_MODIFY:
9940      return TARGET_UPDATE;
9941
9942    /* AND is only allowed in Altivec loads.  */
9943    case AND:
9944      return true;
9945
9946    default:
9947      break;
9948    }
9949
9950  return false;
9951}
9952
9953/* Debug version of rs6000_mode_dependent_address.  */
9954static bool
9955rs6000_debug_mode_dependent_address (const_rtx addr)
9956{
9957  bool ret = rs6000_mode_dependent_address (addr);
9958
9959  fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9960	   ret ? "true" : "false");
9961  debug_rtx (addr);
9962
9963  return ret;
9964}
9965
9966/* Implement FIND_BASE_TERM.  */
9967
9968rtx
9969rs6000_find_base_term (rtx op)
9970{
9971  rtx base;
9972
9973  base = op;
9974  if (GET_CODE (base) == CONST)
9975    base = XEXP (base, 0);
9976  if (GET_CODE (base) == PLUS)
9977    base = XEXP (base, 0);
9978  if (GET_CODE (base) == UNSPEC)
9979    switch (XINT (base, 1))
9980      {
9981      case UNSPEC_TOCREL:
9982      case UNSPEC_MACHOPIC_OFFSET:
9983	/* OP represents SYM [+ OFFSET] - ANCHOR.  SYM is the base term
9984	   for aliasing purposes.  */
9985	return XVECEXP (base, 0, 0);
9986      }
9987
9988  return op;
9989}
9990
9991/* More elaborate version of recog's offsettable_memref_p predicate
9992   that works around the ??? note of rs6000_mode_dependent_address.
9993   In particular it accepts
9994
9995     (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9996
9997   in 32-bit mode, that the recog predicate rejects.  */
9998
9999static bool
10000rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10001{
10002  bool worst_case;
10003
10004  if (!MEM_P (op))
10005    return false;
10006
10007  /* First mimic offsettable_memref_p.  */
10008  if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10009    return true;
10010
10011  /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10012     the latter predicate knows nothing about the mode of the memory
10013     reference and, therefore, assumes that it is the largest supported
10014     mode (TFmode).  As a consequence, legitimate offsettable memory
10015     references are rejected.  rs6000_legitimate_offset_address_p contains
10016     the correct logic for the PLUS case of rs6000_mode_dependent_address,
10017     at least with a little bit of help here given that we know the
10018     actual registers used.  */
10019  worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10020		|| GET_MODE_SIZE (reg_mode) == 4);
10021  return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10022					     strict, worst_case);
10023}
10024
10025/* Determine the reassociation width to be used in reassociate_bb.
10026   This takes into account how many parallel operations we
10027   can actually do of a given type, and also the latency.
10028   P8:
10029     int add/sub 6/cycle
10030         mul 2/cycle
10031     vect add/sub/mul 2/cycle
10032     fp   add/sub/mul 2/cycle
10033     dfp  1/cycle
10034*/
10035
10036static int
10037rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10038                            machine_mode mode)
10039{
10040  switch (rs6000_tune)
10041    {
10042    case PROCESSOR_POWER8:
10043    case PROCESSOR_POWER9:
10044    case PROCESSOR_POWER10:
10045      if (DECIMAL_FLOAT_MODE_P (mode))
10046	return 1;
10047      if (VECTOR_MODE_P (mode))
10048	return 4;
10049      if (INTEGRAL_MODE_P (mode))
10050	return 1;
10051      if (FLOAT_MODE_P (mode))
10052	return 4;
10053      break;
10054    default:
10055      break;
10056    }
10057  return 1;
10058}
10059
10060/* Change register usage conditional on target flags.  */
10061static void
10062rs6000_conditional_register_usage (void)
10063{
10064  int i;
10065
10066  if (TARGET_DEBUG_TARGET)
10067    fprintf (stderr, "rs6000_conditional_register_usage called\n");
10068
10069  /* 64-bit AIX and Linux reserve GPR13 for thread-private data.  */
10070  if (TARGET_64BIT)
10071    fixed_regs[13] = call_used_regs[13] = 1;
10072
10073  /* Conditionally disable FPRs.  */
10074  if (TARGET_SOFT_FLOAT)
10075    for (i = 32; i < 64; i++)
10076      fixed_regs[i] = call_used_regs[i] = 1;
10077
10078  /* The TOC register is not killed across calls in a way that is
10079     visible to the compiler.  */
10080  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10081    call_used_regs[2] = 0;
10082
10083  if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10084    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10085
10086  if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10087    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10088      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10089
10090  if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10091    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10092      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10093
10094  if (TARGET_TOC && TARGET_MINIMAL_TOC)
10095    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10096
10097  if (!TARGET_ALTIVEC && !TARGET_VSX)
10098    {
10099      for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10100	fixed_regs[i] = call_used_regs[i] = 1;
10101      call_used_regs[VRSAVE_REGNO] = 1;
10102    }
10103
10104  if (TARGET_ALTIVEC || TARGET_VSX)
10105    global_regs[VSCR_REGNO] = 1;
10106
10107  if (TARGET_ALTIVEC_ABI)
10108    {
10109      for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10110	call_used_regs[i] = 1;
10111
10112      /* AIX reserves VR20:31 in non-extended ABI mode.  */
10113      if (TARGET_XCOFF && !rs6000_aix_extabi)
10114	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10115	  fixed_regs[i] = call_used_regs[i] = 1;
10116    }
10117}
10118
10119
10120/* Output insns to set DEST equal to the constant SOURCE as a series of
10121   lis, ori and shl instructions and return TRUE.  */
10122
10123bool
10124rs6000_emit_set_const (rtx dest, rtx source)
10125{
10126  machine_mode mode = GET_MODE (dest);
10127  rtx temp, set;
10128  rtx_insn *insn;
10129  HOST_WIDE_INT c;
10130
10131  gcc_checking_assert (CONST_INT_P (source));
10132  c = INTVAL (source);
10133  switch (mode)
10134    {
10135    case E_QImode:
10136    case E_HImode:
10137      emit_insn (gen_rtx_SET (dest, source));
10138      return true;
10139
10140    case E_SImode:
10141      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10142
10143      emit_insn (gen_rtx_SET (copy_rtx (temp),
10144			      GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10145      emit_insn (gen_rtx_SET (dest,
10146			      gen_rtx_IOR (SImode, copy_rtx (temp),
10147					   GEN_INT (c & 0xffff))));
10148      break;
10149
10150    case E_DImode:
10151      if (!TARGET_POWERPC64)
10152	{
10153	  rtx hi, lo;
10154
10155	  hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10156				      DImode);
10157	  lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10158				      DImode);
10159	  emit_move_insn (hi, GEN_INT (c >> 32));
10160	  c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10161	  emit_move_insn (lo, GEN_INT (c));
10162	}
10163      else
10164	rs6000_emit_set_long_const (dest, c);
10165      break;
10166
10167    default:
10168      gcc_unreachable ();
10169    }
10170
10171  insn = get_last_insn ();
10172  set = single_set (insn);
10173  if (! CONSTANT_P (SET_SRC (set)))
10174    set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10175
10176  return true;
10177}
10178
10179/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10180   Output insns to set DEST equal to the constant C as a series of
10181   lis, ori and shl instructions.  */
10182
10183static void
10184rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10185{
10186  rtx temp;
10187  HOST_WIDE_INT ud1, ud2, ud3, ud4;
10188
10189  ud1 = c & 0xffff;
10190  c = c >> 16;
10191  ud2 = c & 0xffff;
10192  c = c >> 16;
10193  ud3 = c & 0xffff;
10194  c = c >> 16;
10195  ud4 = c & 0xffff;
10196
10197  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10198      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10199    emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10200
10201  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10202	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10203    {
10204      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10205
10206      emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10207		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10208      if (ud1 != 0)
10209	emit_move_insn (dest,
10210			gen_rtx_IOR (DImode, copy_rtx (temp),
10211				     GEN_INT (ud1)));
10212    }
10213  else if (ud3 == 0 && ud4 == 0)
10214    {
10215      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10216
10217      gcc_assert (ud2 & 0x8000);
10218      emit_move_insn (copy_rtx (temp),
10219		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10220      if (ud1 != 0)
10221	emit_move_insn (copy_rtx (temp),
10222			gen_rtx_IOR (DImode, copy_rtx (temp),
10223				     GEN_INT (ud1)));
10224      emit_move_insn (dest,
10225		      gen_rtx_ZERO_EXTEND (DImode,
10226					   gen_lowpart (SImode,
10227							copy_rtx (temp))));
10228    }
10229  else if (ud1 == ud3 && ud2 == ud4)
10230    {
10231      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10232      HOST_WIDE_INT num = (ud2 << 16) | ud1;
10233      rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10234      rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10235      rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10236      emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10237    }
10238  else if ((ud4 == 0xffff && (ud3 & 0x8000))
10239	   || (ud4 == 0 && ! (ud3 & 0x8000)))
10240    {
10241      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10242
10243      emit_move_insn (copy_rtx (temp),
10244		      GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10245      if (ud2 != 0)
10246	emit_move_insn (copy_rtx (temp),
10247			gen_rtx_IOR (DImode, copy_rtx (temp),
10248				     GEN_INT (ud2)));
10249      emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10250		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10251				      GEN_INT (16)));
10252      if (ud1 != 0)
10253	emit_move_insn (dest,
10254			gen_rtx_IOR (DImode, copy_rtx (temp),
10255				     GEN_INT (ud1)));
10256    }
10257  else
10258    {
10259      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10260
10261      emit_move_insn (copy_rtx (temp),
10262		      GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10263      if (ud3 != 0)
10264	emit_move_insn (copy_rtx (temp),
10265			gen_rtx_IOR (DImode, copy_rtx (temp),
10266				     GEN_INT (ud3)));
10267
10268      emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10269		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10270				      GEN_INT (32)));
10271      if (ud2 != 0)
10272	emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10273			gen_rtx_IOR (DImode, copy_rtx (temp),
10274				     GEN_INT (ud2 << 16)));
10275      if (ud1 != 0)
10276	emit_move_insn (dest,
10277			gen_rtx_IOR (DImode, copy_rtx (temp),
10278				     GEN_INT (ud1)));
10279    }
10280}
10281
10282/* Helper for the following.  Get rid of [r+r] memory refs
10283   in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
10284
10285static void
10286rs6000_eliminate_indexed_memrefs (rtx operands[2])
10287{
10288  if (MEM_P (operands[0])
10289      && !REG_P (XEXP (operands[0], 0))
10290      && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10291					       GET_MODE (operands[0]), false))
10292    operands[0]
10293      = replace_equiv_address (operands[0],
10294			       copy_addr_to_reg (XEXP (operands[0], 0)));
10295
10296  if (MEM_P (operands[1])
10297      && !REG_P (XEXP (operands[1], 0))
10298      && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10299					       GET_MODE (operands[1]), false))
10300    operands[1]
10301      = replace_equiv_address (operands[1],
10302			       copy_addr_to_reg (XEXP (operands[1], 0)));
10303}
10304
10305/* Generate a vector of constants to permute MODE for a little-endian
10306   storage operation by swapping the two halves of a vector.  */
10307static rtvec
10308rs6000_const_vec (machine_mode mode)
10309{
10310  int i, subparts;
10311  rtvec v;
10312
10313  switch (mode)
10314    {
10315    case E_V1TImode:
10316      subparts = 1;
10317      break;
10318    case E_V2DFmode:
10319    case E_V2DImode:
10320      subparts = 2;
10321      break;
10322    case E_V4SFmode:
10323    case E_V4SImode:
10324      subparts = 4;
10325      break;
10326    case E_V8HImode:
10327      subparts = 8;
10328      break;
10329    case E_V16QImode:
10330      subparts = 16;
10331      break;
10332    default:
10333      gcc_unreachable();
10334    }
10335
10336  v = rtvec_alloc (subparts);
10337
10338  for (i = 0; i < subparts / 2; ++i)
10339    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10340  for (i = subparts / 2; i < subparts; ++i)
10341    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10342
10343  return v;
10344}
10345
10346/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10347   store operation.  */
10348void
10349rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10350{
10351  gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10352  gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10353
10354  /* Scalar permutations are easier to express in integer modes rather than
10355     floating-point modes, so cast them here.  We use V1TImode instead
10356     of TImode to ensure that the values don't go through GPRs.  */
10357  if (FLOAT128_VECTOR_P (mode))
10358    {
10359      dest = gen_lowpart (V1TImode, dest);
10360      source = gen_lowpart (V1TImode, source);
10361      mode = V1TImode;
10362    }
10363
10364  /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10365     scalar.  */
10366  if (mode == TImode || mode == V1TImode)
10367    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10368						  GEN_INT (64))));
10369  else
10370    {
10371      rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10372      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10373    }
10374}
10375
10376/* Emit a little-endian load from vector memory location SOURCE to VSX
10377   register DEST in mode MODE.  The load is done with two permuting
10378   insn's that represent an lxvd2x and xxpermdi.  */
10379void
10380rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10381{
10382  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10383     V1TImode).  */
10384  if (mode == TImode || mode == V1TImode)
10385    {
10386      mode = V2DImode;
10387      dest = gen_lowpart (V2DImode, dest);
10388      source = adjust_address (source, V2DImode, 0);
10389    }
10390
10391  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10392  rs6000_emit_le_vsx_permute (tmp, source, mode);
10393  rs6000_emit_le_vsx_permute (dest, tmp, mode);
10394}
10395
10396/* Emit a little-endian store to vector memory location DEST from VSX
10397   register SOURCE in mode MODE.  The store is done with two permuting
10398   insn's that represent an xxpermdi and an stxvd2x.  */
10399void
10400rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10401{
10402  /* This should never be called after LRA.  */
10403  gcc_assert (can_create_pseudo_p ());
10404
10405  /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10406     V1TImode).  */
10407  if (mode == TImode || mode == V1TImode)
10408    {
10409      mode = V2DImode;
10410      dest = adjust_address (dest, V2DImode, 0);
10411      source = gen_lowpart (V2DImode, source);
10412    }
10413
10414  rtx tmp = gen_reg_rtx_and_attrs (source);
10415  rs6000_emit_le_vsx_permute (tmp, source, mode);
10416  rs6000_emit_le_vsx_permute (dest, tmp, mode);
10417}
10418
10419/* Emit a sequence representing a little-endian VSX load or store,
10420   moving data from SOURCE to DEST in mode MODE.  This is done
10421   separately from rs6000_emit_move to ensure it is called only
10422   during expand.  LE VSX loads and stores introduced later are
10423   handled with a split.  The expand-time RTL generation allows
10424   us to optimize away redundant pairs of register-permutes.  */
10425void
10426rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10427{
10428  gcc_assert (!BYTES_BIG_ENDIAN
10429	      && VECTOR_MEM_VSX_P (mode)
10430	      && !TARGET_P9_VECTOR
10431	      && !gpr_or_gpr_p (dest, source)
10432	      && (MEM_P (source) ^ MEM_P (dest)));
10433
10434  if (MEM_P (source))
10435    {
10436      gcc_assert (REG_P (dest) || SUBREG_P (dest));
10437      rs6000_emit_le_vsx_load (dest, source, mode);
10438    }
10439  else
10440    {
10441      if (!REG_P (source))
10442	source = force_reg (mode, source);
10443      rs6000_emit_le_vsx_store (dest, source, mode);
10444    }
10445}
10446
10447/* Return whether a SFmode or SImode move can be done without converting one
10448   mode to another.  This arrises when we have:
10449
10450	(SUBREG:SF (REG:SI ...))
10451	(SUBREG:SI (REG:SF ...))
10452
10453   and one of the values is in a floating point/vector register, where SFmode
10454   scalars are stored in DFmode format.  */
10455
10456bool
10457valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10458{
10459  if (TARGET_ALLOW_SF_SUBREG)
10460    return true;
10461
10462  if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10463    return true;
10464
10465  if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10466    return true;
10467
10468  /*.  Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))).  */
10469  if (SUBREG_P (dest))
10470    {
10471      rtx dest_subreg = SUBREG_REG (dest);
10472      rtx src_subreg = SUBREG_REG (src);
10473      return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10474    }
10475
10476  return false;
10477}
10478
10479
10480/* Helper function to change moves with:
10481
10482	(SUBREG:SF (REG:SI)) and
10483	(SUBREG:SI (REG:SF))
10484
10485   into separate UNSPEC insns.  In the PowerPC architecture, scalar SFmode
10486   values are stored as DFmode values in the VSX registers.  We need to convert
10487   the bits before we can use a direct move or operate on the bits in the
10488   vector register as an integer type.
10489
10490   Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)).  */
10491
10492static bool
10493rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10494{
10495  if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10496      && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10497      && SUBREG_P (source) && sf_subreg_operand (source, mode))
10498    {
10499      rtx inner_source = SUBREG_REG (source);
10500      machine_mode inner_mode = GET_MODE (inner_source);
10501
10502      if (mode == SImode && inner_mode == SFmode)
10503	{
10504	  emit_insn (gen_movsi_from_sf (dest, inner_source));
10505	  return true;
10506	}
10507
10508      if (mode == SFmode && inner_mode == SImode)
10509	{
10510	  emit_insn (gen_movsf_from_si (dest, inner_source));
10511	  return true;
10512	}
10513    }
10514
10515  return false;
10516}
10517
10518/* Emit a move from SOURCE to DEST in mode MODE.  */
10519void
10520rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10521{
10522  rtx operands[2];
10523  operands[0] = dest;
10524  operands[1] = source;
10525
10526  if (TARGET_DEBUG_ADDR)
10527    {
10528      fprintf (stderr,
10529	       "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10530	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10531	       GET_MODE_NAME (mode),
10532	       lra_in_progress,
10533	       reload_completed,
10534	       can_create_pseudo_p ());
10535      debug_rtx (dest);
10536      fprintf (stderr, "source:\n");
10537      debug_rtx (source);
10538    }
10539
10540  /* Check that we get CONST_WIDE_INT only when we should.  */
10541  if (CONST_WIDE_INT_P (operands[1])
10542      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10543    gcc_unreachable ();
10544
10545#ifdef HAVE_AS_GNU_ATTRIBUTE
10546  /* If we use a long double type, set the flags in .gnu_attribute that say
10547     what the long double type is.  This is to allow the linker's warning
10548     message for the wrong long double to be useful, even if the function does
10549     not do a call (for example, doing a 128-bit add on power9 if the long
10550     double type is IEEE 128-bit.  Do not set this if __ibm128 or __floa128 are
10551     used if they aren't the default long dobule type.  */
10552  if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10553    {
10554      if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10555	rs6000_passes_float = rs6000_passes_long_double = true;
10556
10557      else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10558	rs6000_passes_float = rs6000_passes_long_double = true;
10559    }
10560#endif
10561
10562  /* See if we need to special case SImode/SFmode SUBREG moves.  */
10563  if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10564      && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10565    return;
10566
10567  /* Check if GCC is setting up a block move that will end up using FP
10568     registers as temporaries.  We must make sure this is acceptable.  */
10569  if (MEM_P (operands[0])
10570      && MEM_P (operands[1])
10571      && mode == DImode
10572      && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10573	  || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10574      && ! (rs6000_slow_unaligned_access (SImode,
10575					  (MEM_ALIGN (operands[0]) > 32
10576					   ? 32 : MEM_ALIGN (operands[0])))
10577	    || rs6000_slow_unaligned_access (SImode,
10578					     (MEM_ALIGN (operands[1]) > 32
10579					      ? 32 : MEM_ALIGN (operands[1]))))
10580      && ! MEM_VOLATILE_P (operands [0])
10581      && ! MEM_VOLATILE_P (operands [1]))
10582    {
10583      emit_move_insn (adjust_address (operands[0], SImode, 0),
10584		      adjust_address (operands[1], SImode, 0));
10585      emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10586		      adjust_address (copy_rtx (operands[1]), SImode, 4));
10587      return;
10588    }
10589
10590  if (can_create_pseudo_p () && MEM_P (operands[0])
10591      && !gpc_reg_operand (operands[1], mode))
10592    operands[1] = force_reg (mode, operands[1]);
10593
10594  /* Recognize the case where operand[1] is a reference to thread-local
10595     data and load its address to a register.  */
10596  if (tls_referenced_p (operands[1]))
10597    {
10598      enum tls_model model;
10599      rtx tmp = operands[1];
10600      rtx addend = NULL;
10601
10602      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10603	{
10604          addend = XEXP (XEXP (tmp, 0), 1);
10605	  tmp = XEXP (XEXP (tmp, 0), 0);
10606	}
10607
10608      gcc_assert (SYMBOL_REF_P (tmp));
10609      model = SYMBOL_REF_TLS_MODEL (tmp);
10610      gcc_assert (model != 0);
10611
10612      tmp = rs6000_legitimize_tls_address (tmp, model);
10613      if (addend)
10614	{
10615	  tmp = gen_rtx_PLUS (mode, tmp, addend);
10616	  tmp = force_operand (tmp, operands[0]);
10617	}
10618      operands[1] = tmp;
10619    }
10620
10621  /* 128-bit constant floating-point values on Darwin should really be loaded
10622     as two parts.  However, this premature splitting is a problem when DFmode
10623     values can go into Altivec registers.  */
10624  if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10625      && !reg_addr[DFmode].scalar_in_vmx_p)
10626    {
10627      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10628			simplify_gen_subreg (DFmode, operands[1], mode, 0),
10629			DFmode);
10630      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10631					     GET_MODE_SIZE (DFmode)),
10632			simplify_gen_subreg (DFmode, operands[1], mode,
10633					     GET_MODE_SIZE (DFmode)),
10634			DFmode);
10635      return;
10636    }
10637
10638  /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10639     p1:SD) if p1 is not of floating point class and p0 is spilled as
10640     we can have no analogous movsd_store for this.  */
10641  if (lra_in_progress && mode == DDmode
10642      && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10643      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10644      && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10645      && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10646    {
10647      enum reg_class cl;
10648      int regno = REGNO (SUBREG_REG (operands[1]));
10649
10650      if (!HARD_REGISTER_NUM_P (regno))
10651	{
10652	  cl = reg_preferred_class (regno);
10653	  regno = reg_renumber[regno];
10654	  if (regno < 0)
10655	    regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10656	}
10657      if (regno >= 0 && ! FP_REGNO_P (regno))
10658	{
10659	  mode = SDmode;
10660	  operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10661	  operands[1] = SUBREG_REG (operands[1]);
10662	}
10663    }
10664  if (lra_in_progress
10665      && mode == SDmode
10666      && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10667      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10668      && (REG_P (operands[1])
10669	  || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10670    {
10671      int regno = reg_or_subregno (operands[1]);
10672      enum reg_class cl;
10673
10674      if (!HARD_REGISTER_NUM_P (regno))
10675	{
10676	  cl = reg_preferred_class (regno);
10677	  gcc_assert (cl != NO_REGS);
10678	  regno = reg_renumber[regno];
10679	  if (regno < 0)
10680	    regno = ira_class_hard_regs[cl][0];
10681	}
10682      if (FP_REGNO_P (regno))
10683	{
10684	  if (GET_MODE (operands[0]) != DDmode)
10685	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10686	  emit_insn (gen_movsd_store (operands[0], operands[1]));
10687	}
10688      else if (INT_REGNO_P (regno))
10689	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10690      else
10691	gcc_unreachable();
10692      return;
10693    }
10694  /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10695     p:DD)) if p0 is not of floating point class and p1 is spilled as
10696     we can have no analogous movsd_load for this.  */
10697  if (lra_in_progress && mode == DDmode
10698      && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10699      && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10700      && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10701      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10702    {
10703      enum reg_class cl;
10704      int regno = REGNO (SUBREG_REG (operands[0]));
10705
10706      if (!HARD_REGISTER_NUM_P (regno))
10707	{
10708	  cl = reg_preferred_class (regno);
10709	  regno = reg_renumber[regno];
10710	  if (regno < 0)
10711	    regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10712	}
10713      if (regno >= 0 && ! FP_REGNO_P (regno))
10714	{
10715	  mode = SDmode;
10716	  operands[0] = SUBREG_REG (operands[0]);
10717	  operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10718	}
10719    }
10720  if (lra_in_progress
10721      && mode == SDmode
10722      && (REG_P (operands[0])
10723	  || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10724      && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10725      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10726    {
10727      int regno = reg_or_subregno (operands[0]);
10728      enum reg_class cl;
10729
10730      if (!HARD_REGISTER_NUM_P (regno))
10731	{
10732	  cl = reg_preferred_class (regno);
10733	  gcc_assert (cl != NO_REGS);
10734	  regno = reg_renumber[regno];
10735	  if (regno < 0)
10736	    regno = ira_class_hard_regs[cl][0];
10737	}
10738      if (FP_REGNO_P (regno))
10739	{
10740	  if (GET_MODE (operands[1]) != DDmode)
10741	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10742	  emit_insn (gen_movsd_load (operands[0], operands[1]));
10743	}
10744      else if (INT_REGNO_P (regno))
10745	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10746      else
10747	gcc_unreachable();
10748      return;
10749    }
10750
10751  /* FIXME:  In the long term, this switch statement should go away
10752     and be replaced by a sequence of tests based on things like
10753     mode == Pmode.  */
10754  switch (mode)
10755    {
10756    case E_HImode:
10757    case E_QImode:
10758      if (CONSTANT_P (operands[1])
10759	  && !CONST_INT_P (operands[1]))
10760	operands[1] = force_const_mem (mode, operands[1]);
10761      break;
10762
10763    case E_TFmode:
10764    case E_TDmode:
10765    case E_IFmode:
10766    case E_KFmode:
10767      if (FLOAT128_2REG_P (mode))
10768	rs6000_eliminate_indexed_memrefs (operands);
10769      /* fall through */
10770
10771    case E_DFmode:
10772    case E_DDmode:
10773    case E_SFmode:
10774    case E_SDmode:
10775      if (CONSTANT_P (operands[1])
10776	  && ! easy_fp_constant (operands[1], mode))
10777	operands[1] = force_const_mem (mode, operands[1]);
10778      break;
10779
10780    case E_V16QImode:
10781    case E_V8HImode:
10782    case E_V4SFmode:
10783    case E_V4SImode:
10784    case E_V2DFmode:
10785    case E_V2DImode:
10786    case E_V1TImode:
10787      if (CONSTANT_P (operands[1])
10788	  && !easy_vector_constant (operands[1], mode))
10789	operands[1] = force_const_mem (mode, operands[1]);
10790      break;
10791
10792    case E_OOmode:
10793    case E_XOmode:
10794      if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10795	error ("%qs is an opaque type, and you cannot set it to other values",
10796	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10797      break;
10798
10799    case E_SImode:
10800    case E_DImode:
10801      /* Use default pattern for address of ELF small data */
10802      if (TARGET_ELF
10803	  && mode == Pmode
10804	  && DEFAULT_ABI == ABI_V4
10805	  && (SYMBOL_REF_P (operands[1])
10806	      || GET_CODE (operands[1]) == CONST)
10807	  && small_data_operand (operands[1], mode))
10808	{
10809	  emit_insn (gen_rtx_SET (operands[0], operands[1]));
10810	  return;
10811	}
10812
10813      /* Use the default pattern for loading up PC-relative addresses.  */
10814      if (TARGET_PCREL && mode == Pmode
10815	  && pcrel_local_or_external_address (operands[1], Pmode))
10816	{
10817	  emit_insn (gen_rtx_SET (operands[0], operands[1]));
10818	  return;
10819	}
10820
10821      if (DEFAULT_ABI == ABI_V4
10822	  && mode == Pmode && mode == SImode
10823	  && flag_pic == 1 && got_operand (operands[1], mode))
10824	{
10825	  emit_insn (gen_movsi_got (operands[0], operands[1]));
10826	  return;
10827	}
10828
10829      if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10830	  && TARGET_NO_TOC_OR_PCREL
10831	  && ! flag_pic
10832	  && mode == Pmode
10833	  && CONSTANT_P (operands[1])
10834	  && GET_CODE (operands[1]) != HIGH
10835	  && !CONST_INT_P (operands[1]))
10836	{
10837	  rtx target = (!can_create_pseudo_p ()
10838			? operands[0]
10839			: gen_reg_rtx (mode));
10840
10841	  /* If this is a function address on -mcall-aixdesc,
10842	     convert it to the address of the descriptor.  */
10843	  if (DEFAULT_ABI == ABI_AIX
10844	      && SYMBOL_REF_P (operands[1])
10845	      && XSTR (operands[1], 0)[0] == '.')
10846	    {
10847	      const char *name = XSTR (operands[1], 0);
10848	      rtx new_ref;
10849	      while (*name == '.')
10850		name++;
10851	      new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10852	      CONSTANT_POOL_ADDRESS_P (new_ref)
10853		= CONSTANT_POOL_ADDRESS_P (operands[1]);
10854	      SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10855	      SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10856	      SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10857	      operands[1] = new_ref;
10858	    }
10859
10860	  if (DEFAULT_ABI == ABI_DARWIN)
10861	    {
10862#if TARGET_MACHO
10863	      /* This is not PIC code, but could require the subset of
10864		 indirections used by mdynamic-no-pic.  */
10865	      if (MACHO_DYNAMIC_NO_PIC_P)
10866		{
10867		  /* Take care of any required data indirection.  */
10868		  operands[1] = rs6000_machopic_legitimize_pic_address (
10869				  operands[1], mode, operands[0]);
10870		  if (operands[0] != operands[1])
10871		    emit_insn (gen_rtx_SET (operands[0], operands[1]));
10872		  return;
10873		}
10874#endif
10875	      emit_insn (gen_macho_high (Pmode, target, operands[1]));
10876	      emit_insn (gen_macho_low (Pmode, operands[0],
10877					target, operands[1]));
10878	      return;
10879	    }
10880
10881	  emit_insn (gen_elf_high (target, operands[1]));
10882	  emit_insn (gen_elf_low (operands[0], target, operands[1]));
10883	  return;
10884	}
10885
10886      /* If this is a SYMBOL_REF that refers to a constant pool entry,
10887	 and we have put it in the TOC, we just need to make a TOC-relative
10888	 reference to it.  */
10889      if (TARGET_TOC
10890	  && SYMBOL_REF_P (operands[1])
10891	  && use_toc_relative_ref (operands[1], mode))
10892	operands[1] = create_TOC_reference (operands[1], operands[0]);
10893      else if (mode == Pmode
10894	       && CONSTANT_P (operands[1])
10895	       && GET_CODE (operands[1]) != HIGH
10896	       && ((REG_P (operands[0])
10897		    && FP_REGNO_P (REGNO (operands[0])))
10898		   || !CONST_INT_P (operands[1])
10899		   || (num_insns_constant (operands[1], mode)
10900		       > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10901	       && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10902	       && (TARGET_CMODEL == CMODEL_SMALL
10903		   || can_create_pseudo_p ()
10904		   || (REG_P (operands[0])
10905		       && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10906	{
10907
10908#if TARGET_MACHO
10909	  /* Darwin uses a special PIC legitimizer.  */
10910	  if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10911	    {
10912	      operands[1] =
10913		rs6000_machopic_legitimize_pic_address (operands[1], mode,
10914							operands[0]);
10915	      if (operands[0] != operands[1])
10916		emit_insn (gen_rtx_SET (operands[0], operands[1]));
10917	      return;
10918	    }
10919#endif
10920
10921	  /* If we are to limit the number of things we put in the TOC and
10922	     this is a symbol plus a constant we can add in one insn,
10923	     just put the symbol in the TOC and add the constant.  */
10924	  if (GET_CODE (operands[1]) == CONST
10925	      && TARGET_NO_SUM_IN_TOC
10926	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
10927	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10928	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10929		  || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10930	      && ! side_effects_p (operands[0]))
10931	    {
10932	      rtx sym =
10933		force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10934	      rtx other = XEXP (XEXP (operands[1], 0), 1);
10935
10936	      sym = force_reg (mode, sym);
10937	      emit_insn (gen_add3_insn (operands[0], sym, other));
10938	      return;
10939	    }
10940
10941	  operands[1] = force_const_mem (mode, operands[1]);
10942
10943	  if (TARGET_TOC
10944	      && SYMBOL_REF_P (XEXP (operands[1], 0))
10945	      && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10946	    {
10947	      rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10948						 operands[0]);
10949	      operands[1] = gen_const_mem (mode, tocref);
10950	      set_mem_alias_set (operands[1], get_TOC_alias_set ());
10951	    }
10952	}
10953      break;
10954
10955    case E_TImode:
10956      if (!VECTOR_MEM_VSX_P (TImode))
10957	rs6000_eliminate_indexed_memrefs (operands);
10958      break;
10959
10960    case E_PTImode:
10961      rs6000_eliminate_indexed_memrefs (operands);
10962      break;
10963
10964    default:
10965      fatal_insn ("bad move", gen_rtx_SET (dest, source));
10966    }
10967
10968  /* Above, we may have called force_const_mem which may have returned
10969     an invalid address.  If we can, fix this up; otherwise, reload will
10970     have to deal with it.  */
10971  if (MEM_P (operands[1]))
10972    operands[1] = validize_mem (operands[1]);
10973
10974  emit_insn (gen_rtx_SET (operands[0], operands[1]));
10975}
10976
10977
10978/* Set up AIX/Darwin/64-bit Linux quad floating point routines.  */
10979static void
10980init_float128_ibm (machine_mode mode)
10981{
10982  if (!TARGET_XL_COMPAT)
10983    {
10984      set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10985      set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10986      set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10987      set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10988
10989      if (!TARGET_HARD_FLOAT)
10990	{
10991	  set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10992	  set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10993	  set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10994	  set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10995	  set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10996	  set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10997	  set_optab_libfunc (le_optab, mode, "__gcc_qle");
10998	  set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10999
11000	  set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11001	  set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11002	  set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11003	  set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11004	  set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11005	  set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11006	  set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11007	  set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11008	}
11009    }
11010  else
11011    {
11012      set_optab_libfunc (add_optab, mode, "_xlqadd");
11013      set_optab_libfunc (sub_optab, mode, "_xlqsub");
11014      set_optab_libfunc (smul_optab, mode, "_xlqmul");
11015      set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11016    }
11017
11018  /* Add various conversions for IFmode to use the traditional TFmode
11019     names.  */
11020  if (mode == IFmode)
11021    {
11022      set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11023      set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11024      set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11025      set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11026      set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11027      set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11028
11029      set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11030      set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11031
11032      set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11033      set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11034
11035      if (TARGET_POWERPC64)
11036	{
11037	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11038	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11039	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11040	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11041	}
11042    }
11043}
11044
11045/* Set up IEEE 128-bit floating point routines.  Use different names if the
11046   arguments can be passed in a vector register.  The historical PowerPC
11047   implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11048   continue to use that if we aren't using vector registers to pass IEEE
11049   128-bit floating point.  */
11050
11051static void
11052init_float128_ieee (machine_mode mode)
11053{
11054  if (FLOAT128_VECTOR_P (mode))
11055    {
11056      set_optab_libfunc (add_optab, mode, "__addkf3");
11057      set_optab_libfunc (sub_optab, mode, "__subkf3");
11058      set_optab_libfunc (neg_optab, mode, "__negkf2");
11059      set_optab_libfunc (smul_optab, mode, "__mulkf3");
11060      set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11061      set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11062      set_optab_libfunc (abs_optab, mode, "__abskf2");
11063      set_optab_libfunc (powi_optab, mode, "__powikf2");
11064
11065      set_optab_libfunc (eq_optab, mode, "__eqkf2");
11066      set_optab_libfunc (ne_optab, mode, "__nekf2");
11067      set_optab_libfunc (gt_optab, mode, "__gtkf2");
11068      set_optab_libfunc (ge_optab, mode, "__gekf2");
11069      set_optab_libfunc (lt_optab, mode, "__ltkf2");
11070      set_optab_libfunc (le_optab, mode, "__lekf2");
11071      set_optab_libfunc (unord_optab, mode, "__unordkf2");
11072
11073      set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11074      set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11075      set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11076      set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11077
11078      set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11079      if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11080	set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11081
11082      set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11083      if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11084	set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11085
11086      set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11087      set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11088      set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11089      set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11090      set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11091      set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11092
11093      set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11094      set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11095      set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11096      set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11097
11098      set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11099      set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11100      set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11101      set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11102
11103      if (TARGET_POWERPC64)
11104	{
11105	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11106	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11107	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11108	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11109	}
11110    }
11111
11112  else
11113    {
11114      set_optab_libfunc (add_optab, mode, "_q_add");
11115      set_optab_libfunc (sub_optab, mode, "_q_sub");
11116      set_optab_libfunc (neg_optab, mode, "_q_neg");
11117      set_optab_libfunc (smul_optab, mode, "_q_mul");
11118      set_optab_libfunc (sdiv_optab, mode, "_q_div");
11119      if (TARGET_PPC_GPOPT)
11120	set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11121
11122      set_optab_libfunc (eq_optab, mode, "_q_feq");
11123      set_optab_libfunc (ne_optab, mode, "_q_fne");
11124      set_optab_libfunc (gt_optab, mode, "_q_fgt");
11125      set_optab_libfunc (ge_optab, mode, "_q_fge");
11126      set_optab_libfunc (lt_optab, mode, "_q_flt");
11127      set_optab_libfunc (le_optab, mode, "_q_fle");
11128
11129      set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11130      set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11131      set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11132      set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11133      set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11134      set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11135      set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11136      set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11137    }
11138}
11139
11140static void
11141rs6000_init_libfuncs (void)
11142{
11143  /* __float128 support.  */
11144  if (TARGET_FLOAT128_TYPE)
11145    {
11146      init_float128_ibm (IFmode);
11147      init_float128_ieee (KFmode);
11148    }
11149
11150  /* AIX/Darwin/64-bit Linux quad floating point routines.  */
11151  if (TARGET_LONG_DOUBLE_128)
11152    {
11153      if (!TARGET_IEEEQUAD)
11154	init_float128_ibm (TFmode);
11155
11156      /* IEEE 128-bit including 32-bit SVR4 quad floating point routines.  */
11157      else
11158	init_float128_ieee (TFmode);
11159    }
11160}
11161
11162/* Emit a potentially record-form instruction, setting DST from SRC.
11163   If DOT is 0, that is all; otherwise, set CCREG to the result of the
11164   signed comparison of DST with zero.  If DOT is 1, the generated RTL
11165   doesn't care about the DST result; if DOT is 2, it does.  If CCREG
11166   is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11167   a separate COMPARE.  */
11168
11169void
11170rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11171{
11172  if (dot == 0)
11173    {
11174      emit_move_insn (dst, src);
11175      return;
11176    }
11177
11178  if (cc_reg_not_cr0_operand (ccreg, CCmode))
11179    {
11180      emit_move_insn (dst, src);
11181      emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11182      return;
11183    }
11184
11185  rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11186  if (dot == 1)
11187    {
11188      rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11189      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11190    }
11191  else
11192    {
11193      rtx set = gen_rtx_SET (dst, src);
11194      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11195    }
11196}
11197
11198
11199/* A validation routine: say whether CODE, a condition code, and MODE
11200   match.  The other alternatives either don't make sense or should
11201   never be generated.  */
11202
11203void
11204validate_condition_mode (enum rtx_code code, machine_mode mode)
11205{
11206  gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11207	       || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11208	      && GET_MODE_CLASS (mode) == MODE_CC);
11209
11210  /* These don't make sense.  */
11211  gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11212	      || mode != CCUNSmode);
11213
11214  gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11215	      || mode == CCUNSmode);
11216
11217  gcc_assert (mode == CCFPmode
11218	      || (code != ORDERED && code != UNORDERED
11219		  && code != UNEQ && code != LTGT
11220		  && code != UNGT && code != UNLT
11221		  && code != UNGE && code != UNLE));
11222
11223  /* These are invalid; the information is not there.  */
11224  gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11225}
11226
11227
11228/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11229   rldicl, rldicr, or rldic instruction in mode MODE.  If so, if E is
11230   not zero, store there the bit offset (counted from the right) where
11231   the single stretch of 1 bits begins; and similarly for B, the bit
11232   offset where it ends.  */
11233
11234bool
11235rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11236{
11237  unsigned HOST_WIDE_INT val = INTVAL (mask);
11238  unsigned HOST_WIDE_INT bit;
11239  int nb, ne;
11240  int n = GET_MODE_PRECISION (mode);
11241
11242  if (mode != DImode && mode != SImode)
11243    return false;
11244
11245  if (INTVAL (mask) >= 0)
11246    {
11247      bit = val & -val;
11248      ne = exact_log2 (bit);
11249      nb = exact_log2 (val + bit);
11250    }
11251  else if (val + 1 == 0)
11252    {
11253      nb = n;
11254      ne = 0;
11255    }
11256  else if (val & 1)
11257    {
11258      val = ~val;
11259      bit = val & -val;
11260      nb = exact_log2 (bit);
11261      ne = exact_log2 (val + bit);
11262    }
11263  else
11264    {
11265      bit = val & -val;
11266      ne = exact_log2 (bit);
11267      if (val + bit == 0)
11268	nb = n;
11269      else
11270	nb = 0;
11271    }
11272
11273  nb--;
11274
11275  if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11276    return false;
11277
11278  if (b)
11279    *b = nb;
11280  if (e)
11281    *e = ne;
11282
11283  return true;
11284}
11285
11286bool
11287rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11288{
11289  int nb, ne;
11290  if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11291    {
11292      if (TARGET_64BIT)
11293	return true;
11294      /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11295	 <= 0x7fffffff.  */
11296      return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11297    }
11298
11299  return false;
11300}
11301
11302/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11303   or rldicr instruction, to implement an AND with it in mode MODE.  */
11304
11305bool
11306rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11307{
11308  int nb, ne;
11309
11310  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11311    return false;
11312
11313  /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11314     does not wrap.  */
11315  if (mode == DImode)
11316    return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11317
11318  /* For SImode, rlwinm can do everything.  */
11319  if (mode == SImode)
11320    return (nb < 32 && ne < 32);
11321
11322  return false;
11323}
11324
11325/* Return the instruction template for an AND with mask in mode MODE, with
11326   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11327
11328const char *
11329rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11330{
11331  int nb, ne;
11332
11333  if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11334    gcc_unreachable ();
11335
11336  if (mode == DImode && ne == 0)
11337    {
11338      operands[3] = GEN_INT (63 - nb);
11339      if (dot)
11340	return "rldicl. %0,%1,0,%3";
11341      return "rldicl %0,%1,0,%3";
11342    }
11343
11344  if (mode == DImode && nb == 63)
11345    {
11346      operands[3] = GEN_INT (63 - ne);
11347      if (dot)
11348	return "rldicr. %0,%1,0,%3";
11349      return "rldicr %0,%1,0,%3";
11350    }
11351
11352  if (nb < 32 && ne < 32)
11353    {
11354      operands[3] = GEN_INT (31 - nb);
11355      operands[4] = GEN_INT (31 - ne);
11356      if (dot)
11357	return "rlwinm. %0,%1,0,%3,%4";
11358      return "rlwinm %0,%1,0,%3,%4";
11359    }
11360
11361  gcc_unreachable ();
11362}
11363
11364/* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11365   rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11366   shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE.  */
11367
11368bool
11369rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11370{
11371  int nb, ne;
11372
11373  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11374    return false;
11375
11376  int n = GET_MODE_PRECISION (mode);
11377  int sh = -1;
11378
11379  if (CONST_INT_P (XEXP (shift, 1)))
11380    {
11381      sh = INTVAL (XEXP (shift, 1));
11382      if (sh < 0 || sh >= n)
11383	return false;
11384    }
11385
11386  rtx_code code = GET_CODE (shift);
11387
11388  /* Convert any shift by 0 to a rotate, to simplify below code.  */
11389  if (sh == 0)
11390    code = ROTATE;
11391
11392  /* Convert rotate to simple shift if we can, to make analysis simpler.  */
11393  if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11394    code = ASHIFT;
11395  if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11396    {
11397      code = LSHIFTRT;
11398      sh = n - sh;
11399    }
11400
11401  /* DImode rotates need rld*.  */
11402  if (mode == DImode && code == ROTATE)
11403    return (nb == 63 || ne == 0 || ne == sh);
11404
11405  /* SImode rotates need rlw*.  */
11406  if (mode == SImode && code == ROTATE)
11407    return (nb < 32 && ne < 32 && sh < 32);
11408
11409  /* Wrap-around masks are only okay for rotates.  */
11410  if (ne > nb)
11411    return false;
11412
11413  /* Variable shifts are only okay for rotates.  */
11414  if (sh < 0)
11415    return false;
11416
11417  /* Don't allow ASHIFT if the mask is wrong for that.  */
11418  if (code == ASHIFT && ne < sh)
11419    return false;
11420
11421  /* If we can do it with an rlw*, we can do it.  Don't allow LSHIFTRT
11422     if the mask is wrong for that.  */
11423  if (nb < 32 && ne < 32 && sh < 32
11424      && !(code == LSHIFTRT && nb >= 32 - sh))
11425    return true;
11426
11427  /* If we can do it with an rld*, we can do it.  Don't allow LSHIFTRT
11428     if the mask is wrong for that.  */
11429  if (code == LSHIFTRT)
11430    sh = 64 - sh;
11431  if (nb == 63 || ne == 0 || ne == sh)
11432    return !(code == LSHIFTRT && nb >= sh);
11433
11434  return false;
11435}
11436
11437/* Return the instruction template for a shift with mask in mode MODE, with
11438   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11439
11440const char *
11441rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11442{
11443  int nb, ne;
11444
11445  if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11446    gcc_unreachable ();
11447
11448  if (mode == DImode && ne == 0)
11449    {
11450      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11451	operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11452      operands[3] = GEN_INT (63 - nb);
11453      if (dot)
11454	return "rld%I2cl. %0,%1,%2,%3";
11455      return "rld%I2cl %0,%1,%2,%3";
11456    }
11457
11458  if (mode == DImode && nb == 63)
11459    {
11460      operands[3] = GEN_INT (63 - ne);
11461      if (dot)
11462	return "rld%I2cr. %0,%1,%2,%3";
11463      return "rld%I2cr %0,%1,%2,%3";
11464    }
11465
11466  if (mode == DImode
11467      && GET_CODE (operands[4]) != LSHIFTRT
11468      && CONST_INT_P (operands[2])
11469      && ne == INTVAL (operands[2]))
11470    {
11471      operands[3] = GEN_INT (63 - nb);
11472      if (dot)
11473	return "rld%I2c. %0,%1,%2,%3";
11474      return "rld%I2c %0,%1,%2,%3";
11475    }
11476
11477  if (nb < 32 && ne < 32)
11478    {
11479      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11480	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11481      operands[3] = GEN_INT (31 - nb);
11482      operands[4] = GEN_INT (31 - ne);
11483      /* This insn can also be a 64-bit rotate with mask that really makes
11484	 it just a shift right (with mask); the %h below are to adjust for
11485	 that situation (shift count is >= 32 in that case).  */
11486      if (dot)
11487	return "rlw%I2nm. %0,%1,%h2,%3,%4";
11488      return "rlw%I2nm %0,%1,%h2,%3,%4";
11489    }
11490
11491  gcc_unreachable ();
11492}
11493
11494/* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11495   rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11496   ASHIFT, or LSHIFTRT) in mode MODE.  */
11497
11498bool
11499rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11500{
11501  int nb, ne;
11502
11503  if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11504    return false;
11505
11506  int n = GET_MODE_PRECISION (mode);
11507
11508  int sh = INTVAL (XEXP (shift, 1));
11509  if (sh < 0 || sh >= n)
11510    return false;
11511
11512  rtx_code code = GET_CODE (shift);
11513
11514  /* Convert any shift by 0 to a rotate, to simplify below code.  */
11515  if (sh == 0)
11516    code = ROTATE;
11517
11518  /* Convert rotate to simple shift if we can, to make analysis simpler.  */
11519  if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11520    code = ASHIFT;
11521  if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11522    {
11523      code = LSHIFTRT;
11524      sh = n - sh;
11525    }
11526
11527  /* DImode rotates need rldimi.  */
11528  if (mode == DImode && code == ROTATE)
11529    return (ne == sh);
11530
11531  /* SImode rotates need rlwimi.  */
11532  if (mode == SImode && code == ROTATE)
11533    return (nb < 32 && ne < 32 && sh < 32);
11534
11535  /* Wrap-around masks are only okay for rotates.  */
11536  if (ne > nb)
11537    return false;
11538
11539  /* Don't allow ASHIFT if the mask is wrong for that.  */
11540  if (code == ASHIFT && ne < sh)
11541    return false;
11542
11543  /* If we can do it with an rlwimi, we can do it.  Don't allow LSHIFTRT
11544     if the mask is wrong for that.  */
11545  if (nb < 32 && ne < 32 && sh < 32
11546      && !(code == LSHIFTRT && nb >= 32 - sh))
11547    return true;
11548
11549  /* If we can do it with an rldimi, we can do it.  Don't allow LSHIFTRT
11550     if the mask is wrong for that.  */
11551  if (code == LSHIFTRT)
11552    sh = 64 - sh;
11553  if (ne == sh)
11554    return !(code == LSHIFTRT && nb >= sh);
11555
11556  return false;
11557}
11558
11559/* Return the instruction template for an insert with mask in mode MODE, with
11560   operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11561
11562const char *
11563rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11564{
11565  int nb, ne;
11566
11567  if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11568    gcc_unreachable ();
11569
11570  /* Prefer rldimi because rlwimi is cracked.  */
11571  if (TARGET_POWERPC64
11572      && (!dot || mode == DImode)
11573      && GET_CODE (operands[4]) != LSHIFTRT
11574      && ne == INTVAL (operands[2]))
11575    {
11576      operands[3] = GEN_INT (63 - nb);
11577      if (dot)
11578	return "rldimi. %0,%1,%2,%3";
11579      return "rldimi %0,%1,%2,%3";
11580    }
11581
11582  if (nb < 32 && ne < 32)
11583    {
11584      if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11585	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11586      operands[3] = GEN_INT (31 - nb);
11587      operands[4] = GEN_INT (31 - ne);
11588      if (dot)
11589	return "rlwimi. %0,%1,%2,%3,%4";
11590      return "rlwimi %0,%1,%2,%3,%4";
11591    }
11592
11593  gcc_unreachable ();
11594}
11595
11596/* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11597   using two machine instructions.  */
11598
11599bool
11600rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11601{
11602  /* There are two kinds of AND we can handle with two insns:
11603     1) those we can do with two rl* insn;
11604     2) ori[s];xori[s].
11605
11606     We do not handle that last case yet.  */
11607
11608  /* If there is just one stretch of ones, we can do it.  */
11609  if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11610    return true;
11611
11612  /* Otherwise, fill in the lowest "hole"; if we can do the result with
11613     one insn, we can do the whole thing with two.  */
11614  unsigned HOST_WIDE_INT val = INTVAL (c);
11615  unsigned HOST_WIDE_INT bit1 = val & -val;
11616  unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11617  unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11618  unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11619  return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11620}
11621
11622/* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11623   If EXPAND is true, split rotate-and-mask instructions we generate to
11624   their constituent parts as well (this is used during expand); if DOT
11625   is 1, make the last insn a record-form instruction clobbering the
11626   destination GPR and setting the CC reg (from operands[3]); if 2, set
11627   that GPR as well as the CC reg.  */
11628
11629void
11630rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11631{
11632  gcc_assert (!(expand && dot));
11633
11634  unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11635
11636  /* If it is one stretch of ones, it is DImode; shift left, mask, then
11637     shift right.  This generates better code than doing the masks without
11638     shifts, or shifting first right and then left.  */
11639  int nb, ne;
11640  if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11641    {
11642      gcc_assert (mode == DImode);
11643
11644      int shift = 63 - nb;
11645      if (expand)
11646	{
11647	  rtx tmp1 = gen_reg_rtx (DImode);
11648	  rtx tmp2 = gen_reg_rtx (DImode);
11649	  emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11650	  emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11651	  emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11652	}
11653      else
11654	{
11655	  rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11656	  tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11657	  emit_move_insn (operands[0], tmp);
11658	  tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11659	  rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11660	}
11661      return;
11662    }
11663
11664  /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11665     that does the rest.  */
11666  unsigned HOST_WIDE_INT bit1 = val & -val;
11667  unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11668  unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11669  unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11670
11671  unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11672  unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11673
11674  gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11675
11676  /* Two "no-rotate"-and-mask instructions, for SImode.  */
11677  if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11678    {
11679      gcc_assert (mode == SImode);
11680
11681      rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11682      rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11683      emit_move_insn (reg, tmp);
11684      tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11685      rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11686      return;
11687    }
11688
11689  gcc_assert (mode == DImode);
11690
11691  /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11692     insns; we have to do the first in SImode, because it wraps.  */
11693  if (mask2 <= 0xffffffff
11694      && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11695    {
11696      rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11697      rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11698			     GEN_INT (mask1));
11699      rtx reg_low = gen_lowpart (SImode, reg);
11700      emit_move_insn (reg_low, tmp);
11701      tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11702      rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11703      return;
11704    }
11705
11706  /* Two rld* insns: rotate, clear the hole in the middle (which now is
11707     at the top end), rotate back and clear the other hole.  */
11708  int right = exact_log2 (bit3);
11709  int left = 64 - right;
11710
11711  /* Rotate the mask too.  */
11712  mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11713
11714  if (expand)
11715    {
11716      rtx tmp1 = gen_reg_rtx (DImode);
11717      rtx tmp2 = gen_reg_rtx (DImode);
11718      rtx tmp3 = gen_reg_rtx (DImode);
11719      emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11720      emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11721      emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11722      emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11723    }
11724  else
11725    {
11726      rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11727      tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11728      emit_move_insn (operands[0], tmp);
11729      tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11730      tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11731      rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11732    }
11733}
11734
11735/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11736   for lfq and stfq insns iff the registers are hard registers.   */
11737
11738int
11739registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11740{
11741  /* We might have been passed a SUBREG.  */
11742  if (!REG_P (reg1) || !REG_P (reg2))
11743    return 0;
11744
11745  /* We might have been passed non floating point registers.  */
11746  if (!FP_REGNO_P (REGNO (reg1))
11747      || !FP_REGNO_P (REGNO (reg2)))
11748    return 0;
11749
11750  return (REGNO (reg1) == REGNO (reg2) - 1);
11751}
11752
11753/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11754   addr1 and addr2 must be in consecutive memory locations
11755   (addr2 == addr1 + 8).  */
11756
11757int
11758mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11759{
11760  rtx addr1, addr2;
11761  unsigned int reg1, reg2;
11762  int offset1, offset2;
11763
11764  /* The mems cannot be volatile.  */
11765  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11766    return 0;
11767
11768  addr1 = XEXP (mem1, 0);
11769  addr2 = XEXP (mem2, 0);
11770
11771  /* Extract an offset (if used) from the first addr.  */
11772  if (GET_CODE (addr1) == PLUS)
11773    {
11774      /* If not a REG, return zero.  */
11775      if (!REG_P (XEXP (addr1, 0)))
11776	return 0;
11777      else
11778	{
11779	  reg1 = REGNO (XEXP (addr1, 0));
11780	  /* The offset must be constant!  */
11781	  if (!CONST_INT_P (XEXP (addr1, 1)))
11782	    return 0;
11783	  offset1 = INTVAL (XEXP (addr1, 1));
11784	}
11785    }
11786  else if (!REG_P (addr1))
11787    return 0;
11788  else
11789    {
11790      reg1 = REGNO (addr1);
11791      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
11792      offset1 = 0;
11793    }
11794
11795  /* And now for the second addr.  */
11796  if (GET_CODE (addr2) == PLUS)
11797    {
11798      /* If not a REG, return zero.  */
11799      if (!REG_P (XEXP (addr2, 0)))
11800	return 0;
11801      else
11802	{
11803	  reg2 = REGNO (XEXP (addr2, 0));
11804	  /* The offset must be constant. */
11805	  if (!CONST_INT_P (XEXP (addr2, 1)))
11806	    return 0;
11807	  offset2 = INTVAL (XEXP (addr2, 1));
11808	}
11809    }
11810  else if (!REG_P (addr2))
11811    return 0;
11812  else
11813    {
11814      reg2 = REGNO (addr2);
11815      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
11816      offset2 = 0;
11817    }
11818
11819  /* Both of these must have the same base register.  */
11820  if (reg1 != reg2)
11821    return 0;
11822
11823  /* The offset for the second addr must be 8 more than the first addr.  */
11824  if (offset2 != offset1 + 8)
11825    return 0;
11826
11827  /* All the tests passed.  addr1 and addr2 are valid for lfq or stfq
11828     instructions.  */
11829  return 1;
11830}
11831
11832/* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE.  For SDmode values we
11833   need to use DDmode, in all other cases we can use the same mode.  */
11834static machine_mode
11835rs6000_secondary_memory_needed_mode (machine_mode mode)
11836{
11837  if (lra_in_progress && mode == SDmode)
11838    return DDmode;
11839  return mode;
11840}
11841
11842/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
11843   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11844   only work on the traditional altivec registers, note if an altivec register
11845   was chosen.  */
11846
11847static enum rs6000_reg_type
11848register_to_reg_type (rtx reg, bool *is_altivec)
11849{
11850  HOST_WIDE_INT regno;
11851  enum reg_class rclass;
11852
11853  if (SUBREG_P (reg))
11854    reg = SUBREG_REG (reg);
11855
11856  if (!REG_P (reg))
11857    return NO_REG_TYPE;
11858
11859  regno = REGNO (reg);
11860  if (!HARD_REGISTER_NUM_P (regno))
11861    {
11862      if (!lra_in_progress && !reload_completed)
11863	return PSEUDO_REG_TYPE;
11864
11865      regno = true_regnum (reg);
11866      if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11867	return PSEUDO_REG_TYPE;
11868    }
11869
11870  gcc_assert (regno >= 0);
11871
11872  if (is_altivec && ALTIVEC_REGNO_P (regno))
11873    *is_altivec = true;
11874
11875  rclass = rs6000_regno_regclass[regno];
11876  return reg_class_to_reg_type[(int)rclass];
11877}
11878
11879/* Helper function to return the cost of adding a TOC entry address.  */
11880
11881static inline int
11882rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11883{
11884  int ret;
11885
11886  if (TARGET_CMODEL != CMODEL_SMALL)
11887    ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11888
11889  else
11890    ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11891
11892  return ret;
11893}
11894
11895/* Helper function for rs6000_secondary_reload to determine whether the memory
11896   address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11897   needs reloading.  Return negative if the memory is not handled by the memory
11898   helper functions and to try a different reload method, 0 if no additional
11899   instructions are need, and positive to give the extra cost for the
11900   memory.  */
11901
11902static int
11903rs6000_secondary_reload_memory (rtx addr,
11904				enum reg_class rclass,
11905				machine_mode mode)
11906{
11907  int extra_cost = 0;
11908  rtx reg, and_arg, plus_arg0, plus_arg1;
11909  addr_mask_type addr_mask;
11910  const char *type = NULL;
11911  const char *fail_msg = NULL;
11912
11913  if (GPR_REG_CLASS_P (rclass))
11914    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11915
11916  else if (rclass == FLOAT_REGS)
11917    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11918
11919  else if (rclass == ALTIVEC_REGS)
11920    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11921
11922  /* For the combined VSX_REGS, turn off Altivec AND -16.  */
11923  else if (rclass == VSX_REGS)
11924    addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11925		 & ~RELOAD_REG_AND_M16);
11926
11927  /* If the register allocator hasn't made up its mind yet on the register
11928     class to use, settle on defaults to use.  */
11929  else if (rclass == NO_REGS)
11930    {
11931      addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11932		   & ~RELOAD_REG_AND_M16);
11933
11934      if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11935	addr_mask &= ~(RELOAD_REG_INDEXED
11936		       | RELOAD_REG_PRE_INCDEC
11937		       | RELOAD_REG_PRE_MODIFY);
11938    }
11939
11940  else
11941    addr_mask = 0;
11942
11943  /* If the register isn't valid in this register class, just return now.  */
11944  if ((addr_mask & RELOAD_REG_VALID) == 0)
11945    {
11946      if (TARGET_DEBUG_ADDR)
11947	{
11948	  fprintf (stderr,
11949		   "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11950		   "not valid in class\n",
11951		   GET_MODE_NAME (mode), reg_class_names[rclass]);
11952	  debug_rtx (addr);
11953	}
11954
11955      return -1;
11956    }
11957
11958  switch (GET_CODE (addr))
11959    {
11960      /* Does the register class supports auto update forms for this mode?  We
11961	 don't need a scratch register, since the powerpc only supports
11962	 PRE_INC, PRE_DEC, and PRE_MODIFY.  */
11963    case PRE_INC:
11964    case PRE_DEC:
11965      reg = XEXP (addr, 0);
11966      if (!base_reg_operand (addr, GET_MODE (reg)))
11967	{
11968	  fail_msg = "no base register #1";
11969	  extra_cost = -1;
11970	}
11971
11972      else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11973	{
11974	  extra_cost = 1;
11975	  type = "update";
11976	}
11977      break;
11978
11979    case PRE_MODIFY:
11980      reg = XEXP (addr, 0);
11981      plus_arg1 = XEXP (addr, 1);
11982      if (!base_reg_operand (reg, GET_MODE (reg))
11983	  || GET_CODE (plus_arg1) != PLUS
11984	  || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11985	{
11986	  fail_msg = "bad PRE_MODIFY";
11987	  extra_cost = -1;
11988	}
11989
11990      else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11991	{
11992	  extra_cost = 1;
11993	  type = "update";
11994	}
11995      break;
11996
11997      /* Do we need to simulate AND -16 to clear the bottom address bits used
11998	 in VMX load/stores?  Only allow the AND for vector sizes.  */
11999    case AND:
12000      and_arg = XEXP (addr, 0);
12001      if (GET_MODE_SIZE (mode) != 16
12002	  || !CONST_INT_P (XEXP (addr, 1))
12003	  || INTVAL (XEXP (addr, 1)) != -16)
12004	{
12005	  fail_msg = "bad Altivec AND #1";
12006	  extra_cost = -1;
12007	}
12008
12009      if (rclass != ALTIVEC_REGS)
12010	{
12011	  if (legitimate_indirect_address_p (and_arg, false))
12012	    extra_cost = 1;
12013
12014	  else if (legitimate_indexed_address_p (and_arg, false))
12015	    extra_cost = 2;
12016
12017	  else
12018	    {
12019	      fail_msg = "bad Altivec AND #2";
12020	      extra_cost = -1;
12021	    }
12022
12023	  type = "and";
12024	}
12025      break;
12026
12027      /* If this is an indirect address, make sure it is a base register.  */
12028    case REG:
12029    case SUBREG:
12030      if (!legitimate_indirect_address_p (addr, false))
12031	{
12032	  extra_cost = 1;
12033	  type = "move";
12034	}
12035      break;
12036
12037      /* If this is an indexed address, make sure the register class can handle
12038	 indexed addresses for this mode.  */
12039    case PLUS:
12040      plus_arg0 = XEXP (addr, 0);
12041      plus_arg1 = XEXP (addr, 1);
12042
12043      /* (plus (plus (reg) (constant)) (constant)) is generated during
12044	 push_reload processing, so handle it now.  */
12045      if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12046	{
12047	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12048	    {
12049	      extra_cost = 1;
12050	      type = "offset";
12051	    }
12052	}
12053
12054      /* (plus (plus (reg) (constant)) (reg)) is also generated during
12055	 push_reload processing, so handle it now.  */
12056      else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12057	{
12058	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12059	    {
12060	      extra_cost = 1;
12061	      type = "indexed #2";
12062	    }
12063	}
12064
12065      else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12066	{
12067	  fail_msg = "no base register #2";
12068	  extra_cost = -1;
12069	}
12070
12071      else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12072	{
12073	  if ((addr_mask & RELOAD_REG_INDEXED) == 0
12074	      || !legitimate_indexed_address_p (addr, false))
12075	    {
12076	      extra_cost = 1;
12077	      type = "indexed";
12078	    }
12079	}
12080
12081      else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12082	       && CONST_INT_P (plus_arg1))
12083	{
12084	  if (!quad_address_offset_p (INTVAL (plus_arg1)))
12085	    {
12086	      extra_cost = 1;
12087	      type = "vector d-form offset";
12088	    }
12089	}
12090
12091      /* Make sure the register class can handle offset addresses.  */
12092      else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12093	{
12094	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12095	    {
12096	      extra_cost = 1;
12097	      type = "offset #2";
12098	    }
12099	}
12100
12101      else
12102	{
12103	  fail_msg = "bad PLUS";
12104	  extra_cost = -1;
12105	}
12106
12107      break;
12108
12109    case LO_SUM:
12110      /* Quad offsets are restricted and can't handle normal addresses.  */
12111      if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12112	{
12113	  extra_cost = -1;
12114	  type = "vector d-form lo_sum";
12115	}
12116
12117      else if (!legitimate_lo_sum_address_p (mode, addr, false))
12118	{
12119	  fail_msg = "bad LO_SUM";
12120	  extra_cost = -1;
12121	}
12122
12123      if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12124	{
12125	  extra_cost = 1;
12126	  type = "lo_sum";
12127	}
12128      break;
12129
12130      /* Static addresses need to create a TOC entry.  */
12131    case CONST:
12132    case SYMBOL_REF:
12133    case LABEL_REF:
12134      if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12135	{
12136	  extra_cost = -1;
12137	  type = "vector d-form lo_sum #2";
12138	}
12139
12140      else
12141	{
12142	  type = "address";
12143	  extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12144	}
12145      break;
12146
12147      /* TOC references look like offsetable memory.  */
12148    case UNSPEC:
12149      if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12150	{
12151	  fail_msg = "bad UNSPEC";
12152	  extra_cost = -1;
12153	}
12154
12155      else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12156	{
12157	  extra_cost = -1;
12158	  type = "vector d-form lo_sum #3";
12159	}
12160
12161      else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12162	{
12163	  extra_cost = 1;
12164	  type = "toc reference";
12165	}
12166      break;
12167
12168    default:
12169	{
12170	  fail_msg = "bad address";
12171	  extra_cost = -1;
12172	}
12173    }
12174
12175  if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12176    {
12177      if (extra_cost < 0)
12178	fprintf (stderr,
12179		 "rs6000_secondary_reload_memory error: mode = %s, "
12180		 "class = %s, addr_mask = '%s', %s\n",
12181		 GET_MODE_NAME (mode),
12182		 reg_class_names[rclass],
12183		 rs6000_debug_addr_mask (addr_mask, false),
12184		 (fail_msg != NULL) ? fail_msg : "<bad address>");
12185
12186      else
12187	fprintf (stderr,
12188		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12189		 "addr_mask = '%s', extra cost = %d, %s\n",
12190		 GET_MODE_NAME (mode),
12191		 reg_class_names[rclass],
12192		 rs6000_debug_addr_mask (addr_mask, false),
12193		 extra_cost,
12194		 (type) ? type : "<none>");
12195
12196      debug_rtx (addr);
12197    }
12198
12199  return extra_cost;
12200}
12201
12202/* Helper function for rs6000_secondary_reload to return true if a move to a
12203   different register classe is really a simple move.  */
12204
12205static bool
12206rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12207				     enum rs6000_reg_type from_type,
12208				     machine_mode mode)
12209{
12210  int size = GET_MODE_SIZE (mode);
12211
12212  /* Add support for various direct moves available.  In this function, we only
12213     look at cases where we don't need any extra registers, and one or more
12214     simple move insns are issued.  Originally small integers are not allowed
12215     in FPR/VSX registers.  Single precision binary floating is not a simple
12216     move because we need to convert to the single precision memory layout.
12217     The 4-byte SDmode can be moved.  TDmode values are disallowed since they
12218     need special direct move handling, which we do not support yet.  */
12219  if (TARGET_DIRECT_MOVE
12220      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12221	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12222    {
12223      if (TARGET_POWERPC64)
12224	{
12225	  /* ISA 2.07: MTVSRD or MVFVSRD.  */
12226	  if (size == 8)
12227	    return true;
12228
12229	  /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD.  */
12230	  if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12231	    return true;
12232	}
12233
12234      /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
12235      if (TARGET_P8_VECTOR)
12236	{
12237	  if (mode == SImode)
12238	    return true;
12239
12240	  if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12241	    return true;
12242	}
12243
12244      /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
12245      if (mode == SDmode)
12246	return true;
12247    }
12248
12249  /* Move to/from SPR.  */
12250  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12251	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12252	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12253    return true;
12254
12255  return false;
12256}
12257
12258/* Direct move helper function for rs6000_secondary_reload, handle all of the
12259   special direct moves that involve allocating an extra register, return the
12260   insn code of the helper function if there is such a function or
12261   CODE_FOR_nothing if not.  */
12262
12263static bool
12264rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12265				     enum rs6000_reg_type from_type,
12266				     machine_mode mode,
12267				     secondary_reload_info *sri,
12268				     bool altivec_p)
12269{
12270  bool ret = false;
12271  enum insn_code icode = CODE_FOR_nothing;
12272  int cost = 0;
12273  int size = GET_MODE_SIZE (mode);
12274
12275  if (TARGET_POWERPC64 && size == 16)
12276    {
12277      /* Handle moving 128-bit values from GPRs to VSX point registers on
12278	 ISA 2.07 (power8, power9) when running in 64-bit mode using
12279	 XXPERMDI to glue the two 64-bit values back together.  */
12280      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12281	{
12282	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
12283	  icode = reg_addr[mode].reload_vsx_gpr;
12284	}
12285
12286      /* Handle moving 128-bit values from VSX point registers to GPRs on
12287	 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12288	 bottom 64-bit value.  */
12289      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12290	{
12291	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
12292	  icode = reg_addr[mode].reload_gpr_vsx;
12293	}
12294    }
12295
12296  else if (TARGET_POWERPC64 && mode == SFmode)
12297    {
12298      if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12299	{
12300	  cost = 3;			/* xscvdpspn, mfvsrd, and.  */
12301	  icode = reg_addr[mode].reload_gpr_vsx;
12302	}
12303
12304      else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12305	{
12306	  cost = 2;			/* mtvsrz, xscvspdpn.  */
12307	  icode = reg_addr[mode].reload_vsx_gpr;
12308	}
12309    }
12310
12311  else if (!TARGET_POWERPC64 && size == 8)
12312    {
12313      /* Handle moving 64-bit values from GPRs to floating point registers on
12314	 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12315	 32-bit values back together.  Altivec register classes must be handled
12316	 specially since a different instruction is used, and the secondary
12317	 reload support requires a single instruction class in the scratch
12318	 register constraint.  However, right now TFmode is not allowed in
12319	 Altivec registers, so the pattern will never match.  */
12320      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12321	{
12322	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
12323	  icode = reg_addr[mode].reload_fpr_gpr;
12324	}
12325    }
12326
12327  if (icode != CODE_FOR_nothing)
12328    {
12329      ret = true;
12330      if (sri)
12331	{
12332	  sri->icode = icode;
12333	  sri->extra_cost = cost;
12334	}
12335    }
12336
12337  return ret;
12338}
12339
12340/* Return whether a move between two register classes can be done either
12341   directly (simple move) or via a pattern that uses a single extra temporary
12342   (using ISA 2.07's direct move in this case.  */
12343
12344static bool
12345rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12346			      enum rs6000_reg_type from_type,
12347			      machine_mode mode,
12348			      secondary_reload_info *sri,
12349			      bool altivec_p)
12350{
12351  /* Fall back to load/store reloads if either type is not a register.  */
12352  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12353    return false;
12354
12355  /* If we haven't allocated registers yet, assume the move can be done for the
12356     standard register types.  */
12357  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12358      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12359      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12360    return true;
12361
12362  /* Moves to the same set of registers is a simple move for non-specialized
12363     registers.  */
12364  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12365    return true;
12366
12367  /* Check whether a simple move can be done directly.  */
12368  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12369    {
12370      if (sri)
12371	{
12372	  sri->icode = CODE_FOR_nothing;
12373	  sri->extra_cost = 0;
12374	}
12375      return true;
12376    }
12377
12378  /* Now check if we can do it in a few steps.  */
12379  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12380					      altivec_p);
12381}
12382
12383/* Inform reload about cases where moving X with a mode MODE to a register in
12384   RCLASS requires an extra scratch or immediate register.  Return the class
12385   needed for the immediate register.
12386
12387   For VSX and Altivec, we may need a register to convert sp+offset into
12388   reg+sp.
12389
12390   For misaligned 64-bit gpr loads and stores we need a register to
12391   convert an offset address to indirect.  */
12392
12393static reg_class_t
12394rs6000_secondary_reload (bool in_p,
12395			 rtx x,
12396			 reg_class_t rclass_i,
12397			 machine_mode mode,
12398			 secondary_reload_info *sri)
12399{
12400  enum reg_class rclass = (enum reg_class) rclass_i;
12401  reg_class_t ret = ALL_REGS;
12402  enum insn_code icode;
12403  bool default_p = false;
12404  bool done_p = false;
12405
12406  /* Allow subreg of memory before/during reload.  */
12407  bool memory_p = (MEM_P (x)
12408		   || (!reload_completed && SUBREG_P (x)
12409		       && MEM_P (SUBREG_REG (x))));
12410
12411  sri->icode = CODE_FOR_nothing;
12412  sri->t_icode = CODE_FOR_nothing;
12413  sri->extra_cost = 0;
12414  icode = ((in_p)
12415	   ? reg_addr[mode].reload_load
12416	   : reg_addr[mode].reload_store);
12417
12418  if (REG_P (x) || register_operand (x, mode))
12419    {
12420      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12421      bool altivec_p = (rclass == ALTIVEC_REGS);
12422      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12423
12424      if (!in_p)
12425	std::swap (to_type, from_type);
12426
12427      /* Can we do a direct move of some sort?  */
12428      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12429					altivec_p))
12430	{
12431	  icode = (enum insn_code)sri->icode;
12432	  default_p = false;
12433	  done_p = true;
12434	  ret = NO_REGS;
12435	}
12436    }
12437
12438  /* Make sure 0.0 is not reloaded or forced into memory.  */
12439  if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12440    {
12441      ret = NO_REGS;
12442      default_p = false;
12443      done_p = true;
12444    }
12445
12446  /* If this is a scalar floating point value and we want to load it into the
12447     traditional Altivec registers, do it via a move via a traditional floating
12448     point register, unless we have D-form addressing.  Also make sure that
12449     non-zero constants use a FPR.  */
12450  if (!done_p && reg_addr[mode].scalar_in_vmx_p
12451      && !mode_supports_vmx_dform (mode)
12452      && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12453      && (memory_p || CONST_DOUBLE_P (x)))
12454    {
12455      ret = FLOAT_REGS;
12456      default_p = false;
12457      done_p = true;
12458    }
12459
12460  /* Handle reload of load/stores if we have reload helper functions.  */
12461  if (!done_p && icode != CODE_FOR_nothing && memory_p)
12462    {
12463      int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12464						       mode);
12465
12466      if (extra_cost >= 0)
12467	{
12468	  done_p = true;
12469	  ret = NO_REGS;
12470	  if (extra_cost > 0)
12471	    {
12472	      sri->extra_cost = extra_cost;
12473	      sri->icode = icode;
12474	    }
12475	}
12476    }
12477
12478  /* Handle unaligned loads and stores of integer registers.  */
12479  if (!done_p && TARGET_POWERPC64
12480      && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12481      && memory_p
12482      && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12483    {
12484      rtx addr = XEXP (x, 0);
12485      rtx off = address_offset (addr);
12486
12487      if (off != NULL_RTX)
12488	{
12489	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12490	  unsigned HOST_WIDE_INT offset = INTVAL (off);
12491
12492	  /* We need a secondary reload when our legitimate_address_p
12493	     says the address is good (as otherwise the entire address
12494	     will be reloaded), and the offset is not a multiple of
12495	     four or we have an address wrap.  Address wrap will only
12496	     occur for LO_SUMs since legitimate_offset_address_p
12497	     rejects addresses for 16-byte mems that will wrap.  */
12498	  if (GET_CODE (addr) == LO_SUM
12499	      ? (1 /* legitimate_address_p allows any offset for lo_sum */
12500		 && ((offset & 3) != 0
12501		     || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12502	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12503		 && (offset & 3) != 0))
12504	    {
12505	      /* -m32 -mpowerpc64 needs to use a 32-bit scratch register.  */
12506	      if (in_p)
12507		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12508			      : CODE_FOR_reload_di_load);
12509	      else
12510		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12511			      : CODE_FOR_reload_di_store);
12512	      sri->extra_cost = 2;
12513	      ret = NO_REGS;
12514	      done_p = true;
12515	    }
12516	  else
12517	    default_p = true;
12518	}
12519      else
12520	default_p = true;
12521    }
12522
12523  if (!done_p && !TARGET_POWERPC64
12524      && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12525      && memory_p
12526      && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12527    {
12528      rtx addr = XEXP (x, 0);
12529      rtx off = address_offset (addr);
12530
12531      if (off != NULL_RTX)
12532	{
12533	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12534	  unsigned HOST_WIDE_INT offset = INTVAL (off);
12535
12536	  /* We need a secondary reload when our legitimate_address_p
12537	     says the address is good (as otherwise the entire address
12538	     will be reloaded), and we have a wrap.
12539
12540	     legitimate_lo_sum_address_p allows LO_SUM addresses to
12541	     have any offset so test for wrap in the low 16 bits.
12542
12543	     legitimate_offset_address_p checks for the range
12544	     [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12545	     for mode size of 16.  We wrap at [0x7ffc,0x7fff] and
12546	     [0x7ff4,0x7fff] respectively, so test for the
12547	     intersection of these ranges, [0x7ffc,0x7fff] and
12548	     [0x7ff4,0x7ff7] respectively.
12549
12550	     Note that the address we see here may have been
12551	     manipulated by legitimize_reload_address.  */
12552	  if (GET_CODE (addr) == LO_SUM
12553	      ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12554	      : offset - (0x8000 - extra) < UNITS_PER_WORD)
12555	    {
12556	      if (in_p)
12557		sri->icode = CODE_FOR_reload_si_load;
12558	      else
12559		sri->icode = CODE_FOR_reload_si_store;
12560	      sri->extra_cost = 2;
12561	      ret = NO_REGS;
12562	      done_p = true;
12563	    }
12564	  else
12565	    default_p = true;
12566	}
12567      else
12568	default_p = true;
12569    }
12570
12571  if (!done_p)
12572    default_p = true;
12573
12574  if (default_p)
12575    ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12576
12577  gcc_assert (ret != ALL_REGS);
12578
12579  if (TARGET_DEBUG_ADDR)
12580    {
12581      fprintf (stderr,
12582	       "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12583	       "mode = %s",
12584	       reg_class_names[ret],
12585	       in_p ? "true" : "false",
12586	       reg_class_names[rclass],
12587	       GET_MODE_NAME (mode));
12588
12589      if (reload_completed)
12590	fputs (", after reload", stderr);
12591
12592      if (!done_p)
12593	fputs (", done_p not set", stderr);
12594
12595      if (default_p)
12596	fputs (", default secondary reload", stderr);
12597
12598      if (sri->icode != CODE_FOR_nothing)
12599	fprintf (stderr, ", reload func = %s, extra cost = %d",
12600		 insn_data[sri->icode].name, sri->extra_cost);
12601
12602      else if (sri->extra_cost > 0)
12603	fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12604
12605      fputs ("\n", stderr);
12606      debug_rtx (x);
12607    }
12608
12609  return ret;
12610}
12611
12612/* Better tracing for rs6000_secondary_reload_inner.  */
12613
12614static void
12615rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12616			       bool store_p)
12617{
12618  rtx set, clobber;
12619
12620  gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12621
12622  fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12623	   store_p ? "store" : "load");
12624
12625  if (store_p)
12626    set = gen_rtx_SET (mem, reg);
12627  else
12628    set = gen_rtx_SET (reg, mem);
12629
12630  clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12631  debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12632}
12633
12634static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12635  ATTRIBUTE_NORETURN;
12636
12637static void
12638rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12639			      bool store_p)
12640{
12641  rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12642  gcc_unreachable ();
12643}
12644
12645/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12646   reload helper functions.  These were identified in
12647   rs6000_secondary_reload_memory, and if reload decided to use the secondary
12648   reload, it calls the insns:
12649	reload_<RELOAD:mode>_<P:mptrsize>_store
12650	reload_<RELOAD:mode>_<P:mptrsize>_load
12651
12652   which in turn calls this function, to do whatever is necessary to create
12653   valid addresses.  */
12654
12655void
12656rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12657{
12658  int regno = true_regnum (reg);
12659  machine_mode mode = GET_MODE (reg);
12660  addr_mask_type addr_mask;
12661  rtx addr;
12662  rtx new_addr;
12663  rtx op_reg, op0, op1;
12664  rtx and_op;
12665  rtx cc_clobber;
12666  rtvec rv;
12667
12668  if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12669      || !base_reg_operand (scratch, GET_MODE (scratch)))
12670    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12671
12672  if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12673    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12674
12675  else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12676    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12677
12678  else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12679    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12680
12681  else
12682    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12683
12684  /* Make sure the mode is valid in this register class.  */
12685  if ((addr_mask & RELOAD_REG_VALID) == 0)
12686    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12687
12688  if (TARGET_DEBUG_ADDR)
12689    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12690
12691  new_addr = addr = XEXP (mem, 0);
12692  switch (GET_CODE (addr))
12693    {
12694      /* Does the register class support auto update forms for this mode?  If
12695	 not, do the update now.  We don't need a scratch register, since the
12696	 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY.  */
12697    case PRE_INC:
12698    case PRE_DEC:
12699      op_reg = XEXP (addr, 0);
12700      if (!base_reg_operand (op_reg, Pmode))
12701	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12702
12703      if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12704	{
12705	  int delta = GET_MODE_SIZE (mode);
12706	  if (GET_CODE (addr) == PRE_DEC)
12707	    delta = -delta;
12708	  emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12709	  new_addr = op_reg;
12710	}
12711      break;
12712
12713    case PRE_MODIFY:
12714      op0 = XEXP (addr, 0);
12715      op1 = XEXP (addr, 1);
12716      if (!base_reg_operand (op0, Pmode)
12717	  || GET_CODE (op1) != PLUS
12718	  || !rtx_equal_p (op0, XEXP (op1, 0)))
12719	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12720
12721      if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12722	{
12723	  emit_insn (gen_rtx_SET (op0, op1));
12724	  new_addr = reg;
12725	}
12726      break;
12727
12728      /* Do we need to simulate AND -16 to clear the bottom address bits used
12729	 in VMX load/stores?  */
12730    case AND:
12731      op0 = XEXP (addr, 0);
12732      op1 = XEXP (addr, 1);
12733      if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12734	{
12735	  if (REG_P (op0) || SUBREG_P (op0))
12736	    op_reg = op0;
12737
12738	  else if (GET_CODE (op1) == PLUS)
12739	    {
12740	      emit_insn (gen_rtx_SET (scratch, op1));
12741	      op_reg = scratch;
12742	    }
12743
12744	  else
12745	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12746
12747	  and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12748	  cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12749	  rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12750	  emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12751	  new_addr = scratch;
12752	}
12753      break;
12754
12755      /* If this is an indirect address, make sure it is a base register.  */
12756    case REG:
12757    case SUBREG:
12758      if (!base_reg_operand (addr, GET_MODE (addr)))
12759	{
12760	  emit_insn (gen_rtx_SET (scratch, addr));
12761	  new_addr = scratch;
12762	}
12763      break;
12764
12765      /* If this is an indexed address, make sure the register class can handle
12766	 indexed addresses for this mode.  */
12767    case PLUS:
12768      op0 = XEXP (addr, 0);
12769      op1 = XEXP (addr, 1);
12770      if (!base_reg_operand (op0, Pmode))
12771	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12772
12773      else if (int_reg_operand (op1, Pmode))
12774	{
12775	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12776	    {
12777	      emit_insn (gen_rtx_SET (scratch, addr));
12778	      new_addr = scratch;
12779	    }
12780	}
12781
12782      else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12783	{
12784	  if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12785	      || !quad_address_p (addr, mode, false))
12786	    {
12787	      emit_insn (gen_rtx_SET (scratch, addr));
12788	      new_addr = scratch;
12789	    }
12790	}
12791
12792      /* Make sure the register class can handle offset addresses.  */
12793      else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12794	{
12795	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12796	    {
12797	      emit_insn (gen_rtx_SET (scratch, addr));
12798	      new_addr = scratch;
12799	    }
12800	}
12801
12802      else
12803	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12804
12805      break;
12806
12807    case LO_SUM:
12808      op0 = XEXP (addr, 0);
12809      op1 = XEXP (addr, 1);
12810      if (!base_reg_operand (op0, Pmode))
12811	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12812
12813      else if (int_reg_operand (op1, Pmode))
12814	{
12815	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12816	    {
12817	      emit_insn (gen_rtx_SET (scratch, addr));
12818	      new_addr = scratch;
12819	    }
12820	}
12821
12822      /* Quad offsets are restricted and can't handle normal addresses.  */
12823      else if (mode_supports_dq_form (mode))
12824	{
12825	  emit_insn (gen_rtx_SET (scratch, addr));
12826	  new_addr = scratch;
12827	}
12828
12829      /* Make sure the register class can handle offset addresses.  */
12830      else if (legitimate_lo_sum_address_p (mode, addr, false))
12831	{
12832	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12833	    {
12834	      emit_insn (gen_rtx_SET (scratch, addr));
12835	      new_addr = scratch;
12836	    }
12837	}
12838
12839      else
12840	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12841
12842      break;
12843
12844    case SYMBOL_REF:
12845    case CONST:
12846    case LABEL_REF:
12847      rs6000_emit_move (scratch, addr, Pmode);
12848      new_addr = scratch;
12849      break;
12850
12851    default:
12852      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12853    }
12854
12855  /* Adjust the address if it changed.  */
12856  if (addr != new_addr)
12857    {
12858      mem = replace_equiv_address_nv (mem, new_addr);
12859      if (TARGET_DEBUG_ADDR)
12860	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12861    }
12862
12863  /* Now create the move.  */
12864  if (store_p)
12865    emit_insn (gen_rtx_SET (mem, reg));
12866  else
12867    emit_insn (gen_rtx_SET (reg, mem));
12868
12869  return;
12870}
12871
12872/* Convert reloads involving 64-bit gprs and misaligned offset
12873   addressing, or multiple 32-bit gprs and offsets that are too large,
12874   to use indirect addressing.  */
12875
12876void
12877rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12878{
12879  int regno = true_regnum (reg);
12880  enum reg_class rclass;
12881  rtx addr;
12882  rtx scratch_or_premodify = scratch;
12883
12884  if (TARGET_DEBUG_ADDR)
12885    {
12886      fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12887	       store_p ? "store" : "load");
12888      fprintf (stderr, "reg:\n");
12889      debug_rtx (reg);
12890      fprintf (stderr, "mem:\n");
12891      debug_rtx (mem);
12892      fprintf (stderr, "scratch:\n");
12893      debug_rtx (scratch);
12894    }
12895
12896  gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12897  gcc_assert (MEM_P (mem));
12898  rclass = REGNO_REG_CLASS (regno);
12899  gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12900  addr = XEXP (mem, 0);
12901
12902  if (GET_CODE (addr) == PRE_MODIFY)
12903    {
12904      gcc_assert (REG_P (XEXP (addr, 0))
12905		  && GET_CODE (XEXP (addr, 1)) == PLUS
12906		  && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12907      scratch_or_premodify = XEXP (addr, 0);
12908      addr = XEXP (addr, 1);
12909    }
12910  gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12911
12912  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12913
12914  mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12915
12916  /* Now create the move.  */
12917  if (store_p)
12918    emit_insn (gen_rtx_SET (mem, reg));
12919  else
12920    emit_insn (gen_rtx_SET (reg, mem));
12921
12922  return;
12923}
12924
12925/* Given an rtx X being reloaded into a reg required to be
12926   in class CLASS, return the class of reg to actually use.
12927   In general this is just CLASS; but on some machines
12928   in some cases it is preferable to use a more restrictive class.
12929
12930   On the RS/6000, we have to return NO_REGS when we want to reload a
12931   floating-point CONST_DOUBLE to force it to be copied to memory.
12932
12933   We also don't want to reload integer values into floating-point
12934   registers if we can at all help it.  In fact, this can
12935   cause reload to die, if it tries to generate a reload of CTR
12936   into a FP register and discovers it doesn't have the memory location
12937   required.
12938
12939   ??? Would it be a good idea to have reload do the converse, that is
12940   try to reload floating modes into FP registers if possible?
12941 */
12942
12943static enum reg_class
12944rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12945{
12946  machine_mode mode = GET_MODE (x);
12947  bool is_constant = CONSTANT_P (x);
12948
12949  /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12950     reload class for it.  */
12951  if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12952      && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12953    return NO_REGS;
12954
12955  if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12956      && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12957    return NO_REGS;
12958
12959  /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS.  Do not allow
12960     the reloading of address expressions using PLUS into floating point
12961     registers.  */
12962  if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12963    {
12964      if (is_constant)
12965	{
12966	  /* Zero is always allowed in all VSX registers.  */
12967	  if (x == CONST0_RTX (mode))
12968	    return rclass;
12969
12970	  /* If this is a vector constant that can be formed with a few Altivec
12971	     instructions, we want altivec registers.  */
12972	  if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12973	    return ALTIVEC_REGS;
12974
12975	  /* If this is an integer constant that can easily be loaded into
12976	     vector registers, allow it.  */
12977	  if (CONST_INT_P (x))
12978	    {
12979	      HOST_WIDE_INT value = INTVAL (x);
12980
12981	      /* ISA 2.07 can generate -1 in all registers with XXLORC.  ISA
12982		 2.06 can generate it in the Altivec registers with
12983		 VSPLTI<x>.  */
12984	      if (value == -1)
12985		{
12986		  if (TARGET_P8_VECTOR)
12987		    return rclass;
12988		  else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12989		    return ALTIVEC_REGS;
12990		  else
12991		    return NO_REGS;
12992		}
12993
12994	      /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12995		 a sign extend in the Altivec registers.  */
12996	      if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12997		  && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12998		return ALTIVEC_REGS;
12999	    }
13000
13001	  /* Force constant to memory.  */
13002	  return NO_REGS;
13003	}
13004
13005      /* D-form addressing can easily reload the value.  */
13006      if (mode_supports_vmx_dform (mode)
13007	  || mode_supports_dq_form (mode))
13008	return rclass;
13009
13010      /* If this is a scalar floating point value and we don't have D-form
13011	 addressing, prefer the traditional floating point registers so that we
13012	 can use D-form (register+offset) addressing.  */
13013      if (rclass == VSX_REGS
13014	  && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13015	return FLOAT_REGS;
13016
13017      /* Prefer the Altivec registers if Altivec is handling the vector
13018	 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13019	 loads.  */
13020      if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13021	  || mode == V1TImode)
13022	return ALTIVEC_REGS;
13023
13024      return rclass;
13025    }
13026
13027  if (is_constant || GET_CODE (x) == PLUS)
13028    {
13029      if (reg_class_subset_p (GENERAL_REGS, rclass))
13030	return GENERAL_REGS;
13031      if (reg_class_subset_p (BASE_REGS, rclass))
13032	return BASE_REGS;
13033      return NO_REGS;
13034    }
13035
13036  /* For the vector pair and vector quad modes, prefer their natural register
13037     (VSX or FPR) rather than GPR registers.  For other integer types, prefer
13038     the GPR registers.  */
13039  if (rclass == GEN_OR_FLOAT_REGS)
13040    {
13041      if (mode == OOmode)
13042	return VSX_REGS;
13043
13044      if (mode == XOmode)
13045	return FLOAT_REGS;
13046
13047      if (GET_MODE_CLASS (mode) == MODE_INT)
13048	return GENERAL_REGS;
13049    }
13050
13051  return rclass;
13052}
13053
13054/* Debug version of rs6000_preferred_reload_class.  */
13055static enum reg_class
13056rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13057{
13058  enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13059
13060  fprintf (stderr,
13061	   "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13062	   "mode = %s, x:\n",
13063	   reg_class_names[ret], reg_class_names[rclass],
13064	   GET_MODE_NAME (GET_MODE (x)));
13065  debug_rtx (x);
13066
13067  return ret;
13068}
13069
13070/* If we are copying between FP or AltiVec registers and anything else, we need
13071   a memory location.  The exception is when we are targeting ppc64 and the
13072   move to/from fpr to gpr instructions are available.  Also, under VSX, you
13073   can copy vector registers from the FP register set to the Altivec register
13074   set and vice versa.  */
13075
13076static bool
13077rs6000_secondary_memory_needed (machine_mode mode,
13078				reg_class_t from_class,
13079				reg_class_t to_class)
13080{
13081  enum rs6000_reg_type from_type, to_type;
13082  bool altivec_p = ((from_class == ALTIVEC_REGS)
13083		    || (to_class == ALTIVEC_REGS));
13084
13085  /* If a simple/direct move is available, we don't need secondary memory  */
13086  from_type = reg_class_to_reg_type[(int)from_class];
13087  to_type = reg_class_to_reg_type[(int)to_class];
13088
13089  if (rs6000_secondary_reload_move (to_type, from_type, mode,
13090				    (secondary_reload_info *)0, altivec_p))
13091    return false;
13092
13093  /* If we have a floating point or vector register class, we need to use
13094     memory to transfer the data.  */
13095  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13096    return true;
13097
13098  return false;
13099}
13100
13101/* Debug version of rs6000_secondary_memory_needed.  */
13102static bool
13103rs6000_debug_secondary_memory_needed (machine_mode mode,
13104				      reg_class_t from_class,
13105				      reg_class_t to_class)
13106{
13107  bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13108
13109  fprintf (stderr,
13110	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13111	   "to_class = %s, mode = %s\n",
13112	   ret ? "true" : "false",
13113	   reg_class_names[from_class],
13114	   reg_class_names[to_class],
13115	   GET_MODE_NAME (mode));
13116
13117  return ret;
13118}
13119
13120/* Return the register class of a scratch register needed to copy IN into
13121   or out of a register in RCLASS in MODE.  If it can be done directly,
13122   NO_REGS is returned.  */
13123
13124static enum reg_class
13125rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13126			       rtx in)
13127{
13128  int regno;
13129
13130  if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13131#if TARGET_MACHO
13132		     && MACHOPIC_INDIRECT
13133#endif
13134		     ))
13135    {
13136      /* We cannot copy a symbolic operand directly into anything
13137	 other than BASE_REGS for TARGET_ELF.  So indicate that a
13138	 register from BASE_REGS is needed as an intermediate
13139	 register.
13140
13141	 On Darwin, pic addresses require a load from memory, which
13142	 needs a base register.  */
13143      if (rclass != BASE_REGS
13144	  && (SYMBOL_REF_P (in)
13145	      || GET_CODE (in) == HIGH
13146	      || GET_CODE (in) == LABEL_REF
13147	      || GET_CODE (in) == CONST))
13148	return BASE_REGS;
13149    }
13150
13151  if (REG_P (in))
13152    {
13153      regno = REGNO (in);
13154      if (!HARD_REGISTER_NUM_P (regno))
13155	{
13156	  regno = true_regnum (in);
13157	  if (!HARD_REGISTER_NUM_P (regno))
13158	    regno = -1;
13159	}
13160    }
13161  else if (SUBREG_P (in))
13162    {
13163      regno = true_regnum (in);
13164      if (!HARD_REGISTER_NUM_P (regno))
13165	regno = -1;
13166    }
13167  else
13168    regno = -1;
13169
13170  /* If we have VSX register moves, prefer moving scalar values between
13171     Altivec registers and GPR by going via an FPR (and then via memory)
13172     instead of reloading the secondary memory address for Altivec moves.  */
13173  if (TARGET_VSX
13174      && GET_MODE_SIZE (mode) < 16
13175      && !mode_supports_vmx_dform (mode)
13176      && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13177           && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13178          || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13179              && (regno >= 0 && INT_REGNO_P (regno)))))
13180    return FLOAT_REGS;
13181
13182  /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13183     into anything.  */
13184  if (rclass == GENERAL_REGS || rclass == BASE_REGS
13185      || (regno >= 0 && INT_REGNO_P (regno)))
13186    return NO_REGS;
13187
13188  /* Constants, memory, and VSX registers can go into VSX registers (both the
13189     traditional floating point and the altivec registers).  */
13190  if (rclass == VSX_REGS
13191      && (regno == -1 || VSX_REGNO_P (regno)))
13192    return NO_REGS;
13193
13194  /* Constants, memory, and FP registers can go into FP registers.  */
13195  if ((regno == -1 || FP_REGNO_P (regno))
13196      && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13197    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13198
13199  /* Memory, and AltiVec registers can go into AltiVec registers.  */
13200  if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13201      && rclass == ALTIVEC_REGS)
13202    return NO_REGS;
13203
13204  /* We can copy among the CR registers.  */
13205  if ((rclass == CR_REGS || rclass == CR0_REGS)
13206      && regno >= 0 && CR_REGNO_P (regno))
13207    return NO_REGS;
13208
13209  /* Otherwise, we need GENERAL_REGS.  */
13210  return GENERAL_REGS;
13211}
13212
13213/* Debug version of rs6000_secondary_reload_class.  */
13214static enum reg_class
13215rs6000_debug_secondary_reload_class (enum reg_class rclass,
13216				     machine_mode mode, rtx in)
13217{
13218  enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13219  fprintf (stderr,
13220	   "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13221	   "mode = %s, input rtx:\n",
13222	   reg_class_names[ret], reg_class_names[rclass],
13223	   GET_MODE_NAME (mode));
13224  debug_rtx (in);
13225
13226  return ret;
13227}
13228
13229/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
13230
13231static bool
13232rs6000_can_change_mode_class (machine_mode from,
13233			      machine_mode to,
13234			      reg_class_t rclass)
13235{
13236  unsigned from_size = GET_MODE_SIZE (from);
13237  unsigned to_size = GET_MODE_SIZE (to);
13238
13239  if (from_size != to_size)
13240    {
13241      enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13242
13243      if (reg_classes_intersect_p (xclass, rclass))
13244	{
13245	  unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13246	  unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13247	  bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13248	  bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13249
13250	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
13251	     single register under VSX because the scalar part of the register
13252	     is in the upper 64-bits, and not the lower 64-bits.  Types like
13253	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
13254	     IEEE floating point can't overlap, and neither can small
13255	     values.  */
13256
13257	  if (to_float128_vector_p && from_float128_vector_p)
13258	    return true;
13259
13260	  else if (to_float128_vector_p || from_float128_vector_p)
13261	    return false;
13262
13263	  /* TDmode in floating-mode registers must always go into a register
13264	     pair with the most significant word in the even-numbered register
13265	     to match ISA requirements.  In little-endian mode, this does not
13266	     match subreg numbering, so we cannot allow subregs.  */
13267	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13268	    return false;
13269
13270	  /* Allow SD<->DD changes, since SDmode values are stored in
13271	     the low half of the DDmode, just like target-independent
13272	     code expects.  We need to allow at least SD->DD since
13273	     rs6000_secondary_memory_needed_mode asks for that change
13274	     to be made for SD reloads.  */
13275	  if ((to == DDmode && from == SDmode)
13276	      || (to == SDmode && from == DDmode))
13277	    return true;
13278
13279	  if (from_size < 8 || to_size < 8)
13280	    return false;
13281
13282	  if (from_size == 8 && (8 * to_nregs) != to_size)
13283	    return false;
13284
13285	  if (to_size == 8 && (8 * from_nregs) != from_size)
13286	    return false;
13287
13288	  return true;
13289	}
13290      else
13291	return true;
13292    }
13293
13294  /* Since the VSX register set includes traditional floating point registers
13295     and altivec registers, just check for the size being different instead of
13296     trying to check whether the modes are vector modes.  Otherwise it won't
13297     allow say DF and DI to change classes.  For types like TFmode and TDmode
13298     that take 2 64-bit registers, rather than a single 128-bit register, don't
13299     allow subregs of those types to other 128 bit types.  */
13300  if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13301    {
13302      unsigned num_regs = (from_size + 15) / 16;
13303      if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13304	  || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13305	return false;
13306
13307      return (from_size == 8 || from_size == 16);
13308    }
13309
13310  if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13311      && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13312    return false;
13313
13314  return true;
13315}
13316
13317/* Debug version of rs6000_can_change_mode_class.  */
13318static bool
13319rs6000_debug_can_change_mode_class (machine_mode from,
13320				    machine_mode to,
13321				    reg_class_t rclass)
13322{
13323  bool ret = rs6000_can_change_mode_class (from, to, rclass);
13324
13325  fprintf (stderr,
13326	   "rs6000_can_change_mode_class, return %s, from = %s, "
13327	   "to = %s, rclass = %s\n",
13328	   ret ? "true" : "false",
13329	   GET_MODE_NAME (from), GET_MODE_NAME (to),
13330	   reg_class_names[rclass]);
13331
13332  return ret;
13333}
13334
13335/* Return a string to do a move operation of 128 bits of data.  */
13336
13337const char *
13338rs6000_output_move_128bit (rtx operands[])
13339{
13340  rtx dest = operands[0];
13341  rtx src = operands[1];
13342  machine_mode mode = GET_MODE (dest);
13343  int dest_regno;
13344  int src_regno;
13345  bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13346  bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13347
13348  if (REG_P (dest))
13349    {
13350      dest_regno = REGNO (dest);
13351      dest_gpr_p = INT_REGNO_P (dest_regno);
13352      dest_fp_p = FP_REGNO_P (dest_regno);
13353      dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13354      dest_vsx_p = dest_fp_p | dest_vmx_p;
13355    }
13356  else
13357    {
13358      dest_regno = -1;
13359      dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13360    }
13361
13362  if (REG_P (src))
13363    {
13364      src_regno = REGNO (src);
13365      src_gpr_p = INT_REGNO_P (src_regno);
13366      src_fp_p = FP_REGNO_P (src_regno);
13367      src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13368      src_vsx_p = src_fp_p | src_vmx_p;
13369    }
13370  else
13371    {
13372      src_regno = -1;
13373      src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13374    }
13375
13376  /* Register moves.  */
13377  if (dest_regno >= 0 && src_regno >= 0)
13378    {
13379      if (dest_gpr_p)
13380	{
13381	  if (src_gpr_p)
13382	    return "#";
13383
13384	  if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13385	    return (WORDS_BIG_ENDIAN
13386		    ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13387		    : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13388
13389	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13390	    return "#";
13391	}
13392
13393      else if (TARGET_VSX && dest_vsx_p)
13394	{
13395	  if (src_vsx_p)
13396	    return "xxlor %x0,%x1,%x1";
13397
13398	  else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13399	    return (WORDS_BIG_ENDIAN
13400		    ? "mtvsrdd %x0,%1,%L1"
13401		    : "mtvsrdd %x0,%L1,%1");
13402
13403	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
13404	    return "#";
13405	}
13406
13407      else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13408	return "vor %0,%1,%1";
13409
13410      else if (dest_fp_p && src_fp_p)
13411	return "#";
13412    }
13413
13414  /* Loads.  */
13415  else if (dest_regno >= 0 && MEM_P (src))
13416    {
13417      if (dest_gpr_p)
13418	{
13419	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13420	    return "lq %0,%1";
13421	  else
13422	    return "#";
13423	}
13424
13425      else if (TARGET_ALTIVEC && dest_vmx_p
13426	       && altivec_indexed_or_indirect_operand (src, mode))
13427	return "lvx %0,%y1";
13428
13429      else if (TARGET_VSX && dest_vsx_p)
13430	{
13431	  if (mode_supports_dq_form (mode)
13432	      && quad_address_p (XEXP (src, 0), mode, true))
13433	    return "lxv %x0,%1";
13434
13435	  else if (TARGET_P9_VECTOR)
13436	    return "lxvx %x0,%y1";
13437
13438	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13439	    return "lxvw4x %x0,%y1";
13440
13441	  else
13442	    return "lxvd2x %x0,%y1";
13443	}
13444
13445      else if (TARGET_ALTIVEC && dest_vmx_p)
13446	return "lvx %0,%y1";
13447
13448      else if (dest_fp_p)
13449	return "#";
13450    }
13451
13452  /* Stores.  */
13453  else if (src_regno >= 0 && MEM_P (dest))
13454    {
13455      if (src_gpr_p)
13456	{
13457 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13458	    return "stq %1,%0";
13459	  else
13460	    return "#";
13461	}
13462
13463      else if (TARGET_ALTIVEC && src_vmx_p
13464	       && altivec_indexed_or_indirect_operand (dest, mode))
13465	return "stvx %1,%y0";
13466
13467      else if (TARGET_VSX && src_vsx_p)
13468	{
13469	  if (mode_supports_dq_form (mode)
13470	      && quad_address_p (XEXP (dest, 0), mode, true))
13471	    return "stxv %x1,%0";
13472
13473	  else if (TARGET_P9_VECTOR)
13474	    return "stxvx %x1,%y0";
13475
13476	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13477	    return "stxvw4x %x1,%y0";
13478
13479	  else
13480	    return "stxvd2x %x1,%y0";
13481	}
13482
13483      else if (TARGET_ALTIVEC && src_vmx_p)
13484	return "stvx %1,%y0";
13485
13486      else if (src_fp_p)
13487	return "#";
13488    }
13489
13490  /* Constants.  */
13491  else if (dest_regno >= 0
13492	   && (CONST_INT_P (src)
13493	       || CONST_WIDE_INT_P (src)
13494	       || CONST_DOUBLE_P (src)
13495	       || GET_CODE (src) == CONST_VECTOR))
13496    {
13497      if (dest_gpr_p)
13498	return "#";
13499
13500      else if ((dest_vmx_p && TARGET_ALTIVEC)
13501	       || (dest_vsx_p && TARGET_VSX))
13502	return output_vec_const_move (operands);
13503    }
13504
13505  fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13506}
13507
13508/* Validate a 128-bit move.  */
13509bool
13510rs6000_move_128bit_ok_p (rtx operands[])
13511{
13512  machine_mode mode = GET_MODE (operands[0]);
13513  return (gpc_reg_operand (operands[0], mode)
13514	  || gpc_reg_operand (operands[1], mode));
13515}
13516
13517/* Return true if a 128-bit move needs to be split.  */
13518bool
13519rs6000_split_128bit_ok_p (rtx operands[])
13520{
13521  if (!reload_completed)
13522    return false;
13523
13524  if (!gpr_or_gpr_p (operands[0], operands[1]))
13525    return false;
13526
13527  if (quad_load_store_p (operands[0], operands[1]))
13528    return false;
13529
13530  return true;
13531}
13532
13533
13534/* Given a comparison operation, return the bit number in CCR to test.  We
13535   know this is a valid comparison.
13536
13537   SCC_P is 1 if this is for an scc.  That means that %D will have been
13538   used instead of %C, so the bits will be in different places.
13539
13540   Return -1 if OP isn't a valid comparison for some reason.  */
13541
13542int
13543ccr_bit (rtx op, int scc_p)
13544{
13545  enum rtx_code code = GET_CODE (op);
13546  machine_mode cc_mode;
13547  int cc_regnum;
13548  int base_bit;
13549  rtx reg;
13550
13551  if (!COMPARISON_P (op))
13552    return -1;
13553
13554  reg = XEXP (op, 0);
13555
13556  if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13557    return -1;
13558
13559  cc_mode = GET_MODE (reg);
13560  cc_regnum = REGNO (reg);
13561  base_bit = 4 * (cc_regnum - CR0_REGNO);
13562
13563  validate_condition_mode (code, cc_mode);
13564
13565  /* When generating a sCOND operation, only positive conditions are
13566     allowed.  */
13567  if (scc_p)
13568    switch (code)
13569      {
13570      case EQ:
13571      case GT:
13572      case LT:
13573      case UNORDERED:
13574      case GTU:
13575      case LTU:
13576	break;
13577      default:
13578	return -1;
13579      }
13580
13581  switch (code)
13582    {
13583    case NE:
13584      return scc_p ? base_bit + 3 : base_bit + 2;
13585    case EQ:
13586      return base_bit + 2;
13587    case GT:  case GTU:  case UNLE:
13588      return base_bit + 1;
13589    case LT:  case LTU:  case UNGE:
13590      return base_bit;
13591    case ORDERED:  case UNORDERED:
13592      return base_bit + 3;
13593
13594    case GE:  case GEU:
13595      /* If scc, we will have done a cror to put the bit in the
13596	 unordered position.  So test that bit.  For integer, this is ! LT
13597	 unless this is an scc insn.  */
13598      return scc_p ? base_bit + 3 : base_bit;
13599
13600    case LE:  case LEU:
13601      return scc_p ? base_bit + 3 : base_bit + 1;
13602
13603    default:
13604      return -1;
13605    }
13606}
13607
13608/* Return the GOT register.  */
13609
13610rtx
13611rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13612{
13613  /* The second flow pass currently (June 1999) can't update
13614     regs_ever_live without disturbing other parts of the compiler, so
13615     update it here to make the prolog/epilogue code happy.  */
13616  if (!can_create_pseudo_p ()
13617      && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13618    df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13619
13620  crtl->uses_pic_offset_table = 1;
13621
13622  return pic_offset_table_rtx;
13623}
13624
13625#define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13626
13627/* Write out a function code label.  */
13628
13629void
13630rs6000_output_function_entry (FILE *file, const char *fname)
13631{
13632  if (fname[0] != '.')
13633    {
13634      switch (DEFAULT_ABI)
13635	{
13636	default:
13637	  gcc_unreachable ();
13638
13639	case ABI_AIX:
13640	  if (DOT_SYMBOLS)
13641	    putc ('.', file);
13642	  else
13643	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13644	  break;
13645
13646	case ABI_ELFv2:
13647	case ABI_V4:
13648	case ABI_DARWIN:
13649	  break;
13650	}
13651    }
13652
13653  RS6000_OUTPUT_BASENAME (file, fname);
13654}
13655
13656/* Print an operand.  Recognize special options, documented below.  */
13657
13658#if TARGET_ELF
13659/* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13660   only introduced by the linker, when applying the sda21
13661   relocation.  */
13662#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13663#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13664#else
13665#define SMALL_DATA_RELOC "sda21"
13666#define SMALL_DATA_REG 0
13667#endif
13668
13669void
13670print_operand (FILE *file, rtx x, int code)
13671{
13672  int i;
13673  unsigned HOST_WIDE_INT uval;
13674
13675  switch (code)
13676    {
13677      /* %a is output_address.  */
13678
13679      /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13680	 output_operand.  */
13681
13682    case 'A':
13683      /* Write the MMA accumulator number associated with VSX register X.  */
13684      if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13685	output_operand_lossage ("invalid %%A value");
13686      else
13687	fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13688      return;
13689
13690    case 'D':
13691      /* Like 'J' but get to the GT bit only.  */
13692      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13693	{
13694	  output_operand_lossage ("invalid %%D value");
13695	  return;
13696	}
13697
13698      /* Bit 1 is GT bit.  */
13699      i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13700
13701      /* Add one for shift count in rlinm for scc.  */
13702      fprintf (file, "%d", i + 1);
13703      return;
13704
13705    case 'e':
13706      /* If the low 16 bits are 0, but some other bit is set, write 's'.  */
13707      if (! INT_P (x))
13708	{
13709	  output_operand_lossage ("invalid %%e value");
13710	  return;
13711	}
13712
13713      uval = INTVAL (x);
13714      if ((uval & 0xffff) == 0 && uval != 0)
13715	putc ('s', file);
13716      return;
13717
13718    case 'E':
13719      /* X is a CR register.  Print the number of the EQ bit of the CR */
13720      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13721	output_operand_lossage ("invalid %%E value");
13722      else
13723	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13724      return;
13725
13726    case 'f':
13727      /* X is a CR register.  Print the shift count needed to move it
13728	 to the high-order four bits.  */
13729      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13730	output_operand_lossage ("invalid %%f value");
13731      else
13732	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13733      return;
13734
13735    case 'F':
13736      /* Similar, but print the count for the rotate in the opposite
13737	 direction.  */
13738      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13739	output_operand_lossage ("invalid %%F value");
13740      else
13741	fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13742      return;
13743
13744    case 'G':
13745      /* X is a constant integer.  If it is negative, print "m",
13746	 otherwise print "z".  This is to make an aze or ame insn.  */
13747      if (!CONST_INT_P (x))
13748	output_operand_lossage ("invalid %%G value");
13749      else if (INTVAL (x) >= 0)
13750	putc ('z', file);
13751      else
13752	putc ('m', file);
13753      return;
13754
13755    case 'h':
13756      /* If constant, output low-order five bits.  Otherwise, write
13757	 normally.  */
13758      if (INT_P (x))
13759	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13760      else
13761	print_operand (file, x, 0);
13762      return;
13763
13764    case 'H':
13765      /* If constant, output low-order six bits.  Otherwise, write
13766	 normally.  */
13767      if (INT_P (x))
13768	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13769      else
13770	print_operand (file, x, 0);
13771      return;
13772
13773    case 'I':
13774      /* Print `i' if this is a constant, else nothing.  */
13775      if (INT_P (x))
13776	putc ('i', file);
13777      return;
13778
13779    case 'j':
13780      /* Write the bit number in CCR for jump.  */
13781      i = ccr_bit (x, 0);
13782      if (i == -1)
13783	output_operand_lossage ("invalid %%j code");
13784      else
13785	fprintf (file, "%d", i);
13786      return;
13787
13788    case 'J':
13789      /* Similar, but add one for shift count in rlinm for scc and pass
13790	 scc flag to `ccr_bit'.  */
13791      i = ccr_bit (x, 1);
13792      if (i == -1)
13793	output_operand_lossage ("invalid %%J code");
13794      else
13795	/* If we want bit 31, write a shift count of zero, not 32.  */
13796	fprintf (file, "%d", i == 31 ? 0 : i + 1);
13797      return;
13798
13799    case 'k':
13800      /* X must be a constant.  Write the 1's complement of the
13801	 constant.  */
13802      if (! INT_P (x))
13803	output_operand_lossage ("invalid %%k value");
13804      else
13805	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13806      return;
13807
13808    case 'K':
13809      /* X must be a symbolic constant on ELF.  Write an
13810	 expression suitable for an 'addi' that adds in the low 16
13811	 bits of the MEM.  */
13812      if (GET_CODE (x) == CONST)
13813	{
13814	  if (GET_CODE (XEXP (x, 0)) != PLUS
13815	      || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13816		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13817	      || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13818	    output_operand_lossage ("invalid %%K value");
13819	}
13820      print_operand_address (file, x);
13821      fputs ("@l", file);
13822      return;
13823
13824      /* %l is output_asm_label.  */
13825
13826    case 'L':
13827      /* Write second word of DImode or DFmode reference.  Works on register
13828	 or non-indexed memory only.  */
13829      if (REG_P (x))
13830	fputs (reg_names[REGNO (x) + 1], file);
13831      else if (MEM_P (x))
13832	{
13833	  machine_mode mode = GET_MODE (x);
13834	  /* Handle possible auto-increment.  Since it is pre-increment and
13835	     we have already done it, we can just use an offset of word.  */
13836	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
13837	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13838	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13839						 UNITS_PER_WORD));
13840	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13841	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13842						 UNITS_PER_WORD));
13843	  else
13844	    output_address (mode, XEXP (adjust_address_nv (x, SImode,
13845							   UNITS_PER_WORD),
13846				  0));
13847
13848	  if (small_data_operand (x, GET_MODE (x)))
13849	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13850		     reg_names[SMALL_DATA_REG]);
13851	}
13852      return;
13853
13854    case 'N': /* Unused */
13855      /* Write the number of elements in the vector times 4.  */
13856      if (GET_CODE (x) != PARALLEL)
13857	output_operand_lossage ("invalid %%N value");
13858      else
13859	fprintf (file, "%d", XVECLEN (x, 0) * 4);
13860      return;
13861
13862    case 'O': /* Unused */
13863      /* Similar, but subtract 1 first.  */
13864      if (GET_CODE (x) != PARALLEL)
13865	output_operand_lossage ("invalid %%O value");
13866      else
13867	fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13868      return;
13869
13870    case 'p':
13871      /* X is a CONST_INT that is a power of two.  Output the logarithm.  */
13872      if (! INT_P (x)
13873	  || INTVAL (x) < 0
13874	  || (i = exact_log2 (INTVAL (x))) < 0)
13875	output_operand_lossage ("invalid %%p value");
13876      else
13877	fprintf (file, "%d", i);
13878      return;
13879
13880    case 'P':
13881      /* The operand must be an indirect memory reference.  The result
13882	 is the register name.  */
13883      if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13884	  || REGNO (XEXP (x, 0)) >= 32)
13885	output_operand_lossage ("invalid %%P value");
13886      else
13887	fputs (reg_names[REGNO (XEXP (x, 0))], file);
13888      return;
13889
13890    case 'q':
13891      /* This outputs the logical code corresponding to a boolean
13892	 expression.  The expression may have one or both operands
13893	 negated (if one, only the first one).  For condition register
13894	 logical operations, it will also treat the negated
13895	 CR codes as NOTs, but not handle NOTs of them.  */
13896      {
13897	const char *const *t = 0;
13898	const char *s;
13899	enum rtx_code code = GET_CODE (x);
13900	static const char * const tbl[3][3] = {
13901	  { "and", "andc", "nor" },
13902	  { "or", "orc", "nand" },
13903	  { "xor", "eqv", "xor" } };
13904
13905	if (code == AND)
13906	  t = tbl[0];
13907	else if (code == IOR)
13908	  t = tbl[1];
13909	else if (code == XOR)
13910	  t = tbl[2];
13911	else
13912	  output_operand_lossage ("invalid %%q value");
13913
13914	if (GET_CODE (XEXP (x, 0)) != NOT)
13915	  s = t[0];
13916	else
13917	  {
13918	    if (GET_CODE (XEXP (x, 1)) == NOT)
13919	      s = t[2];
13920	    else
13921	      s = t[1];
13922	  }
13923
13924	fputs (s, file);
13925      }
13926      return;
13927
13928    case 'Q':
13929      if (! TARGET_MFCRF)
13930	return;
13931      fputc (',', file);
13932      /* FALLTHRU */
13933
13934    case 'R':
13935      /* X is a CR register.  Print the mask for `mtcrf'.  */
13936      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13937	output_operand_lossage ("invalid %%R value");
13938      else
13939	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13940      return;
13941
13942    case 's':
13943      /* Low 5 bits of 32 - value */
13944      if (! INT_P (x))
13945	output_operand_lossage ("invalid %%s value");
13946      else
13947	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13948      return;
13949
13950    case 't':
13951      /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
13952      if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13953	{
13954	  output_operand_lossage ("invalid %%t value");
13955	  return;
13956	}
13957
13958      /* Bit 3 is OV bit.  */
13959      i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13960
13961      /* If we want bit 31, write a shift count of zero, not 32.  */
13962      fprintf (file, "%d", i == 31 ? 0 : i + 1);
13963      return;
13964
13965    case 'T':
13966      /* Print the symbolic name of a branch target register.  */
13967      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13968	x = XVECEXP (x, 0, 0);
13969      if (!REG_P (x) || (REGNO (x) != LR_REGNO
13970			 && REGNO (x) != CTR_REGNO))
13971	output_operand_lossage ("invalid %%T value");
13972      else if (REGNO (x) == LR_REGNO)
13973	fputs ("lr", file);
13974      else
13975	fputs ("ctr", file);
13976      return;
13977
13978    case 'u':
13979      /* High-order or low-order 16 bits of constant, whichever is non-zero,
13980	 for use in unsigned operand.  */
13981      if (! INT_P (x))
13982	{
13983	  output_operand_lossage ("invalid %%u value");
13984	  return;
13985	}
13986
13987      uval = INTVAL (x);
13988      if ((uval & 0xffff) == 0)
13989	uval >>= 16;
13990
13991      fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13992      return;
13993
13994    case 'v':
13995      /* High-order 16 bits of constant for use in signed operand.  */
13996      if (! INT_P (x))
13997	output_operand_lossage ("invalid %%v value");
13998      else
13999	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14000		 (INTVAL (x) >> 16) & 0xffff);
14001      return;
14002
14003    case 'U':
14004      /* Print `u' if this has an auto-increment or auto-decrement.  */
14005      if (MEM_P (x)
14006	  && (GET_CODE (XEXP (x, 0)) == PRE_INC
14007	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
14008	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14009	putc ('u', file);
14010      return;
14011
14012    case 'V':
14013      /* Print the trap code for this operand.  */
14014      switch (GET_CODE (x))
14015	{
14016	case EQ:
14017	  fputs ("eq", file);   /* 4 */
14018	  break;
14019	case NE:
14020	  fputs ("ne", file);   /* 24 */
14021	  break;
14022	case LT:
14023	  fputs ("lt", file);   /* 16 */
14024	  break;
14025	case LE:
14026	  fputs ("le", file);   /* 20 */
14027	  break;
14028	case GT:
14029	  fputs ("gt", file);   /* 8 */
14030	  break;
14031	case GE:
14032	  fputs ("ge", file);   /* 12 */
14033	  break;
14034	case LTU:
14035	  fputs ("llt", file);  /* 2 */
14036	  break;
14037	case LEU:
14038	  fputs ("lle", file);  /* 6 */
14039	  break;
14040	case GTU:
14041	  fputs ("lgt", file);  /* 1 */
14042	  break;
14043	case GEU:
14044	  fputs ("lge", file);  /* 5 */
14045	  break;
14046	default:
14047	  output_operand_lossage ("invalid %%V value");
14048	}
14049      break;
14050
14051    case 'w':
14052      /* If constant, low-order 16 bits of constant, signed.  Otherwise, write
14053	 normally.  */
14054      if (INT_P (x))
14055	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14056		 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14057      else
14058	print_operand (file, x, 0);
14059      return;
14060
14061    case 'x':
14062      /* X is a FPR or Altivec register used in a VSX context.  */
14063      if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14064	output_operand_lossage ("invalid %%x value");
14065      else
14066	{
14067	  int reg = REGNO (x);
14068	  int vsx_reg = (FP_REGNO_P (reg)
14069			 ? reg - 32
14070			 : reg - FIRST_ALTIVEC_REGNO + 32);
14071
14072#ifdef TARGET_REGNAMES
14073	  if (TARGET_REGNAMES)
14074	    fprintf (file, "%%vs%d", vsx_reg);
14075	  else
14076#endif
14077	    fprintf (file, "%d", vsx_reg);
14078	}
14079      return;
14080
14081    case 'X':
14082      if (MEM_P (x)
14083	  && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14084	      || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14085		  && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14086	putc ('x', file);
14087      return;
14088
14089    case 'Y':
14090      /* Like 'L', for third word of TImode/PTImode  */
14091      if (REG_P (x))
14092	fputs (reg_names[REGNO (x) + 2], file);
14093      else if (MEM_P (x))
14094	{
14095	  machine_mode mode = GET_MODE (x);
14096	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
14097	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14098	    output_address (mode, plus_constant (Pmode,
14099						 XEXP (XEXP (x, 0), 0), 8));
14100	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14101	    output_address (mode, plus_constant (Pmode,
14102						 XEXP (XEXP (x, 0), 0), 8));
14103	  else
14104	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14105	  if (small_data_operand (x, GET_MODE (x)))
14106	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14107		     reg_names[SMALL_DATA_REG]);
14108	}
14109      return;
14110
14111    case 'z':
14112      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14113	x = XVECEXP (x, 0, 1);
14114      /* X is a SYMBOL_REF.  Write out the name preceded by a
14115	 period and without any trailing data in brackets.  Used for function
14116	 names.  If we are configured for System V (or the embedded ABI) on
14117	 the PowerPC, do not emit the period, since those systems do not use
14118	 TOCs and the like.  */
14119      if (!SYMBOL_REF_P (x))
14120	{
14121	  output_operand_lossage ("invalid %%z value");
14122	  return;
14123	}
14124
14125      /* For macho, check to see if we need a stub.  */
14126      if (TARGET_MACHO)
14127	{
14128	  const char *name = XSTR (x, 0);
14129#if TARGET_MACHO
14130	  if (darwin_symbol_stubs
14131	      && MACHOPIC_INDIRECT
14132	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14133	    name = machopic_indirection_name (x, /*stub_p=*/true);
14134#endif
14135	  assemble_name (file, name);
14136	}
14137      else if (!DOT_SYMBOLS)
14138	assemble_name (file, XSTR (x, 0));
14139      else
14140	rs6000_output_function_entry (file, XSTR (x, 0));
14141      return;
14142
14143    case 'Z':
14144      /* Like 'L', for last word of TImode/PTImode.  */
14145      if (REG_P (x))
14146	fputs (reg_names[REGNO (x) + 3], file);
14147      else if (MEM_P (x))
14148	{
14149	  machine_mode mode = GET_MODE (x);
14150	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
14151	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14152	    output_address (mode, plus_constant (Pmode,
14153						 XEXP (XEXP (x, 0), 0), 12));
14154	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14155	    output_address (mode, plus_constant (Pmode,
14156						 XEXP (XEXP (x, 0), 0), 12));
14157	  else
14158	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14159	  if (small_data_operand (x, GET_MODE (x)))
14160	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14161		     reg_names[SMALL_DATA_REG]);
14162	}
14163      return;
14164
14165      /* Print AltiVec memory operand.  */
14166    case 'y':
14167      {
14168	rtx tmp;
14169
14170	gcc_assert (MEM_P (x));
14171
14172	tmp = XEXP (x, 0);
14173
14174	if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14175	    && GET_CODE (tmp) == AND
14176	    && CONST_INT_P (XEXP (tmp, 1))
14177	    && INTVAL (XEXP (tmp, 1)) == -16)
14178	  tmp = XEXP (tmp, 0);
14179	else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14180		 && GET_CODE (tmp) == PRE_MODIFY)
14181	  tmp = XEXP (tmp, 1);
14182	if (REG_P (tmp))
14183	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14184	else
14185	  {
14186	    if (GET_CODE (tmp) != PLUS
14187		|| !REG_P (XEXP (tmp, 0))
14188		|| !REG_P (XEXP (tmp, 1)))
14189	      {
14190		output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14191		break;
14192	      }
14193
14194	    if (REGNO (XEXP (tmp, 0)) == 0)
14195	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14196		       reg_names[ REGNO (XEXP (tmp, 0)) ]);
14197	    else
14198	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14199		       reg_names[ REGNO (XEXP (tmp, 1)) ]);
14200	  }
14201	break;
14202      }
14203
14204    case 0:
14205      if (REG_P (x))
14206	fprintf (file, "%s", reg_names[REGNO (x)]);
14207      else if (MEM_P (x))
14208	{
14209	  /* We need to handle PRE_INC and PRE_DEC here, since we need to
14210	     know the width from the mode.  */
14211	  if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14212	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14213		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14214	  else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14215	    fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14216		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14217	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14218	    output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14219	  else
14220	    output_address (GET_MODE (x), XEXP (x, 0));
14221	}
14222      else if (toc_relative_expr_p (x, false,
14223				    &tocrel_base_oac, &tocrel_offset_oac))
14224	/* This hack along with a corresponding hack in
14225	   rs6000_output_addr_const_extra arranges to output addends
14226	   where the assembler expects to find them.  eg.
14227	   (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14228	   without this hack would be output as "x@toc+4".  We
14229	   want "x+4@toc".  */
14230	output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14231      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14232	output_addr_const (file, XVECEXP (x, 0, 0));
14233      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14234	output_addr_const (file, XVECEXP (x, 0, 1));
14235      else
14236	output_addr_const (file, x);
14237      return;
14238
14239    case '&':
14240      if (const char *name = get_some_local_dynamic_name ())
14241	assemble_name (file, name);
14242      else
14243	output_operand_lossage ("'%%&' used without any "
14244				"local dynamic TLS references");
14245      return;
14246
14247    default:
14248      output_operand_lossage ("invalid %%xn code");
14249    }
14250}
14251
14252/* Print the address of an operand.  */
14253
14254void
14255print_operand_address (FILE *file, rtx x)
14256{
14257  if (REG_P (x))
14258    fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14259
14260  /* Is it a PC-relative address?  */
14261  else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14262    {
14263      HOST_WIDE_INT offset;
14264
14265      if (GET_CODE (x) == CONST)
14266	x = XEXP (x, 0);
14267
14268      if (GET_CODE (x) == PLUS)
14269	{
14270	  offset = INTVAL (XEXP (x, 1));
14271	  x = XEXP (x, 0);
14272	}
14273      else
14274	offset = 0;
14275
14276      output_addr_const (file, x);
14277
14278      if (offset)
14279	fprintf (file, "%+" PRId64, offset);
14280
14281      if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14282	fprintf (file, "@got");
14283
14284      fprintf (file, "@pcrel");
14285    }
14286  else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14287	   || GET_CODE (x) == LABEL_REF)
14288    {
14289      output_addr_const (file, x);
14290      if (small_data_operand (x, GET_MODE (x)))
14291	fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14292		 reg_names[SMALL_DATA_REG]);
14293      else
14294	gcc_assert (!TARGET_TOC);
14295    }
14296  else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14297	   && REG_P (XEXP (x, 1)))
14298    {
14299      if (REGNO (XEXP (x, 0)) == 0)
14300	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14301		 reg_names[ REGNO (XEXP (x, 0)) ]);
14302      else
14303	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14304		 reg_names[ REGNO (XEXP (x, 1)) ]);
14305    }
14306  else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14307	   && CONST_INT_P (XEXP (x, 1)))
14308    fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14309	     INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14310#if TARGET_MACHO
14311  else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14312	   && CONSTANT_P (XEXP (x, 1)))
14313    {
14314      fprintf (file, "lo16(");
14315      output_addr_const (file, XEXP (x, 1));
14316      fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14317    }
14318#endif
14319#if TARGET_ELF
14320  else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14321	   && CONSTANT_P (XEXP (x, 1)))
14322    {
14323      output_addr_const (file, XEXP (x, 1));
14324      fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14325    }
14326#endif
14327  else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14328    {
14329      /* This hack along with a corresponding hack in
14330	 rs6000_output_addr_const_extra arranges to output addends
14331	 where the assembler expects to find them.  eg.
14332	 (lo_sum (reg 9)
14333	 .       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14334	 without this hack would be output as "x@toc+8@l(9)".  We
14335	 want "x+8@toc@l(9)".  */
14336      output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14337      if (GET_CODE (x) == LO_SUM)
14338	fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14339      else
14340	fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14341    }
14342  else
14343    output_addr_const (file, x);
14344}
14345
14346/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
14347
14348bool
14349rs6000_output_addr_const_extra (FILE *file, rtx x)
14350{
14351  if (GET_CODE (x) == UNSPEC)
14352    switch (XINT (x, 1))
14353      {
14354      case UNSPEC_TOCREL:
14355	gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14356			     && REG_P (XVECEXP (x, 0, 1))
14357			     && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14358	output_addr_const (file, XVECEXP (x, 0, 0));
14359	if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14360	  {
14361	    if (INTVAL (tocrel_offset_oac) >= 0)
14362	      fprintf (file, "+");
14363	    output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14364	  }
14365	if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14366	  {
14367	    putc ('-', file);
14368	    assemble_name (file, toc_label_name);
14369	    need_toc_init = 1;
14370	  }
14371	else if (TARGET_ELF)
14372	  fputs ("@toc", file);
14373	return true;
14374
14375#if TARGET_MACHO
14376      case UNSPEC_MACHOPIC_OFFSET:
14377	output_addr_const (file, XVECEXP (x, 0, 0));
14378	putc ('-', file);
14379	machopic_output_function_base_name (file);
14380	return true;
14381#endif
14382      }
14383  return false;
14384}
14385
14386/* Target hook for assembling integer objects.  The PowerPC version has
14387   to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14388   is defined.  It also needs to handle DI-mode objects on 64-bit
14389   targets.  */
14390
14391static bool
14392rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14393{
14394#ifdef RELOCATABLE_NEEDS_FIXUP
14395  /* Special handling for SI values.  */
14396  if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14397    {
14398      static int recurse = 0;
14399
14400      /* For -mrelocatable, we mark all addresses that need to be fixed up in
14401	 the .fixup section.  Since the TOC section is already relocated, we
14402	 don't need to mark it here.  We used to skip the text section, but it
14403	 should never be valid for relocated addresses to be placed in the text
14404	 section.  */
14405      if (DEFAULT_ABI == ABI_V4
14406	  && (TARGET_RELOCATABLE || flag_pic > 1)
14407	  && in_section != toc_section
14408	  && !recurse
14409	  && !CONST_SCALAR_INT_P (x)
14410	  && CONSTANT_P (x))
14411	{
14412	  char buf[256];
14413
14414	  recurse = 1;
14415	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14416	  fixuplabelno++;
14417	  ASM_OUTPUT_LABEL (asm_out_file, buf);
14418	  fprintf (asm_out_file, "\t.long\t(");
14419	  output_addr_const (asm_out_file, x);
14420	  fprintf (asm_out_file, ")@fixup\n");
14421	  fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14422	  ASM_OUTPUT_ALIGN (asm_out_file, 2);
14423	  fprintf (asm_out_file, "\t.long\t");
14424	  assemble_name (asm_out_file, buf);
14425	  fprintf (asm_out_file, "\n\t.previous\n");
14426	  recurse = 0;
14427	  return true;
14428	}
14429      /* Remove initial .'s to turn a -mcall-aixdesc function
14430	 address into the address of the descriptor, not the function
14431	 itself.  */
14432      else if (SYMBOL_REF_P (x)
14433	       && XSTR (x, 0)[0] == '.'
14434	       && DEFAULT_ABI == ABI_AIX)
14435	{
14436	  const char *name = XSTR (x, 0);
14437	  while (*name == '.')
14438	    name++;
14439
14440	  fprintf (asm_out_file, "\t.long\t%s\n", name);
14441	  return true;
14442	}
14443    }
14444#endif /* RELOCATABLE_NEEDS_FIXUP */
14445  return default_assemble_integer (x, size, aligned_p);
14446}
14447
14448/* Return a template string for assembly to emit when making an
14449   external call.  FUNOP is the call mem argument operand number.  */
14450
14451static const char *
14452rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14453{
14454  /* -Wformat-overflow workaround, without which gcc thinks that %u
14455      might produce 10 digits.  */
14456  gcc_assert (funop <= MAX_RECOG_OPERANDS);
14457
14458  char arg[12];
14459  arg[0] = 0;
14460  if (GET_CODE (operands[funop + 1]) == UNSPEC)
14461    {
14462      if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14463	sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14464      else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14465	sprintf (arg, "(%%&@tlsld)");
14466    }
14467
14468  /* The magic 32768 offset here corresponds to the offset of
14469     r30 in .got2, as given by LCTOC1.  See sysv4.h:toc_section.  */
14470  char z[11];
14471  sprintf (z, "%%z%u%s", funop,
14472	   (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14473	    ? "+32768" : ""));
14474
14475  static char str[32];  /* 1 spare */
14476  if (rs6000_pcrel_p ())
14477    sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14478  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14479    sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14480	     sibcall ? "" : "\n\tnop");
14481  else if (DEFAULT_ABI == ABI_V4)
14482    sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14483	     flag_pic ? "@plt" : "");
14484#if TARGET_MACHO
14485  /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14486   else if (DEFAULT_ABI == ABI_DARWIN)
14487    {
14488      /* The cookie is in operand func+2.  */
14489      gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14490      int cookie = INTVAL (operands[funop + 2]);
14491      if (cookie & CALL_LONG)
14492	{
14493	  tree funname = get_identifier (XSTR (operands[funop], 0));
14494	  tree labelname = get_prev_label (funname);
14495	  gcc_checking_assert (labelname && !sibcall);
14496
14497	  /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14498	     instruction will reach 'foo', otherwise link as 'bl L42'".
14499	     "L42" should be a 'branch island', that will do a far jump to
14500	     'foo'.  Branch islands are generated in
14501	     macho_branch_islands().  */
14502	  sprintf (str, "jbsr %%z%u,%.10s", funop,
14503		   IDENTIFIER_POINTER (labelname));
14504	}
14505      else
14506        /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14507	   after the call.  */
14508	sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14509    }
14510#endif
14511  else
14512    gcc_unreachable ();
14513  return str;
14514}
14515
14516const char *
14517rs6000_call_template (rtx *operands, unsigned int funop)
14518{
14519  return rs6000_call_template_1 (operands, funop, false);
14520}
14521
14522const char *
14523rs6000_sibcall_template (rtx *operands, unsigned int funop)
14524{
14525  return rs6000_call_template_1 (operands, funop, true);
14526}
14527
14528/* As above, for indirect calls.  */
14529
14530static const char *
14531rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14532				 bool sibcall)
14533{
14534  /* -Wformat-overflow workaround, without which gcc thinks that %u
14535     might produce 10 digits.  Note that -Wformat-overflow will not
14536     currently warn here for str[], so do not rely on a warning to
14537     ensure str[] is correctly sized.  */
14538  gcc_assert (funop <= MAX_RECOG_OPERANDS);
14539
14540  /* Currently, funop is either 0 or 1.  The maximum string is always
14541     a !speculate 64-bit __tls_get_addr call.
14542
14543     ABI_ELFv2, pcrel:
14544     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14545     . 35	.reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14546     .  9	crset 2\n\t
14547     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14548     . 36	.reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14549     .  8	beq%T1l-
14550     .---
14551     .142
14552
14553     ABI_AIX:
14554     .  9	ld 2,%3\n\t
14555     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14556     . 29	.reloc .,R_PPC64_PLTSEQ,%z1\n\t
14557     .  9	crset 2\n\t
14558     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14559     . 30	.reloc .,R_PPC64_PLTCALL,%z1\n\t
14560     . 10	beq%T1l-\n\t
14561     . 10	ld 2,%4(1)
14562     .---
14563     .151
14564
14565     ABI_ELFv2:
14566     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14567     . 29	.reloc .,R_PPC64_PLTSEQ,%z1\n\t
14568     .  9	crset 2\n\t
14569     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14570     . 30	.reloc .,R_PPC64_PLTCALL,%z1\n\t
14571     . 10	beq%T1l-\n\t
14572     . 10	ld 2,%3(1)
14573     .---
14574     .142
14575
14576     ABI_V4:
14577     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14578     . 35	.reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14579     .  9	crset 2\n\t
14580     . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14581     . 36	.reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14582     .  8	beq%T1l-
14583     .---
14584     .141  */
14585  static char str[160];  /* 8 spare */
14586  char *s = str;
14587  const char *ptrload = TARGET_64BIT ? "d" : "wz";
14588
14589  if (DEFAULT_ABI == ABI_AIX)
14590    s += sprintf (s,
14591		  "l%s 2,%%%u\n\t",
14592		  ptrload, funop + 3);
14593
14594  /* We don't need the extra code to stop indirect call speculation if
14595     calling via LR.  */
14596  bool speculate = (TARGET_MACHO
14597		    || rs6000_speculate_indirect_jumps
14598		    || (REG_P (operands[funop])
14599			&& REGNO (operands[funop]) == LR_REGNO));
14600
14601  if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14602    {
14603      const char *rel64 = TARGET_64BIT ? "64" : "";
14604      char tls[29];
14605      tls[0] = 0;
14606      if (GET_CODE (operands[funop + 1]) == UNSPEC)
14607	{
14608	  if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14609	    sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14610		     rel64, funop + 1);
14611	  else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14612	    sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14613		     rel64);
14614	}
14615
14616      const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14617      const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14618			    && flag_pic == 2 ? "+32768" : "");
14619      if (!speculate)
14620	{
14621	  s += sprintf (s,
14622			"%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14623			tls, rel64, notoc, funop, addend);
14624	  s += sprintf (s, "crset 2\n\t");
14625	}
14626      s += sprintf (s,
14627		    "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14628		    tls, rel64, notoc, funop, addend);
14629    }
14630  else if (!speculate)
14631    s += sprintf (s, "crset 2\n\t");
14632
14633  if (rs6000_pcrel_p ())
14634    {
14635      if (speculate)
14636	sprintf (s, "b%%T%ul", funop);
14637      else
14638	sprintf (s, "beq%%T%ul-", funop);
14639    }
14640  else if (DEFAULT_ABI == ABI_AIX)
14641    {
14642      if (speculate)
14643	sprintf (s,
14644		 "b%%T%ul\n\t"
14645		 "l%s 2,%%%u(1)",
14646		 funop, ptrload, funop + 4);
14647      else
14648	sprintf (s,
14649		 "beq%%T%ul-\n\t"
14650		 "l%s 2,%%%u(1)",
14651		 funop, ptrload, funop + 4);
14652    }
14653  else if (DEFAULT_ABI == ABI_ELFv2)
14654    {
14655      if (speculate)
14656	sprintf (s,
14657		 "b%%T%ul\n\t"
14658		 "l%s 2,%%%u(1)",
14659		 funop, ptrload, funop + 3);
14660      else
14661	sprintf (s,
14662		 "beq%%T%ul-\n\t"
14663		 "l%s 2,%%%u(1)",
14664		 funop, ptrload, funop + 3);
14665    }
14666  else
14667    {
14668      if (speculate)
14669	sprintf (s,
14670		 "b%%T%u%s",
14671		 funop, sibcall ? "" : "l");
14672      else
14673	sprintf (s,
14674		 "beq%%T%u%s-%s",
14675		 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14676    }
14677  return str;
14678}
14679
14680const char *
14681rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14682{
14683  return rs6000_indirect_call_template_1 (operands, funop, false);
14684}
14685
14686const char *
14687rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14688{
14689  return rs6000_indirect_call_template_1 (operands, funop, true);
14690}
14691
14692#if HAVE_AS_PLTSEQ
14693/* Output indirect call insns.  WHICH identifies the type of sequence.  */
14694const char *
14695rs6000_pltseq_template (rtx *operands, int which)
14696{
14697  const char *rel64 = TARGET_64BIT ? "64" : "";
14698  char tls[30];
14699  tls[0] = 0;
14700  if (GET_CODE (operands[3]) == UNSPEC)
14701    {
14702      char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14703      if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14704	sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14705		 off, rel64);
14706      else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14707	sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14708		 off, rel64);
14709    }
14710
14711  gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14712  static char str[96];  /* 10 spare */
14713  char off = WORDS_BIG_ENDIAN ? '2' : '4';
14714  const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14715			&& flag_pic == 2 ? "+32768" : "");
14716  switch (which)
14717    {
14718    case RS6000_PLTSEQ_TOCSAVE:
14719      sprintf (str,
14720	       "st%s\n\t"
14721	       "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14722	       TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14723	       tls, rel64);
14724      break;
14725    case RS6000_PLTSEQ_PLT16_HA:
14726      if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14727	sprintf (str,
14728		 "lis %%0,0\n\t"
14729		 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14730		 tls, off, rel64);
14731      else
14732	sprintf (str,
14733		 "addis %%0,%%1,0\n\t"
14734		 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14735		 tls, off, rel64, addend);
14736      break;
14737    case RS6000_PLTSEQ_PLT16_LO:
14738      sprintf (str,
14739	       "l%s %%0,0(%%1)\n\t"
14740	       "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14741	       TARGET_64BIT ? "d" : "wz",
14742	       tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14743      break;
14744    case RS6000_PLTSEQ_MTCTR:
14745      sprintf (str,
14746	       "mtctr %%1\n\t"
14747	       "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14748	       tls, rel64, addend);
14749      break;
14750    case RS6000_PLTSEQ_PLT_PCREL34:
14751      sprintf (str,
14752	       "pl%s %%0,0(0),1\n\t"
14753	       "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14754	       TARGET_64BIT ? "d" : "wz",
14755	       tls, rel64);
14756      break;
14757    default:
14758      gcc_unreachable ();
14759    }
14760  return str;
14761}
14762#endif
14763
14764#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14765/* Emit an assembler directive to set symbol visibility for DECL to
14766   VISIBILITY_TYPE.  */
14767
14768static void
14769rs6000_assemble_visibility (tree decl, int vis)
14770{
14771  if (TARGET_XCOFF)
14772    return;
14773
14774  /* Functions need to have their entry point symbol visibility set as
14775     well as their descriptor symbol visibility.  */
14776  if (DEFAULT_ABI == ABI_AIX
14777      && DOT_SYMBOLS
14778      && TREE_CODE (decl) == FUNCTION_DECL)
14779    {
14780      static const char * const visibility_types[] = {
14781	NULL, "protected", "hidden", "internal"
14782      };
14783
14784      const char *name, *type;
14785
14786      name = ((* targetm.strip_name_encoding)
14787	      (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14788      type = visibility_types[vis];
14789
14790      fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14791      fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14792    }
14793  else
14794    default_assemble_visibility (decl, vis);
14795}
14796#endif
14797
14798/* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14799   entry.  If RECORD_P is true and the target supports named sections,
14800   the location of the NOPs will be recorded in a special object section
14801   called "__patchable_function_entries".  This routine may be called
14802   twice per function to put NOPs before and after the function
14803   entry.  */
14804
14805void
14806rs6000_print_patchable_function_entry (FILE *file,
14807				       unsigned HOST_WIDE_INT patch_area_size,
14808				       bool record_p)
14809{
14810  unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14811  /* When .opd section is emitted, the function symbol
14812     default_print_patchable_function_entry_1 is emitted into the .opd section
14813     while the patchable area is emitted into the function section.
14814     Don't use SECTION_LINK_ORDER in that case.  */
14815  if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14816      && HAVE_GAS_SECTION_LINK_ORDER)
14817    flags |= SECTION_LINK_ORDER;
14818  default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14819					    flags);
14820}
14821
14822enum rtx_code
14823rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14824{
14825  /* Reversal of FP compares takes care -- an ordered compare
14826     becomes an unordered compare and vice versa.  */
14827  if (mode == CCFPmode
14828      && (!flag_finite_math_only
14829	  || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14830	  || code == UNEQ || code == LTGT))
14831    return reverse_condition_maybe_unordered (code);
14832  else
14833    return reverse_condition (code);
14834}
14835
14836/* Generate a compare for CODE.  Return a brand-new rtx that
14837   represents the result of the compare.  */
14838
14839static rtx
14840rs6000_generate_compare (rtx cmp, machine_mode mode)
14841{
14842  machine_mode comp_mode;
14843  rtx compare_result;
14844  enum rtx_code code = GET_CODE (cmp);
14845  rtx op0 = XEXP (cmp, 0);
14846  rtx op1 = XEXP (cmp, 1);
14847
14848  if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14849    comp_mode = CCmode;
14850  else if (FLOAT_MODE_P (mode))
14851    comp_mode = CCFPmode;
14852  else if (code == GTU || code == LTU
14853	   || code == GEU || code == LEU)
14854    comp_mode = CCUNSmode;
14855  else if ((code == EQ || code == NE)
14856	   && unsigned_reg_p (op0)
14857	   && (unsigned_reg_p (op1)
14858	       || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14859    /* These are unsigned values, perhaps there will be a later
14860       ordering compare that can be shared with this one.  */
14861    comp_mode = CCUNSmode;
14862  else
14863    comp_mode = CCmode;
14864
14865  /* If we have an unsigned compare, make sure we don't have a signed value as
14866     an immediate.  */
14867  if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14868      && INTVAL (op1) < 0)
14869    {
14870      op0 = copy_rtx_if_shared (op0);
14871      op1 = force_reg (GET_MODE (op0), op1);
14872      cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14873    }
14874
14875  /* First, the compare.  */
14876  compare_result = gen_reg_rtx (comp_mode);
14877
14878  /* IEEE 128-bit support in VSX registers when we do not have hardware
14879     support.  */
14880  if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14881    {
14882      rtx libfunc = NULL_RTX;
14883      bool check_nan = false;
14884      rtx dest;
14885
14886      switch (code)
14887	{
14888	case EQ:
14889	case NE:
14890	  libfunc = optab_libfunc (eq_optab, mode);
14891	  break;
14892
14893	case GT:
14894	case GE:
14895	  libfunc = optab_libfunc (ge_optab, mode);
14896	  break;
14897
14898	case LT:
14899	case LE:
14900	  libfunc = optab_libfunc (le_optab, mode);
14901	  break;
14902
14903	case UNORDERED:
14904	case ORDERED:
14905	  libfunc = optab_libfunc (unord_optab, mode);
14906	  code = (code == UNORDERED) ? NE : EQ;
14907	  break;
14908
14909	case UNGE:
14910	case UNGT:
14911	  check_nan = true;
14912	  libfunc = optab_libfunc (ge_optab, mode);
14913	  code = (code == UNGE) ? GE : GT;
14914	  break;
14915
14916	case UNLE:
14917	case UNLT:
14918	  check_nan = true;
14919	  libfunc = optab_libfunc (le_optab, mode);
14920	  code = (code == UNLE) ? LE : LT;
14921	  break;
14922
14923	case UNEQ:
14924	case LTGT:
14925	  check_nan = true;
14926	  libfunc = optab_libfunc (eq_optab, mode);
14927	  code = (code = UNEQ) ? EQ : NE;
14928	  break;
14929
14930	default:
14931	  gcc_unreachable ();
14932	}
14933
14934      gcc_assert (libfunc);
14935
14936      if (!check_nan)
14937	dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14938					SImode, op0, mode, op1, mode);
14939
14940      /* The library signals an exception for signalling NaNs, so we need to
14941	 handle isgreater, etc. by first checking isordered.  */
14942      else
14943	{
14944	  rtx ne_rtx, normal_dest, unord_dest;
14945	  rtx unord_func = optab_libfunc (unord_optab, mode);
14946	  rtx join_label = gen_label_rtx ();
14947	  rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14948	  rtx unord_cmp = gen_reg_rtx (comp_mode);
14949
14950
14951	  /* Test for either value being a NaN.  */
14952	  gcc_assert (unord_func);
14953	  unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14954						SImode, op0, mode, op1, mode);
14955
14956	  /* Set value (0) if either value is a NaN, and jump to the join
14957	     label.  */
14958	  dest = gen_reg_rtx (SImode);
14959	  emit_move_insn (dest, const1_rtx);
14960	  emit_insn (gen_rtx_SET (unord_cmp,
14961				  gen_rtx_COMPARE (comp_mode, unord_dest,
14962						   const0_rtx)));
14963
14964	  ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14965	  emit_jump_insn (gen_rtx_SET (pc_rtx,
14966				       gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14967							     join_ref,
14968							     pc_rtx)));
14969
14970	  /* Do the normal comparison, knowing that the values are not
14971	     NaNs.  */
14972	  normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14973						 SImode, op0, mode, op1, mode);
14974
14975	  emit_insn (gen_cstoresi4 (dest,
14976				    gen_rtx_fmt_ee (code, SImode, normal_dest,
14977						    const0_rtx),
14978				    normal_dest, const0_rtx));
14979
14980	  /* Join NaN and non-Nan paths.  Compare dest against 0.  */
14981	  emit_label (join_label);
14982	  code = NE;
14983	}
14984
14985      emit_insn (gen_rtx_SET (compare_result,
14986			      gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14987    }
14988
14989  else
14990    {
14991      /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14992	 CLOBBERs to match cmptf_internal2 pattern.  */
14993      if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14994	  && FLOAT128_IBM_P (GET_MODE (op0))
14995	  && TARGET_HARD_FLOAT)
14996	emit_insn (gen_rtx_PARALLEL (VOIDmode,
14997	  gen_rtvec (10,
14998		     gen_rtx_SET (compare_result,
14999				  gen_rtx_COMPARE (comp_mode, op0, op1)),
15000		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15001		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15002		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15003		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15004		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15005		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15006		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15007		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15008		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15009      else if (GET_CODE (op1) == UNSPEC
15010	       && XINT (op1, 1) == UNSPEC_SP_TEST)
15011	{
15012	  rtx op1b = XVECEXP (op1, 0, 0);
15013	  comp_mode = CCEQmode;
15014	  compare_result = gen_reg_rtx (CCEQmode);
15015	  if (TARGET_64BIT)
15016	    emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15017	  else
15018	    emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15019	}
15020      else
15021	emit_insn (gen_rtx_SET (compare_result,
15022				gen_rtx_COMPARE (comp_mode, op0, op1)));
15023    }
15024
15025  validate_condition_mode (code, GET_MODE (compare_result));
15026
15027  return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15028}
15029
15030
15031/* Return the diagnostic message string if the binary operation OP is
15032   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
15033
15034static const char*
15035rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15036			  const_tree type1,
15037			  const_tree type2)
15038{
15039  machine_mode mode1 = TYPE_MODE (type1);
15040  machine_mode mode2 = TYPE_MODE (type2);
15041
15042  /* For complex modes, use the inner type.  */
15043  if (COMPLEX_MODE_P (mode1))
15044    mode1 = GET_MODE_INNER (mode1);
15045
15046  if (COMPLEX_MODE_P (mode2))
15047    mode2 = GET_MODE_INNER (mode2);
15048
15049  /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15050     double to intermix unless -mfloat128-convert.  */
15051  if (mode1 == mode2)
15052    return NULL;
15053
15054  if (!TARGET_FLOAT128_CVT)
15055    {
15056      if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15057	  || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15058	return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15059		  "point types");
15060    }
15061
15062  return NULL;
15063}
15064
15065
15066/* Expand floating point conversion to/from __float128 and __ibm128.  */
15067
15068void
15069rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15070{
15071  machine_mode dest_mode = GET_MODE (dest);
15072  machine_mode src_mode = GET_MODE (src);
15073  convert_optab cvt = unknown_optab;
15074  bool do_move = false;
15075  rtx libfunc = NULL_RTX;
15076  rtx dest2;
15077  typedef rtx (*rtx_2func_t) (rtx, rtx);
15078  rtx_2func_t hw_convert = (rtx_2func_t)0;
15079  size_t kf_or_tf;
15080
15081  struct hw_conv_t {
15082    rtx_2func_t	from_df;
15083    rtx_2func_t from_sf;
15084    rtx_2func_t from_si_sign;
15085    rtx_2func_t from_si_uns;
15086    rtx_2func_t from_di_sign;
15087    rtx_2func_t from_di_uns;
15088    rtx_2func_t to_df;
15089    rtx_2func_t to_sf;
15090    rtx_2func_t to_si_sign;
15091    rtx_2func_t to_si_uns;
15092    rtx_2func_t to_di_sign;
15093    rtx_2func_t to_di_uns;
15094  } hw_conversions[2] = {
15095    /* convertions to/from KFmode */
15096    {
15097      gen_extenddfkf2_hw,		/* KFmode <- DFmode.  */
15098      gen_extendsfkf2_hw,		/* KFmode <- SFmode.  */
15099      gen_float_kfsi2_hw,		/* KFmode <- SImode (signed).  */
15100      gen_floatuns_kfsi2_hw,		/* KFmode <- SImode (unsigned).  */
15101      gen_float_kfdi2_hw,		/* KFmode <- DImode (signed).  */
15102      gen_floatuns_kfdi2_hw,		/* KFmode <- DImode (unsigned).  */
15103      gen_trunckfdf2_hw,		/* DFmode <- KFmode.  */
15104      gen_trunckfsf2_hw,		/* SFmode <- KFmode.  */
15105      gen_fix_kfsi2_hw,			/* SImode <- KFmode (signed).  */
15106      gen_fixuns_kfsi2_hw,		/* SImode <- KFmode (unsigned).  */
15107      gen_fix_kfdi2_hw,			/* DImode <- KFmode (signed).  */
15108      gen_fixuns_kfdi2_hw,		/* DImode <- KFmode (unsigned).  */
15109    },
15110
15111    /* convertions to/from TFmode */
15112    {
15113      gen_extenddftf2_hw,		/* TFmode <- DFmode.  */
15114      gen_extendsftf2_hw,		/* TFmode <- SFmode.  */
15115      gen_float_tfsi2_hw,		/* TFmode <- SImode (signed).  */
15116      gen_floatuns_tfsi2_hw,		/* TFmode <- SImode (unsigned).  */
15117      gen_float_tfdi2_hw,		/* TFmode <- DImode (signed).  */
15118      gen_floatuns_tfdi2_hw,		/* TFmode <- DImode (unsigned).  */
15119      gen_trunctfdf2_hw,		/* DFmode <- TFmode.  */
15120      gen_trunctfsf2_hw,		/* SFmode <- TFmode.  */
15121      gen_fix_tfsi2_hw,			/* SImode <- TFmode (signed).  */
15122      gen_fixuns_tfsi2_hw,		/* SImode <- TFmode (unsigned).  */
15123      gen_fix_tfdi2_hw,			/* DImode <- TFmode (signed).  */
15124      gen_fixuns_tfdi2_hw,		/* DImode <- TFmode (unsigned).  */
15125    },
15126  };
15127
15128  if (dest_mode == src_mode)
15129    gcc_unreachable ();
15130
15131  /* Eliminate memory operations.  */
15132  if (MEM_P (src))
15133    src = force_reg (src_mode, src);
15134
15135  if (MEM_P (dest))
15136    {
15137      rtx tmp = gen_reg_rtx (dest_mode);
15138      rs6000_expand_float128_convert (tmp, src, unsigned_p);
15139      rs6000_emit_move (dest, tmp, dest_mode);
15140      return;
15141    }
15142
15143  /* Convert to IEEE 128-bit floating point.  */
15144  if (FLOAT128_IEEE_P (dest_mode))
15145    {
15146      if (dest_mode == KFmode)
15147	kf_or_tf = 0;
15148      else if (dest_mode == TFmode)
15149	kf_or_tf = 1;
15150      else
15151	gcc_unreachable ();
15152
15153      switch (src_mode)
15154	{
15155	case E_DFmode:
15156	  cvt = sext_optab;
15157	  hw_convert = hw_conversions[kf_or_tf].from_df;
15158	  break;
15159
15160	case E_SFmode:
15161	  cvt = sext_optab;
15162	  hw_convert = hw_conversions[kf_or_tf].from_sf;
15163	  break;
15164
15165	case E_KFmode:
15166	case E_IFmode:
15167	case E_TFmode:
15168	  if (FLOAT128_IBM_P (src_mode))
15169	    cvt = sext_optab;
15170	  else
15171	    do_move = true;
15172	  break;
15173
15174	case E_SImode:
15175	  if (unsigned_p)
15176	    {
15177	      cvt = ufloat_optab;
15178	      hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15179	    }
15180	  else
15181	    {
15182	      cvt = sfloat_optab;
15183	      hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15184	    }
15185	  break;
15186
15187	case E_DImode:
15188	  if (unsigned_p)
15189	    {
15190	      cvt = ufloat_optab;
15191	      hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15192	    }
15193	  else
15194	    {
15195	      cvt = sfloat_optab;
15196	      hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15197	    }
15198	  break;
15199
15200	default:
15201	  gcc_unreachable ();
15202	}
15203    }
15204
15205  /* Convert from IEEE 128-bit floating point.  */
15206  else if (FLOAT128_IEEE_P (src_mode))
15207    {
15208      if (src_mode == KFmode)
15209	kf_or_tf = 0;
15210      else if (src_mode == TFmode)
15211	kf_or_tf = 1;
15212      else
15213	gcc_unreachable ();
15214
15215      switch (dest_mode)
15216	{
15217	case E_DFmode:
15218	  cvt = trunc_optab;
15219	  hw_convert = hw_conversions[kf_or_tf].to_df;
15220	  break;
15221
15222	case E_SFmode:
15223	  cvt = trunc_optab;
15224	  hw_convert = hw_conversions[kf_or_tf].to_sf;
15225	  break;
15226
15227	case E_KFmode:
15228	case E_IFmode:
15229	case E_TFmode:
15230	  if (FLOAT128_IBM_P (dest_mode))
15231	    cvt = trunc_optab;
15232	  else
15233	    do_move = true;
15234	  break;
15235
15236	case E_SImode:
15237	  if (unsigned_p)
15238	    {
15239	      cvt = ufix_optab;
15240	      hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15241	    }
15242	  else
15243	    {
15244	      cvt = sfix_optab;
15245	      hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15246	    }
15247	  break;
15248
15249	case E_DImode:
15250	  if (unsigned_p)
15251	    {
15252	      cvt = ufix_optab;
15253	      hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15254	    }
15255	  else
15256	    {
15257	      cvt = sfix_optab;
15258	      hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15259	    }
15260	  break;
15261
15262	default:
15263	  gcc_unreachable ();
15264	}
15265    }
15266
15267  /* Both IBM format.  */
15268  else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15269    do_move = true;
15270
15271  else
15272    gcc_unreachable ();
15273
15274  /* Handle conversion between TFmode/KFmode/IFmode.  */
15275  if (do_move)
15276    emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15277
15278  /* Handle conversion if we have hardware support.  */
15279  else if (TARGET_FLOAT128_HW && hw_convert)
15280    emit_insn ((hw_convert) (dest, src));
15281
15282  /* Call an external function to do the conversion.  */
15283  else if (cvt != unknown_optab)
15284    {
15285      libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15286      gcc_assert (libfunc != NULL_RTX);
15287
15288      dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15289				       src, src_mode);
15290
15291      gcc_assert (dest2 != NULL_RTX);
15292      if (!rtx_equal_p (dest, dest2))
15293	emit_move_insn (dest, dest2);
15294    }
15295
15296  else
15297    gcc_unreachable ();
15298
15299  return;
15300}
15301
15302
15303/* Emit RTL that sets a register to zero if OP1 and OP2 are equal.  SCRATCH
15304   can be used as that dest register.  Return the dest register.  */
15305
15306rtx
15307rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15308{
15309  if (op2 == const0_rtx)
15310    return op1;
15311
15312  if (GET_CODE (scratch) == SCRATCH)
15313    scratch = gen_reg_rtx (mode);
15314
15315  if (logical_operand (op2, mode))
15316    emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15317  else
15318    emit_insn (gen_rtx_SET (scratch,
15319			    gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15320
15321  return scratch;
15322}
15323
15324/* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15325   requires this.  The result is mode MODE.  */
15326rtx
15327rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15328{
15329  rtx cond[2];
15330  int n = 0;
15331  if (code == LTGT || code == LE || code == UNLT)
15332    cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15333  if (code == LTGT || code == GE || code == UNGT)
15334    cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15335  if (code == LE || code == GE || code == UNEQ)
15336    cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15337  if (code == UNLT || code == UNGT || code == UNEQ)
15338    cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15339
15340  gcc_assert (n == 2);
15341
15342  rtx cc = gen_reg_rtx (CCEQmode);
15343  rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15344  emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15345
15346  return cc;
15347}
15348
15349void
15350rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15351{
15352  rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15353  rtx_code cond_code = GET_CODE (condition_rtx);
15354
15355  if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15356      && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15357    ;
15358  else if (cond_code == NE
15359	   || cond_code == GE || cond_code == LE
15360	   || cond_code == GEU || cond_code == LEU
15361	   || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15362    {
15363      rtx not_result = gen_reg_rtx (CCEQmode);
15364      rtx not_op, rev_cond_rtx;
15365      machine_mode cc_mode;
15366
15367      cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15368
15369      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15370				     SImode, XEXP (condition_rtx, 0), const0_rtx);
15371      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15372      emit_insn (gen_rtx_SET (not_result, not_op));
15373      condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15374    }
15375
15376  machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15377  if (op_mode == VOIDmode)
15378    op_mode = GET_MODE (XEXP (operands[1], 1));
15379
15380  if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15381    {
15382      PUT_MODE (condition_rtx, DImode);
15383      convert_move (operands[0], condition_rtx, 0);
15384    }
15385  else
15386    {
15387      PUT_MODE (condition_rtx, SImode);
15388      emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15389    }
15390}
15391
15392/* Emit a branch of kind CODE to location LOC.  */
15393
15394void
15395rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15396{
15397  rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15398  rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15399  rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15400  emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15401}
15402
15403/* Return the string to output a conditional branch to LABEL, which is
15404   the operand template of the label, or NULL if the branch is really a
15405   conditional return.
15406
15407   OP is the conditional expression.  XEXP (OP, 0) is assumed to be a
15408   condition code register and its mode specifies what kind of
15409   comparison we made.
15410
15411   REVERSED is nonzero if we should reverse the sense of the comparison.
15412
15413   INSN is the insn.  */
15414
15415char *
15416output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15417{
15418  static char string[64];
15419  enum rtx_code code = GET_CODE (op);
15420  rtx cc_reg = XEXP (op, 0);
15421  machine_mode mode = GET_MODE (cc_reg);
15422  int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15423  int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15424  int really_reversed = reversed ^ need_longbranch;
15425  char *s = string;
15426  const char *ccode;
15427  const char *pred;
15428  rtx note;
15429
15430  validate_condition_mode (code, mode);
15431
15432  /* Work out which way this really branches.  We could use
15433     reverse_condition_maybe_unordered here always but this
15434     makes the resulting assembler clearer.  */
15435  if (really_reversed)
15436    {
15437      /* Reversal of FP compares takes care -- an ordered compare
15438	 becomes an unordered compare and vice versa.  */
15439      if (mode == CCFPmode)
15440	code = reverse_condition_maybe_unordered (code);
15441      else
15442	code = reverse_condition (code);
15443    }
15444
15445  switch (code)
15446    {
15447      /* Not all of these are actually distinct opcodes, but
15448	 we distinguish them for clarity of the resulting assembler.  */
15449    case NE: case LTGT:
15450      ccode = "ne"; break;
15451    case EQ: case UNEQ:
15452      ccode = "eq"; break;
15453    case GE: case GEU:
15454      ccode = "ge"; break;
15455    case GT: case GTU: case UNGT:
15456      ccode = "gt"; break;
15457    case LE: case LEU:
15458      ccode = "le"; break;
15459    case LT: case LTU: case UNLT:
15460      ccode = "lt"; break;
15461    case UNORDERED: ccode = "un"; break;
15462    case ORDERED: ccode = "nu"; break;
15463    case UNGE: ccode = "nl"; break;
15464    case UNLE: ccode = "ng"; break;
15465    default:
15466      gcc_unreachable ();
15467    }
15468
15469  /* Maybe we have a guess as to how likely the branch is.  */
15470  pred = "";
15471  note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15472  if (note != NULL_RTX)
15473    {
15474      /* PROB is the difference from 50%.  */
15475      int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15476		   .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15477
15478      /* Only hint for highly probable/improbable branches on newer cpus when
15479	 we have real profile data, as static prediction overrides processor
15480	 dynamic prediction.  For older cpus we may as well always hint, but
15481	 assume not taken for branches that are very close to 50% as a
15482	 mispredicted taken branch is more expensive than a
15483	 mispredicted not-taken branch.  */
15484      if (rs6000_always_hint
15485	  || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15486	      && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15487	      && br_prob_note_reliable_p (note)))
15488	{
15489	  if (abs (prob) > REG_BR_PROB_BASE / 20
15490	      && ((prob > 0) ^ need_longbranch))
15491	    pred = "+";
15492	  else
15493	    pred = "-";
15494	}
15495    }
15496
15497  if (label == NULL)
15498    s += sprintf (s, "b%slr%s ", ccode, pred);
15499  else
15500    s += sprintf (s, "b%s%s ", ccode, pred);
15501
15502  /* We need to escape any '%' characters in the reg_names string.
15503     Assume they'd only be the first character....  */
15504  if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15505    *s++ = '%';
15506  s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15507
15508  if (label != NULL)
15509    {
15510      /* If the branch distance was too far, we may have to use an
15511	 unconditional branch to go the distance.  */
15512      if (need_longbranch)
15513	s += sprintf (s, ",$+8\n\tb %s", label);
15514      else
15515	s += sprintf (s, ",%s", label);
15516    }
15517
15518  return string;
15519}
15520
15521/* Return insn for VSX or Altivec comparisons.  */
15522
15523static rtx
15524rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15525{
15526  rtx mask;
15527  machine_mode mode = GET_MODE (op0);
15528
15529  switch (code)
15530    {
15531    default:
15532      break;
15533
15534    case GE:
15535      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15536	return NULL_RTX;
15537      /* FALLTHRU */
15538
15539    case EQ:
15540    case GT:
15541    case GTU:
15542    case ORDERED:
15543    case UNORDERED:
15544    case UNEQ:
15545    case LTGT:
15546      mask = gen_reg_rtx (mode);
15547      emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15548      return mask;
15549    }
15550
15551  return NULL_RTX;
15552}
15553
15554/* Emit vector compare for operands OP0 and OP1 using code RCODE.
15555   DMODE is expected destination mode. This is a recursive function.  */
15556
15557static rtx
15558rs6000_emit_vector_compare (enum rtx_code rcode,
15559			    rtx op0, rtx op1,
15560			    machine_mode dmode)
15561{
15562  rtx mask;
15563  bool swap_operands = false;
15564  bool try_again = false;
15565
15566  gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15567  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15568
15569  /* See if the comparison works as is.  */
15570  mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15571  if (mask)
15572    return mask;
15573
15574  switch (rcode)
15575    {
15576    case LT:
15577      rcode = GT;
15578      swap_operands = true;
15579      try_again = true;
15580      break;
15581    case LTU:
15582      rcode = GTU;
15583      swap_operands = true;
15584      try_again = true;
15585      break;
15586    case NE:
15587    case UNLE:
15588    case UNLT:
15589    case UNGE:
15590    case UNGT:
15591      /* Invert condition and try again.
15592	 e.g., A != B becomes ~(A==B).  */
15593      {
15594	enum rtx_code rev_code;
15595	enum insn_code nor_code;
15596	rtx mask2;
15597
15598	rev_code = reverse_condition_maybe_unordered (rcode);
15599	if (rev_code == UNKNOWN)
15600	  return NULL_RTX;
15601
15602	nor_code = optab_handler (one_cmpl_optab, dmode);
15603	if (nor_code == CODE_FOR_nothing)
15604	  return NULL_RTX;
15605
15606	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15607	if (!mask2)
15608	  return NULL_RTX;
15609
15610	mask = gen_reg_rtx (dmode);
15611	emit_insn (GEN_FCN (nor_code) (mask, mask2));
15612	return mask;
15613      }
15614      break;
15615    case GE:
15616    case GEU:
15617    case LE:
15618    case LEU:
15619      /* Try GT/GTU/LT/LTU OR EQ */
15620      {
15621	rtx c_rtx, eq_rtx;
15622	enum insn_code ior_code;
15623	enum rtx_code new_code;
15624
15625	switch (rcode)
15626	  {
15627	  case  GE:
15628	    new_code = GT;
15629	    break;
15630
15631	  case GEU:
15632	    new_code = GTU;
15633	    break;
15634
15635	  case LE:
15636	    new_code = LT;
15637	    break;
15638
15639	  case LEU:
15640	    new_code = LTU;
15641	    break;
15642
15643	  default:
15644	    gcc_unreachable ();
15645	  }
15646
15647	ior_code = optab_handler (ior_optab, dmode);
15648	if (ior_code == CODE_FOR_nothing)
15649	  return NULL_RTX;
15650
15651	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15652	if (!c_rtx)
15653	  return NULL_RTX;
15654
15655	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15656	if (!eq_rtx)
15657	  return NULL_RTX;
15658
15659	mask = gen_reg_rtx (dmode);
15660	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15661	return mask;
15662      }
15663      break;
15664    default:
15665      return NULL_RTX;
15666    }
15667
15668  if (try_again)
15669    {
15670      if (swap_operands)
15671	std::swap (op0, op1);
15672
15673      mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15674      if (mask)
15675	return mask;
15676    }
15677
15678  /* You only get two chances.  */
15679  return NULL_RTX;
15680}
15681
15682/* Emit vector conditional expression.  DEST is destination. OP_TRUE and
15683   OP_FALSE are two VEC_COND_EXPR operands.  CC_OP0 and CC_OP1 are the two
15684   operands for the relation operation COND.  */
15685
15686int
15687rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15688			      rtx cond, rtx cc_op0, rtx cc_op1)
15689{
15690  machine_mode dest_mode = GET_MODE (dest);
15691  machine_mode mask_mode = GET_MODE (cc_op0);
15692  enum rtx_code rcode = GET_CODE (cond);
15693  rtx mask;
15694  bool invert_move = false;
15695
15696  if (VECTOR_UNIT_NONE_P (dest_mode))
15697    return 0;
15698
15699  gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15700	      && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15701
15702  switch (rcode)
15703    {
15704      /* Swap operands if we can, and fall back to doing the operation as
15705	 specified, and doing a NOR to invert the test.  */
15706    case NE:
15707    case UNLE:
15708    case UNLT:
15709    case UNGE:
15710    case UNGT:
15711      /* Invert condition and try again.
15712	 e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
15713      invert_move = true;
15714      rcode = reverse_condition_maybe_unordered (rcode);
15715      if (rcode == UNKNOWN)
15716	return 0;
15717      break;
15718
15719    case GE:
15720    case LE:
15721      if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15722	{
15723	  /* Invert condition to avoid compound test.  */
15724	  invert_move = true;
15725	  rcode = reverse_condition (rcode);
15726	}
15727      break;
15728
15729    case GTU:
15730    case GEU:
15731    case LTU:
15732    case LEU:
15733
15734      /* Invert condition to avoid compound test if necessary.  */
15735      if (rcode == GEU || rcode == LEU)
15736	{
15737	  invert_move = true;
15738	  rcode = reverse_condition (rcode);
15739	}
15740      break;
15741
15742    default:
15743      break;
15744    }
15745
15746  /* Get the vector mask for the given relational operations.  */
15747  mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15748
15749  if (!mask)
15750    return 0;
15751
15752  if (mask_mode != dest_mode)
15753    mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15754
15755  if (invert_move)
15756    std::swap (op_true, op_false);
15757
15758  /* Optimize vec1 == vec2, to know the mask generates -1/0.  */
15759  if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15760      && (GET_CODE (op_true) == CONST_VECTOR
15761	  || GET_CODE (op_false) == CONST_VECTOR))
15762    {
15763      rtx constant_0 = CONST0_RTX (dest_mode);
15764      rtx constant_m1 = CONSTM1_RTX (dest_mode);
15765
15766      if (op_true == constant_m1 && op_false == constant_0)
15767	{
15768	  emit_move_insn (dest, mask);
15769	  return 1;
15770	}
15771
15772      else if (op_true == constant_0 && op_false == constant_m1)
15773	{
15774	  emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15775	  return 1;
15776	}
15777
15778      /* If we can't use the vector comparison directly, perhaps we can use
15779	 the mask for the true or false fields, instead of loading up a
15780	 constant.  */
15781      if (op_true == constant_m1)
15782	op_true = mask;
15783
15784      if (op_false == constant_0)
15785	op_false = mask;
15786    }
15787
15788  if (!REG_P (op_true) && !SUBREG_P (op_true))
15789    op_true = force_reg (dest_mode, op_true);
15790
15791  if (!REG_P (op_false) && !SUBREG_P (op_false))
15792    op_false = force_reg (dest_mode, op_false);
15793
15794  rtx tmp = gen_rtx_IOR (dest_mode,
15795			 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15796				      op_false),
15797			 gen_rtx_AND (dest_mode, mask, op_true));
15798  emit_insn (gen_rtx_SET (dest, tmp));
15799  return 1;
15800}
15801
15802/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15803   maximum or minimum with "C" semantics.
15804
15805   Unless you use -ffast-math, you can't use these instructions to replace
15806   conditions that implicitly reverse the condition because the comparison
15807   might generate a NaN or signed zer0.
15808
15809   I.e. the following can be replaced all of the time
15810	ret = (op1 >  op2) ? op1 : op2	; generate xsmaxcdp
15811	ret = (op1 >= op2) ? op1 : op2	; generate xsmaxcdp
15812	ret = (op1 <  op2) ? op1 : op2;	; generate xsmincdp
15813	ret = (op1 <= op2) ? op1 : op2;	; generate xsmincdp
15814
15815   The following can be replaced only if -ffast-math is used:
15816	ret = (op1 <  op2) ? op2 : op1	; generate xsmaxcdp
15817	ret = (op1 <= op2) ? op2 : op1	; generate xsmaxcdp
15818	ret = (op1 >  op2) ? op2 : op1;	; generate xsmincdp
15819	ret = (op1 >= op2) ? op2 : op1;	; generate xsmincdp
15820
15821   Move TRUE_COND to DEST if OP of the operands of the last comparison is
15822   nonzero/true, FALSE_COND if it is zero/false.
15823
15824   Return false if we can't generate the appropriate minimum or maximum, and
15825   true if we can did the minimum or maximum.  */
15826
15827static bool
15828rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15829{
15830  enum rtx_code code = GET_CODE (op);
15831  rtx op0 = XEXP (op, 0);
15832  rtx op1 = XEXP (op, 1);
15833  machine_mode compare_mode = GET_MODE (op0);
15834  machine_mode result_mode = GET_MODE (dest);
15835
15836  if (result_mode != compare_mode)
15837    return false;
15838
15839  /* See the comments of this function, it simply expects GE/GT/LE/LT in
15840     the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
15841     we need to do the reversions first to make the following checks
15842     support fewer cases, like:
15843
15844	(a UNLT b) ? op1 : op2 =>  (a >= b) ? op2 : op1;
15845	(a UNLE b) ? op1 : op2 =>  (a >  b) ? op2 : op1;
15846	(a UNGT b) ? op1 : op2 =>  (a <= b) ? op2 : op1;
15847	(a UNGE b) ? op1 : op2 =>  (a <  b) ? op2 : op1;
15848
15849     By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
15850     that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
15851     have to check for fast-math or the like.  */
15852  if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
15853    {
15854      code = reverse_condition_maybe_unordered (code);
15855      std::swap (true_cond, false_cond);
15856    }
15857
15858  bool max_p;
15859  if (code == GE || code == GT)
15860    max_p = true;
15861  else if (code == LE || code == LT)
15862    max_p = false;
15863  else
15864    return false;
15865
15866  if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15867    ;
15868
15869  /* Only when NaNs and signed-zeros are not in effect, smax could be
15870     used for `op0 < op1 ? op1 : op0`, and smin could be used for
15871     `op0 > op1 ? op1 : op0`.  */
15872  else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15873	   && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15874    max_p = !max_p;
15875
15876  else
15877    return false;
15878
15879  rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15880  return true;
15881}
15882
15883/* Possibly emit a floating point conditional move by generating a compare that
15884   sets a mask instruction and a XXSEL select instruction.
15885
15886   Move TRUE_COND to DEST if OP of the operands of the last comparison is
15887   nonzero/true, FALSE_COND if it is zero/false.
15888
15889   Return false if the operation cannot be generated, and true if we could
15890   generate the instruction.  */
15891
15892static bool
15893rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15894{
15895  enum rtx_code code = GET_CODE (op);
15896  rtx op0 = XEXP (op, 0);
15897  rtx op1 = XEXP (op, 1);
15898  machine_mode compare_mode = GET_MODE (op0);
15899  machine_mode result_mode = GET_MODE (dest);
15900  rtx compare_rtx;
15901  rtx cmove_rtx;
15902  rtx clobber_rtx;
15903
15904  if (!can_create_pseudo_p ())
15905    return 0;
15906
15907  /* We allow the comparison to be either SFmode/DFmode and the true/false
15908     condition to be either SFmode/DFmode.  I.e. we allow:
15909
15910	float a, b;
15911	double c, d, r;
15912
15913	r = (a == b) ? c : d;
15914
15915    and:
15916
15917	double a, b;
15918	float c, d, r;
15919
15920	r = (a == b) ? c : d;
15921
15922    but we don't allow intermixing the IEEE 128-bit floating point types with
15923    the 32/64-bit scalar types.  */
15924
15925  if (!(compare_mode == result_mode
15926	|| (compare_mode == SFmode && result_mode == DFmode)
15927	|| (compare_mode == DFmode && result_mode == SFmode)))
15928    return false;
15929
15930  switch (code)
15931    {
15932    case EQ:
15933    case GE:
15934    case GT:
15935      break;
15936
15937    case NE:
15938    case LT:
15939    case LE:
15940      code = swap_condition (code);
15941      std::swap (op0, op1);
15942      break;
15943
15944    default:
15945      return false;
15946    }
15947
15948  /* Generate:	[(parallel [(set (dest)
15949				 (if_then_else (op (cmp1) (cmp2))
15950					       (true)
15951					       (false)))
15952			    (clobber (scratch))])].  */
15953
15954  compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15955  cmove_rtx = gen_rtx_SET (dest,
15956			   gen_rtx_IF_THEN_ELSE (result_mode,
15957						 compare_rtx,
15958						 true_cond,
15959						 false_cond));
15960
15961  clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15962  emit_insn (gen_rtx_PARALLEL (VOIDmode,
15963			       gen_rtvec (2, cmove_rtx, clobber_rtx)));
15964
15965  return true;
15966}
15967
15968/* Helper function to return true if the target has instructions to do a
15969   compare and set mask instruction that can be used with XXSEL to implement a
15970   conditional move.  It is also assumed that such a target also supports the
15971   "C" minimum and maximum instructions. */
15972
15973static bool
15974have_compare_and_set_mask (machine_mode mode)
15975{
15976  switch (mode)
15977    {
15978    case E_SFmode:
15979    case E_DFmode:
15980      return TARGET_P9_MINMAX;
15981
15982    case E_KFmode:
15983    case E_TFmode:
15984      return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
15985
15986    default:
15987      break;
15988    }
15989
15990  return false;
15991}
15992
15993/* Emit a conditional move: move TRUE_COND to DEST if OP of the
15994   operands of the last comparison is nonzero/true, FALSE_COND if it
15995   is zero/false.  Return 0 if the hardware has no such operation.  */
15996
15997bool
15998rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15999{
16000  enum rtx_code code = GET_CODE (op);
16001  rtx op0 = XEXP (op, 0);
16002  rtx op1 = XEXP (op, 1);
16003  machine_mode compare_mode = GET_MODE (op0);
16004  machine_mode result_mode = GET_MODE (dest);
16005  rtx temp;
16006  bool is_against_zero;
16007
16008  /* These modes should always match.  */
16009  if (GET_MODE (op1) != compare_mode
16010      /* In the isel case however, we can use a compare immediate, so
16011	 op1 may be a small constant.  */
16012      && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16013    return false;
16014  if (GET_MODE (true_cond) != result_mode)
16015    return false;
16016  if (GET_MODE (false_cond) != result_mode)
16017    return false;
16018
16019  /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16020     instructions.  */
16021  if (have_compare_and_set_mask (compare_mode)
16022      && have_compare_and_set_mask (result_mode))
16023    {
16024      if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16025	return true;
16026
16027      if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16028	return true;
16029    }
16030
16031  /* Don't allow using floating point comparisons for integer results for
16032     now.  */
16033  if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16034    return false;
16035
16036  /* First, work out if the hardware can do this at all, or
16037     if it's too slow....  */
16038  if (!FLOAT_MODE_P (compare_mode))
16039    {
16040      if (TARGET_ISEL)
16041	return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16042      return false;
16043    }
16044
16045  is_against_zero = op1 == CONST0_RTX (compare_mode);
16046
16047  /* A floating-point subtract might overflow, underflow, or produce
16048     an inexact result, thus changing the floating-point flags, so it
16049     can't be generated if we care about that.  It's safe if one side
16050     of the construct is zero, since then no subtract will be
16051     generated.  */
16052  if (SCALAR_FLOAT_MODE_P (compare_mode)
16053      && flag_trapping_math && ! is_against_zero)
16054    return false;
16055
16056  /* Eliminate half of the comparisons by switching operands, this
16057     makes the remaining code simpler.  */
16058  if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16059      || code == LTGT || code == LT || code == UNLE)
16060    {
16061      code = reverse_condition_maybe_unordered (code);
16062      temp = true_cond;
16063      true_cond = false_cond;
16064      false_cond = temp;
16065    }
16066
16067  /* UNEQ and LTGT take four instructions for a comparison with zero,
16068     it'll probably be faster to use a branch here too.  */
16069  if (code == UNEQ && HONOR_NANS (compare_mode))
16070    return false;
16071
16072  /* We're going to try to implement comparisons by performing
16073     a subtract, then comparing against zero.  Unfortunately,
16074     Inf - Inf is NaN which is not zero, and so if we don't
16075     know that the operand is finite and the comparison
16076     would treat EQ different to UNORDERED, we can't do it.  */
16077  if (HONOR_INFINITIES (compare_mode)
16078      && code != GT && code != UNGE
16079      && (!CONST_DOUBLE_P (op1)
16080	  || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16081      /* Constructs of the form (a OP b ? a : b) are safe.  */
16082      && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16083	  || (! rtx_equal_p (op0, true_cond)
16084	      && ! rtx_equal_p (op1, true_cond))))
16085    return false;
16086
16087  /* At this point we know we can use fsel.  */
16088
16089  /* Don't allow compare_mode other than SFmode or DFmode, for others there
16090     is no fsel instruction.  */
16091  if (compare_mode != SFmode && compare_mode != DFmode)
16092    return false;
16093
16094  /* Reduce the comparison to a comparison against zero.  */
16095  if (! is_against_zero)
16096    {
16097      temp = gen_reg_rtx (compare_mode);
16098      emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16099      op0 = temp;
16100      op1 = CONST0_RTX (compare_mode);
16101    }
16102
16103  /* If we don't care about NaNs we can reduce some of the comparisons
16104     down to faster ones.  */
16105  if (! HONOR_NANS (compare_mode))
16106    switch (code)
16107      {
16108      case GT:
16109	code = LE;
16110	temp = true_cond;
16111	true_cond = false_cond;
16112	false_cond = temp;
16113	break;
16114      case UNGE:
16115	code = GE;
16116	break;
16117      case UNEQ:
16118	code = EQ;
16119	break;
16120      default:
16121	break;
16122      }
16123
16124  /* Now, reduce everything down to a GE.  */
16125  switch (code)
16126    {
16127    case GE:
16128      break;
16129
16130    case LE:
16131      temp = gen_reg_rtx (compare_mode);
16132      emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16133      op0 = temp;
16134      break;
16135
16136    case ORDERED:
16137      temp = gen_reg_rtx (compare_mode);
16138      emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16139      op0 = temp;
16140      break;
16141
16142    case EQ:
16143      temp = gen_reg_rtx (compare_mode);
16144      emit_insn (gen_rtx_SET (temp,
16145			      gen_rtx_NEG (compare_mode,
16146					   gen_rtx_ABS (compare_mode, op0))));
16147      op0 = temp;
16148      break;
16149
16150    case UNGE:
16151      /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16152      temp = gen_reg_rtx (result_mode);
16153      emit_insn (gen_rtx_SET (temp,
16154			      gen_rtx_IF_THEN_ELSE (result_mode,
16155						    gen_rtx_GE (VOIDmode,
16156								op0, op1),
16157						    true_cond, false_cond)));
16158      false_cond = true_cond;
16159      true_cond = temp;
16160
16161      temp = gen_reg_rtx (compare_mode);
16162      emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16163      op0 = temp;
16164      break;
16165
16166    case GT:
16167      /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16168      temp = gen_reg_rtx (result_mode);
16169      emit_insn (gen_rtx_SET (temp,
16170			      gen_rtx_IF_THEN_ELSE (result_mode,
16171						    gen_rtx_GE (VOIDmode,
16172								op0, op1),
16173						    true_cond, false_cond)));
16174      true_cond = false_cond;
16175      false_cond = temp;
16176
16177      temp = gen_reg_rtx (compare_mode);
16178      emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16179      op0 = temp;
16180      break;
16181
16182    default:
16183      gcc_unreachable ();
16184    }
16185
16186  emit_insn (gen_rtx_SET (dest,
16187			  gen_rtx_IF_THEN_ELSE (result_mode,
16188						gen_rtx_GE (VOIDmode,
16189							    op0, op1),
16190						true_cond, false_cond)));
16191  return true;
16192}
16193
16194/* Same as above, but for ints (isel).  */
16195
16196bool
16197rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16198{
16199  rtx condition_rtx, cr;
16200  machine_mode mode = GET_MODE (dest);
16201  enum rtx_code cond_code;
16202  rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16203  bool signedp;
16204
16205  if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16206    return false;
16207
16208  /* PR104335: We now need to expect CC-mode "comparisons"
16209     coming from ifcvt.  The following code expects proper
16210     comparisons so better abort here.  */
16211  if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16212    return false;
16213
16214  /* We still have to do the compare, because isel doesn't do a
16215     compare, it just looks at the CRx bits set by a previous compare
16216     instruction.  */
16217  condition_rtx = rs6000_generate_compare (op, mode);
16218  cond_code = GET_CODE (condition_rtx);
16219  cr = XEXP (condition_rtx, 0);
16220  signedp = GET_MODE (cr) == CCmode;
16221
16222  isel_func = (mode == SImode
16223	       ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16224	       : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16225
16226  switch (cond_code)
16227    {
16228    case LT: case GT: case LTU: case GTU: case EQ:
16229      /* isel handles these directly.  */
16230      break;
16231
16232    default:
16233      /* We need to swap the sense of the comparison.  */
16234      {
16235	std::swap (false_cond, true_cond);
16236	PUT_CODE (condition_rtx, reverse_condition (cond_code));
16237      }
16238      break;
16239    }
16240
16241  false_cond = force_reg (mode, false_cond);
16242  if (true_cond != const0_rtx)
16243    true_cond = force_reg (mode, true_cond);
16244
16245  emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16246
16247  return true;
16248}
16249
16250void
16251rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16252{
16253  machine_mode mode = GET_MODE (op0);
16254  enum rtx_code c;
16255  rtx target;
16256
16257  /* VSX/altivec have direct min/max insns.  */
16258  if ((code == SMAX || code == SMIN)
16259      && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16260	  || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16261	  || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16262    {
16263      emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16264      return;
16265    }
16266
16267  if (code == SMAX || code == SMIN)
16268    c = GE;
16269  else
16270    c = GEU;
16271
16272  if (code == SMAX || code == UMAX)
16273    target = emit_conditional_move (dest, { c, op0, op1, mode },
16274				    op0, op1, mode, 0);
16275  else
16276    target = emit_conditional_move (dest, { c, op0, op1, mode },
16277				    op1, op0, mode, 0);
16278  gcc_assert (target);
16279  if (target != dest)
16280    emit_move_insn (dest, target);
16281}
16282
16283/* A subroutine of the atomic operation splitters.  Jump to LABEL if
16284   COND is true.  Mark the jump as unlikely to be taken.  */
16285
16286static void
16287emit_unlikely_jump (rtx cond, rtx label)
16288{
16289  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16290  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16291  add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16292}
16293
16294/* A subroutine of the atomic operation splitters.  Emit a load-locked
16295   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
16296   the zero_extend operation.  */
16297
16298static void
16299emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16300{
16301  rtx (*fn) (rtx, rtx) = NULL;
16302
16303  switch (mode)
16304    {
16305    case E_QImode:
16306      fn = gen_load_lockedqi;
16307      break;
16308    case E_HImode:
16309      fn = gen_load_lockedhi;
16310      break;
16311    case E_SImode:
16312      if (GET_MODE (mem) == QImode)
16313	fn = gen_load_lockedqi_si;
16314      else if (GET_MODE (mem) == HImode)
16315	fn = gen_load_lockedhi_si;
16316      else
16317	fn = gen_load_lockedsi;
16318      break;
16319    case E_DImode:
16320      fn = gen_load_lockeddi;
16321      break;
16322    case E_TImode:
16323      fn = gen_load_lockedti;
16324      break;
16325    default:
16326      gcc_unreachable ();
16327    }
16328  emit_insn (fn (reg, mem));
16329}
16330
16331/* A subroutine of the atomic operation splitters.  Emit a store-conditional
16332   instruction in MODE.  */
16333
16334static void
16335emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16336{
16337  rtx (*fn) (rtx, rtx, rtx) = NULL;
16338
16339  switch (mode)
16340    {
16341    case E_QImode:
16342      fn = gen_store_conditionalqi;
16343      break;
16344    case E_HImode:
16345      fn = gen_store_conditionalhi;
16346      break;
16347    case E_SImode:
16348      fn = gen_store_conditionalsi;
16349      break;
16350    case E_DImode:
16351      fn = gen_store_conditionaldi;
16352      break;
16353    case E_TImode:
16354      fn = gen_store_conditionalti;
16355      break;
16356    default:
16357      gcc_unreachable ();
16358    }
16359
16360  /* Emit sync before stwcx. to address PPC405 Erratum.  */
16361  if (PPC405_ERRATUM77)
16362    emit_insn (gen_hwsync ());
16363
16364  emit_insn (fn (res, mem, val));
16365}
16366
16367/* Expand barriers before and after a load_locked/store_cond sequence.  */
16368
16369static rtx
16370rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16371{
16372  rtx addr = XEXP (mem, 0);
16373
16374  if (!legitimate_indirect_address_p (addr, reload_completed)
16375      && !legitimate_indexed_address_p (addr, reload_completed))
16376    {
16377      addr = force_reg (Pmode, addr);
16378      mem = replace_equiv_address_nv (mem, addr);
16379    }
16380
16381  switch (model)
16382    {
16383    case MEMMODEL_RELAXED:
16384    case MEMMODEL_CONSUME:
16385    case MEMMODEL_ACQUIRE:
16386      break;
16387    case MEMMODEL_RELEASE:
16388    case MEMMODEL_ACQ_REL:
16389      emit_insn (gen_lwsync ());
16390      break;
16391    case MEMMODEL_SEQ_CST:
16392      emit_insn (gen_hwsync ());
16393      break;
16394    default:
16395      gcc_unreachable ();
16396    }
16397  return mem;
16398}
16399
16400static void
16401rs6000_post_atomic_barrier (enum memmodel model)
16402{
16403  switch (model)
16404    {
16405    case MEMMODEL_RELAXED:
16406    case MEMMODEL_CONSUME:
16407    case MEMMODEL_RELEASE:
16408      break;
16409    case MEMMODEL_ACQUIRE:
16410    case MEMMODEL_ACQ_REL:
16411    case MEMMODEL_SEQ_CST:
16412      emit_insn (gen_isync ());
16413      break;
16414    default:
16415      gcc_unreachable ();
16416    }
16417}
16418
16419/* A subroutine of the various atomic expanders.  For sub-word operations,
16420   we must adjust things to operate on SImode.  Given the original MEM,
16421   return a new aligned memory.  Also build and return the quantities by
16422   which to shift and mask.  */
16423
16424static rtx
16425rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16426{
16427  rtx addr, align, shift, mask, mem;
16428  HOST_WIDE_INT shift_mask;
16429  machine_mode mode = GET_MODE (orig_mem);
16430
16431  /* For smaller modes, we have to implement this via SImode.  */
16432  shift_mask = (mode == QImode ? 0x18 : 0x10);
16433
16434  addr = XEXP (orig_mem, 0);
16435  addr = force_reg (GET_MODE (addr), addr);
16436
16437  /* Aligned memory containing subword.  Generate a new memory.  We
16438     do not want any of the existing MEM_ATTR data, as we're now
16439     accessing memory outside the original object.  */
16440  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16441			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16442  mem = gen_rtx_MEM (SImode, align);
16443  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16444  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16445    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16446
16447  /* Shift amount for subword relative to aligned word.  */
16448  shift = gen_reg_rtx (SImode);
16449  addr = gen_lowpart (SImode, addr);
16450  rtx tmp = gen_reg_rtx (SImode);
16451  emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16452  emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16453  if (BYTES_BIG_ENDIAN)
16454    shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16455			         shift, 1, OPTAB_LIB_WIDEN);
16456  *pshift = shift;
16457
16458  /* Mask for insertion.  */
16459  mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16460			      shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16461  *pmask = mask;
16462
16463  return mem;
16464}
16465
16466/* A subroutine of the various atomic expanders.  For sub-word operands,
16467   combine OLDVAL and NEWVAL via MASK.  Returns a new pseduo.  */
16468
16469static rtx
16470rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16471{
16472  rtx x;
16473
16474  x = gen_reg_rtx (SImode);
16475  emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16476					  gen_rtx_NOT (SImode, mask),
16477					  oldval)));
16478
16479  x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16480
16481  return x;
16482}
16483
16484/* A subroutine of the various atomic expanders.  For sub-word operands,
16485   extract WIDE to NARROW via SHIFT.  */
16486
16487static void
16488rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16489{
16490  wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16491			      wide, 1, OPTAB_LIB_WIDEN);
16492  emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16493}
16494
16495/* Expand an atomic compare and swap operation.  */
16496
16497void
16498rs6000_expand_atomic_compare_and_swap (rtx operands[])
16499{
16500  rtx boolval, retval, mem, oldval, newval, cond;
16501  rtx label1, label2, x, mask, shift;
16502  machine_mode mode, orig_mode;
16503  enum memmodel mod_s, mod_f;
16504  bool is_weak;
16505
16506  boolval = operands[0];
16507  retval = operands[1];
16508  mem = operands[2];
16509  oldval = operands[3];
16510  newval = operands[4];
16511  is_weak = (INTVAL (operands[5]) != 0);
16512  mod_s = memmodel_base (INTVAL (operands[6]));
16513  mod_f = memmodel_base (INTVAL (operands[7]));
16514  orig_mode = mode = GET_MODE (mem);
16515
16516  mask = shift = NULL_RTX;
16517  if (mode == QImode || mode == HImode)
16518    {
16519      /* Before power8, we didn't have access to lbarx/lharx, so generate a
16520	 lwarx and shift/mask operations.  With power8, we need to do the
16521	 comparison in SImode, but the store is still done in QI/HImode.  */
16522      oldval = convert_modes (SImode, mode, oldval, 1);
16523
16524      if (!TARGET_SYNC_HI_QI)
16525	{
16526	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16527
16528	  /* Shift and mask OLDVAL into position with the word.  */
16529	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16530					NULL_RTX, 1, OPTAB_LIB_WIDEN);
16531
16532	  /* Shift and mask NEWVAL into position within the word.  */
16533	  newval = convert_modes (SImode, mode, newval, 1);
16534	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16535					NULL_RTX, 1, OPTAB_LIB_WIDEN);
16536	}
16537
16538      /* Prepare to adjust the return value.  */
16539      retval = gen_reg_rtx (SImode);
16540      mode = SImode;
16541    }
16542  else if (reg_overlap_mentioned_p (retval, oldval))
16543    oldval = copy_to_reg (oldval);
16544
16545  if (mode != TImode && !reg_or_short_operand (oldval, mode))
16546    oldval = copy_to_mode_reg (mode, oldval);
16547
16548  if (reg_overlap_mentioned_p (retval, newval))
16549    newval = copy_to_reg (newval);
16550
16551  mem = rs6000_pre_atomic_barrier (mem, mod_s);
16552
16553  label1 = NULL_RTX;
16554  if (!is_weak)
16555    {
16556      label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16557      emit_label (XEXP (label1, 0));
16558    }
16559  label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16560
16561  emit_load_locked (mode, retval, mem);
16562
16563  x = retval;
16564  if (mask)
16565    x = expand_simple_binop (SImode, AND, retval, mask,
16566			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
16567
16568  cond = gen_reg_rtx (CCmode);
16569  /* If we have TImode, synthesize a comparison.  */
16570  if (mode != TImode)
16571    x = gen_rtx_COMPARE (CCmode, x, oldval);
16572  else
16573    {
16574      rtx xor1_result = gen_reg_rtx (DImode);
16575      rtx xor2_result = gen_reg_rtx (DImode);
16576      rtx or_result = gen_reg_rtx (DImode);
16577      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16578      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16579      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16580      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16581
16582      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16583      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16584      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16585      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16586    }
16587
16588  emit_insn (gen_rtx_SET (cond, x));
16589
16590  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16591  emit_unlikely_jump (x, label2);
16592
16593  x = newval;
16594  if (mask)
16595    x = rs6000_mask_atomic_subword (retval, newval, mask);
16596
16597  emit_store_conditional (orig_mode, cond, mem, x);
16598
16599  if (!is_weak)
16600    {
16601      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16602      emit_unlikely_jump (x, label1);
16603    }
16604
16605  if (!is_mm_relaxed (mod_f))
16606    emit_label (XEXP (label2, 0));
16607
16608  rs6000_post_atomic_barrier (mod_s);
16609
16610  if (is_mm_relaxed (mod_f))
16611    emit_label (XEXP (label2, 0));
16612
16613  if (shift)
16614    rs6000_finish_atomic_subword (operands[1], retval, shift);
16615  else if (mode != GET_MODE (operands[1]))
16616    convert_move (operands[1], retval, 1);
16617
16618  /* In all cases, CR0 contains EQ on success, and NE on failure.  */
16619  x = gen_rtx_EQ (SImode, cond, const0_rtx);
16620  emit_insn (gen_rtx_SET (boolval, x));
16621}
16622
16623/* Expand an atomic exchange operation.  */
16624
16625void
16626rs6000_expand_atomic_exchange (rtx operands[])
16627{
16628  rtx retval, mem, val, cond;
16629  machine_mode mode;
16630  enum memmodel model;
16631  rtx label, x, mask, shift;
16632
16633  retval = operands[0];
16634  mem = operands[1];
16635  val = operands[2];
16636  model = memmodel_base (INTVAL (operands[3]));
16637  mode = GET_MODE (mem);
16638
16639  mask = shift = NULL_RTX;
16640  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16641    {
16642      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16643
16644      /* Shift and mask VAL into position with the word.  */
16645      val = convert_modes (SImode, mode, val, 1);
16646      val = expand_simple_binop (SImode, ASHIFT, val, shift,
16647				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16648
16649      /* Prepare to adjust the return value.  */
16650      retval = gen_reg_rtx (SImode);
16651      mode = SImode;
16652    }
16653
16654  mem = rs6000_pre_atomic_barrier (mem, model);
16655
16656  label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16657  emit_label (XEXP (label, 0));
16658
16659  emit_load_locked (mode, retval, mem);
16660
16661  x = val;
16662  if (mask)
16663    x = rs6000_mask_atomic_subword (retval, val, mask);
16664
16665  cond = gen_reg_rtx (CCmode);
16666  emit_store_conditional (mode, cond, mem, x);
16667
16668  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16669  emit_unlikely_jump (x, label);
16670
16671  rs6000_post_atomic_barrier (model);
16672
16673  if (shift)
16674    rs6000_finish_atomic_subword (operands[0], retval, shift);
16675}
16676
16677/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
16678   to perform.  MEM is the memory on which to operate.  VAL is the second
16679   operand of the binary operator.  BEFORE and AFTER are optional locations to
16680   return the value of MEM either before of after the operation.  MODEL_RTX
16681   is a CONST_INT containing the memory model to use.  */
16682
16683void
16684rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16685			 rtx orig_before, rtx orig_after, rtx model_rtx)
16686{
16687  enum memmodel model = memmodel_base (INTVAL (model_rtx));
16688  machine_mode mode = GET_MODE (mem);
16689  machine_mode store_mode = mode;
16690  rtx label, x, cond, mask, shift;
16691  rtx before = orig_before, after = orig_after;
16692
16693  mask = shift = NULL_RTX;
16694  /* On power8, we want to use SImode for the operation.  On previous systems,
16695     use the operation in a subword and shift/mask to get the proper byte or
16696     halfword.  */
16697  if (mode == QImode || mode == HImode)
16698    {
16699      if (TARGET_SYNC_HI_QI)
16700	{
16701	  val = convert_modes (SImode, mode, val, 1);
16702
16703	  /* Prepare to adjust the return value.  */
16704	  before = gen_reg_rtx (SImode);
16705	  if (after)
16706	    after = gen_reg_rtx (SImode);
16707	  mode = SImode;
16708	}
16709      else
16710	{
16711	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16712
16713	  /* Shift and mask VAL into position with the word.  */
16714	  val = convert_modes (SImode, mode, val, 1);
16715	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
16716				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
16717
16718	  switch (code)
16719	    {
16720	    case IOR:
16721	    case XOR:
16722	      /* We've already zero-extended VAL.  That is sufficient to
16723		 make certain that it does not affect other bits.  */
16724	      mask = NULL;
16725	      break;
16726
16727	    case AND:
16728	      /* If we make certain that all of the other bits in VAL are
16729		 set, that will be sufficient to not affect other bits.  */
16730	      x = gen_rtx_NOT (SImode, mask);
16731	      x = gen_rtx_IOR (SImode, x, val);
16732	      emit_insn (gen_rtx_SET (val, x));
16733	      mask = NULL;
16734	      break;
16735
16736	    case NOT:
16737	    case PLUS:
16738	    case MINUS:
16739	      /* These will all affect bits outside the field and need
16740		 adjustment via MASK within the loop.  */
16741	      break;
16742
16743	    default:
16744	      gcc_unreachable ();
16745	    }
16746
16747	  /* Prepare to adjust the return value.  */
16748	  before = gen_reg_rtx (SImode);
16749	  if (after)
16750	    after = gen_reg_rtx (SImode);
16751	  store_mode = mode = SImode;
16752	}
16753    }
16754
16755  mem = rs6000_pre_atomic_barrier (mem, model);
16756
16757  label = gen_label_rtx ();
16758  emit_label (label);
16759  label = gen_rtx_LABEL_REF (VOIDmode, label);
16760
16761  if (before == NULL_RTX)
16762    before = gen_reg_rtx (mode);
16763
16764  emit_load_locked (mode, before, mem);
16765
16766  if (code == NOT)
16767    {
16768      x = expand_simple_binop (mode, AND, before, val,
16769			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16770      after = expand_simple_unop (mode, NOT, x, after, 1);
16771    }
16772  else
16773    {
16774      after = expand_simple_binop (mode, code, before, val,
16775				   after, 1, OPTAB_LIB_WIDEN);
16776    }
16777
16778  x = after;
16779  if (mask)
16780    {
16781      x = expand_simple_binop (SImode, AND, after, mask,
16782			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16783      x = rs6000_mask_atomic_subword (before, x, mask);
16784    }
16785  else if (store_mode != mode)
16786    x = convert_modes (store_mode, mode, x, 1);
16787
16788  cond = gen_reg_rtx (CCmode);
16789  emit_store_conditional (store_mode, cond, mem, x);
16790
16791  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16792  emit_unlikely_jump (x, label);
16793
16794  rs6000_post_atomic_barrier (model);
16795
16796  if (shift)
16797    {
16798      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16799	 then do the calcuations in a SImode register.  */
16800      if (orig_before)
16801	rs6000_finish_atomic_subword (orig_before, before, shift);
16802      if (orig_after)
16803	rs6000_finish_atomic_subword (orig_after, after, shift);
16804    }
16805  else if (store_mode != mode)
16806    {
16807      /* QImode/HImode on machines with lbarx/lharx where we do the native
16808	 operation and then do the calcuations in a SImode register.  */
16809      if (orig_before)
16810	convert_move (orig_before, before, 1);
16811      if (orig_after)
16812	convert_move (orig_after, after, 1);
16813    }
16814  else if (orig_after && after != orig_after)
16815    emit_move_insn (orig_after, after);
16816}
16817
16818static GTY(()) alias_set_type TOC_alias_set = -1;
16819
16820alias_set_type
16821get_TOC_alias_set (void)
16822{
16823  if (TOC_alias_set == -1)
16824    TOC_alias_set = new_alias_set ();
16825  return TOC_alias_set;
16826}
16827
16828/* The mode the ABI uses for a word.  This is not the same as word_mode
16829   for -m32 -mpowerpc64.  This is used to implement various target hooks.  */
16830
16831static scalar_int_mode
16832rs6000_abi_word_mode (void)
16833{
16834  return TARGET_32BIT ? SImode : DImode;
16835}
16836
16837/* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
16838static char *
16839rs6000_offload_options (void)
16840{
16841  if (TARGET_64BIT)
16842    return xstrdup ("-foffload-abi=lp64");
16843  else
16844    return xstrdup ("-foffload-abi=ilp32");
16845}
16846
16847
16848/* A quick summary of the various types of 'constant-pool tables'
16849   under PowerPC:
16850
16851   Target	Flags		Name		One table per
16852   AIX		(none)		AIX TOC		object file
16853   AIX		-mfull-toc	AIX TOC		object file
16854   AIX		-mminimal-toc	AIX minimal TOC	translation unit
16855   SVR4/EABI	(none)		SVR4 SDATA	object file
16856   SVR4/EABI	-fpic		SVR4 pic	object file
16857   SVR4/EABI	-fPIC		SVR4 PIC	translation unit
16858   SVR4/EABI	-mrelocatable	EABI TOC	function
16859   SVR4/EABI	-maix		AIX TOC		object file
16860   SVR4/EABI	-maix -mminimal-toc
16861				AIX minimal TOC	translation unit
16862
16863   Name			Reg.	Set by	entries	      contains:
16864					made by	 addrs?	fp?	sum?
16865
16866   AIX TOC		2	crt0	as	 Y	option	option
16867   AIX minimal TOC	30	prolog	gcc	 Y	Y	option
16868   SVR4 SDATA		13	crt0	gcc	 N	Y	N
16869   SVR4 pic		30	prolog	ld	 Y	not yet	N
16870   SVR4 PIC		30	prolog	gcc	 Y	option	option
16871   EABI TOC		30	prolog	gcc	 Y	option	option
16872
16873*/
16874
16875/* Hash functions for the hash table.  */
16876
16877static unsigned
16878rs6000_hash_constant (rtx k)
16879{
16880  enum rtx_code code = GET_CODE (k);
16881  machine_mode mode = GET_MODE (k);
16882  unsigned result = (code << 3) ^ mode;
16883  const char *format;
16884  int flen, fidx;
16885
16886  format = GET_RTX_FORMAT (code);
16887  flen = strlen (format);
16888  fidx = 0;
16889
16890  switch (code)
16891    {
16892    case LABEL_REF:
16893      return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16894
16895    case CONST_WIDE_INT:
16896      {
16897	int i;
16898	flen = CONST_WIDE_INT_NUNITS (k);
16899	for (i = 0; i < flen; i++)
16900	  result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16901	return result;
16902      }
16903
16904    case CONST_DOUBLE:
16905      return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16906
16907    case CODE_LABEL:
16908      fidx = 3;
16909      break;
16910
16911    default:
16912      break;
16913    }
16914
16915  for (; fidx < flen; fidx++)
16916    switch (format[fidx])
16917      {
16918      case 's':
16919	{
16920	  unsigned i, len;
16921	  const char *str = XSTR (k, fidx);
16922	  len = strlen (str);
16923	  result = result * 613 + len;
16924	  for (i = 0; i < len; i++)
16925	    result = result * 613 + (unsigned) str[i];
16926	  break;
16927	}
16928      case 'u':
16929      case 'e':
16930	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16931	break;
16932      case 'i':
16933      case 'n':
16934	result = result * 613 + (unsigned) XINT (k, fidx);
16935	break;
16936      case 'w':
16937	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16938	  result = result * 613 + (unsigned) XWINT (k, fidx);
16939	else
16940	  {
16941	    size_t i;
16942	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16943	      result = result * 613 + (unsigned) (XWINT (k, fidx)
16944						  >> CHAR_BIT * i);
16945	  }
16946	break;
16947      case '0':
16948	break;
16949      default:
16950	gcc_unreachable ();
16951      }
16952
16953  return result;
16954}
16955
16956hashval_t
16957toc_hasher::hash (toc_hash_struct *thc)
16958{
16959  return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16960}
16961
16962/* Compare H1 and H2 for equivalence.  */
16963
16964bool
16965toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16966{
16967  rtx r1 = h1->key;
16968  rtx r2 = h2->key;
16969
16970  if (h1->key_mode != h2->key_mode)
16971    return 0;
16972
16973  return rtx_equal_p (r1, r2);
16974}
16975
16976/* These are the names given by the C++ front-end to vtables, and
16977   vtable-like objects.  Ideally, this logic should not be here;
16978   instead, there should be some programmatic way of inquiring as
16979   to whether or not an object is a vtable.  */
16980
16981#define VTABLE_NAME_P(NAME)	  \
16982  (startswith (name, "_vt.")	  \
16983  || startswith (name, "_ZTV")	  \
16984  || startswith (name, "_ZTT")	  \
16985  || startswith (name, "_ZTI")	  \
16986  || startswith (name, "_ZTC"))
16987
16988#ifdef NO_DOLLAR_IN_LABEL
16989/* Return a GGC-allocated character string translating dollar signs in
16990   input NAME to underscores.  Used by XCOFF ASM_OUTPUT_LABELREF.  */
16991
16992const char *
16993rs6000_xcoff_strip_dollar (const char *name)
16994{
16995  char *strip, *p;
16996  const char *q;
16997  size_t len;
16998
16999  q = (const char *) strchr (name, '$');
17000
17001  if (q == 0 || q == name)
17002    return name;
17003
17004  len = strlen (name);
17005  strip = XALLOCAVEC (char, len + 1);
17006  strcpy (strip, name);
17007  p = strip + (q - name);
17008  while (p)
17009    {
17010      *p = '_';
17011      p = strchr (p + 1, '$');
17012    }
17013
17014  return ggc_alloc_string (strip, len);
17015}
17016#endif
17017
17018void
17019rs6000_output_symbol_ref (FILE *file, rtx x)
17020{
17021  const char *name = XSTR (x, 0);
17022
17023  /* Currently C++ toc references to vtables can be emitted before it
17024     is decided whether the vtable is public or private.  If this is
17025     the case, then the linker will eventually complain that there is
17026     a reference to an unknown section.  Thus, for vtables only,
17027     we emit the TOC reference to reference the identifier and not the
17028     symbol.  */
17029  if (VTABLE_NAME_P (name))
17030    {
17031      RS6000_OUTPUT_BASENAME (file, name);
17032    }
17033  else
17034    assemble_name (file, name);
17035}
17036
17037/* Output a TOC entry.  We derive the entry name from what is being
17038   written.  */
17039
17040void
17041output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17042{
17043  char buf[256];
17044  const char *name = buf;
17045  rtx base = x;
17046  HOST_WIDE_INT offset = 0;
17047
17048  gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17049
17050  /* When the linker won't eliminate them, don't output duplicate
17051     TOC entries (this happens on AIX if there is any kind of TOC,
17052     and on SVR4 under -fPIC or -mrelocatable).  Don't do this for
17053     CODE_LABELs.  */
17054  if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17055    {
17056      struct toc_hash_struct *h;
17057
17058      /* Create toc_hash_table.  This can't be done at TARGET_OPTION_OVERRIDE
17059	 time because GGC is not initialized at that point.  */
17060      if (toc_hash_table == NULL)
17061	toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17062
17063      h = ggc_alloc<toc_hash_struct> ();
17064      h->key = x;
17065      h->key_mode = mode;
17066      h->labelno = labelno;
17067
17068      toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17069      if (*found == NULL)
17070	*found = h;
17071      else  /* This is indeed a duplicate.
17072	       Set this label equal to that label.  */
17073	{
17074	  fputs ("\t.set ", file);
17075	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17076	  fprintf (file, "%d,", labelno);
17077	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17078	  fprintf (file, "%d\n", ((*found)->labelno));
17079
17080#ifdef HAVE_AS_TLS
17081	  if (TARGET_XCOFF && SYMBOL_REF_P (x)
17082	      && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17083		  || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17084	    {
17085	      fputs ("\t.set ", file);
17086	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17087	      fprintf (file, "%d,", labelno);
17088	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17089	      fprintf (file, "%d\n", ((*found)->labelno));
17090	    }
17091#endif
17092	  return;
17093	}
17094    }
17095
17096  /* If we're going to put a double constant in the TOC, make sure it's
17097     aligned properly when strict alignment is on.  */
17098  if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17099      && STRICT_ALIGNMENT
17100      && GET_MODE_BITSIZE (mode) >= 64
17101      && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17102    ASM_OUTPUT_ALIGN (file, 3);
17103  }
17104
17105  (*targetm.asm_out.internal_label) (file, "LC", labelno);
17106
17107  /* Handle FP constants specially.  Note that if we have a minimal
17108     TOC, things we put here aren't actually in the TOC, so we can allow
17109     FP constants.  */
17110  if (CONST_DOUBLE_P (x)
17111      && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17112	  || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17113    {
17114      long k[4];
17115
17116      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17117	REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17118      else
17119	REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17120
17121      if (TARGET_64BIT)
17122	{
17123	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17124	    fputs (DOUBLE_INT_ASM_OP, file);
17125	  else
17126	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17127		     k[0] & 0xffffffff, k[1] & 0xffffffff,
17128		     k[2] & 0xffffffff, k[3] & 0xffffffff);
17129	  fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17130		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17131		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17132		   k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17133		   k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17134	  return;
17135	}
17136      else
17137	{
17138	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17139	    fputs ("\t.long ", file);
17140	  else
17141	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17142		     k[0] & 0xffffffff, k[1] & 0xffffffff,
17143		     k[2] & 0xffffffff, k[3] & 0xffffffff);
17144	  fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17145		   k[0] & 0xffffffff, k[1] & 0xffffffff,
17146		   k[2] & 0xffffffff, k[3] & 0xffffffff);
17147	  return;
17148	}
17149    }
17150  else if (CONST_DOUBLE_P (x)
17151	   && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17152    {
17153      long k[2];
17154
17155      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17156	REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17157      else
17158	REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17159
17160      if (TARGET_64BIT)
17161	{
17162	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17163	    fputs (DOUBLE_INT_ASM_OP, file);
17164	  else
17165	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17166		     k[0] & 0xffffffff, k[1] & 0xffffffff);
17167	  fprintf (file, "0x%lx%08lx\n",
17168		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17169		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17170	  return;
17171	}
17172      else
17173	{
17174	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17175	    fputs ("\t.long ", file);
17176	  else
17177	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17178		     k[0] & 0xffffffff, k[1] & 0xffffffff);
17179	  fprintf (file, "0x%lx,0x%lx\n",
17180		   k[0] & 0xffffffff, k[1] & 0xffffffff);
17181	  return;
17182	}
17183    }
17184  else if (CONST_DOUBLE_P (x)
17185	   && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17186    {
17187      long l;
17188
17189      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17190	REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17191      else
17192	REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17193
17194      if (TARGET_64BIT)
17195	{
17196	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17197	    fputs (DOUBLE_INT_ASM_OP, file);
17198	  else
17199	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17200	  if (WORDS_BIG_ENDIAN)
17201	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17202	  else
17203	    fprintf (file, "0x%lx\n", l & 0xffffffff);
17204	  return;
17205	}
17206      else
17207	{
17208	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17209	    fputs ("\t.long ", file);
17210	  else
17211	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17212	  fprintf (file, "0x%lx\n", l & 0xffffffff);
17213	  return;
17214	}
17215    }
17216  else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17217    {
17218      unsigned HOST_WIDE_INT low;
17219      HOST_WIDE_INT high;
17220
17221      low = INTVAL (x) & 0xffffffff;
17222      high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17223
17224      /* TOC entries are always Pmode-sized, so when big-endian
17225	 smaller integer constants in the TOC need to be padded.
17226	 (This is still a win over putting the constants in
17227	 a separate constant pool, because then we'd have
17228	 to have both a TOC entry _and_ the actual constant.)
17229
17230	 For a 32-bit target, CONST_INT values are loaded and shifted
17231	 entirely within `low' and can be stored in one TOC entry.  */
17232
17233      /* It would be easy to make this work, but it doesn't now.  */
17234      gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17235
17236      if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17237	{
17238	  low |= high << 32;
17239	  low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17240	  high = (HOST_WIDE_INT) low >> 32;
17241	  low &= 0xffffffff;
17242	}
17243
17244      if (TARGET_64BIT)
17245	{
17246	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17247	    fputs (DOUBLE_INT_ASM_OP, file);
17248	  else
17249	    fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17250		     (long) high & 0xffffffff, (long) low & 0xffffffff);
17251	  fprintf (file, "0x%lx%08lx\n",
17252		   (long) high & 0xffffffff, (long) low & 0xffffffff);
17253	  return;
17254	}
17255      else
17256	{
17257	  if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17258	    {
17259	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
17260		fputs ("\t.long ", file);
17261	      else
17262		fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17263			 (long) high & 0xffffffff, (long) low & 0xffffffff);
17264	      fprintf (file, "0x%lx,0x%lx\n",
17265		       (long) high & 0xffffffff, (long) low & 0xffffffff);
17266	    }
17267	  else
17268	    {
17269	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
17270		fputs ("\t.long ", file);
17271	      else
17272		fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17273	      fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17274	    }
17275	  return;
17276	}
17277    }
17278
17279  if (GET_CODE (x) == CONST)
17280    {
17281      gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17282		  && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17283
17284      base = XEXP (XEXP (x, 0), 0);
17285      offset = INTVAL (XEXP (XEXP (x, 0), 1));
17286    }
17287
17288  switch (GET_CODE (base))
17289    {
17290    case SYMBOL_REF:
17291      name = XSTR (base, 0);
17292      break;
17293
17294    case LABEL_REF:
17295      ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17296				   CODE_LABEL_NUMBER (XEXP (base, 0)));
17297      break;
17298
17299    case CODE_LABEL:
17300      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17301      break;
17302
17303    default:
17304      gcc_unreachable ();
17305    }
17306
17307  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17308    fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17309  else
17310    {
17311      fputs ("\t.tc ", file);
17312      RS6000_OUTPUT_BASENAME (file, name);
17313
17314      if (offset < 0)
17315	fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17316      else if (offset)
17317	fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17318
17319      /* Mark large TOC symbols on AIX with [TE] so they are mapped
17320	 after other TOC symbols, reducing overflow of small TOC access
17321	 to [TC] symbols.  */
17322      fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17323	     ? "[TE]," : "[TC],", file);
17324    }
17325
17326  /* Currently C++ toc references to vtables can be emitted before it
17327     is decided whether the vtable is public or private.  If this is
17328     the case, then the linker will eventually complain that there is
17329     a TOC reference to an unknown section.  Thus, for vtables only,
17330     we emit the TOC reference to reference the symbol and not the
17331     section.  */
17332  if (VTABLE_NAME_P (name))
17333    {
17334      RS6000_OUTPUT_BASENAME (file, name);
17335      if (offset < 0)
17336	fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17337      else if (offset > 0)
17338	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17339    }
17340  else
17341    output_addr_const (file, x);
17342
17343#if HAVE_AS_TLS
17344  if (TARGET_XCOFF && SYMBOL_REF_P (base))
17345    {
17346      switch (SYMBOL_REF_TLS_MODEL (base))
17347	{
17348	case 0:
17349	  break;
17350	case TLS_MODEL_LOCAL_EXEC:
17351	  fputs ("@le", file);
17352	  break;
17353	case TLS_MODEL_INITIAL_EXEC:
17354	  fputs ("@ie", file);
17355	  break;
17356	/* Use global-dynamic for local-dynamic.  */
17357	case TLS_MODEL_GLOBAL_DYNAMIC:
17358	case TLS_MODEL_LOCAL_DYNAMIC:
17359	  putc ('\n', file);
17360	  (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17361	  fputs ("\t.tc .", file);
17362	  RS6000_OUTPUT_BASENAME (file, name);
17363	  fputs ("[TC],", file);
17364	  output_addr_const (file, x);
17365	  fputs ("@m", file);
17366	  break;
17367	default:
17368	  gcc_unreachable ();
17369	}
17370    }
17371#endif
17372
17373  putc ('\n', file);
17374}
17375
17376/* Output an assembler pseudo-op to write an ASCII string of N characters
17377   starting at P to FILE.
17378
17379   On the RS/6000, we have to do this using the .byte operation and
17380   write out special characters outside the quoted string.
17381   Also, the assembler is broken; very long strings are truncated,
17382   so we must artificially break them up early.  */
17383
17384void
17385output_ascii (FILE *file, const char *p, int n)
17386{
17387  char c;
17388  int i, count_string;
17389  const char *for_string = "\t.byte \"";
17390  const char *for_decimal = "\t.byte ";
17391  const char *to_close = NULL;
17392
17393  count_string = 0;
17394  for (i = 0; i < n; i++)
17395    {
17396      c = *p++;
17397      if (c >= ' ' && c < 0177)
17398	{
17399	  if (for_string)
17400	    fputs (for_string, file);
17401	  putc (c, file);
17402
17403	  /* Write two quotes to get one.  */
17404	  if (c == '"')
17405	    {
17406	      putc (c, file);
17407	      ++count_string;
17408	    }
17409
17410	  for_string = NULL;
17411	  for_decimal = "\"\n\t.byte ";
17412	  to_close = "\"\n";
17413	  ++count_string;
17414
17415	  if (count_string >= 512)
17416	    {
17417	      fputs (to_close, file);
17418
17419	      for_string = "\t.byte \"";
17420	      for_decimal = "\t.byte ";
17421	      to_close = NULL;
17422	      count_string = 0;
17423	    }
17424	}
17425      else
17426	{
17427	  if (for_decimal)
17428	    fputs (for_decimal, file);
17429	  fprintf (file, "%d", c);
17430
17431	  for_string = "\n\t.byte \"";
17432	  for_decimal = ", ";
17433	  to_close = "\n";
17434	  count_string = 0;
17435	}
17436    }
17437
17438  /* Now close the string if we have written one.  Then end the line.  */
17439  if (to_close)
17440    fputs (to_close, file);
17441}
17442
17443/* Generate a unique section name for FILENAME for a section type
17444   represented by SECTION_DESC.  Output goes into BUF.
17445
17446   SECTION_DESC can be any string, as long as it is different for each
17447   possible section type.
17448
17449   We name the section in the same manner as xlc.  The name begins with an
17450   underscore followed by the filename (after stripping any leading directory
17451   names) with the last period replaced by the string SECTION_DESC.  If
17452   FILENAME does not contain a period, SECTION_DESC is appended to the end of
17453   the name.  */
17454
17455void
17456rs6000_gen_section_name (char **buf, const char *filename,
17457			 const char *section_desc)
17458{
17459  const char *q, *after_last_slash, *last_period = 0;
17460  char *p;
17461  int len;
17462
17463  after_last_slash = filename;
17464  for (q = filename; *q; q++)
17465    {
17466      if (*q == '/')
17467	after_last_slash = q + 1;
17468      else if (*q == '.')
17469	last_period = q;
17470    }
17471
17472  len = strlen (after_last_slash) + strlen (section_desc) + 2;
17473  *buf = (char *) xmalloc (len);
17474
17475  p = *buf;
17476  *p++ = '_';
17477
17478  for (q = after_last_slash; *q; q++)
17479    {
17480      if (q == last_period)
17481	{
17482	  strcpy (p, section_desc);
17483	  p += strlen (section_desc);
17484	  break;
17485	}
17486
17487      else if (ISALNUM (*q))
17488	*p++ = *q;
17489    }
17490
17491  if (last_period == 0)
17492    strcpy (p, section_desc);
17493  else
17494    *p = '\0';
17495}
17496
17497/* Emit profile function.  */
17498
17499void
17500output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17501{
17502  /* Non-standard profiling for kernels, which just saves LR then calls
17503     _mcount without worrying about arg saves.  The idea is to change
17504     the function prologue as little as possible as it isn't easy to
17505     account for arg save/restore code added just for _mcount.  */
17506  if (TARGET_PROFILE_KERNEL)
17507    return;
17508
17509  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17510    {
17511#ifndef NO_PROFILE_COUNTERS
17512# define NO_PROFILE_COUNTERS 0
17513#endif
17514      if (NO_PROFILE_COUNTERS)
17515	emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17516			   LCT_NORMAL, VOIDmode);
17517      else
17518	{
17519	  char buf[30];
17520	  const char *label_name;
17521	  rtx fun;
17522
17523	  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17524	  label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17525	  fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17526
17527	  emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17528			     LCT_NORMAL, VOIDmode, fun, Pmode);
17529	}
17530    }
17531  else if (DEFAULT_ABI == ABI_DARWIN)
17532    {
17533      const char *mcount_name = RS6000_MCOUNT;
17534      int caller_addr_regno = LR_REGNO;
17535
17536      /* Be conservative and always set this, at least for now.  */
17537      crtl->uses_pic_offset_table = 1;
17538
17539#if TARGET_MACHO
17540      /* For PIC code, set up a stub and collect the caller's address
17541	 from r0, which is where the prologue puts it.  */
17542      if (MACHOPIC_INDIRECT
17543	  && crtl->uses_pic_offset_table)
17544	caller_addr_regno = 0;
17545#endif
17546      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17547			 LCT_NORMAL, VOIDmode,
17548			 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17549    }
17550}
17551
17552/* Write function profiler code.  */
17553
17554void
17555output_function_profiler (FILE *file, int labelno)
17556{
17557  char buf[100];
17558
17559  switch (DEFAULT_ABI)
17560    {
17561    default:
17562      gcc_unreachable ();
17563
17564    case ABI_V4:
17565      if (!TARGET_32BIT)
17566	{
17567	  warning (0, "no profiling of 64-bit code for this ABI");
17568	  return;
17569	}
17570      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17571      fprintf (file, "\tmflr %s\n", reg_names[0]);
17572      if (NO_PROFILE_COUNTERS)
17573	{
17574	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17575		       reg_names[0], reg_names[1]);
17576	}
17577      else if (TARGET_SECURE_PLT && flag_pic)
17578	{
17579	  if (TARGET_LINK_STACK)
17580	    {
17581	      char name[32];
17582	      get_ppc476_thunk_name (name);
17583	      asm_fprintf (file, "\tbl %s\n", name);
17584	    }
17585	  else
17586	    asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17587	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17588		       reg_names[0], reg_names[1]);
17589	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17590	  asm_fprintf (file, "\taddis %s,%s,",
17591		       reg_names[12], reg_names[12]);
17592	  assemble_name (file, buf);
17593	  asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17594	  assemble_name (file, buf);
17595	  asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17596	}
17597      else if (flag_pic == 1)
17598	{
17599	  fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17600	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17601		       reg_names[0], reg_names[1]);
17602	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17603	  asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17604	  assemble_name (file, buf);
17605	  asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17606	}
17607      else if (flag_pic > 1)
17608	{
17609	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17610		       reg_names[0], reg_names[1]);
17611	  /* Now, we need to get the address of the label.  */
17612	  if (TARGET_LINK_STACK)
17613	    {
17614	      char name[32];
17615	      get_ppc476_thunk_name (name);
17616	      asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17617	      assemble_name (file, buf);
17618	      fputs ("-.\n1:", file);
17619	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17620	      asm_fprintf (file, "\taddi %s,%s,4\n",
17621			   reg_names[11], reg_names[11]);
17622	    }
17623	  else
17624	    {
17625	      fputs ("\tbcl 20,31,1f\n\t.long ", file);
17626	      assemble_name (file, buf);
17627	      fputs ("-.\n1:", file);
17628	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17629	    }
17630	  asm_fprintf (file, "\tlwz %s,0(%s)\n",
17631		       reg_names[0], reg_names[11]);
17632	  asm_fprintf (file, "\tadd %s,%s,%s\n",
17633		       reg_names[0], reg_names[0], reg_names[11]);
17634	}
17635      else
17636	{
17637	  asm_fprintf (file, "\tlis %s,", reg_names[12]);
17638	  assemble_name (file, buf);
17639	  fputs ("@ha\n", file);
17640	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17641		       reg_names[0], reg_names[1]);
17642	  asm_fprintf (file, "\tla %s,", reg_names[0]);
17643	  assemble_name (file, buf);
17644	  asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17645	}
17646
17647      /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH.  */
17648      fprintf (file, "\tbl %s%s\n",
17649	       RS6000_MCOUNT, flag_pic ? "@plt" : "");
17650      break;
17651
17652    case ABI_AIX:
17653    case ABI_ELFv2:
17654    case ABI_DARWIN:
17655      /* Don't do anything, done in output_profile_hook ().  */
17656      break;
17657    }
17658}
17659
17660
17661
17662/* The following variable value is the last issued insn.  */
17663
17664static rtx_insn *last_scheduled_insn;
17665
17666/* The following variable helps to balance issuing of load and
17667   store instructions */
17668
17669static int load_store_pendulum;
17670
17671/* The following variable helps pair divide insns during scheduling.  */
17672static int divide_cnt;
17673/* The following variable helps pair and alternate vector and vector load
17674   insns during scheduling.  */
17675static int vec_pairing;
17676
17677
17678/* Power4 load update and store update instructions are cracked into a
17679   load or store and an integer insn which are executed in the same cycle.
17680   Branches have their own dispatch slot which does not count against the
17681   GCC issue rate, but it changes the program flow so there are no other
17682   instructions to issue in this cycle.  */
17683
17684static int
17685rs6000_variable_issue_1 (rtx_insn *insn, int more)
17686{
17687  last_scheduled_insn = insn;
17688  if (GET_CODE (PATTERN (insn)) == USE
17689      || GET_CODE (PATTERN (insn)) == CLOBBER)
17690    {
17691      cached_can_issue_more = more;
17692      return cached_can_issue_more;
17693    }
17694
17695  if (insn_terminates_group_p (insn, current_group))
17696    {
17697      cached_can_issue_more = 0;
17698      return cached_can_issue_more;
17699    }
17700
17701  /* If no reservation, but reach here */
17702  if (recog_memoized (insn) < 0)
17703    return more;
17704
17705  if (rs6000_sched_groups)
17706    {
17707      if (is_microcoded_insn (insn))
17708        cached_can_issue_more = 0;
17709      else if (is_cracked_insn (insn))
17710        cached_can_issue_more = more > 2 ? more - 2 : 0;
17711      else
17712        cached_can_issue_more = more - 1;
17713
17714      return cached_can_issue_more;
17715    }
17716
17717  if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17718    return 0;
17719
17720  cached_can_issue_more = more - 1;
17721  return cached_can_issue_more;
17722}
17723
17724static int
17725rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17726{
17727  int r = rs6000_variable_issue_1 (insn, more);
17728  if (verbose)
17729    fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17730  return r;
17731}
17732
17733/* Adjust the cost of a scheduling dependency.  Return the new cost of
17734   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
17735
17736static int
17737rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17738		    unsigned int)
17739{
17740  enum attr_type attr_type;
17741
17742  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17743    return cost;
17744
17745  switch (dep_type)
17746    {
17747    case REG_DEP_TRUE:
17748      {
17749        /* Data dependency; DEP_INSN writes a register that INSN reads
17750	   some cycles later.  */
17751
17752	/* Separate a load from a narrower, dependent store.  */
17753	if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17754	     || rs6000_tune == PROCESSOR_POWER10)
17755	    && GET_CODE (PATTERN (insn)) == SET
17756	    && GET_CODE (PATTERN (dep_insn)) == SET
17757	    && MEM_P (XEXP (PATTERN (insn), 1))
17758	    && MEM_P (XEXP (PATTERN (dep_insn), 0))
17759	    && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17760		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17761	  return cost + 14;
17762
17763        attr_type = get_attr_type (insn);
17764
17765        switch (attr_type)
17766          {
17767          case TYPE_JMPREG:
17768            /* Tell the first scheduling pass about the latency between
17769               a mtctr and bctr (and mtlr and br/blr).  The first
17770               scheduling pass will not know about this latency since
17771               the mtctr instruction, which has the latency associated
17772               to it, will be generated by reload.  */
17773            return 4;
17774          case TYPE_BRANCH:
17775            /* Leave some extra cycles between a compare and its
17776               dependent branch, to inhibit expensive mispredicts.  */
17777            if ((rs6000_tune == PROCESSOR_PPC603
17778                 || rs6000_tune == PROCESSOR_PPC604
17779                 || rs6000_tune == PROCESSOR_PPC604e
17780                 || rs6000_tune == PROCESSOR_PPC620
17781                 || rs6000_tune == PROCESSOR_PPC630
17782                 || rs6000_tune == PROCESSOR_PPC750
17783                 || rs6000_tune == PROCESSOR_PPC7400
17784                 || rs6000_tune == PROCESSOR_PPC7450
17785                 || rs6000_tune == PROCESSOR_PPCE5500
17786                 || rs6000_tune == PROCESSOR_PPCE6500
17787                 || rs6000_tune == PROCESSOR_POWER4
17788                 || rs6000_tune == PROCESSOR_POWER5
17789		 || rs6000_tune == PROCESSOR_POWER7
17790		 || rs6000_tune == PROCESSOR_POWER8
17791		 || rs6000_tune == PROCESSOR_POWER9
17792		 || rs6000_tune == PROCESSOR_POWER10
17793                 || rs6000_tune == PROCESSOR_CELL)
17794                && recog_memoized (dep_insn)
17795                && (INSN_CODE (dep_insn) >= 0))
17796
17797              switch (get_attr_type (dep_insn))
17798                {
17799                case TYPE_CMP:
17800                case TYPE_FPCOMPARE:
17801                case TYPE_CR_LOGICAL:
17802		  return cost + 2;
17803                case TYPE_EXTS:
17804                case TYPE_MUL:
17805		  if (get_attr_dot (dep_insn) == DOT_YES)
17806		    return cost + 2;
17807		  else
17808		    break;
17809                case TYPE_SHIFT:
17810		  if (get_attr_dot (dep_insn) == DOT_YES
17811		      && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17812		    return cost + 2;
17813		  else
17814		    break;
17815		default:
17816		  break;
17817		}
17818            break;
17819
17820          case TYPE_STORE:
17821          case TYPE_FPSTORE:
17822            if ((rs6000_tune == PROCESSOR_POWER6)
17823                && recog_memoized (dep_insn)
17824                && (INSN_CODE (dep_insn) >= 0))
17825              {
17826
17827                if (GET_CODE (PATTERN (insn)) != SET)
17828                  /* If this happens, we have to extend this to schedule
17829                     optimally.  Return default for now.  */
17830                  return cost;
17831
17832                /* Adjust the cost for the case where the value written
17833                   by a fixed point operation is used as the address
17834                   gen value on a store. */
17835                switch (get_attr_type (dep_insn))
17836                  {
17837                  case TYPE_LOAD:
17838                  case TYPE_CNTLZ:
17839                    {
17840                      if (! rs6000_store_data_bypass_p (dep_insn, insn))
17841                        return get_attr_sign_extend (dep_insn)
17842                               == SIGN_EXTEND_YES ? 6 : 4;
17843                      break;
17844                    }
17845                  case TYPE_SHIFT:
17846                    {
17847                      if (! rs6000_store_data_bypass_p (dep_insn, insn))
17848                        return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17849                               6 : 3;
17850                      break;
17851		    }
17852                  case TYPE_INTEGER:
17853                  case TYPE_ADD:
17854                  case TYPE_LOGICAL:
17855                  case TYPE_EXTS:
17856                  case TYPE_INSERT:
17857                    {
17858                      if (! rs6000_store_data_bypass_p (dep_insn, insn))
17859                        return 3;
17860                      break;
17861                    }
17862                  case TYPE_STORE:
17863                  case TYPE_FPLOAD:
17864                  case TYPE_FPSTORE:
17865                    {
17866                      if (get_attr_update (dep_insn) == UPDATE_YES
17867                          && ! rs6000_store_data_bypass_p (dep_insn, insn))
17868                        return 3;
17869                      break;
17870                    }
17871                  case TYPE_MUL:
17872                    {
17873                      if (! rs6000_store_data_bypass_p (dep_insn, insn))
17874                        return 17;
17875                      break;
17876                    }
17877                  case TYPE_DIV:
17878                    {
17879                      if (! rs6000_store_data_bypass_p (dep_insn, insn))
17880                        return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17881                      break;
17882                    }
17883                  default:
17884                    break;
17885                  }
17886              }
17887	    break;
17888
17889          case TYPE_LOAD:
17890            if ((rs6000_tune == PROCESSOR_POWER6)
17891                && recog_memoized (dep_insn)
17892                && (INSN_CODE (dep_insn) >= 0))
17893              {
17894
17895                /* Adjust the cost for the case where the value written
17896                   by a fixed point instruction is used within the address
17897                   gen portion of a subsequent load(u)(x) */
17898                switch (get_attr_type (dep_insn))
17899                  {
17900                  case TYPE_LOAD:
17901                  case TYPE_CNTLZ:
17902                    {
17903                      if (set_to_load_agen (dep_insn, insn))
17904                        return get_attr_sign_extend (dep_insn)
17905                               == SIGN_EXTEND_YES ? 6 : 4;
17906                      break;
17907                    }
17908                  case TYPE_SHIFT:
17909                    {
17910                      if (set_to_load_agen (dep_insn, insn))
17911                        return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17912                               6 : 3;
17913                      break;
17914		    }
17915                  case TYPE_INTEGER:
17916                  case TYPE_ADD:
17917                  case TYPE_LOGICAL:
17918                  case TYPE_EXTS:
17919                  case TYPE_INSERT:
17920                    {
17921                      if (set_to_load_agen (dep_insn, insn))
17922                        return 3;
17923                      break;
17924                    }
17925                  case TYPE_STORE:
17926                  case TYPE_FPLOAD:
17927                  case TYPE_FPSTORE:
17928                    {
17929                      if (get_attr_update (dep_insn) == UPDATE_YES
17930                          && set_to_load_agen (dep_insn, insn))
17931                        return 3;
17932                      break;
17933                    }
17934                  case TYPE_MUL:
17935                    {
17936                      if (set_to_load_agen (dep_insn, insn))
17937                        return 17;
17938                      break;
17939                    }
17940                  case TYPE_DIV:
17941                    {
17942                      if (set_to_load_agen (dep_insn, insn))
17943                        return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17944                      break;
17945                    }
17946                  default:
17947                    break;
17948                  }
17949              }
17950            break;
17951
17952          default:
17953            break;
17954          }
17955
17956	/* Fall out to return default cost.  */
17957      }
17958      break;
17959
17960    case REG_DEP_OUTPUT:
17961      /* Output dependency; DEP_INSN writes a register that INSN writes some
17962	 cycles later.  */
17963      if ((rs6000_tune == PROCESSOR_POWER6)
17964          && recog_memoized (dep_insn)
17965          && (INSN_CODE (dep_insn) >= 0))
17966        {
17967          attr_type = get_attr_type (insn);
17968
17969          switch (attr_type)
17970            {
17971            case TYPE_FP:
17972            case TYPE_FPSIMPLE:
17973              if (get_attr_type (dep_insn) == TYPE_FP
17974		  || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17975                return 1;
17976              break;
17977            default:
17978              break;
17979            }
17980        }
17981      /* Fall through, no cost for output dependency.  */
17982      /* FALLTHRU */
17983
17984    case REG_DEP_ANTI:
17985      /* Anti dependency; DEP_INSN reads a register that INSN writes some
17986	 cycles later.  */
17987      return 0;
17988
17989    default:
17990      gcc_unreachable ();
17991    }
17992
17993  return cost;
17994}
17995
17996/* Debug version of rs6000_adjust_cost.  */
17997
17998static int
17999rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18000			  int cost, unsigned int dw)
18001{
18002  int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18003
18004  if (ret != cost)
18005    {
18006      const char *dep;
18007
18008      switch (dep_type)
18009	{
18010	default:	     dep = "unknown depencency"; break;
18011	case REG_DEP_TRUE:   dep = "data dependency";	 break;
18012	case REG_DEP_OUTPUT: dep = "output dependency";  break;
18013	case REG_DEP_ANTI:   dep = "anti depencency";	 break;
18014	}
18015
18016      fprintf (stderr,
18017	       "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18018	       "%s, insn:\n", ret, cost, dep);
18019
18020      debug_rtx (insn);
18021    }
18022
18023  return ret;
18024}
18025
18026/* The function returns a true if INSN is microcoded.
18027   Return false otherwise.  */
18028
18029static bool
18030is_microcoded_insn (rtx_insn *insn)
18031{
18032  if (!insn || !NONDEBUG_INSN_P (insn)
18033      || GET_CODE (PATTERN (insn)) == USE
18034      || GET_CODE (PATTERN (insn)) == CLOBBER)
18035    return false;
18036
18037  if (rs6000_tune == PROCESSOR_CELL)
18038    return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18039
18040  if (rs6000_sched_groups
18041      && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18042    {
18043      enum attr_type type = get_attr_type (insn);
18044      if ((type == TYPE_LOAD
18045	   && get_attr_update (insn) == UPDATE_YES
18046	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18047	  || ((type == TYPE_LOAD || type == TYPE_STORE)
18048	      && get_attr_update (insn) == UPDATE_YES
18049	      && get_attr_indexed (insn) == INDEXED_YES)
18050	  || type == TYPE_MFCR)
18051	return true;
18052    }
18053
18054  return false;
18055}
18056
18057/* The function returns true if INSN is cracked into 2 instructions
18058   by the processor (and therefore occupies 2 issue slots).  */
18059
18060static bool
18061is_cracked_insn (rtx_insn *insn)
18062{
18063  if (!insn || !NONDEBUG_INSN_P (insn)
18064      || GET_CODE (PATTERN (insn)) == USE
18065      || GET_CODE (PATTERN (insn)) == CLOBBER)
18066    return false;
18067
18068  if (rs6000_sched_groups
18069      && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18070    {
18071      enum attr_type type = get_attr_type (insn);
18072      if ((type == TYPE_LOAD
18073	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18074	   && get_attr_update (insn) == UPDATE_NO)
18075	  || (type == TYPE_LOAD
18076	      && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18077	      && get_attr_update (insn) == UPDATE_YES
18078	      && get_attr_indexed (insn) == INDEXED_NO)
18079	  || (type == TYPE_STORE
18080	      && get_attr_update (insn) == UPDATE_YES
18081	      && get_attr_indexed (insn) == INDEXED_NO)
18082	  || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18083	      && get_attr_update (insn) == UPDATE_YES)
18084	  || (type == TYPE_CR_LOGICAL
18085	      && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18086	  || (type == TYPE_EXTS
18087	      && get_attr_dot (insn) == DOT_YES)
18088	  || (type == TYPE_SHIFT
18089	      && get_attr_dot (insn) == DOT_YES
18090	      && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18091	  || (type == TYPE_MUL
18092	      && get_attr_dot (insn) == DOT_YES)
18093	  || type == TYPE_DIV
18094	  || (type == TYPE_INSERT
18095	      && get_attr_size (insn) == SIZE_32))
18096	return true;
18097    }
18098
18099  return false;
18100}
18101
18102/* The function returns true if INSN can be issued only from
18103   the branch slot.  */
18104
18105static bool
18106is_branch_slot_insn (rtx_insn *insn)
18107{
18108  if (!insn || !NONDEBUG_INSN_P (insn)
18109      || GET_CODE (PATTERN (insn)) == USE
18110      || GET_CODE (PATTERN (insn)) == CLOBBER)
18111    return false;
18112
18113  if (rs6000_sched_groups)
18114    {
18115      enum attr_type type = get_attr_type (insn);
18116      if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18117	return true;
18118      return false;
18119    }
18120
18121  return false;
18122}
18123
18124/* The function returns true if out_inst sets a value that is
18125   used in the address generation computation of in_insn */
18126static bool
18127set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18128{
18129  rtx out_set, in_set;
18130
18131  /* For performance reasons, only handle the simple case where
18132     both loads are a single_set. */
18133  out_set = single_set (out_insn);
18134  if (out_set)
18135    {
18136      in_set = single_set (in_insn);
18137      if (in_set)
18138        return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18139    }
18140
18141  return false;
18142}
18143
18144/* Try to determine base/offset/size parts of the given MEM.
18145   Return true if successful, false if all the values couldn't
18146   be determined.
18147
18148   This function only looks for REG or REG+CONST address forms.
18149   REG+REG address form will return false. */
18150
18151static bool
18152get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18153		  HOST_WIDE_INT *size)
18154{
18155  rtx addr_rtx;
18156  if MEM_SIZE_KNOWN_P (mem)
18157    *size = MEM_SIZE (mem);
18158  else
18159    return false;
18160
18161  addr_rtx = (XEXP (mem, 0));
18162  if (GET_CODE (addr_rtx) == PRE_MODIFY)
18163    addr_rtx = XEXP (addr_rtx, 1);
18164
18165  *offset = 0;
18166  while (GET_CODE (addr_rtx) == PLUS
18167	 && CONST_INT_P (XEXP (addr_rtx, 1)))
18168    {
18169      *offset += INTVAL (XEXP (addr_rtx, 1));
18170      addr_rtx = XEXP (addr_rtx, 0);
18171    }
18172  if (!REG_P (addr_rtx))
18173    return false;
18174
18175  *base = addr_rtx;
18176  return true;
18177}
18178
18179/* If the target storage locations of arguments MEM1 and MEM2 are
18180   adjacent, then return the argument that has the lower address.
18181   Otherwise, return NULL_RTX.  */
18182
18183static rtx
18184adjacent_mem_locations (rtx mem1, rtx mem2)
18185{
18186  rtx reg1, reg2;
18187  HOST_WIDE_INT off1, size1, off2, size2;
18188
18189  if (MEM_P (mem1)
18190      && MEM_P (mem2)
18191      && get_memref_parts (mem1, &reg1, &off1, &size1)
18192      && get_memref_parts (mem2, &reg2, &off2, &size2)
18193      && REGNO (reg1) == REGNO (reg2))
18194    {
18195      if (off1 + size1 == off2)
18196	return mem1;
18197      else if (off2 + size2 == off1)
18198	return mem2;
18199    }
18200
18201  return NULL_RTX;
18202}
18203
18204/* This function returns true if it can be determined that the two MEM
18205   locations overlap by at least 1 byte based on base reg/offset/size. */
18206
18207static bool
18208mem_locations_overlap (rtx mem1, rtx mem2)
18209{
18210  rtx reg1, reg2;
18211  HOST_WIDE_INT off1, size1, off2, size2;
18212
18213  if (get_memref_parts (mem1, &reg1, &off1, &size1)
18214      && get_memref_parts (mem2, &reg2, &off2, &size2))
18215    return ((REGNO (reg1) == REGNO (reg2))
18216	    && (((off1 <= off2) && (off1 + size1 > off2))
18217		|| ((off2 <= off1) && (off2 + size2 > off1))));
18218
18219  return false;
18220}
18221
18222/* A C statement (sans semicolon) to update the integer scheduling
18223   priority INSN_PRIORITY (INSN). Increase the priority to execute the
18224   INSN earlier, reduce the priority to execute INSN later.  Do not
18225   define this macro if you do not need to adjust the scheduling
18226   priorities of insns.  */
18227
18228static int
18229rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18230{
18231  rtx load_mem, str_mem;
18232  /* On machines (like the 750) which have asymmetric integer units,
18233     where one integer unit can do multiply and divides and the other
18234     can't, reduce the priority of multiply/divide so it is scheduled
18235     before other integer operations.  */
18236
18237#if 0
18238  if (! INSN_P (insn))
18239    return priority;
18240
18241  if (GET_CODE (PATTERN (insn)) == USE)
18242    return priority;
18243
18244  switch (rs6000_tune) {
18245  case PROCESSOR_PPC750:
18246    switch (get_attr_type (insn))
18247      {
18248      default:
18249	break;
18250
18251      case TYPE_MUL:
18252      case TYPE_DIV:
18253	fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18254		 priority, priority);
18255	if (priority >= 0 && priority < 0x01000000)
18256	  priority >>= 3;
18257	break;
18258      }
18259  }
18260#endif
18261
18262  if (insn_must_be_first_in_group (insn)
18263      && reload_completed
18264      && current_sched_info->sched_max_insns_priority
18265      && rs6000_sched_restricted_insns_priority)
18266    {
18267
18268      /* Prioritize insns that can be dispatched only in the first
18269	 dispatch slot.  */
18270      if (rs6000_sched_restricted_insns_priority == 1)
18271	/* Attach highest priority to insn. This means that in
18272	   haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18273	   precede 'priority' (critical path) considerations.  */
18274	return current_sched_info->sched_max_insns_priority;
18275      else if (rs6000_sched_restricted_insns_priority == 2)
18276	/* Increase priority of insn by a minimal amount. This means that in
18277	   haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18278	   considerations precede dispatch-slot restriction considerations.  */
18279	return (priority + 1);
18280    }
18281
18282  if (rs6000_tune == PROCESSOR_POWER6
18283      && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18284          || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18285    /* Attach highest priority to insn if the scheduler has just issued two
18286       stores and this instruction is a load, or two loads and this instruction
18287       is a store. Power6 wants loads and stores scheduled alternately
18288       when possible */
18289    return current_sched_info->sched_max_insns_priority;
18290
18291  return priority;
18292}
18293
18294/* Return true if the instruction is nonpipelined on the Cell. */
18295static bool
18296is_nonpipeline_insn (rtx_insn *insn)
18297{
18298  enum attr_type type;
18299  if (!insn || !NONDEBUG_INSN_P (insn)
18300      || GET_CODE (PATTERN (insn)) == USE
18301      || GET_CODE (PATTERN (insn)) == CLOBBER)
18302    return false;
18303
18304  type = get_attr_type (insn);
18305  if (type == TYPE_MUL
18306      || type == TYPE_DIV
18307      || type == TYPE_SDIV
18308      || type == TYPE_DDIV
18309      || type == TYPE_SSQRT
18310      || type == TYPE_DSQRT
18311      || type == TYPE_MFCR
18312      || type == TYPE_MFCRF
18313      || type == TYPE_MFJMPR)
18314    {
18315      return true;
18316    }
18317  return false;
18318}
18319
18320
18321/* Return how many instructions the machine can issue per cycle.  */
18322
18323static int
18324rs6000_issue_rate (void)
18325{
18326  /* Unless scheduling for register pressure, use issue rate of 1 for
18327     first scheduling pass to decrease degradation.  */
18328  if (!reload_completed && !flag_sched_pressure)
18329    return 1;
18330
18331  switch (rs6000_tune) {
18332  case PROCESSOR_RS64A:
18333  case PROCESSOR_PPC601: /* ? */
18334  case PROCESSOR_PPC7450:
18335    return 3;
18336  case PROCESSOR_PPC440:
18337  case PROCESSOR_PPC603:
18338  case PROCESSOR_PPC750:
18339  case PROCESSOR_PPC7400:
18340  case PROCESSOR_PPC8540:
18341  case PROCESSOR_PPC8548:
18342  case PROCESSOR_CELL:
18343  case PROCESSOR_PPCE300C2:
18344  case PROCESSOR_PPCE300C3:
18345  case PROCESSOR_PPCE500MC:
18346  case PROCESSOR_PPCE500MC64:
18347  case PROCESSOR_PPCE5500:
18348  case PROCESSOR_PPCE6500:
18349  case PROCESSOR_TITAN:
18350    return 2;
18351  case PROCESSOR_PPC476:
18352  case PROCESSOR_PPC604:
18353  case PROCESSOR_PPC604e:
18354  case PROCESSOR_PPC620:
18355  case PROCESSOR_PPC630:
18356    return 4;
18357  case PROCESSOR_POWER4:
18358  case PROCESSOR_POWER5:
18359  case PROCESSOR_POWER6:
18360  case PROCESSOR_POWER7:
18361    return 5;
18362  case PROCESSOR_POWER8:
18363    return 7;
18364  case PROCESSOR_POWER9:
18365    return 6;
18366  case PROCESSOR_POWER10:
18367    return 8;
18368  default:
18369    return 1;
18370  }
18371}
18372
18373/* Return how many instructions to look ahead for better insn
18374   scheduling.  */
18375
18376static int
18377rs6000_use_sched_lookahead (void)
18378{
18379  switch (rs6000_tune)
18380    {
18381    case PROCESSOR_PPC8540:
18382    case PROCESSOR_PPC8548:
18383      return 4;
18384
18385    case PROCESSOR_CELL:
18386      return (reload_completed ? 8 : 0);
18387
18388    default:
18389      return 0;
18390    }
18391}
18392
18393/* We are choosing insn from the ready queue.  Return zero if INSN can be
18394   chosen.  */
18395static int
18396rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18397{
18398  if (ready_index == 0)
18399    return 0;
18400
18401  if (rs6000_tune != PROCESSOR_CELL)
18402    return 0;
18403
18404  gcc_assert (insn != NULL_RTX && INSN_P (insn));
18405
18406  if (!reload_completed
18407      || is_nonpipeline_insn (insn)
18408      || is_microcoded_insn (insn))
18409    return 1;
18410
18411  return 0;
18412}
18413
18414/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18415   and return true.  */
18416
18417static bool
18418find_mem_ref (rtx pat, rtx *mem_ref)
18419{
18420  const char * fmt;
18421  int i, j;
18422
18423  /* stack_tie does not produce any real memory traffic.  */
18424  if (tie_operand (pat, VOIDmode))
18425    return false;
18426
18427  if (MEM_P (pat))
18428    {
18429      *mem_ref = pat;
18430      return true;
18431    }
18432
18433  /* Recursively process the pattern.  */
18434  fmt = GET_RTX_FORMAT (GET_CODE (pat));
18435
18436  for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18437    {
18438      if (fmt[i] == 'e')
18439	{
18440	  if (find_mem_ref (XEXP (pat, i), mem_ref))
18441	    return true;
18442	}
18443      else if (fmt[i] == 'E')
18444	for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18445	  {
18446	    if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18447	      return true;
18448	  }
18449    }
18450
18451  return false;
18452}
18453
18454/* Determine if PAT is a PATTERN of a load insn.  */
18455
18456static bool
18457is_load_insn1 (rtx pat, rtx *load_mem)
18458{
18459  if (!pat || pat == NULL_RTX)
18460    return false;
18461
18462  if (GET_CODE (pat) == SET)
18463    {
18464      if (REG_P (SET_DEST (pat)))
18465	return find_mem_ref (SET_SRC (pat), load_mem);
18466      else
18467	return false;
18468    }
18469
18470  if (GET_CODE (pat) == PARALLEL)
18471    {
18472      int i;
18473
18474      for (i = 0; i < XVECLEN (pat, 0); i++)
18475	if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18476	  return true;
18477    }
18478
18479  return false;
18480}
18481
18482/* Determine if INSN loads from memory.  */
18483
18484static bool
18485is_load_insn (rtx insn, rtx *load_mem)
18486{
18487  if (!insn || !INSN_P (insn))
18488    return false;
18489
18490  if (CALL_P (insn))
18491    return false;
18492
18493  return is_load_insn1 (PATTERN (insn), load_mem);
18494}
18495
18496/* Determine if PAT is a PATTERN of a store insn.  */
18497
18498static bool
18499is_store_insn1 (rtx pat, rtx *str_mem)
18500{
18501  if (!pat || pat == NULL_RTX)
18502    return false;
18503
18504  if (GET_CODE (pat) == SET)
18505    {
18506      if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18507	return find_mem_ref (SET_DEST (pat), str_mem);
18508      else
18509	return false;
18510    }
18511
18512  if (GET_CODE (pat) == PARALLEL)
18513    {
18514      int i;
18515
18516      for (i = 0; i < XVECLEN (pat, 0); i++)
18517	if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18518	  return true;
18519    }
18520
18521  return false;
18522}
18523
18524/* Determine if INSN stores to memory.  */
18525
18526static bool
18527is_store_insn (rtx insn, rtx *str_mem)
18528{
18529  if (!insn || !INSN_P (insn))
18530    return false;
18531
18532  return is_store_insn1 (PATTERN (insn), str_mem);
18533}
18534
18535/* Return whether TYPE is a Power9 pairable vector instruction type.  */
18536
18537static bool
18538is_power9_pairable_vec_type (enum attr_type type)
18539{
18540  switch (type)
18541    {
18542      case TYPE_VECSIMPLE:
18543      case TYPE_VECCOMPLEX:
18544      case TYPE_VECDIV:
18545      case TYPE_VECCMP:
18546      case TYPE_VECPERM:
18547      case TYPE_VECFLOAT:
18548      case TYPE_VECFDIV:
18549      case TYPE_VECDOUBLE:
18550	return true;
18551      default:
18552	break;
18553    }
18554  return false;
18555}
18556
18557/* Returns whether the dependence between INSN and NEXT is considered
18558   costly by the given target.  */
18559
18560static bool
18561rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18562{
18563  rtx insn;
18564  rtx next;
18565  rtx load_mem, str_mem;
18566
18567  /* If the flag is not enabled - no dependence is considered costly;
18568     allow all dependent insns in the same group.
18569     This is the most aggressive option.  */
18570  if (rs6000_sched_costly_dep == no_dep_costly)
18571    return false;
18572
18573  /* If the flag is set to 1 - a dependence is always considered costly;
18574     do not allow dependent instructions in the same group.
18575     This is the most conservative option.  */
18576  if (rs6000_sched_costly_dep == all_deps_costly)
18577    return true;
18578
18579  insn = DEP_PRO (dep);
18580  next = DEP_CON (dep);
18581
18582  if (rs6000_sched_costly_dep == store_to_load_dep_costly
18583      && is_load_insn (next, &load_mem)
18584      && is_store_insn (insn, &str_mem))
18585    /* Prevent load after store in the same group.  */
18586    return true;
18587
18588  if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18589      && is_load_insn (next, &load_mem)
18590      && is_store_insn (insn, &str_mem)
18591      && DEP_TYPE (dep) == REG_DEP_TRUE
18592      && mem_locations_overlap(str_mem, load_mem))
18593     /* Prevent load after store in the same group if it is a true
18594	dependence.  */
18595     return true;
18596
18597  /* The flag is set to X; dependences with latency >= X are considered costly,
18598     and will not be scheduled in the same group.  */
18599  if (rs6000_sched_costly_dep <= max_dep_latency
18600      && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18601    return true;
18602
18603  return false;
18604}
18605
18606/* Return the next insn after INSN that is found before TAIL is reached,
18607   skipping any "non-active" insns - insns that will not actually occupy
18608   an issue slot.  Return NULL_RTX if such an insn is not found.  */
18609
18610static rtx_insn *
18611get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18612{
18613  if (insn == NULL_RTX || insn == tail)
18614    return NULL;
18615
18616  while (1)
18617    {
18618      insn = NEXT_INSN (insn);
18619      if (insn == NULL_RTX || insn == tail)
18620	return NULL;
18621
18622      if (CALL_P (insn)
18623	  || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18624	  || (NONJUMP_INSN_P (insn)
18625	      && GET_CODE (PATTERN (insn)) != USE
18626	      && GET_CODE (PATTERN (insn)) != CLOBBER
18627	      && INSN_CODE (insn) != CODE_FOR_stack_tie))
18628	break;
18629    }
18630  return insn;
18631}
18632
18633/* Move instruction at POS to the end of the READY list.  */
18634
18635static void
18636move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18637{
18638  rtx_insn *tmp;
18639  int i;
18640
18641  tmp = ready[pos];
18642  for (i = pos; i < lastpos; i++)
18643    ready[i] = ready[i + 1];
18644  ready[lastpos] = tmp;
18645}
18646
18647/* Do Power6 specific sched_reorder2 reordering of ready list.  */
18648
18649static int
18650power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18651{
18652  /* For Power6, we need to handle some special cases to try and keep the
18653     store queue from overflowing and triggering expensive flushes.
18654
18655     This code monitors how load and store instructions are being issued
18656     and skews the ready list one way or the other to increase the likelihood
18657     that a desired instruction is issued at the proper time.
18658
18659     A couple of things are done.  First, we maintain a "load_store_pendulum"
18660     to track the current state of load/store issue.
18661
18662       - If the pendulum is at zero, then no loads or stores have been
18663	 issued in the current cycle so we do nothing.
18664
18665       - If the pendulum is 1, then a single load has been issued in this
18666	 cycle and we attempt to locate another load in the ready list to
18667	 issue with it.
18668
18669       - If the pendulum is -2, then two stores have already been
18670	 issued in this cycle, so we increase the priority of the first load
18671	 in the ready list to increase it's likelihood of being chosen first
18672	 in the next cycle.
18673
18674       - If the pendulum is -1, then a single store has been issued in this
18675	 cycle and we attempt to locate another store in the ready list to
18676	 issue with it, preferring a store to an adjacent memory location to
18677	 facilitate store pairing in the store queue.
18678
18679       - If the pendulum is 2, then two loads have already been
18680	 issued in this cycle, so we increase the priority of the first store
18681	 in the ready list to increase it's likelihood of being chosen first
18682	 in the next cycle.
18683
18684       - If the pendulum < -2 or > 2, then do nothing.
18685
18686       Note: This code covers the most common scenarios.  There exist non
18687	     load/store instructions which make use of the LSU and which
18688	     would need to be accounted for to strictly model the behavior
18689	     of the machine.  Those instructions are currently unaccounted
18690	     for to help minimize compile time overhead of this code.
18691   */
18692  int pos;
18693  rtx load_mem, str_mem;
18694
18695  if (is_store_insn (last_scheduled_insn, &str_mem))
18696    /* Issuing a store, swing the load_store_pendulum to the left */
18697    load_store_pendulum--;
18698  else if (is_load_insn (last_scheduled_insn, &load_mem))
18699    /* Issuing a load, swing the load_store_pendulum to the right */
18700    load_store_pendulum++;
18701  else
18702    return cached_can_issue_more;
18703
18704  /* If the pendulum is balanced, or there is only one instruction on
18705     the ready list, then all is well, so return. */
18706  if ((load_store_pendulum == 0) || (lastpos <= 0))
18707    return cached_can_issue_more;
18708
18709  if (load_store_pendulum == 1)
18710    {
18711      /* A load has been issued in this cycle.  Scan the ready list
18712	 for another load to issue with it */
18713      pos = lastpos;
18714
18715      while (pos >= 0)
18716	{
18717	  if (is_load_insn (ready[pos], &load_mem))
18718	    {
18719	      /* Found a load.  Move it to the head of the ready list,
18720		 and adjust it's priority so that it is more likely to
18721		 stay there */
18722	      move_to_end_of_ready (ready, pos, lastpos);
18723
18724	      if (!sel_sched_p ()
18725		  && INSN_PRIORITY_KNOWN (ready[lastpos]))
18726		INSN_PRIORITY (ready[lastpos])++;
18727	      break;
18728	    }
18729	  pos--;
18730	}
18731    }
18732  else if (load_store_pendulum == -2)
18733    {
18734      /* Two stores have been issued in this cycle.  Increase the
18735	 priority of the first load in the ready list to favor it for
18736	 issuing in the next cycle. */
18737      pos = lastpos;
18738
18739      while (pos >= 0)
18740	{
18741	  if (is_load_insn (ready[pos], &load_mem)
18742	      && !sel_sched_p ()
18743	      && INSN_PRIORITY_KNOWN (ready[pos]))
18744	    {
18745	      INSN_PRIORITY (ready[pos])++;
18746
18747	      /* Adjust the pendulum to account for the fact that a load
18748		 was found and increased in priority.  This is to prevent
18749		 increasing the priority of multiple loads */
18750	      load_store_pendulum--;
18751
18752	      break;
18753	    }
18754	  pos--;
18755	}
18756    }
18757  else if (load_store_pendulum == -1)
18758    {
18759      /* A store has been issued in this cycle.  Scan the ready list for
18760	 another store to issue with it, preferring a store to an adjacent
18761	 memory location */
18762      int first_store_pos = -1;
18763
18764      pos = lastpos;
18765
18766      while (pos >= 0)
18767	{
18768	  if (is_store_insn (ready[pos], &str_mem))
18769	    {
18770	      rtx str_mem2;
18771	      /* Maintain the index of the first store found on the
18772		 list */
18773	      if (first_store_pos == -1)
18774		first_store_pos = pos;
18775
18776	      if (is_store_insn (last_scheduled_insn, &str_mem2)
18777		  && adjacent_mem_locations (str_mem, str_mem2))
18778		{
18779		  /* Found an adjacent store.  Move it to the head of the
18780		     ready list, and adjust it's priority so that it is
18781		     more likely to stay there */
18782		  move_to_end_of_ready (ready, pos, lastpos);
18783
18784		  if (!sel_sched_p ()
18785		      && INSN_PRIORITY_KNOWN (ready[lastpos]))
18786		    INSN_PRIORITY (ready[lastpos])++;
18787
18788		  first_store_pos = -1;
18789
18790		  break;
18791		};
18792	    }
18793	  pos--;
18794	}
18795
18796      if (first_store_pos >= 0)
18797	{
18798	  /* An adjacent store wasn't found, but a non-adjacent store was,
18799	     so move the non-adjacent store to the front of the ready
18800	     list, and adjust its priority so that it is more likely to
18801	     stay there. */
18802	  move_to_end_of_ready (ready, first_store_pos, lastpos);
18803	  if (!sel_sched_p ()
18804	      && INSN_PRIORITY_KNOWN (ready[lastpos]))
18805	    INSN_PRIORITY (ready[lastpos])++;
18806	}
18807    }
18808  else if (load_store_pendulum == 2)
18809    {
18810      /* Two loads have been issued in this cycle.  Increase the priority
18811	 of the first store in the ready list to favor it for issuing in
18812	 the next cycle. */
18813      pos = lastpos;
18814
18815      while (pos >= 0)
18816	{
18817	  if (is_store_insn (ready[pos], &str_mem)
18818	      && !sel_sched_p ()
18819	      && INSN_PRIORITY_KNOWN (ready[pos]))
18820	    {
18821	      INSN_PRIORITY (ready[pos])++;
18822
18823	      /* Adjust the pendulum to account for the fact that a store
18824		 was found and increased in priority.  This is to prevent
18825		 increasing the priority of multiple stores */
18826	      load_store_pendulum++;
18827
18828	      break;
18829	    }
18830	  pos--;
18831	}
18832    }
18833
18834  return cached_can_issue_more;
18835}
18836
18837/* Do Power9 specific sched_reorder2 reordering of ready list.  */
18838
18839static int
18840power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18841{
18842  int pos;
18843  enum attr_type type, type2;
18844
18845  type = get_attr_type (last_scheduled_insn);
18846
18847  /* Try to issue fixed point divides back-to-back in pairs so they will be
18848     routed to separate execution units and execute in parallel.  */
18849  if (type == TYPE_DIV && divide_cnt == 0)
18850    {
18851      /* First divide has been scheduled.  */
18852      divide_cnt = 1;
18853
18854      /* Scan the ready list looking for another divide, if found move it
18855	 to the end of the list so it is chosen next.  */
18856      pos = lastpos;
18857      while (pos >= 0)
18858	{
18859	  if (recog_memoized (ready[pos]) >= 0
18860	      && get_attr_type (ready[pos]) == TYPE_DIV)
18861	    {
18862	      move_to_end_of_ready (ready, pos, lastpos);
18863	      break;
18864	    }
18865	  pos--;
18866	}
18867    }
18868  else
18869    {
18870      /* Last insn was the 2nd divide or not a divide, reset the counter.  */
18871      divide_cnt = 0;
18872
18873      /* The best dispatch throughput for vector and vector load insns can be
18874	 achieved by interleaving a vector and vector load such that they'll
18875	 dispatch to the same superslice. If this pairing cannot be achieved
18876	 then it is best to pair vector insns together and vector load insns
18877	 together.
18878
18879	 To aid in this pairing, vec_pairing maintains the current state with
18880	 the following values:
18881
18882	     0  : Initial state, no vecload/vector pairing has been started.
18883
18884	     1  : A vecload or vector insn has been issued and a candidate for
18885		  pairing has been found and moved to the end of the ready
18886		  list.  */
18887      if (type == TYPE_VECLOAD)
18888	{
18889	  /* Issued a vecload.  */
18890	  if (vec_pairing == 0)
18891	    {
18892	      int vecload_pos = -1;
18893	      /* We issued a single vecload, look for a vector insn to pair it
18894		 with.  If one isn't found, try to pair another vecload.  */
18895	      pos = lastpos;
18896	      while (pos >= 0)
18897		{
18898		  if (recog_memoized (ready[pos]) >= 0)
18899		    {
18900		      type2 = get_attr_type (ready[pos]);
18901		      if (is_power9_pairable_vec_type (type2))
18902			{
18903			  /* Found a vector insn to pair with, move it to the
18904			     end of the ready list so it is scheduled next.  */
18905			  move_to_end_of_ready (ready, pos, lastpos);
18906			  vec_pairing = 1;
18907			  return cached_can_issue_more;
18908			}
18909		      else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18910			/* Remember position of first vecload seen.  */
18911			vecload_pos = pos;
18912		    }
18913		  pos--;
18914		}
18915	      if (vecload_pos >= 0)
18916		{
18917		  /* Didn't find a vector to pair with but did find a vecload,
18918		     move it to the end of the ready list.  */
18919		  move_to_end_of_ready (ready, vecload_pos, lastpos);
18920		  vec_pairing = 1;
18921		  return cached_can_issue_more;
18922		}
18923	    }
18924	}
18925      else if (is_power9_pairable_vec_type (type))
18926	{
18927	  /* Issued a vector operation.  */
18928	  if (vec_pairing == 0)
18929	    {
18930	      int vec_pos = -1;
18931	      /* We issued a single vector insn, look for a vecload to pair it
18932		 with.  If one isn't found, try to pair another vector.  */
18933	      pos = lastpos;
18934	      while (pos >= 0)
18935		{
18936		  if (recog_memoized (ready[pos]) >= 0)
18937		    {
18938		      type2 = get_attr_type (ready[pos]);
18939		      if (type2 == TYPE_VECLOAD)
18940			{
18941			  /* Found a vecload insn to pair with, move it to the
18942			     end of the ready list so it is scheduled next.  */
18943			  move_to_end_of_ready (ready, pos, lastpos);
18944			  vec_pairing = 1;
18945			  return cached_can_issue_more;
18946			}
18947		      else if (is_power9_pairable_vec_type (type2)
18948			       && vec_pos == -1)
18949			/* Remember position of first vector insn seen.  */
18950			vec_pos = pos;
18951		    }
18952		  pos--;
18953		}
18954	      if (vec_pos >= 0)
18955		{
18956		  /* Didn't find a vecload to pair with but did find a vector
18957		     insn, move it to the end of the ready list.  */
18958		  move_to_end_of_ready (ready, vec_pos, lastpos);
18959		  vec_pairing = 1;
18960		  return cached_can_issue_more;
18961		}
18962	    }
18963	}
18964
18965      /* We've either finished a vec/vecload pair, couldn't find an insn to
18966	 continue the current pair, or the last insn had nothing to do with
18967	 with pairing.  In any case, reset the state.  */
18968      vec_pairing = 0;
18969    }
18970
18971  return cached_can_issue_more;
18972}
18973
18974/* Determine if INSN is a store to memory that can be fused with a similar
18975   adjacent store.  */
18976
18977static bool
18978is_fusable_store (rtx_insn *insn, rtx *str_mem)
18979{
18980  /* Insn must be a non-prefixed base+disp form store.  */
18981  if (is_store_insn (insn, str_mem)
18982      && get_attr_prefixed (insn) == PREFIXED_NO
18983      && get_attr_update (insn) == UPDATE_NO
18984      && get_attr_indexed (insn) == INDEXED_NO)
18985    {
18986      /* Further restrictions by mode and size.  */
18987      if (!MEM_SIZE_KNOWN_P (*str_mem))
18988	return false;
18989
18990      machine_mode mode = GET_MODE (*str_mem);
18991      HOST_WIDE_INT size = MEM_SIZE (*str_mem);
18992
18993      if (INTEGRAL_MODE_P (mode))
18994	/* Must be word or dword size.  */
18995	return (size == 4 || size == 8);
18996      else if (FLOAT_MODE_P (mode))
18997	/* Must be dword size.  */
18998	return (size == 8);
18999    }
19000
19001  return false;
19002}
19003
19004/* Do Power10 specific reordering of the ready list.  */
19005
19006static int
19007power10_sched_reorder (rtx_insn **ready, int lastpos)
19008{
19009  rtx mem1;
19010
19011  /* Do store fusion during sched2 only.  */
19012  if (!reload_completed)
19013    return cached_can_issue_more;
19014
19015  /* If the prior insn finished off a store fusion pair then simply
19016     reset the counter and return, nothing more to do.  */
19017  if (load_store_pendulum != 0)
19018    {
19019      load_store_pendulum = 0;
19020      return cached_can_issue_more;
19021    }
19022
19023  /* Try to pair certain store insns to adjacent memory locations
19024     so that the hardware will fuse them to a single operation.  */
19025  if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19026    {
19027
19028      /* A fusable store was just scheduled.  Scan the ready list for another
19029	 store that it can fuse with.  */
19030      int pos = lastpos;
19031      while (pos >= 0)
19032	{
19033	  rtx mem2;
19034	  /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19035	     must be ascending only.  */
19036	  if (is_fusable_store (ready[pos], &mem2)
19037	      && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19038		   && adjacent_mem_locations (mem1, mem2))
19039		  || (FLOAT_MODE_P (GET_MODE (mem1))
19040		   && (adjacent_mem_locations (mem1, mem2) == mem1))))
19041	    {
19042	      /* Found a fusable store.  Move it to the end of the ready list
19043		 so it is scheduled next.  */
19044	      move_to_end_of_ready (ready, pos, lastpos);
19045
19046	      load_store_pendulum = -1;
19047	      break;
19048	    }
19049	  pos--;
19050	}
19051    }
19052
19053  return cached_can_issue_more;
19054}
19055
19056/* We are about to begin issuing insns for this clock cycle. */
19057
19058static int
19059rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19060                        rtx_insn **ready ATTRIBUTE_UNUSED,
19061                        int *pn_ready ATTRIBUTE_UNUSED,
19062		        int clock_var ATTRIBUTE_UNUSED)
19063{
19064  int n_ready = *pn_ready;
19065
19066  if (sched_verbose)
19067    fprintf (dump, "// rs6000_sched_reorder :\n");
19068
19069  /* Reorder the ready list, if the second to last ready insn
19070     is a nonepipeline insn.  */
19071  if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19072  {
19073    if (is_nonpipeline_insn (ready[n_ready - 1])
19074        && (recog_memoized (ready[n_ready - 2]) > 0))
19075      /* Simply swap first two insns.  */
19076      std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19077  }
19078
19079  if (rs6000_tune == PROCESSOR_POWER6)
19080    load_store_pendulum = 0;
19081
19082  /* Do Power10 dependent reordering.  */
19083  if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19084    power10_sched_reorder (ready, n_ready - 1);
19085
19086  return rs6000_issue_rate ();
19087}
19088
19089/* Like rs6000_sched_reorder, but called after issuing each insn.  */
19090
19091static int
19092rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19093		         int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19094{
19095  if (sched_verbose)
19096    fprintf (dump, "// rs6000_sched_reorder2 :\n");
19097
19098  /* Do Power6 dependent reordering if necessary.  */
19099  if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19100    return power6_sched_reorder2 (ready, *pn_ready - 1);
19101
19102  /* Do Power9 dependent reordering if necessary.  */
19103  if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19104      && recog_memoized (last_scheduled_insn) >= 0)
19105    return power9_sched_reorder2 (ready, *pn_ready - 1);
19106
19107  /* Do Power10 dependent reordering.  */
19108  if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19109    return power10_sched_reorder (ready, *pn_ready - 1);
19110
19111  return cached_can_issue_more;
19112}
19113
19114/* Return whether the presence of INSN causes a dispatch group termination
19115   of group WHICH_GROUP.
19116
19117   If WHICH_GROUP == current_group, this function will return true if INSN
19118   causes the termination of the current group (i.e, the dispatch group to
19119   which INSN belongs). This means that INSN will be the last insn in the
19120   group it belongs to.
19121
19122   If WHICH_GROUP == previous_group, this function will return true if INSN
19123   causes the termination of the previous group (i.e, the dispatch group that
19124   precedes the group to which INSN belongs).  This means that INSN will be
19125   the first insn in the group it belongs to).  */
19126
19127static bool
19128insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19129{
19130  bool first, last;
19131
19132  if (! insn)
19133    return false;
19134
19135  first = insn_must_be_first_in_group (insn);
19136  last = insn_must_be_last_in_group (insn);
19137
19138  if (first && last)
19139    return true;
19140
19141  if (which_group == current_group)
19142    return last;
19143  else if (which_group == previous_group)
19144    return first;
19145
19146  return false;
19147}
19148
19149
19150static bool
19151insn_must_be_first_in_group (rtx_insn *insn)
19152{
19153  enum attr_type type;
19154
19155  if (!insn
19156      || NOTE_P (insn)
19157      || DEBUG_INSN_P (insn)
19158      || GET_CODE (PATTERN (insn)) == USE
19159      || GET_CODE (PATTERN (insn)) == CLOBBER)
19160    return false;
19161
19162  switch (rs6000_tune)
19163    {
19164    case PROCESSOR_POWER5:
19165      if (is_cracked_insn (insn))
19166        return true;
19167      /* FALLTHRU */
19168    case PROCESSOR_POWER4:
19169      if (is_microcoded_insn (insn))
19170        return true;
19171
19172      if (!rs6000_sched_groups)
19173        return false;
19174
19175      type = get_attr_type (insn);
19176
19177      switch (type)
19178        {
19179        case TYPE_MFCR:
19180        case TYPE_MFCRF:
19181        case TYPE_MTCR:
19182        case TYPE_CR_LOGICAL:
19183        case TYPE_MTJMPR:
19184        case TYPE_MFJMPR:
19185        case TYPE_DIV:
19186        case TYPE_LOAD_L:
19187        case TYPE_STORE_C:
19188        case TYPE_ISYNC:
19189        case TYPE_SYNC:
19190          return true;
19191        default:
19192          break;
19193        }
19194      break;
19195    case PROCESSOR_POWER6:
19196      type = get_attr_type (insn);
19197
19198      switch (type)
19199        {
19200        case TYPE_EXTS:
19201        case TYPE_CNTLZ:
19202        case TYPE_TRAP:
19203        case TYPE_MUL:
19204        case TYPE_INSERT:
19205        case TYPE_FPCOMPARE:
19206        case TYPE_MFCR:
19207        case TYPE_MTCR:
19208        case TYPE_MFJMPR:
19209        case TYPE_MTJMPR:
19210        case TYPE_ISYNC:
19211        case TYPE_SYNC:
19212        case TYPE_LOAD_L:
19213        case TYPE_STORE_C:
19214          return true;
19215        case TYPE_SHIFT:
19216          if (get_attr_dot (insn) == DOT_NO
19217              || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19218            return true;
19219          else
19220            break;
19221        case TYPE_DIV:
19222          if (get_attr_size (insn) == SIZE_32)
19223            return true;
19224          else
19225            break;
19226        case TYPE_LOAD:
19227        case TYPE_STORE:
19228        case TYPE_FPLOAD:
19229        case TYPE_FPSTORE:
19230          if (get_attr_update (insn) == UPDATE_YES)
19231            return true;
19232          else
19233            break;
19234        default:
19235          break;
19236        }
19237      break;
19238    case PROCESSOR_POWER7:
19239      type = get_attr_type (insn);
19240
19241      switch (type)
19242        {
19243        case TYPE_CR_LOGICAL:
19244        case TYPE_MFCR:
19245        case TYPE_MFCRF:
19246        case TYPE_MTCR:
19247        case TYPE_DIV:
19248        case TYPE_ISYNC:
19249        case TYPE_LOAD_L:
19250        case TYPE_STORE_C:
19251        case TYPE_MFJMPR:
19252        case TYPE_MTJMPR:
19253          return true;
19254        case TYPE_MUL:
19255        case TYPE_SHIFT:
19256        case TYPE_EXTS:
19257          if (get_attr_dot (insn) == DOT_YES)
19258            return true;
19259          else
19260            break;
19261        case TYPE_LOAD:
19262          if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19263              || get_attr_update (insn) == UPDATE_YES)
19264            return true;
19265          else
19266            break;
19267        case TYPE_STORE:
19268        case TYPE_FPLOAD:
19269        case TYPE_FPSTORE:
19270          if (get_attr_update (insn) == UPDATE_YES)
19271            return true;
19272          else
19273            break;
19274        default:
19275          break;
19276        }
19277      break;
19278    case PROCESSOR_POWER8:
19279      type = get_attr_type (insn);
19280
19281      switch (type)
19282        {
19283        case TYPE_CR_LOGICAL:
19284        case TYPE_MFCR:
19285        case TYPE_MFCRF:
19286        case TYPE_MTCR:
19287        case TYPE_SYNC:
19288        case TYPE_ISYNC:
19289        case TYPE_LOAD_L:
19290        case TYPE_STORE_C:
19291        case TYPE_VECSTORE:
19292        case TYPE_MFJMPR:
19293        case TYPE_MTJMPR:
19294          return true;
19295        case TYPE_SHIFT:
19296        case TYPE_EXTS:
19297        case TYPE_MUL:
19298          if (get_attr_dot (insn) == DOT_YES)
19299            return true;
19300          else
19301            break;
19302        case TYPE_LOAD:
19303          if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19304              || get_attr_update (insn) == UPDATE_YES)
19305            return true;
19306          else
19307            break;
19308        case TYPE_STORE:
19309          if (get_attr_update (insn) == UPDATE_YES
19310              && get_attr_indexed (insn) == INDEXED_YES)
19311            return true;
19312          else
19313            break;
19314        default:
19315          break;
19316        }
19317      break;
19318    default:
19319      break;
19320    }
19321
19322  return false;
19323}
19324
19325static bool
19326insn_must_be_last_in_group (rtx_insn *insn)
19327{
19328  enum attr_type type;
19329
19330  if (!insn
19331      || NOTE_P (insn)
19332      || DEBUG_INSN_P (insn)
19333      || GET_CODE (PATTERN (insn)) == USE
19334      || GET_CODE (PATTERN (insn)) == CLOBBER)
19335    return false;
19336
19337  switch (rs6000_tune) {
19338  case PROCESSOR_POWER4:
19339  case PROCESSOR_POWER5:
19340    if (is_microcoded_insn (insn))
19341      return true;
19342
19343    if (is_branch_slot_insn (insn))
19344      return true;
19345
19346    break;
19347  case PROCESSOR_POWER6:
19348    type = get_attr_type (insn);
19349
19350    switch (type)
19351      {
19352      case TYPE_EXTS:
19353      case TYPE_CNTLZ:
19354      case TYPE_TRAP:
19355      case TYPE_MUL:
19356      case TYPE_FPCOMPARE:
19357      case TYPE_MFCR:
19358      case TYPE_MTCR:
19359      case TYPE_MFJMPR:
19360      case TYPE_MTJMPR:
19361      case TYPE_ISYNC:
19362      case TYPE_SYNC:
19363      case TYPE_LOAD_L:
19364      case TYPE_STORE_C:
19365        return true;
19366      case TYPE_SHIFT:
19367        if (get_attr_dot (insn) == DOT_NO
19368            || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19369          return true;
19370        else
19371          break;
19372      case TYPE_DIV:
19373        if (get_attr_size (insn) == SIZE_32)
19374          return true;
19375        else
19376          break;
19377      default:
19378        break;
19379    }
19380    break;
19381  case PROCESSOR_POWER7:
19382    type = get_attr_type (insn);
19383
19384    switch (type)
19385      {
19386      case TYPE_ISYNC:
19387      case TYPE_SYNC:
19388      case TYPE_LOAD_L:
19389      case TYPE_STORE_C:
19390        return true;
19391      case TYPE_LOAD:
19392        if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19393            && get_attr_update (insn) == UPDATE_YES)
19394          return true;
19395        else
19396          break;
19397      case TYPE_STORE:
19398        if (get_attr_update (insn) == UPDATE_YES
19399            && get_attr_indexed (insn) == INDEXED_YES)
19400          return true;
19401        else
19402          break;
19403      default:
19404        break;
19405    }
19406    break;
19407  case PROCESSOR_POWER8:
19408    type = get_attr_type (insn);
19409
19410    switch (type)
19411      {
19412      case TYPE_MFCR:
19413      case TYPE_MTCR:
19414      case TYPE_ISYNC:
19415      case TYPE_SYNC:
19416      case TYPE_LOAD_L:
19417      case TYPE_STORE_C:
19418        return true;
19419      case TYPE_LOAD:
19420        if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19421            && get_attr_update (insn) == UPDATE_YES)
19422          return true;
19423        else
19424          break;
19425      case TYPE_STORE:
19426        if (get_attr_update (insn) == UPDATE_YES
19427            && get_attr_indexed (insn) == INDEXED_YES)
19428          return true;
19429        else
19430          break;
19431      default:
19432        break;
19433    }
19434    break;
19435  default:
19436    break;
19437  }
19438
19439  return false;
19440}
19441
19442/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19443   dispatch group) from the insns in GROUP_INSNS.  Return false otherwise.  */
19444
19445static bool
19446is_costly_group (rtx *group_insns, rtx next_insn)
19447{
19448  int i;
19449  int issue_rate = rs6000_issue_rate ();
19450
19451  for (i = 0; i < issue_rate; i++)
19452    {
19453      sd_iterator_def sd_it;
19454      dep_t dep;
19455      rtx insn = group_insns[i];
19456
19457      if (!insn)
19458	continue;
19459
19460      FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19461	{
19462	  rtx next = DEP_CON (dep);
19463
19464	  if (next == next_insn
19465	      && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19466	    return true;
19467	}
19468    }
19469
19470  return false;
19471}
19472
19473/* Utility of the function redefine_groups.
19474   Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19475   in the same dispatch group.  If so, insert nops before NEXT_INSN, in order
19476   to keep it "far" (in a separate group) from GROUP_INSNS, following
19477   one of the following schemes, depending on the value of the flag
19478   -minsert_sched_nops = X:
19479   (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19480       in order to force NEXT_INSN into a separate group.
19481   (2) X < sched_finish_regroup_exact: insert exactly X nops.
19482   GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19483   insertion (has a group just ended, how many vacant issue slots remain in the
19484   last group, and how many dispatch groups were encountered so far).  */
19485
19486static int
19487force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19488		 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19489		 int *group_count)
19490{
19491  rtx nop;
19492  bool force;
19493  int issue_rate = rs6000_issue_rate ();
19494  bool end = *group_end;
19495  int i;
19496
19497  if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19498    return can_issue_more;
19499
19500  if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19501    return can_issue_more;
19502
19503  force = is_costly_group (group_insns, next_insn);
19504  if (!force)
19505    return can_issue_more;
19506
19507  if (sched_verbose > 6)
19508    fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19509	     *group_count ,can_issue_more);
19510
19511  if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19512    {
19513      if (*group_end)
19514	can_issue_more = 0;
19515
19516      /* Since only a branch can be issued in the last issue_slot, it is
19517	 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19518	 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19519	 in this case the last nop will start a new group and the branch
19520	 will be forced to the new group.  */
19521      if (can_issue_more && !is_branch_slot_insn (next_insn))
19522	can_issue_more--;
19523
19524      /* Do we have a special group ending nop? */
19525      if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19526	  || rs6000_tune == PROCESSOR_POWER8)
19527	{
19528	  nop = gen_group_ending_nop ();
19529	  emit_insn_before (nop, next_insn);
19530	  can_issue_more = 0;
19531	}
19532      else
19533	while (can_issue_more > 0)
19534	  {
19535	    nop = gen_nop ();
19536	    emit_insn_before (nop, next_insn);
19537	    can_issue_more--;
19538	  }
19539
19540      *group_end = true;
19541      return 0;
19542    }
19543
19544  if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19545    {
19546      int n_nops = rs6000_sched_insert_nops;
19547
19548      /* Nops can't be issued from the branch slot, so the effective
19549	 issue_rate for nops is 'issue_rate - 1'.  */
19550      if (can_issue_more == 0)
19551	can_issue_more = issue_rate;
19552      can_issue_more--;
19553      if (can_issue_more == 0)
19554	{
19555	  can_issue_more = issue_rate - 1;
19556	  (*group_count)++;
19557	  end = true;
19558	  for (i = 0; i < issue_rate; i++)
19559	    {
19560	      group_insns[i] = 0;
19561	    }
19562	}
19563
19564      while (n_nops > 0)
19565	{
19566	  nop = gen_nop ();
19567	  emit_insn_before (nop, next_insn);
19568	  if (can_issue_more == issue_rate - 1) /* new group begins */
19569	    end = false;
19570	  can_issue_more--;
19571	  if (can_issue_more == 0)
19572	    {
19573	      can_issue_more = issue_rate - 1;
19574	      (*group_count)++;
19575	      end = true;
19576	      for (i = 0; i < issue_rate; i++)
19577		{
19578		  group_insns[i] = 0;
19579		}
19580	    }
19581	  n_nops--;
19582	}
19583
19584      /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1').  */
19585      can_issue_more++;
19586
19587      /* Is next_insn going to start a new group?  */
19588      *group_end
19589	= (end
19590	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19591	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19592	   || (can_issue_more < issue_rate &&
19593	       insn_terminates_group_p (next_insn, previous_group)));
19594      if (*group_end && end)
19595	(*group_count)--;
19596
19597      if (sched_verbose > 6)
19598	fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19599		 *group_count, can_issue_more);
19600      return can_issue_more;
19601    }
19602
19603  return can_issue_more;
19604}
19605
19606/* This function tries to synch the dispatch groups that the compiler "sees"
19607   with the dispatch groups that the processor dispatcher is expected to
19608   form in practice.  It tries to achieve this synchronization by forcing the
19609   estimated processor grouping on the compiler (as opposed to the function
19610   'pad_goups' which tries to force the scheduler's grouping on the processor).
19611
19612   The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19613   examines the (estimated) dispatch groups that will be formed by the processor
19614   dispatcher.  It marks these group boundaries to reflect the estimated
19615   processor grouping, overriding the grouping that the scheduler had marked.
19616   Depending on the value of the flag '-minsert-sched-nops' this function can
19617   force certain insns into separate groups or force a certain distance between
19618   them by inserting nops, for example, if there exists a "costly dependence"
19619   between the insns.
19620
19621   The function estimates the group boundaries that the processor will form as
19622   follows:  It keeps track of how many vacant issue slots are available after
19623   each insn.  A subsequent insn will start a new group if one of the following
19624   4 cases applies:
19625   - no more vacant issue slots remain in the current dispatch group.
19626   - only the last issue slot, which is the branch slot, is vacant, but the next
19627     insn is not a branch.
19628   - only the last 2 or less issue slots, including the branch slot, are vacant,
19629     which means that a cracked insn (which occupies two issue slots) can't be
19630     issued in this group.
19631   - less than 'issue_rate' slots are vacant, and the next insn always needs to
19632     start a new group.  */
19633
19634static int
19635redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19636		 rtx_insn *tail)
19637{
19638  rtx_insn *insn, *next_insn;
19639  int issue_rate;
19640  int can_issue_more;
19641  int slot, i;
19642  bool group_end;
19643  int group_count = 0;
19644  rtx *group_insns;
19645
19646  /* Initialize.  */
19647  issue_rate = rs6000_issue_rate ();
19648  group_insns = XALLOCAVEC (rtx, issue_rate);
19649  for (i = 0; i < issue_rate; i++)
19650    {
19651      group_insns[i] = 0;
19652    }
19653  can_issue_more = issue_rate;
19654  slot = 0;
19655  insn = get_next_active_insn (prev_head_insn, tail);
19656  group_end = false;
19657
19658  while (insn != NULL_RTX)
19659    {
19660      slot = (issue_rate - can_issue_more);
19661      group_insns[slot] = insn;
19662      can_issue_more =
19663	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19664      if (insn_terminates_group_p (insn, current_group))
19665	can_issue_more = 0;
19666
19667      next_insn = get_next_active_insn (insn, tail);
19668      if (next_insn == NULL_RTX)
19669	return group_count + 1;
19670
19671      /* Is next_insn going to start a new group?  */
19672      group_end
19673	= (can_issue_more == 0
19674	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19675	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19676	   || (can_issue_more < issue_rate &&
19677	       insn_terminates_group_p (next_insn, previous_group)));
19678
19679      can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19680					next_insn, &group_end, can_issue_more,
19681					&group_count);
19682
19683      if (group_end)
19684	{
19685	  group_count++;
19686	  can_issue_more = 0;
19687	  for (i = 0; i < issue_rate; i++)
19688	    {
19689	      group_insns[i] = 0;
19690	    }
19691	}
19692
19693      if (GET_MODE (next_insn) == TImode && can_issue_more)
19694	PUT_MODE (next_insn, VOIDmode);
19695      else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19696	PUT_MODE (next_insn, TImode);
19697
19698      insn = next_insn;
19699      if (can_issue_more == 0)
19700	can_issue_more = issue_rate;
19701    } /* while */
19702
19703  return group_count;
19704}
19705
19706/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19707   dispatch group boundaries that the scheduler had marked.  Pad with nops
19708   any dispatch groups which have vacant issue slots, in order to force the
19709   scheduler's grouping on the processor dispatcher.  The function
19710   returns the number of dispatch groups found.  */
19711
19712static int
19713pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19714	    rtx_insn *tail)
19715{
19716  rtx_insn *insn, *next_insn;
19717  rtx nop;
19718  int issue_rate;
19719  int can_issue_more;
19720  int group_end;
19721  int group_count = 0;
19722
19723  /* Initialize issue_rate.  */
19724  issue_rate = rs6000_issue_rate ();
19725  can_issue_more = issue_rate;
19726
19727  insn = get_next_active_insn (prev_head_insn, tail);
19728  next_insn = get_next_active_insn (insn, tail);
19729
19730  while (insn != NULL_RTX)
19731    {
19732      can_issue_more =
19733      	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19734
19735      group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19736
19737      if (next_insn == NULL_RTX)
19738	break;
19739
19740      if (group_end)
19741	{
19742	  /* If the scheduler had marked group termination at this location
19743	     (between insn and next_insn), and neither insn nor next_insn will
19744	     force group termination, pad the group with nops to force group
19745	     termination.  */
19746	  if (can_issue_more
19747	      && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19748	      && !insn_terminates_group_p (insn, current_group)
19749	      && !insn_terminates_group_p (next_insn, previous_group))
19750	    {
19751	      if (!is_branch_slot_insn (next_insn))
19752		can_issue_more--;
19753
19754	      while (can_issue_more)
19755		{
19756		  nop = gen_nop ();
19757		  emit_insn_before (nop, next_insn);
19758		  can_issue_more--;
19759		}
19760	    }
19761
19762	  can_issue_more = issue_rate;
19763	  group_count++;
19764	}
19765
19766      insn = next_insn;
19767      next_insn = get_next_active_insn (insn, tail);
19768    }
19769
19770  return group_count;
19771}
19772
19773/* We're beginning a new block.  Initialize data structures as necessary.  */
19774
19775static void
19776rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19777		     int sched_verbose ATTRIBUTE_UNUSED,
19778		     int max_ready ATTRIBUTE_UNUSED)
19779{
19780  last_scheduled_insn = NULL;
19781  load_store_pendulum = 0;
19782  divide_cnt = 0;
19783  vec_pairing = 0;
19784}
19785
19786/* The following function is called at the end of scheduling BB.
19787   After reload, it inserts nops at insn group bundling.  */
19788
19789static void
19790rs6000_sched_finish (FILE *dump, int sched_verbose)
19791{
19792  int n_groups;
19793
19794  if (sched_verbose)
19795    fprintf (dump, "=== Finishing schedule.\n");
19796
19797  if (reload_completed && rs6000_sched_groups)
19798    {
19799      /* Do not run sched_finish hook when selective scheduling enabled.  */
19800      if (sel_sched_p ())
19801	return;
19802
19803      if (rs6000_sched_insert_nops == sched_finish_none)
19804	return;
19805
19806      if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19807	n_groups = pad_groups (dump, sched_verbose,
19808			       current_sched_info->prev_head,
19809			       current_sched_info->next_tail);
19810      else
19811	n_groups = redefine_groups (dump, sched_verbose,
19812				    current_sched_info->prev_head,
19813				    current_sched_info->next_tail);
19814
19815      if (sched_verbose >= 6)
19816	{
19817    	  fprintf (dump, "ngroups = %d\n", n_groups);
19818	  print_rtl (dump, current_sched_info->prev_head);
19819	  fprintf (dump, "Done finish_sched\n");
19820	}
19821    }
19822}
19823
19824struct rs6000_sched_context
19825{
19826  short cached_can_issue_more;
19827  rtx_insn *last_scheduled_insn;
19828  int load_store_pendulum;
19829  int divide_cnt;
19830  int vec_pairing;
19831};
19832
19833typedef struct rs6000_sched_context rs6000_sched_context_def;
19834typedef rs6000_sched_context_def *rs6000_sched_context_t;
19835
19836/* Allocate store for new scheduling context.  */
19837static void *
19838rs6000_alloc_sched_context (void)
19839{
19840  return xmalloc (sizeof (rs6000_sched_context_def));
19841}
19842
19843/* If CLEAN_P is true then initializes _SC with clean data,
19844   and from the global context otherwise.  */
19845static void
19846rs6000_init_sched_context (void *_sc, bool clean_p)
19847{
19848  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19849
19850  if (clean_p)
19851    {
19852      sc->cached_can_issue_more = 0;
19853      sc->last_scheduled_insn = NULL;
19854      sc->load_store_pendulum = 0;
19855      sc->divide_cnt = 0;
19856      sc->vec_pairing = 0;
19857    }
19858  else
19859    {
19860      sc->cached_can_issue_more = cached_can_issue_more;
19861      sc->last_scheduled_insn = last_scheduled_insn;
19862      sc->load_store_pendulum = load_store_pendulum;
19863      sc->divide_cnt = divide_cnt;
19864      sc->vec_pairing = vec_pairing;
19865    }
19866}
19867
19868/* Sets the global scheduling context to the one pointed to by _SC.  */
19869static void
19870rs6000_set_sched_context (void *_sc)
19871{
19872  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19873
19874  gcc_assert (sc != NULL);
19875
19876  cached_can_issue_more = sc->cached_can_issue_more;
19877  last_scheduled_insn = sc->last_scheduled_insn;
19878  load_store_pendulum = sc->load_store_pendulum;
19879  divide_cnt = sc->divide_cnt;
19880  vec_pairing = sc->vec_pairing;
19881}
19882
19883/* Free _SC.  */
19884static void
19885rs6000_free_sched_context (void *_sc)
19886{
19887  gcc_assert (_sc != NULL);
19888
19889  free (_sc);
19890}
19891
19892static bool
19893rs6000_sched_can_speculate_insn (rtx_insn *insn)
19894{
19895  switch (get_attr_type (insn))
19896    {
19897    case TYPE_DIV:
19898    case TYPE_SDIV:
19899    case TYPE_DDIV:
19900    case TYPE_VECDIV:
19901    case TYPE_SSQRT:
19902    case TYPE_DSQRT:
19903      return false;
19904
19905    default:
19906      return true;
19907  }
19908}
19909
19910/* Length in units of the trampoline for entering a nested function.  */
19911
19912int
19913rs6000_trampoline_size (void)
19914{
19915  int ret = 0;
19916
19917  switch (DEFAULT_ABI)
19918    {
19919    default:
19920      gcc_unreachable ();
19921
19922    case ABI_AIX:
19923      ret = (TARGET_32BIT) ? 12 : 24;
19924      break;
19925
19926    case ABI_ELFv2:
19927      gcc_assert (!TARGET_32BIT);
19928      ret = 32;
19929      break;
19930
19931    case ABI_DARWIN:
19932    case ABI_V4:
19933      ret = (TARGET_32BIT) ? 40 : 48;
19934      break;
19935    }
19936
19937  return ret;
19938}
19939
19940/* Emit RTL insns to initialize the variable parts of a trampoline.
19941   FNADDR is an RTX for the address of the function's pure code.
19942   CXT is an RTX for the static chain value for the function.  */
19943
19944static void
19945rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19946{
19947  int regsize = (TARGET_32BIT) ? 4 : 8;
19948  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19949  rtx ctx_reg = force_reg (Pmode, cxt);
19950  rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19951
19952  switch (DEFAULT_ABI)
19953    {
19954    default:
19955      gcc_unreachable ();
19956
19957    /* Under AIX, just build the 3 word function descriptor */
19958    case ABI_AIX:
19959      {
19960	rtx fnmem, fn_reg, toc_reg;
19961
19962	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19963	  error ("you cannot take the address of a nested function if you use "
19964		 "the %qs option", "-mno-pointers-to-nested-functions");
19965
19966	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19967	fn_reg = gen_reg_rtx (Pmode);
19968	toc_reg = gen_reg_rtx (Pmode);
19969
19970  /* Macro to shorten the code expansions below.  */
19971# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19972
19973	m_tramp = replace_equiv_address (m_tramp, addr);
19974
19975	emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19976	emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19977	emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19978	emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19979	emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19980
19981# undef MEM_PLUS
19982      }
19983      break;
19984
19985    /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
19986    case ABI_ELFv2:
19987    case ABI_DARWIN:
19988    case ABI_V4:
19989      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19990			 LCT_NORMAL, VOIDmode,
19991			 addr, Pmode,
19992			 GEN_INT (rs6000_trampoline_size ()), SImode,
19993			 fnaddr, Pmode,
19994			 ctx_reg, Pmode);
19995      break;
19996    }
19997}
19998
19999
20000/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20001   identifier as an argument, so the front end shouldn't look it up.  */
20002
20003static bool
20004rs6000_attribute_takes_identifier_p (const_tree attr_id)
20005{
20006  return is_attribute_p ("altivec", attr_id);
20007}
20008
20009/* Handle the "altivec" attribute.  The attribute may have
20010   arguments as follows:
20011
20012	__attribute__((altivec(vector__)))
20013	__attribute__((altivec(pixel__)))	(always followed by 'unsigned short')
20014	__attribute__((altivec(bool__)))	(always followed by 'unsigned')
20015
20016  and may appear more than once (e.g., 'vector bool char') in a
20017  given declaration.  */
20018
20019static tree
20020rs6000_handle_altivec_attribute (tree *node,
20021				 tree name ATTRIBUTE_UNUSED,
20022				 tree args,
20023				 int flags ATTRIBUTE_UNUSED,
20024				 bool *no_add_attrs)
20025{
20026  tree type = *node, result = NULL_TREE;
20027  machine_mode mode;
20028  int unsigned_p;
20029  char altivec_type
20030    = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20031	&& TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20032       ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20033       : '?');
20034
20035  while (POINTER_TYPE_P (type)
20036	 || TREE_CODE (type) == FUNCTION_TYPE
20037	 || TREE_CODE (type) == METHOD_TYPE
20038	 || TREE_CODE (type) == ARRAY_TYPE)
20039    type = TREE_TYPE (type);
20040
20041  mode = TYPE_MODE (type);
20042
20043  /* Check for invalid AltiVec type qualifiers.  */
20044  if (type == long_double_type_node)
20045    error ("use of %<long double%> in AltiVec types is invalid");
20046  else if (type == boolean_type_node)
20047    error ("use of boolean types in AltiVec types is invalid");
20048  else if (TREE_CODE (type) == COMPLEX_TYPE)
20049    error ("use of %<complex%> in AltiVec types is invalid");
20050  else if (DECIMAL_FLOAT_MODE_P (mode))
20051    error ("use of decimal floating-point types in AltiVec types is invalid");
20052  else if (!TARGET_VSX)
20053    {
20054      if (type == long_unsigned_type_node || type == long_integer_type_node)
20055	{
20056	  if (TARGET_64BIT)
20057	    error ("use of %<long%> in AltiVec types is invalid for "
20058		   "64-bit code without %qs", "-mvsx");
20059	  else if (rs6000_warn_altivec_long)
20060	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
20061		     "use %<int%>");
20062	}
20063      else if (type == long_long_unsigned_type_node
20064	       || type == long_long_integer_type_node)
20065	error ("use of %<long long%> in AltiVec types is invalid without %qs",
20066	       "-mvsx");
20067      else if (type == double_type_node)
20068	error ("use of %<double%> in AltiVec types is invalid without %qs",
20069	       "-mvsx");
20070    }
20071
20072  switch (altivec_type)
20073    {
20074    case 'v':
20075      unsigned_p = TYPE_UNSIGNED (type);
20076      switch (mode)
20077	{
20078	case E_TImode:
20079	  result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20080	  break;
20081	case E_DImode:
20082	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20083	  break;
20084	case E_SImode:
20085	  result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20086	  break;
20087	case E_HImode:
20088	  result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20089	  break;
20090	case E_QImode:
20091	  result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20092	  break;
20093	case E_SFmode: result = V4SF_type_node; break;
20094	case E_DFmode: result = V2DF_type_node; break;
20095	  /* If the user says 'vector int bool', we may be handed the 'bool'
20096	     attribute _before_ the 'vector' attribute, and so select the
20097	     proper type in the 'b' case below.  */
20098	case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20099	case E_V2DImode: case E_V2DFmode:
20100	  result = type;
20101	default: break;
20102	}
20103      break;
20104    case 'b':
20105      switch (mode)
20106	{
20107	case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20108	case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20109	case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20110	case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20111	case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20112	default: break;
20113	}
20114      break;
20115    case 'p':
20116      switch (mode)
20117	{
20118	case E_V8HImode: result = pixel_V8HI_type_node;
20119	default: break;
20120	}
20121    default: break;
20122    }
20123
20124  /* Propagate qualifiers attached to the element type
20125     onto the vector type.  */
20126  if (result && result != type && TYPE_QUALS (type))
20127    result = build_qualified_type (result, TYPE_QUALS (type));
20128
20129  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
20130
20131  if (result)
20132    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20133
20134  return NULL_TREE;
20135}
20136
20137/* AltiVec defines five built-in scalar types that serve as vector
20138   elements; we must teach the compiler how to mangle them.  The 128-bit
20139   floating point mangling is target-specific as well.  MMA defines
20140   two built-in types to be used as opaque vector types.  */
20141
20142static const char *
20143rs6000_mangle_type (const_tree type)
20144{
20145  type = TYPE_MAIN_VARIANT (type);
20146
20147  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20148      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20149      && TREE_CODE (type) != OPAQUE_TYPE)
20150    return NULL;
20151
20152  if (type == bool_char_type_node) return "U6__boolc";
20153  if (type == bool_short_type_node) return "U6__bools";
20154  if (type == pixel_type_node) return "u7__pixel";
20155  if (type == bool_int_type_node) return "U6__booli";
20156  if (type == bool_long_long_type_node) return "U6__boolx";
20157
20158  if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20159    return "g";
20160  if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20161    return "u9__ieee128";
20162
20163  if (type == vector_pair_type_node)
20164    return "u13__vector_pair";
20165  if (type == vector_quad_type_node)
20166    return "u13__vector_quad";
20167
20168  /* For all other types, use the default mangling.  */
20169  return NULL;
20170}
20171
20172/* Handle a "longcall" or "shortcall" attribute; arguments as in
20173   struct attribute_spec.handler.  */
20174
20175static tree
20176rs6000_handle_longcall_attribute (tree *node, tree name,
20177				  tree args ATTRIBUTE_UNUSED,
20178				  int flags ATTRIBUTE_UNUSED,
20179				  bool *no_add_attrs)
20180{
20181  if (TREE_CODE (*node) != FUNCTION_TYPE
20182      && TREE_CODE (*node) != FIELD_DECL
20183      && TREE_CODE (*node) != TYPE_DECL)
20184    {
20185      warning (OPT_Wattributes, "%qE attribute only applies to functions",
20186	       name);
20187      *no_add_attrs = true;
20188    }
20189
20190  return NULL_TREE;
20191}
20192
20193/* Set longcall attributes on all functions declared when
20194   rs6000_default_long_calls is true.  */
20195static void
20196rs6000_set_default_type_attributes (tree type)
20197{
20198  if (rs6000_default_long_calls
20199      && (TREE_CODE (type) == FUNCTION_TYPE
20200	  || TREE_CODE (type) == METHOD_TYPE))
20201    TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20202					NULL_TREE,
20203					TYPE_ATTRIBUTES (type));
20204
20205#if TARGET_MACHO
20206  darwin_set_default_type_attributes (type);
20207#endif
20208}
20209
20210/* Return a reference suitable for calling a function with the
20211   longcall attribute.  */
20212
20213static rtx
20214rs6000_longcall_ref (rtx call_ref, rtx arg)
20215{
20216  /* System V adds '.' to the internal name, so skip them.  */
20217  const char *call_name = XSTR (call_ref, 0);
20218  if (*call_name == '.')
20219    {
20220      while (*call_name == '.')
20221	call_name++;
20222
20223      tree node = get_identifier (call_name);
20224      call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20225    }
20226
20227  if (TARGET_PLTSEQ)
20228    {
20229      rtx base = const0_rtx;
20230      int regno = 12;
20231      if (rs6000_pcrel_p ())
20232	{
20233	  rtx reg = gen_rtx_REG (Pmode, regno);
20234	  rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20235					   gen_rtvec (3, base, call_ref, arg),
20236					   UNSPECV_PLT_PCREL);
20237	  emit_insn (gen_rtx_SET (reg, u));
20238	  return reg;
20239	}
20240
20241      if (DEFAULT_ABI == ABI_ELFv2)
20242	base = gen_rtx_REG (Pmode, TOC_REGISTER);
20243      else
20244	{
20245	  if (flag_pic)
20246	    base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20247	  regno = 11;
20248	}
20249      /* Reg must match that used by linker PLT stubs.  For ELFv2, r12
20250	 may be used by a function global entry point.  For SysV4, r11
20251	 is used by __glink_PLTresolve lazy resolver entry.  */
20252      rtx reg = gen_rtx_REG (Pmode, regno);
20253      rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20254			       UNSPEC_PLT16_HA);
20255      rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20256					gen_rtvec (3, reg, call_ref, arg),
20257					UNSPECV_PLT16_LO);
20258      emit_insn (gen_rtx_SET (reg, hi));
20259      emit_insn (gen_rtx_SET (reg, lo));
20260      return reg;
20261    }
20262
20263  return force_reg (Pmode, call_ref);
20264}
20265
20266#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20267#define TARGET_USE_MS_BITFIELD_LAYOUT 0
20268#endif
20269
20270/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20271   struct attribute_spec.handler.  */
20272static tree
20273rs6000_handle_struct_attribute (tree *node, tree name,
20274				tree args ATTRIBUTE_UNUSED,
20275				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20276{
20277  tree *type = NULL;
20278  if (DECL_P (*node))
20279    {
20280      if (TREE_CODE (*node) == TYPE_DECL)
20281        type = &TREE_TYPE (*node);
20282    }
20283  else
20284    type = node;
20285
20286  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20287                 || TREE_CODE (*type) == UNION_TYPE)))
20288    {
20289      warning (OPT_Wattributes, "%qE attribute ignored", name);
20290      *no_add_attrs = true;
20291    }
20292
20293  else if ((is_attribute_p ("ms_struct", name)
20294            && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20295           || ((is_attribute_p ("gcc_struct", name)
20296                && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20297    {
20298      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20299               name);
20300      *no_add_attrs = true;
20301    }
20302
20303  return NULL_TREE;
20304}
20305
20306static bool
20307rs6000_ms_bitfield_layout_p (const_tree record_type)
20308{
20309  return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20310          !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20311    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20312}
20313
20314#ifdef USING_ELFOS_H
20315
20316/* A get_unnamed_section callback, used for switching to toc_section.  */
20317
20318static void
20319rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20320{
20321  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20322      && TARGET_MINIMAL_TOC)
20323    {
20324      if (!toc_initialized)
20325	{
20326	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20327	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20328	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20329	  fprintf (asm_out_file, "\t.tc ");
20330	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20331	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20332	  fprintf (asm_out_file, "\n");
20333
20334	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20335	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20336	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20337	  fprintf (asm_out_file, " = .+32768\n");
20338	  toc_initialized = 1;
20339	}
20340      else
20341	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20342    }
20343  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20344    {
20345      fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20346      if (!toc_initialized)
20347	{
20348	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20349	  toc_initialized = 1;
20350	}
20351    }
20352  else
20353    {
20354      fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20355      if (!toc_initialized)
20356	{
20357	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20358	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20359	  fprintf (asm_out_file, " = .+32768\n");
20360	  toc_initialized = 1;
20361	}
20362    }
20363}
20364
20365/* Implement TARGET_ASM_INIT_SECTIONS.  */
20366
20367static void
20368rs6000_elf_asm_init_sections (void)
20369{
20370  toc_section
20371    = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20372
20373  sdata2_section
20374    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20375			   SDATA2_SECTION_ASM_OP);
20376}
20377
20378/* Implement TARGET_SELECT_RTX_SECTION.  */
20379
20380static section *
20381rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20382			       unsigned HOST_WIDE_INT align)
20383{
20384  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20385    return toc_section;
20386  else
20387    return default_elf_select_rtx_section (mode, x, align);
20388}
20389
20390/* For a SYMBOL_REF, set generic flags and then perform some
20391   target-specific processing.
20392
20393   When the AIX ABI is requested on a non-AIX system, replace the
20394   function name with the real name (with a leading .) rather than the
20395   function descriptor name.  This saves a lot of overriding code to
20396   read the prefixes.  */
20397
20398static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20399static void
20400rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20401{
20402  default_encode_section_info (decl, rtl, first);
20403
20404  if (first
20405      && TREE_CODE (decl) == FUNCTION_DECL
20406      && !TARGET_AIX
20407      && DEFAULT_ABI == ABI_AIX)
20408    {
20409      rtx sym_ref = XEXP (rtl, 0);
20410      size_t len = strlen (XSTR (sym_ref, 0));
20411      char *str = XALLOCAVEC (char, len + 2);
20412      str[0] = '.';
20413      memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20414      XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20415    }
20416}
20417
20418static inline bool
20419compare_section_name (const char *section, const char *templ)
20420{
20421  int len;
20422
20423  len = strlen (templ);
20424  return (strncmp (section, templ, len) == 0
20425	  && (section[len] == 0 || section[len] == '.'));
20426}
20427
20428bool
20429rs6000_elf_in_small_data_p (const_tree decl)
20430{
20431  if (rs6000_sdata == SDATA_NONE)
20432    return false;
20433
20434  /* We want to merge strings, so we never consider them small data.  */
20435  if (TREE_CODE (decl) == STRING_CST)
20436    return false;
20437
20438  /* Functions are never in the small data area.  */
20439  if (TREE_CODE (decl) == FUNCTION_DECL)
20440    return false;
20441
20442  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20443    {
20444      const char *section = DECL_SECTION_NAME (decl);
20445      if (compare_section_name (section, ".sdata")
20446	  || compare_section_name (section, ".sdata2")
20447	  || compare_section_name (section, ".gnu.linkonce.s")
20448	  || compare_section_name (section, ".sbss")
20449	  || compare_section_name (section, ".sbss2")
20450	  || compare_section_name (section, ".gnu.linkonce.sb")
20451	  || strcmp (section, ".PPC.EMB.sdata0") == 0
20452	  || strcmp (section, ".PPC.EMB.sbss0") == 0)
20453	return true;
20454    }
20455  else
20456    {
20457      /* If we are told not to put readonly data in sdata, then don't.  */
20458      if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20459	  && !rs6000_readonly_in_sdata)
20460	return false;
20461
20462      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20463
20464      if (size > 0
20465	  && size <= g_switch_value
20466	  /* If it's not public, and we're not going to reference it there,
20467	     there's no need to put it in the small data section.  */
20468	  && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20469	return true;
20470    }
20471
20472  return false;
20473}
20474
20475#endif /* USING_ELFOS_H */
20476
20477/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  */
20478
20479static bool
20480rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20481{
20482  return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20483}
20484
20485/* Do not place thread-local symbols refs in the object blocks.  */
20486
20487static bool
20488rs6000_use_blocks_for_decl_p (const_tree decl)
20489{
20490  return !DECL_THREAD_LOCAL_P (decl);
20491}
20492
20493/* Return a REG that occurs in ADDR with coefficient 1.
20494   ADDR can be effectively incremented by incrementing REG.
20495
20496   r0 is special and we must not select it as an address
20497   register by this routine since our caller will try to
20498   increment the returned register via an "la" instruction.  */
20499
20500rtx
20501find_addr_reg (rtx addr)
20502{
20503  while (GET_CODE (addr) == PLUS)
20504    {
20505      if (REG_P (XEXP (addr, 0))
20506	  && REGNO (XEXP (addr, 0)) != 0)
20507	addr = XEXP (addr, 0);
20508      else if (REG_P (XEXP (addr, 1))
20509	       && REGNO (XEXP (addr, 1)) != 0)
20510	addr = XEXP (addr, 1);
20511      else if (CONSTANT_P (XEXP (addr, 0)))
20512	addr = XEXP (addr, 1);
20513      else if (CONSTANT_P (XEXP (addr, 1)))
20514	addr = XEXP (addr, 0);
20515      else
20516	gcc_unreachable ();
20517    }
20518  gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20519  return addr;
20520}
20521
20522void
20523rs6000_fatal_bad_address (rtx op)
20524{
20525  fatal_insn ("bad address", op);
20526}
20527
20528#if TARGET_MACHO
20529
20530vec<branch_island, va_gc> *branch_islands;
20531
20532/* Remember to generate a branch island for far calls to the given
20533   function.  */
20534
20535static void
20536add_compiler_branch_island (tree label_name, tree function_name,
20537			    int line_number)
20538{
20539  branch_island bi = {function_name, label_name, line_number};
20540  vec_safe_push (branch_islands, bi);
20541}
20542
20543/* NO_PREVIOUS_DEF checks in the link list whether the function name is
20544   already there or not.  */
20545
20546static int
20547no_previous_def (tree function_name)
20548{
20549  branch_island *bi;
20550  unsigned ix;
20551
20552  FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20553    if (function_name == bi->function_name)
20554      return 0;
20555  return 1;
20556}
20557
20558/* GET_PREV_LABEL gets the label name from the previous definition of
20559   the function.  */
20560
20561static tree
20562get_prev_label (tree function_name)
20563{
20564  branch_island *bi;
20565  unsigned ix;
20566
20567  FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20568    if (function_name == bi->function_name)
20569      return bi->label_name;
20570  return NULL_TREE;
20571}
20572
20573/* Generate external symbol indirection stubs (PIC and non-PIC).  */
20574
20575void
20576machopic_output_stub (FILE *file, const char *symb, const char *stub)
20577{
20578  unsigned int length;
20579  char *symbol_name, *lazy_ptr_name;
20580  char *local_label_0;
20581  static unsigned label = 0;
20582
20583  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
20584  symb = (*targetm.strip_name_encoding) (symb);
20585
20586  length = strlen (symb);
20587  symbol_name = XALLOCAVEC (char, length + 32);
20588  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20589
20590  lazy_ptr_name = XALLOCAVEC (char, length + 32);
20591  GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20592
20593  if (MACHOPIC_PURE)
20594    {
20595      switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20596      fprintf (file, "\t.align 5\n");
20597
20598      fprintf (file, "%s:\n", stub);
20599      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20600
20601      label++;
20602      local_label_0 = XALLOCAVEC (char, 16);
20603      sprintf (local_label_0, "L%u$spb", label);
20604
20605      fprintf (file, "\tmflr r0\n");
20606      fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20607      fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20608      fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20609	       lazy_ptr_name, local_label_0);
20610      fprintf (file, "\tmtlr r0\n");
20611      fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20612	       (TARGET_64BIT ? "ldu" : "lwzu"),
20613	       lazy_ptr_name, local_label_0);
20614      fprintf (file, "\tmtctr r12\n");
20615      fprintf (file, "\tbctr\n");
20616    }
20617  else /* mdynamic-no-pic or mkernel.  */
20618    {
20619      switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20620      fprintf (file, "\t.align 4\n");
20621
20622      fprintf (file, "%s:\n", stub);
20623      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20624
20625      fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20626      fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20627	       (TARGET_64BIT ? "ldu" : "lwzu"),
20628	       lazy_ptr_name);
20629      fprintf (file, "\tmtctr r12\n");
20630      fprintf (file, "\tbctr\n");
20631    }
20632
20633  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20634  fprintf (file, "%s:\n", lazy_ptr_name);
20635  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20636  fprintf (file, "%sdyld_stub_binding_helper\n",
20637	   (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20638}
20639
20640/* Legitimize PIC addresses.  If the address is already
20641   position-independent, we return ORIG.  Newly generated
20642   position-independent addresses go into a reg.  This is REG if non
20643   zero, otherwise we allocate register(s) as necessary.  */
20644
20645#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20646
20647rtx
20648rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20649					rtx reg)
20650{
20651  rtx base, offset;
20652
20653  if (reg == NULL && !reload_completed)
20654    reg = gen_reg_rtx (Pmode);
20655
20656  if (GET_CODE (orig) == CONST)
20657    {
20658      rtx reg_temp;
20659
20660      if (GET_CODE (XEXP (orig, 0)) == PLUS
20661	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20662	return orig;
20663
20664      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20665
20666      /* Use a different reg for the intermediate value, as
20667	 it will be marked UNCHANGING.  */
20668      reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20669      base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20670						     Pmode, reg_temp);
20671      offset =
20672	rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20673						Pmode, reg);
20674
20675      if (CONST_INT_P (offset))
20676	{
20677	  if (SMALL_INT (offset))
20678	    return plus_constant (Pmode, base, INTVAL (offset));
20679	  else if (!reload_completed)
20680	    offset = force_reg (Pmode, offset);
20681	  else
20682	    {
20683 	      rtx mem = force_const_mem (Pmode, orig);
20684	      return machopic_legitimize_pic_address (mem, Pmode, reg);
20685	    }
20686	}
20687      return gen_rtx_PLUS (Pmode, base, offset);
20688    }
20689
20690  /* Fall back on generic machopic code.  */
20691  return machopic_legitimize_pic_address (orig, mode, reg);
20692}
20693
20694/* Output a .machine directive for the Darwin assembler, and call
20695   the generic start_file routine.  */
20696
20697static void
20698rs6000_darwin_file_start (void)
20699{
20700  static const struct
20701  {
20702    const char *arg;
20703    const char *name;
20704    HOST_WIDE_INT if_set;
20705  } mapping[] = {
20706    { "ppc64", "ppc64", MASK_64BIT },
20707    { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20708    { "power4", "ppc970", 0 },
20709    { "G5", "ppc970", 0 },
20710    { "7450", "ppc7450", 0 },
20711    { "7400", "ppc7400", MASK_ALTIVEC },
20712    { "G4", "ppc7400", 0 },
20713    { "750", "ppc750", 0 },
20714    { "740", "ppc750", 0 },
20715    { "G3", "ppc750", 0 },
20716    { "604e", "ppc604e", 0 },
20717    { "604", "ppc604", 0 },
20718    { "603e", "ppc603", 0 },
20719    { "603", "ppc603", 0 },
20720    { "601", "ppc601", 0 },
20721    { NULL, "ppc", 0 } };
20722  const char *cpu_id = "";
20723  size_t i;
20724
20725  rs6000_file_start ();
20726  darwin_file_start ();
20727
20728  /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
20729
20730  if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20731    cpu_id = rs6000_default_cpu;
20732
20733  if (OPTION_SET_P (rs6000_cpu_index))
20734    cpu_id = processor_target_table[rs6000_cpu_index].name;
20735
20736  /* Look through the mapping array.  Pick the first name that either
20737     matches the argument, has a bit set in IF_SET that is also set
20738     in the target flags, or has a NULL name.  */
20739
20740  i = 0;
20741  while (mapping[i].arg != NULL
20742	 && strcmp (mapping[i].arg, cpu_id) != 0
20743	 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20744    i++;
20745
20746  fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20747}
20748
20749#endif /* TARGET_MACHO */
20750
20751#if TARGET_ELF
20752static int
20753rs6000_elf_reloc_rw_mask (void)
20754{
20755  if (flag_pic)
20756    return 3;
20757  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20758#if defined (POWERPC_NETBSD)
20759    return 3;
20760#else
20761    return 2;
20762#endif
20763  else
20764    return 0;
20765}
20766
20767/* Record an element in the table of global constructors.  SYMBOL is
20768   a SYMBOL_REF of the function to be called; PRIORITY is a number
20769   between 0 and MAX_INIT_PRIORITY.
20770
20771   This differs from default_named_section_asm_out_constructor in
20772   that we have special handling for -mrelocatable.  */
20773
20774static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20775static void
20776rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20777{
20778  const char *section = ".ctors";
20779  char buf[18];
20780
20781  if (priority != DEFAULT_INIT_PRIORITY)
20782    {
20783      sprintf (buf, ".ctors.%.5u",
20784	       /* Invert the numbering so the linker puts us in the proper
20785		  order; constructors are run from right to left, and the
20786		  linker sorts in increasing order.  */
20787	       MAX_INIT_PRIORITY - priority);
20788      section = buf;
20789    }
20790
20791  switch_to_section (get_section (section, SECTION_WRITE, NULL));
20792  assemble_align (POINTER_SIZE);
20793
20794  if (DEFAULT_ABI == ABI_V4
20795      && (TARGET_RELOCATABLE || flag_pic > 1))
20796    {
20797      fputs ("\t.long (", asm_out_file);
20798      output_addr_const (asm_out_file, symbol);
20799      fputs (")@fixup\n", asm_out_file);
20800    }
20801  else
20802    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20803}
20804
20805static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20806static void
20807rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20808{
20809  const char *section = ".dtors";
20810  char buf[18];
20811
20812  if (priority != DEFAULT_INIT_PRIORITY)
20813    {
20814      sprintf (buf, ".dtors.%.5u",
20815	       /* Invert the numbering so the linker puts us in the proper
20816		  order; constructors are run from right to left, and the
20817		  linker sorts in increasing order.  */
20818	       MAX_INIT_PRIORITY - priority);
20819      section = buf;
20820    }
20821
20822  switch_to_section (get_section (section, SECTION_WRITE, NULL));
20823  assemble_align (POINTER_SIZE);
20824
20825  if (DEFAULT_ABI == ABI_V4
20826      && (TARGET_RELOCATABLE || flag_pic > 1))
20827    {
20828      fputs ("\t.long (", asm_out_file);
20829      output_addr_const (asm_out_file, symbol);
20830      fputs (")@fixup\n", asm_out_file);
20831    }
20832  else
20833    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20834}
20835
20836void
20837rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20838{
20839  if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20840    {
20841      fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20842      ASM_OUTPUT_LABEL (file, name);
20843      fputs (DOUBLE_INT_ASM_OP, file);
20844      rs6000_output_function_entry (file, name);
20845      fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20846      if (DOT_SYMBOLS)
20847	{
20848	  fputs ("\t.size\t", file);
20849	  assemble_name (file, name);
20850	  fputs (",24\n\t.type\t.", file);
20851	  assemble_name (file, name);
20852	  fputs (",@function\n", file);
20853	  if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20854	    {
20855	      fputs ("\t.globl\t.", file);
20856	      assemble_name (file, name);
20857	      putc ('\n', file);
20858	    }
20859	}
20860      else
20861	ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20862      ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20863      rs6000_output_function_entry (file, name);
20864      fputs (":\n", file);
20865      return;
20866    }
20867
20868  int uses_toc;
20869  if (DEFAULT_ABI == ABI_V4
20870      && (TARGET_RELOCATABLE || flag_pic > 1)
20871      && !TARGET_SECURE_PLT
20872      && (!constant_pool_empty_p () || crtl->profile)
20873      && (uses_toc = uses_TOC ()))
20874    {
20875      char buf[256];
20876
20877      if (uses_toc == 2)
20878	switch_to_other_text_partition ();
20879      (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20880
20881      fprintf (file, "\t.long ");
20882      assemble_name (file, toc_label_name);
20883      need_toc_init = 1;
20884      putc ('-', file);
20885      ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20886      assemble_name (file, buf);
20887      putc ('\n', file);
20888      if (uses_toc == 2)
20889	switch_to_other_text_partition ();
20890    }
20891
20892  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20893  ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20894
20895  if (TARGET_CMODEL == CMODEL_LARGE
20896      && rs6000_global_entry_point_prologue_needed_p ())
20897    {
20898      char buf[256];
20899
20900      (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20901
20902      fprintf (file, "\t.quad .TOC.-");
20903      ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20904      assemble_name (file, buf);
20905      putc ('\n', file);
20906    }
20907
20908  if (DEFAULT_ABI == ABI_AIX)
20909    {
20910      const char *desc_name, *orig_name;
20911
20912      orig_name = (*targetm.strip_name_encoding) (name);
20913      desc_name = orig_name;
20914      while (*desc_name == '.')
20915	desc_name++;
20916
20917      if (TREE_PUBLIC (decl))
20918	fprintf (file, "\t.globl %s\n", desc_name);
20919
20920      fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20921      fprintf (file, "%s:\n", desc_name);
20922      fprintf (file, "\t.long %s\n", orig_name);
20923      fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20924      fputs ("\t.long 0\n", file);
20925      fprintf (file, "\t.previous\n");
20926    }
20927  ASM_OUTPUT_LABEL (file, name);
20928}
20929
20930static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20931static void
20932rs6000_elf_file_end (void)
20933{
20934#ifdef HAVE_AS_GNU_ATTRIBUTE
20935  /* ??? The value emitted depends on options active at file end.
20936     Assume anyone using #pragma or attributes that might change
20937     options knows what they are doing.  */
20938  if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20939      && rs6000_passes_float)
20940    {
20941      int fp;
20942
20943      if (TARGET_HARD_FLOAT)
20944	fp = 1;
20945      else
20946	fp = 2;
20947      if (rs6000_passes_long_double)
20948	{
20949	  if (!TARGET_LONG_DOUBLE_128)
20950	    fp |= 2 * 4;
20951	  else if (TARGET_IEEEQUAD)
20952	    fp |= 3 * 4;
20953	  else
20954	    fp |= 1 * 4;
20955	}
20956      fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20957    }
20958  if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20959    {
20960      if (rs6000_passes_vector)
20961	fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20962		 (TARGET_ALTIVEC_ABI ? 2 : 1));
20963      if (rs6000_returns_struct)
20964	fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20965		 aix_struct_return ? 2 : 1);
20966    }
20967#endif
20968#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20969  if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20970    file_end_indicate_exec_stack ();
20971#endif
20972
20973  if (flag_split_stack)
20974    file_end_indicate_split_stack ();
20975
20976  if (cpu_builtin_p)
20977    {
20978      /* We have expanded a CPU builtin, so we need to emit a reference to
20979	 the special symbol that LIBC uses to declare it supports the
20980	 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature.  */
20981      switch_to_section (data_section);
20982      fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20983      fprintf (asm_out_file, "\t%s %s\n",
20984	       TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20985    }
20986}
20987#endif
20988
20989#if TARGET_XCOFF
20990
20991#ifndef HAVE_XCOFF_DWARF_EXTRAS
20992#define HAVE_XCOFF_DWARF_EXTRAS 0
20993#endif
20994
20995static enum unwind_info_type
20996rs6000_xcoff_debug_unwind_info (void)
20997{
20998  return UI_NONE;
20999}
21000
21001static void
21002rs6000_xcoff_asm_output_anchor (rtx symbol)
21003{
21004  char buffer[100];
21005
21006  sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21007	   SYMBOL_REF_BLOCK_OFFSET (symbol));
21008  fprintf (asm_out_file, "%s", SET_ASM_OP);
21009  RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21010  fprintf (asm_out_file, ",");
21011  RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21012  fprintf (asm_out_file, "\n");
21013}
21014
21015static void
21016rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21017{
21018  fputs (GLOBAL_ASM_OP, stream);
21019  RS6000_OUTPUT_BASENAME (stream, name);
21020  putc ('\n', stream);
21021}
21022
21023/* A get_unnamed_decl callback, used for read-only sections.  PTR
21024   points to the section string variable.  */
21025
21026static void
21027rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21028{
21029  fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21030	   directive
21031	   ? xcoff_private_rodata_section_name
21032	   : xcoff_read_only_section_name,
21033	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21034}
21035
21036/* Likewise for read-write sections.  */
21037
21038static void
21039rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21040{
21041  fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21042	   xcoff_private_data_section_name,
21043	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21044}
21045
21046static void
21047rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21048{
21049  fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21050	   directive
21051	   ? xcoff_private_data_section_name
21052	   : xcoff_tls_data_section_name,
21053	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21054}
21055
21056/* A get_unnamed_section callback, used for switching to toc_section.  */
21057
21058static void
21059rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21060{
21061  if (TARGET_MINIMAL_TOC)
21062    {
21063      /* toc_section is always selected at least once from
21064	 rs6000_xcoff_file_start, so this is guaranteed to
21065	 always be defined once and only once in each file.  */
21066      if (!toc_initialized)
21067	{
21068	  fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21069	  fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21070	  toc_initialized = 1;
21071	}
21072      fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21073	       (TARGET_32BIT ? "" : ",3"));
21074    }
21075  else
21076    fputs ("\t.toc\n", asm_out_file);
21077}
21078
21079/* Implement TARGET_ASM_INIT_SECTIONS.  */
21080
21081static void
21082rs6000_xcoff_asm_init_sections (void)
21083{
21084  read_only_data_section
21085    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21086			   NULL);
21087
21088  private_data_section
21089    = get_unnamed_section (SECTION_WRITE,
21090			   rs6000_xcoff_output_readwrite_section_asm_op,
21091			   NULL);
21092
21093  read_only_private_data_section
21094    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21095			   "");
21096
21097  tls_data_section
21098    = get_unnamed_section (SECTION_TLS,
21099			   rs6000_xcoff_output_tls_section_asm_op,
21100			   NULL);
21101
21102  tls_private_data_section
21103    = get_unnamed_section (SECTION_TLS,
21104			   rs6000_xcoff_output_tls_section_asm_op,
21105			   "");
21106
21107  toc_section
21108    = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21109
21110  readonly_data_section = read_only_data_section;
21111}
21112
21113static int
21114rs6000_xcoff_reloc_rw_mask (void)
21115{
21116  return 3;
21117}
21118
21119static void
21120rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21121				tree decl ATTRIBUTE_UNUSED)
21122{
21123  int smclass;
21124  static const char * const suffix[7]
21125    = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21126
21127  if (flags & SECTION_EXCLUDE)
21128    smclass = 6;
21129  else if (flags & SECTION_DEBUG)
21130    {
21131      fprintf (asm_out_file, "\t.dwsect %s\n", name);
21132      return;
21133    }
21134  else if (flags & SECTION_CODE)
21135    smclass = 0;
21136  else if (flags & SECTION_TLS)
21137    {
21138      if (flags & SECTION_BSS)
21139	smclass = 5;
21140      else
21141	smclass = 4;
21142    }
21143  else if (flags & SECTION_WRITE)
21144    {
21145      if (flags & SECTION_BSS)
21146	smclass = 3;
21147      else
21148	smclass = 2;
21149    }
21150  else
21151    smclass = 1;
21152
21153  fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21154	   (flags & SECTION_CODE) ? "." : "",
21155	   name, suffix[smclass], flags & SECTION_ENTSIZE);
21156}
21157
21158#define IN_NAMED_SECTION(DECL) \
21159  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21160   && DECL_SECTION_NAME (DECL) != NULL)
21161
21162static section *
21163rs6000_xcoff_select_section (tree decl, int reloc,
21164			     unsigned HOST_WIDE_INT align)
21165{
21166  /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21167     named section.  */
21168  if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21169    {
21170      resolve_unique_section (decl, reloc, true);
21171      if (IN_NAMED_SECTION (decl))
21172	return get_named_section (decl, NULL, reloc);
21173    }
21174
21175  if (decl_readonly_section (decl, reloc))
21176    {
21177      if (TREE_PUBLIC (decl))
21178	return read_only_data_section;
21179      else
21180	return read_only_private_data_section;
21181    }
21182  else
21183    {
21184#if HAVE_AS_TLS
21185      if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21186	{
21187	  if (bss_initializer_p (decl))
21188	    return tls_comm_section;
21189	  else if (TREE_PUBLIC (decl))
21190	    return tls_data_section;
21191	  else
21192	    return tls_private_data_section;
21193	}
21194      else
21195#endif
21196	if (TREE_PUBLIC (decl))
21197	return data_section;
21198      else
21199	return private_data_section;
21200    }
21201}
21202
21203static void
21204rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21205{
21206  const char *name;
21207
21208  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21209  name = (*targetm.strip_name_encoding) (name);
21210  set_decl_section_name (decl, name);
21211}
21212
21213/* Select section for constant in constant pool.
21214
21215   On RS/6000, all constants are in the private read-only data area.
21216   However, if this is being placed in the TOC it must be output as a
21217   toc entry.  */
21218
21219static section *
21220rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21221				 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21222{
21223  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21224    return toc_section;
21225  else
21226    return read_only_private_data_section;
21227}
21228
21229/* Remove any trailing [DS] or the like from the symbol name.  */
21230
21231static const char *
21232rs6000_xcoff_strip_name_encoding (const char *name)
21233{
21234  size_t len;
21235  if (*name == '*')
21236    name++;
21237  len = strlen (name);
21238  if (name[len - 1] == ']')
21239    return ggc_alloc_string (name, len - 4);
21240  else
21241    return name;
21242}
21243
21244/* Section attributes.  AIX is always PIC.  */
21245
21246static unsigned int
21247rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21248{
21249  unsigned int align;
21250  unsigned int flags = default_section_type_flags (decl, name, reloc);
21251
21252  if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21253    flags |= SECTION_BSS;
21254
21255  /* Align to at least UNIT size.  */
21256  if (!decl || !DECL_P (decl))
21257    align = MIN_UNITS_PER_WORD;
21258  /* Align code CSECT to at least 32 bytes.  */
21259  else if ((flags & SECTION_CODE) != 0)
21260    align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21261  else
21262    /* Increase alignment of large objects if not already stricter.  */
21263    align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21264		 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21265		 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21266
21267  return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21268}
21269
21270/* Output at beginning of assembler file.
21271
21272   Initialize the section names for the RS/6000 at this point.
21273
21274   Specify filename, including full path, to assembler.
21275
21276   We want to go into the TOC section so at least one .toc will be emitted.
21277   Also, in order to output proper .bs/.es pairs, we need at least one static
21278   [RW] section emitted.
21279
21280   Finally, declare mcount when profiling to make the assembler happy.  */
21281
21282static void
21283rs6000_xcoff_file_start (void)
21284{
21285  rs6000_gen_section_name (&xcoff_bss_section_name,
21286			   main_input_filename, ".bss_");
21287  rs6000_gen_section_name (&xcoff_private_data_section_name,
21288			   main_input_filename, ".rw_");
21289  rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21290			   main_input_filename, ".rop_");
21291  rs6000_gen_section_name (&xcoff_read_only_section_name,
21292			   main_input_filename, ".ro_");
21293  rs6000_gen_section_name (&xcoff_tls_data_section_name,
21294			   main_input_filename, ".tls_");
21295
21296  fputs ("\t.file\t", asm_out_file);
21297  output_quoted_string (asm_out_file, main_input_filename);
21298  fputc ('\n', asm_out_file);
21299  if (write_symbols != NO_DEBUG)
21300    switch_to_section (private_data_section);
21301  switch_to_section (toc_section);
21302  switch_to_section (text_section);
21303  if (profile_flag)
21304    fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21305  rs6000_file_start ();
21306}
21307
21308/* Output at end of assembler file.
21309   On the RS/6000, referencing data should automatically pull in text.  */
21310
21311static void
21312rs6000_xcoff_file_end (void)
21313{
21314  switch_to_section (text_section);
21315  if (xcoff_tls_exec_model_detected)
21316    {
21317      /* Add a .ref to __tls_get_addr to force libpthread dependency.  */
21318      fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21319    }
21320  fputs ("_section_.text:\n", asm_out_file);
21321  switch_to_section (data_section);
21322  fputs (TARGET_32BIT
21323	 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21324	 asm_out_file);
21325
21326}
21327
21328struct declare_alias_data
21329{
21330  FILE *file;
21331  bool function_descriptor;
21332};
21333
21334/* Declare alias N.  A helper function for for_node_and_aliases.  */
21335
21336static bool
21337rs6000_declare_alias (struct symtab_node *n, void *d)
21338{
21339  struct declare_alias_data *data = (struct declare_alias_data *)d;
21340  /* Main symbol is output specially, because varasm machinery does part of
21341     the job for us - we do not need to declare .globl/lglobs and such.  */
21342  if (!n->alias || n->weakref)
21343    return false;
21344
21345  if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21346    return false;
21347
21348  /* Prevent assemble_alias from trying to use .set pseudo operation
21349     that does not behave as expected by the middle-end.  */
21350  TREE_ASM_WRITTEN (n->decl) = true;
21351
21352  const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21353  char *buffer = (char *) alloca (strlen (name) + 2);
21354  char *p;
21355  int dollar_inside = 0;
21356
21357  strcpy (buffer, name);
21358  p = strchr (buffer, '$');
21359  while (p) {
21360    *p = '_';
21361    dollar_inside++;
21362    p = strchr (p + 1, '$');
21363  }
21364  if (TREE_PUBLIC (n->decl))
21365    {
21366      if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21367	{
21368          if (dollar_inside) {
21369	      if (data->function_descriptor)
21370                fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21371	      fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21372	    }
21373	  if (data->function_descriptor)
21374	    {
21375	      fputs ("\t.globl .", data->file);
21376	      RS6000_OUTPUT_BASENAME (data->file, buffer);
21377	      putc ('\n', data->file);
21378	    }
21379	  fputs ("\t.globl ", data->file);
21380	  assemble_name (data->file, buffer);
21381	  putc ('\n', data->file);
21382	}
21383#ifdef ASM_WEAKEN_DECL
21384      else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21385	ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21386#endif
21387    }
21388  else
21389    {
21390      if (dollar_inside)
21391	{
21392	  if (data->function_descriptor)
21393            fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21394	  fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21395	}
21396      if (data->function_descriptor)
21397	{
21398	  fputs ("\t.lglobl .", data->file);
21399	  RS6000_OUTPUT_BASENAME (data->file, buffer);
21400	  putc ('\n', data->file);
21401	}
21402      fputs ("\t.lglobl ", data->file);
21403      assemble_name (data->file, buffer);
21404      putc ('\n', data->file);
21405    }
21406  if (data->function_descriptor)
21407    putc ('.', data->file);
21408  ASM_OUTPUT_LABEL (data->file, buffer);
21409  return false;
21410}
21411
21412
21413#ifdef HAVE_GAS_HIDDEN
21414/* Helper function to calculate visibility of a DECL
21415   and return the value as a const string.  */
21416
21417static const char *
21418rs6000_xcoff_visibility (tree decl)
21419{
21420  static const char * const visibility_types[] = {
21421    "", ",protected", ",hidden", ",internal"
21422  };
21423
21424  enum symbol_visibility vis = DECL_VISIBILITY (decl);
21425  return visibility_types[vis];
21426}
21427#endif
21428
21429
21430/* This macro produces the initial definition of a function name.
21431   On the RS/6000, we need to place an extra '.' in the function name and
21432   output the function descriptor.
21433   Dollar signs are converted to underscores.
21434
21435   The csect for the function will have already been created when
21436   text_section was selected.  We do have to go back to that csect, however.
21437
21438   The third and fourth parameters to the .function pseudo-op (16 and 044)
21439   are placeholders which no longer have any use.
21440
21441   Because AIX assembler's .set command has unexpected semantics, we output
21442   all aliases as alternative labels in front of the definition.  */
21443
21444void
21445rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21446{
21447  char *buffer = (char *) alloca (strlen (name) + 1);
21448  char *p;
21449  int dollar_inside = 0;
21450  struct declare_alias_data data = {file, false};
21451
21452  strcpy (buffer, name);
21453  p = strchr (buffer, '$');
21454  while (p) {
21455    *p = '_';
21456    dollar_inside++;
21457    p = strchr (p + 1, '$');
21458  }
21459  if (TREE_PUBLIC (decl))
21460    {
21461      if (!RS6000_WEAK || !DECL_WEAK (decl))
21462	{
21463          if (dollar_inside) {
21464              fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21465              fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21466	    }
21467	  fputs ("\t.globl .", file);
21468	  RS6000_OUTPUT_BASENAME (file, buffer);
21469#ifdef HAVE_GAS_HIDDEN
21470	  fputs (rs6000_xcoff_visibility (decl), file);
21471#endif
21472	  putc ('\n', file);
21473	}
21474    }
21475  else
21476    {
21477      if (dollar_inside) {
21478          fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21479          fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21480	}
21481      fputs ("\t.lglobl .", file);
21482      RS6000_OUTPUT_BASENAME (file, buffer);
21483      putc ('\n', file);
21484    }
21485
21486  fputs ("\t.csect ", file);
21487  assemble_name (file, buffer);
21488  fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21489
21490  ASM_OUTPUT_LABEL (file, buffer);
21491
21492  symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21493							&data, true);
21494  fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21495  RS6000_OUTPUT_BASENAME (file, buffer);
21496  fputs (", TOC[tc0], 0\n", file);
21497
21498  in_section = NULL;
21499  switch_to_section (function_section (decl));
21500  putc ('.', file);
21501  ASM_OUTPUT_LABEL (file, buffer);
21502
21503  data.function_descriptor = true;
21504  symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21505							&data, true);
21506  if (!DECL_IGNORED_P (decl))
21507    {
21508      if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21509	xcoffout_declare_function (file, decl, buffer);
21510      else if (dwarf_debuginfo_p ())
21511	{
21512	  name = (*targetm.strip_name_encoding) (name);
21513	  fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21514	}
21515    }
21516  return;
21517}
21518
21519
21520/* Output assembly language to globalize a symbol from a DECL,
21521   possibly with visibility.  */
21522
21523void
21524rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21525{
21526  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21527  fputs (GLOBAL_ASM_OP, stream);
21528  assemble_name (stream, name);
21529#ifdef HAVE_GAS_HIDDEN
21530  fputs (rs6000_xcoff_visibility (decl), stream);
21531#endif
21532  putc ('\n', stream);
21533}
21534
21535/* Output assembly language to define a symbol as COMMON from a DECL,
21536   possibly with visibility.  */
21537
21538void
21539rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21540					     tree decl ATTRIBUTE_UNUSED,
21541					     const char *name,
21542					     unsigned HOST_WIDE_INT size,
21543					     unsigned int align)
21544{
21545  unsigned int align2 = 2;
21546
21547  if (align == 0)
21548    align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21549
21550  if (align > 32)
21551    align2 = floor_log2 (align / BITS_PER_UNIT);
21552  else if (size > 4)
21553    align2 = 3;
21554
21555  if (! DECL_COMMON (decl))
21556    {
21557      /* Forget section.  */
21558      in_section = NULL;
21559
21560      /* Globalize TLS BSS.  */
21561      if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21562	{
21563	  fputs (GLOBAL_ASM_OP, stream);
21564	  assemble_name (stream, name);
21565	  fputc ('\n', stream);
21566	}
21567
21568      /* Switch to section and skip space.  */
21569      fputs ("\t.csect ", stream);
21570      assemble_name (stream, name);
21571      fprintf (stream, ",%u\n", align2);
21572      ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21573      ASM_OUTPUT_SKIP (stream, size ? size : 1);
21574      return;
21575    }
21576
21577  if (TREE_PUBLIC (decl))
21578    {
21579      fprintf (stream,
21580	       "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21581	       name, size, align2);
21582
21583#ifdef HAVE_GAS_HIDDEN
21584      if (decl != NULL)
21585	fputs (rs6000_xcoff_visibility (decl), stream);
21586#endif
21587      putc ('\n', stream);
21588    }
21589  else
21590      fprintf (stream,
21591	       "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21592	       (*targetm.strip_name_encoding) (name), size, name, align2);
21593}
21594
21595/* This macro produces the initial definition of a object (variable) name.
21596   Because AIX assembler's .set command has unexpected semantics, we output
21597   all aliases as alternative labels in front of the definition.  */
21598
21599void
21600rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21601{
21602  struct declare_alias_data data = {file, false};
21603  ASM_OUTPUT_LABEL (file, name);
21604  symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21605							       &data, true);
21606}
21607
21608/* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21609
21610void
21611rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21612{
21613  fputs (integer_asm_op (size, FALSE), file);
21614  assemble_name (file, label);
21615  fputs ("-$", file);
21616}
21617
21618/* Output a symbol offset relative to the dbase for the current object.
21619   We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21620   signed offsets.
21621
21622   __gcc_unwind_dbase is embedded in all executables/libraries through
21623   libgcc/config/rs6000/crtdbase.S.  */
21624
21625void
21626rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21627{
21628  fputs (integer_asm_op (size, FALSE), file);
21629  assemble_name (file, label);
21630  fputs("-__gcc_unwind_dbase", file);
21631}
21632
21633#ifdef HAVE_AS_TLS
21634static void
21635rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21636{
21637  rtx symbol;
21638  int flags;
21639  const char *symname;
21640
21641  default_encode_section_info (decl, rtl, first);
21642
21643  /* Careful not to prod global register variables.  */
21644  if (!MEM_P (rtl))
21645    return;
21646  symbol = XEXP (rtl, 0);
21647  if (!SYMBOL_REF_P (symbol))
21648    return;
21649
21650  flags = SYMBOL_REF_FLAGS (symbol);
21651
21652  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21653    flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21654
21655  SYMBOL_REF_FLAGS (symbol) = flags;
21656
21657  symname = XSTR (symbol, 0);
21658
21659  /* Append CSECT mapping class, unless the symbol already is qualified.
21660     Aliases are implemented as labels, so the symbol name should not add
21661     a mapping class.  */
21662  if (decl
21663      && DECL_P (decl)
21664      && VAR_OR_FUNCTION_DECL_P (decl)
21665      && (symtab_node::get (decl) == NULL
21666	  || symtab_node::get (decl)->alias == 0)
21667      && symname[strlen (symname) - 1] != ']')
21668    {
21669      const char *smclass = NULL;
21670
21671      if (TREE_CODE (decl) == FUNCTION_DECL)
21672	smclass = "[DS]";
21673      else if (DECL_THREAD_LOCAL_P (decl))
21674	{
21675	  if (bss_initializer_p (decl))
21676	    smclass = "[UL]";
21677	  else if (flag_data_sections)
21678	    smclass = "[TL]";
21679	}
21680      else if (DECL_EXTERNAL (decl))
21681	smclass = "[UA]";
21682      else if (bss_initializer_p (decl))
21683	smclass = "[BS]";
21684      else if (flag_data_sections)
21685	{
21686	  /* This must exactly match the logic of select section.  */
21687	  if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21688	    smclass = "[RO]";
21689	  else
21690	    smclass = "[RW]";
21691	}
21692
21693      if (smclass != NULL)
21694	{
21695	  char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21696
21697	  strcpy (newname, symname);
21698	  strcat (newname, smclass);
21699	  XSTR (symbol, 0) = ggc_strdup (newname);
21700	}
21701    }
21702}
21703#endif /* HAVE_AS_TLS */
21704#endif /* TARGET_XCOFF */
21705
21706void
21707rs6000_asm_weaken_decl (FILE *stream, tree decl,
21708			const char *name, const char *val)
21709{
21710  fputs ("\t.weak\t", stream);
21711  assemble_name (stream, name);
21712  if (decl && TREE_CODE (decl) == FUNCTION_DECL
21713      && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21714    {
21715#if TARGET_XCOFF && HAVE_GAS_HIDDEN
21716      if (TARGET_XCOFF)
21717	fputs (rs6000_xcoff_visibility (decl), stream);
21718#endif
21719      fputs ("\n\t.weak\t.", stream);
21720      RS6000_OUTPUT_BASENAME (stream, name);
21721    }
21722#if TARGET_XCOFF && HAVE_GAS_HIDDEN
21723  if (TARGET_XCOFF)
21724    fputs (rs6000_xcoff_visibility (decl), stream);
21725#endif
21726  fputc ('\n', stream);
21727
21728  if (val)
21729    {
21730#ifdef ASM_OUTPUT_DEF
21731      ASM_OUTPUT_DEF (stream, name, val);
21732#endif
21733      if (decl && TREE_CODE (decl) == FUNCTION_DECL
21734	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21735	{
21736	  fputs ("\t.set\t.", stream);
21737	  RS6000_OUTPUT_BASENAME (stream, name);
21738	  fputs (",.", stream);
21739	  RS6000_OUTPUT_BASENAME (stream, val);
21740	  fputc ('\n', stream);
21741	}
21742    }
21743}
21744
21745
21746/* Return true if INSN should not be copied.  */
21747
21748static bool
21749rs6000_cannot_copy_insn_p (rtx_insn *insn)
21750{
21751  return recog_memoized (insn) >= 0
21752	 && get_attr_cannot_copy (insn);
21753}
21754
21755/* Compute a (partial) cost for rtx X.  Return true if the complete
21756   cost has been computed, and false if subexpressions should be
21757   scanned.  In either case, *TOTAL contains the cost result.  */
21758
21759static bool
21760rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21761		  int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21762{
21763  int code = GET_CODE (x);
21764
21765  switch (code)
21766    {
21767      /* On the RS/6000, if it is valid in the insn, it is free.  */
21768    case CONST_INT:
21769      if (((outer_code == SET
21770	    || outer_code == PLUS
21771	    || outer_code == MINUS)
21772	   && (satisfies_constraint_I (x)
21773	       || satisfies_constraint_L (x)))
21774	  || (outer_code == AND
21775	      && (satisfies_constraint_K (x)
21776		  || (mode == SImode
21777		      ? satisfies_constraint_L (x)
21778		      : satisfies_constraint_J (x))))
21779	  || ((outer_code == IOR || outer_code == XOR)
21780	      && (satisfies_constraint_K (x)
21781		  || (mode == SImode
21782		      ? satisfies_constraint_L (x)
21783		      : satisfies_constraint_J (x))))
21784	  || outer_code == ASHIFT
21785	  || outer_code == ASHIFTRT
21786	  || outer_code == LSHIFTRT
21787	  || outer_code == ROTATE
21788	  || outer_code == ROTATERT
21789	  || outer_code == ZERO_EXTRACT
21790	  || (outer_code == MULT
21791	      && satisfies_constraint_I (x))
21792	  || ((outer_code == DIV || outer_code == UDIV
21793	       || outer_code == MOD || outer_code == UMOD)
21794	      && exact_log2 (INTVAL (x)) >= 0)
21795	  || (outer_code == COMPARE
21796	      && (satisfies_constraint_I (x)
21797		  || satisfies_constraint_K (x)))
21798	  || ((outer_code == EQ || outer_code == NE)
21799	      && (satisfies_constraint_I (x)
21800		  || satisfies_constraint_K (x)
21801		  || (mode == SImode
21802		      ? satisfies_constraint_L (x)
21803		      : satisfies_constraint_J (x))))
21804	  || (outer_code == GTU
21805	      && satisfies_constraint_I (x))
21806	  || (outer_code == LTU
21807	      && satisfies_constraint_P (x)))
21808	{
21809	  *total = 0;
21810	  return true;
21811	}
21812      else if ((outer_code == PLUS
21813		&& reg_or_add_cint_operand (x, mode))
21814	       || (outer_code == MINUS
21815		   && reg_or_sub_cint_operand (x, mode))
21816	       || ((outer_code == SET
21817		    || outer_code == IOR
21818		    || outer_code == XOR)
21819		   && (INTVAL (x)
21820		       & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21821	{
21822	  *total = COSTS_N_INSNS (1);
21823	  return true;
21824	}
21825      /* FALLTHRU */
21826
21827    case CONST_DOUBLE:
21828    case CONST_WIDE_INT:
21829    case CONST:
21830    case HIGH:
21831    case SYMBOL_REF:
21832      *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21833      return true;
21834
21835    case MEM:
21836      /* When optimizing for size, MEM should be slightly more expensive
21837	 than generating address, e.g., (plus (reg) (const)).
21838	 L1 cache latency is about two instructions.  */
21839      *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21840      if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21841	*total += COSTS_N_INSNS (100);
21842      return true;
21843
21844    case LABEL_REF:
21845      *total = 0;
21846      return true;
21847
21848    case PLUS:
21849    case MINUS:
21850      if (FLOAT_MODE_P (mode))
21851	*total = rs6000_cost->fp;
21852      else
21853	*total = COSTS_N_INSNS (1);
21854      return false;
21855
21856    case MULT:
21857      if (CONST_INT_P (XEXP (x, 1))
21858	  && satisfies_constraint_I (XEXP (x, 1)))
21859	{
21860	  if (INTVAL (XEXP (x, 1)) >= -256
21861	      && INTVAL (XEXP (x, 1)) <= 255)
21862	    *total = rs6000_cost->mulsi_const9;
21863	  else
21864	    *total = rs6000_cost->mulsi_const;
21865	}
21866      else if (mode == SFmode)
21867	*total = rs6000_cost->fp;
21868      else if (FLOAT_MODE_P (mode))
21869	*total = rs6000_cost->dmul;
21870      else if (mode == DImode)
21871	*total = rs6000_cost->muldi;
21872      else
21873	*total = rs6000_cost->mulsi;
21874      return false;
21875
21876    case FMA:
21877      if (mode == SFmode)
21878	*total = rs6000_cost->fp;
21879      else
21880	*total = rs6000_cost->dmul;
21881      break;
21882
21883    case DIV:
21884    case MOD:
21885      if (FLOAT_MODE_P (mode))
21886	{
21887	  *total = mode == DFmode ? rs6000_cost->ddiv
21888				  : rs6000_cost->sdiv;
21889	  return false;
21890	}
21891      /* FALLTHRU */
21892
21893    case UDIV:
21894    case UMOD:
21895      if (CONST_INT_P (XEXP (x, 1))
21896	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21897	{
21898	  if (code == DIV || code == MOD)
21899	    /* Shift, addze */
21900	    *total = COSTS_N_INSNS (2);
21901	  else
21902	    /* Shift */
21903	    *total = COSTS_N_INSNS (1);
21904	}
21905      else
21906	{
21907	  if (GET_MODE (XEXP (x, 1)) == DImode)
21908	    *total = rs6000_cost->divdi;
21909	  else
21910	    *total = rs6000_cost->divsi;
21911	}
21912      /* Add in shift and subtract for MOD unless we have a mod instruction. */
21913      if (!TARGET_MODULO && (code == MOD || code == UMOD))
21914	*total += COSTS_N_INSNS (2);
21915      return false;
21916
21917    case CTZ:
21918      *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21919      return false;
21920
21921    case FFS:
21922      *total = COSTS_N_INSNS (4);
21923      return false;
21924
21925    case POPCOUNT:
21926      *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21927      return false;
21928
21929    case PARITY:
21930      *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21931      return false;
21932
21933    case NOT:
21934      if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21935	*total = 0;
21936      else
21937	*total = COSTS_N_INSNS (1);
21938      return false;
21939
21940    case AND:
21941      if (CONST_INT_P (XEXP (x, 1)))
21942	{
21943	  rtx left = XEXP (x, 0);
21944	  rtx_code left_code = GET_CODE (left);
21945
21946	  /* rotate-and-mask: 1 insn.  */
21947	  if ((left_code == ROTATE
21948	       || left_code == ASHIFT
21949	       || left_code == LSHIFTRT)
21950	      && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21951	    {
21952	      *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21953	      if (!CONST_INT_P (XEXP (left, 1)))
21954		*total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21955	      *total += COSTS_N_INSNS (1);
21956	      return true;
21957	    }
21958
21959	  /* rotate-and-mask (no rotate), andi., andis.: 1 insn.  */
21960	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21961	  if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21962	      || (val & 0xffff) == val
21963	      || (val & 0xffff0000) == val
21964	      || ((val & 0xffff) == 0 && mode == SImode))
21965	    {
21966	      *total = rtx_cost (left, mode, AND, 0, speed);
21967	      *total += COSTS_N_INSNS (1);
21968	      return true;
21969	    }
21970
21971	  /* 2 insns.  */
21972	  if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21973	    {
21974	      *total = rtx_cost (left, mode, AND, 0, speed);
21975	      *total += COSTS_N_INSNS (2);
21976	      return true;
21977	    }
21978	}
21979
21980      *total = COSTS_N_INSNS (1);
21981      return false;
21982
21983    case IOR:
21984      /* FIXME */
21985      *total = COSTS_N_INSNS (1);
21986      return true;
21987
21988    case CLZ:
21989    case XOR:
21990    case ZERO_EXTRACT:
21991      *total = COSTS_N_INSNS (1);
21992      return false;
21993
21994    case ASHIFT:
21995      /* The EXTSWSLI instruction is a combined instruction.  Don't count both
21996	 the sign extend and shift separately within the insn.  */
21997      if (TARGET_EXTSWSLI && mode == DImode
21998	  && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21999	  && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22000	{
22001	  *total = 0;
22002	  return false;
22003	}
22004      /* fall through */
22005
22006    case ASHIFTRT:
22007    case LSHIFTRT:
22008    case ROTATE:
22009    case ROTATERT:
22010      /* Handle mul_highpart.  */
22011      if (outer_code == TRUNCATE
22012	  && GET_CODE (XEXP (x, 0)) == MULT)
22013	{
22014	  if (mode == DImode)
22015	    *total = rs6000_cost->muldi;
22016	  else
22017	    *total = rs6000_cost->mulsi;
22018	  return true;
22019	}
22020      else if (outer_code == AND)
22021	*total = 0;
22022      else
22023	*total = COSTS_N_INSNS (1);
22024      return false;
22025
22026    case SIGN_EXTEND:
22027    case ZERO_EXTEND:
22028      if (MEM_P (XEXP (x, 0)))
22029	*total = 0;
22030      else
22031	*total = COSTS_N_INSNS (1);
22032      return false;
22033
22034    case COMPARE:
22035    case NEG:
22036    case ABS:
22037      if (!FLOAT_MODE_P (mode))
22038	{
22039	  *total = COSTS_N_INSNS (1);
22040	  return false;
22041	}
22042      /* FALLTHRU */
22043
22044    case FLOAT:
22045    case UNSIGNED_FLOAT:
22046    case FIX:
22047    case UNSIGNED_FIX:
22048    case FLOAT_TRUNCATE:
22049      *total = rs6000_cost->fp;
22050      return false;
22051
22052    case FLOAT_EXTEND:
22053      if (mode == DFmode)
22054	*total = rs6000_cost->sfdf_convert;
22055      else
22056	*total = rs6000_cost->fp;
22057      return false;
22058
22059    case CALL:
22060    case IF_THEN_ELSE:
22061      if (!speed)
22062	{
22063	  *total = COSTS_N_INSNS (1);
22064	  return true;
22065	}
22066      else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22067	{
22068	  *total = rs6000_cost->fp;
22069	  return false;
22070	}
22071      break;
22072
22073    case NE:
22074    case EQ:
22075    case GTU:
22076    case LTU:
22077      /* Carry bit requires mode == Pmode.
22078	 NEG or PLUS already counted so only add one.  */
22079      if (mode == Pmode
22080	  && (outer_code == NEG || outer_code == PLUS))
22081	{
22082	  *total = COSTS_N_INSNS (1);
22083	  return true;
22084	}
22085      /* FALLTHRU */
22086
22087    case GT:
22088    case LT:
22089    case UNORDERED:
22090      if (outer_code == SET)
22091	{
22092	  if (XEXP (x, 1) == const0_rtx)
22093	    {
22094	      *total = COSTS_N_INSNS (2);
22095	      return true;
22096	    }
22097	  else
22098	    {
22099	      *total = COSTS_N_INSNS (3);
22100	      return false;
22101	    }
22102	}
22103      /* CC COMPARE.  */
22104      if (outer_code == COMPARE)
22105	{
22106	  *total = 0;
22107	  return true;
22108	}
22109      break;
22110
22111    case UNSPEC:
22112      if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22113	{
22114	  *total = 0;
22115	  return true;
22116	}
22117      break;
22118
22119    default:
22120      break;
22121    }
22122
22123  return false;
22124}
22125
22126/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost.  */
22127
22128static bool
22129rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22130			int opno, int *total, bool speed)
22131{
22132  bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22133
22134  fprintf (stderr,
22135	   "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22136	   "opno = %d, total = %d, speed = %s, x:\n",
22137	   ret ? "complete" : "scan inner",
22138	   GET_MODE_NAME (mode),
22139	   GET_RTX_NAME (outer_code),
22140	   opno,
22141	   *total,
22142	   speed ? "true" : "false");
22143
22144  debug_rtx (x);
22145
22146  return ret;
22147}
22148
22149static int
22150rs6000_insn_cost (rtx_insn *insn, bool speed)
22151{
22152  if (recog_memoized (insn) < 0)
22153    return 0;
22154
22155  /* If we are optimizing for size, just use the length.  */
22156  if (!speed)
22157    return get_attr_length (insn);
22158
22159  /* Use the cost if provided.  */
22160  int cost = get_attr_cost (insn);
22161  if (cost > 0)
22162    return cost;
22163
22164  /* If the insn tells us how many insns there are, use that.  Otherwise use
22165     the length/4.  Adjust the insn length to remove the extra size that
22166     prefixed instructions take.  */
22167  int n = get_attr_num_insns (insn);
22168  if (n == 0)
22169    {
22170      int length = get_attr_length (insn);
22171      if (get_attr_prefixed (insn) == PREFIXED_YES)
22172	{
22173	  int adjust = 0;
22174	  ADJUST_INSN_LENGTH (insn, adjust);
22175	  length -= adjust;
22176	}
22177
22178      n = length / 4;
22179    }
22180
22181  enum attr_type type = get_attr_type (insn);
22182
22183  switch (type)
22184    {
22185    case TYPE_LOAD:
22186    case TYPE_FPLOAD:
22187    case TYPE_VECLOAD:
22188      cost = COSTS_N_INSNS (n + 1);
22189      break;
22190
22191    case TYPE_MUL:
22192      switch (get_attr_size (insn))
22193	{
22194	case SIZE_8:
22195	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22196	  break;
22197	case SIZE_16:
22198	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22199	  break;
22200	case SIZE_32:
22201	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22202	  break;
22203	case SIZE_64:
22204	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22205	  break;
22206	default:
22207	  gcc_unreachable ();
22208	}
22209      break;
22210    case TYPE_DIV:
22211      switch (get_attr_size (insn))
22212	{
22213	case SIZE_32:
22214	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22215	  break;
22216	case SIZE_64:
22217	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22218	  break;
22219	default:
22220	  gcc_unreachable ();
22221	}
22222      break;
22223
22224    case TYPE_FP:
22225      cost = n * rs6000_cost->fp;
22226      break;
22227    case TYPE_DMUL:
22228      cost = n * rs6000_cost->dmul;
22229      break;
22230    case TYPE_SDIV:
22231      cost = n * rs6000_cost->sdiv;
22232      break;
22233    case TYPE_DDIV:
22234      cost = n * rs6000_cost->ddiv;
22235      break;
22236
22237    case TYPE_SYNC:
22238    case TYPE_LOAD_L:
22239    case TYPE_MFCR:
22240    case TYPE_MFCRF:
22241      cost = COSTS_N_INSNS (n + 2);
22242      break;
22243
22244    default:
22245      cost = COSTS_N_INSNS (n);
22246    }
22247
22248  return cost;
22249}
22250
22251/* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
22252
22253static int
22254rs6000_debug_address_cost (rtx x, machine_mode mode,
22255			   addr_space_t as, bool speed)
22256{
22257  int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22258
22259  fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22260	   ret, speed ? "true" : "false");
22261  debug_rtx (x);
22262
22263  return ret;
22264}
22265
22266
22267/* A C expression returning the cost of moving data from a register of class
22268   CLASS1 to one of CLASS2.  */
22269
22270static int
22271rs6000_register_move_cost (machine_mode mode,
22272			   reg_class_t from, reg_class_t to)
22273{
22274  int ret;
22275  reg_class_t rclass;
22276
22277  if (TARGET_DEBUG_COST)
22278    dbg_cost_ctrl++;
22279
22280  /* If we have VSX, we can easily move between FPR or Altivec registers,
22281     otherwise we can only easily move within classes.
22282     Do this first so we give best-case answers for union classes
22283     containing both gprs and vsx regs.  */
22284  HARD_REG_SET to_vsx, from_vsx;
22285  to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22286  from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22287  if (!hard_reg_set_empty_p (to_vsx)
22288      && !hard_reg_set_empty_p (from_vsx)
22289      && (TARGET_VSX
22290	  || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22291    {
22292      int reg = FIRST_FPR_REGNO;
22293      if (TARGET_VSX
22294	  || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22295	      && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22296	reg = FIRST_ALTIVEC_REGNO;
22297      ret = 2 * hard_regno_nregs (reg, mode);
22298    }
22299
22300  /*  Moves from/to GENERAL_REGS.  */
22301  else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22302	   || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22303    {
22304      if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22305	{
22306	  if (TARGET_DIRECT_MOVE)
22307	    {
22308	      /* Keep the cost for direct moves above that for within
22309		 a register class even if the actual processor cost is
22310		 comparable.  We do this because a direct move insn
22311		 can't be a nop, whereas with ideal register
22312		 allocation a move within the same class might turn
22313		 out to be a nop.  */
22314	      if (rs6000_tune == PROCESSOR_POWER9
22315		  || rs6000_tune == PROCESSOR_POWER10)
22316		ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22317	      else
22318		ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22319	      /* SFmode requires a conversion when moving between gprs
22320		 and vsx.  */
22321	      if (mode == SFmode)
22322		ret += 2;
22323	    }
22324	  else
22325	    ret = (rs6000_memory_move_cost (mode, rclass, false)
22326		   + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22327	}
22328
22329      /* It's more expensive to move CR_REGS than CR0_REGS because of the
22330	 shift.  */
22331      else if (rclass == CR_REGS)
22332	ret = 4;
22333
22334      /* For those processors that have slow LR/CTR moves, make them more
22335         expensive than memory in order to bias spills to memory .*/
22336      else if ((rs6000_tune == PROCESSOR_POWER6
22337		|| rs6000_tune == PROCESSOR_POWER7
22338		|| rs6000_tune == PROCESSOR_POWER8
22339		|| rs6000_tune == PROCESSOR_POWER9)
22340	       && reg_class_subset_p (rclass, SPECIAL_REGS))
22341        ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22342
22343      else
22344	/* A move will cost one instruction per GPR moved.  */
22345	ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22346    }
22347
22348  /* Everything else has to go through GENERAL_REGS.  */
22349  else
22350    ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22351	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22352
22353  if (TARGET_DEBUG_COST)
22354    {
22355      if (dbg_cost_ctrl == 1)
22356	fprintf (stderr,
22357		 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22358		 ret, GET_MODE_NAME (mode), reg_class_names[from],
22359		 reg_class_names[to]);
22360      dbg_cost_ctrl--;
22361    }
22362
22363  return ret;
22364}
22365
22366/* A C expressions returning the cost of moving data of MODE from a register to
22367   or from memory.  */
22368
22369static int
22370rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22371			 bool in ATTRIBUTE_UNUSED)
22372{
22373  int ret;
22374
22375  if (TARGET_DEBUG_COST)
22376    dbg_cost_ctrl++;
22377
22378  if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22379    ret = 4 * hard_regno_nregs (0, mode);
22380  else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22381	    || reg_classes_intersect_p (rclass, VSX_REGS)))
22382    ret = 4 * hard_regno_nregs (32, mode);
22383  else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22384    ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22385  else
22386    ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22387
22388  if (TARGET_DEBUG_COST)
22389    {
22390      if (dbg_cost_ctrl == 1)
22391	fprintf (stderr,
22392		 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22393		 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22394      dbg_cost_ctrl--;
22395    }
22396
22397  return ret;
22398}
22399
22400/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22401
22402   The register allocator chooses GEN_OR_VSX_REGS for the allocno
22403   class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22404   cost.  This happens a lot when TARGET_DIRECT_MOVE makes the register
22405   move cost between GENERAL_REGS and VSX_REGS low.
22406
22407   It might seem reasonable to use a union class.  After all, if usage
22408   of vsr is low and gpr high, it might make sense to spill gpr to vsr
22409   rather than memory.  However, in cases where register pressure of
22410   both is high, like the cactus_adm spec test, allowing
22411   GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22412   the first scheduling pass.  This is partly due to an allocno of
22413   GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22414   class, which gives too high a pressure for GENERAL_REGS and too low
22415   for VSX_REGS.  So, force a choice of the subclass here.
22416
22417   The best class is also the union if GENERAL_REGS and VSX_REGS have
22418   the same cost.  In that case we do use GEN_OR_VSX_REGS as the
22419   allocno class, since trying to narrow down the class by regno mode
22420   is prone to error.  For example, SImode is allowed in VSX regs and
22421   in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22422   it would be wrong to choose an allocno of GENERAL_REGS based on
22423   SImode.  */
22424
22425static reg_class_t
22426rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22427					reg_class_t allocno_class,
22428					reg_class_t best_class)
22429{
22430  switch (allocno_class)
22431    {
22432    case GEN_OR_VSX_REGS:
22433      /* best_class must be a subset of allocno_class.  */
22434      gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22435			   || best_class == GEN_OR_FLOAT_REGS
22436			   || best_class == VSX_REGS
22437			   || best_class == ALTIVEC_REGS
22438			   || best_class == FLOAT_REGS
22439			   || best_class == GENERAL_REGS
22440			   || best_class == BASE_REGS);
22441      /* Use best_class but choose wider classes when copying from the
22442	 wider class to best_class is cheap.  This mimics IRA choice
22443	 of allocno class.  */
22444      if (best_class == BASE_REGS)
22445	return GENERAL_REGS;
22446      if (TARGET_VSX && best_class == FLOAT_REGS)
22447	return VSX_REGS;
22448      return best_class;
22449
22450    case VSX_REGS:
22451      if (best_class == ALTIVEC_REGS)
22452	return ALTIVEC_REGS;
22453
22454    default:
22455      break;
22456    }
22457
22458  return allocno_class;
22459}
22460
22461/* Load up a constant.  If the mode is a vector mode, splat the value across
22462   all of the vector elements.  */
22463
22464static rtx
22465rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22466{
22467  rtx reg;
22468
22469  if (mode == SFmode || mode == DFmode)
22470    {
22471      rtx d = const_double_from_real_value (dconst, mode);
22472      reg = force_reg (mode, d);
22473    }
22474  else if (mode == V4SFmode)
22475    {
22476      rtx d = const_double_from_real_value (dconst, SFmode);
22477      rtvec v = gen_rtvec (4, d, d, d, d);
22478      reg = gen_reg_rtx (mode);
22479      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22480    }
22481  else if (mode == V2DFmode)
22482    {
22483      rtx d = const_double_from_real_value (dconst, DFmode);
22484      rtvec v = gen_rtvec (2, d, d);
22485      reg = gen_reg_rtx (mode);
22486      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22487    }
22488  else
22489    gcc_unreachable ();
22490
22491  return reg;
22492}
22493
22494/* Generate an FMA instruction.  */
22495
22496static void
22497rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22498{
22499  machine_mode mode = GET_MODE (target);
22500  rtx dst;
22501
22502  dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22503  gcc_assert (dst != NULL);
22504
22505  if (dst != target)
22506    emit_move_insn (target, dst);
22507}
22508
22509/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a).  */
22510
22511static void
22512rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22513{
22514  machine_mode mode = GET_MODE (dst);
22515  rtx r;
22516
22517  /* This is a tad more complicated, since the fnma_optab is for
22518     a different expression: fma(-m1, m2, a), which is the same
22519     thing except in the case of signed zeros.
22520
22521     Fortunately we know that if FMA is supported that FNMSUB is
22522     also supported in the ISA.  Just expand it directly.  */
22523
22524  gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22525
22526  r = gen_rtx_NEG (mode, a);
22527  r = gen_rtx_FMA (mode, m1, m2, r);
22528  r = gen_rtx_NEG (mode, r);
22529  emit_insn (gen_rtx_SET (dst, r));
22530}
22531
22532/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
22533   add a reg_note saying that this was a division.  Support both scalar and
22534   vector divide.  Assumes no trapping math and finite arguments.  */
22535
22536void
22537rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22538{
22539  machine_mode mode = GET_MODE (dst);
22540  rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22541  int i;
22542
22543  /* Low precision estimates guarantee 5 bits of accuracy.  High
22544     precision estimates guarantee 14 bits of accuracy.  SFmode
22545     requires 23 bits of accuracy.  DFmode requires 52 bits of
22546     accuracy.  Each pass at least doubles the accuracy, leading
22547     to the following.  */
22548  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22549  if (mode == DFmode || mode == V2DFmode)
22550    passes++;
22551
22552  enum insn_code code = optab_handler (smul_optab, mode);
22553  insn_gen_fn gen_mul = GEN_FCN (code);
22554
22555  gcc_assert (code != CODE_FOR_nothing);
22556
22557  one = rs6000_load_constant_and_splat (mode, dconst1);
22558
22559  /* x0 = 1./d estimate */
22560  x0 = gen_reg_rtx (mode);
22561  emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22562					      UNSPEC_FRES)));
22563
22564  /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
22565  if (passes > 1) {
22566
22567    /* e0 = 1. - d * x0  */
22568    e0 = gen_reg_rtx (mode);
22569    rs6000_emit_nmsub (e0, d, x0, one);
22570
22571    /* x1 = x0 + e0 * x0  */
22572    x1 = gen_reg_rtx (mode);
22573    rs6000_emit_madd (x1, e0, x0, x0);
22574
22575    for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22576	 ++i, xprev = xnext, eprev = enext) {
22577
22578      /* enext = eprev * eprev  */
22579      enext = gen_reg_rtx (mode);
22580      emit_insn (gen_mul (enext, eprev, eprev));
22581
22582      /* xnext = xprev + enext * xprev  */
22583      xnext = gen_reg_rtx (mode);
22584      rs6000_emit_madd (xnext, enext, xprev, xprev);
22585    }
22586
22587  } else
22588    xprev = x0;
22589
22590  /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
22591
22592  /* u = n * xprev  */
22593  u = gen_reg_rtx (mode);
22594  emit_insn (gen_mul (u, n, xprev));
22595
22596  /* v = n - (d * u)  */
22597  v = gen_reg_rtx (mode);
22598  rs6000_emit_nmsub (v, d, u, n);
22599
22600  /* dst = (v * xprev) + u  */
22601  rs6000_emit_madd (dst, v, xprev, u);
22602
22603  if (note_p)
22604    add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22605}
22606
22607/* Goldschmidt's Algorithm for single/double-precision floating point
22608   sqrt and rsqrt.  Assumes no trapping math and finite arguments.  */
22609
22610void
22611rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22612{
22613  machine_mode mode = GET_MODE (src);
22614  rtx e = gen_reg_rtx (mode);
22615  rtx g = gen_reg_rtx (mode);
22616  rtx h = gen_reg_rtx (mode);
22617
22618  /* Low precision estimates guarantee 5 bits of accuracy.  High
22619     precision estimates guarantee 14 bits of accuracy.  SFmode
22620     requires 23 bits of accuracy.  DFmode requires 52 bits of
22621     accuracy.  Each pass at least doubles the accuracy, leading
22622     to the following.  */
22623  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22624  if (mode == DFmode || mode == V2DFmode)
22625    passes++;
22626
22627  int i;
22628  rtx mhalf;
22629  enum insn_code code = optab_handler (smul_optab, mode);
22630  insn_gen_fn gen_mul = GEN_FCN (code);
22631
22632  gcc_assert (code != CODE_FOR_nothing);
22633
22634  mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22635
22636  /* e = rsqrt estimate */
22637  emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22638					     UNSPEC_RSQRT)));
22639
22640  /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0).  */
22641  if (!recip)
22642    {
22643      rtx zero = force_reg (mode, CONST0_RTX (mode));
22644
22645      if (mode == SFmode)
22646	{
22647	  rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22648					      e, zero, mode, 0);
22649	  if (target != e)
22650	    emit_move_insn (e, target);
22651	}
22652      else
22653	{
22654	  rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22655	  rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22656	}
22657    }
22658
22659  /* g = sqrt estimate.  */
22660  emit_insn (gen_mul (g, e, src));
22661  /* h = 1/(2*sqrt) estimate.  */
22662  emit_insn (gen_mul (h, e, mhalf));
22663
22664  if (recip)
22665    {
22666      if (passes == 1)
22667	{
22668	  rtx t = gen_reg_rtx (mode);
22669	  rs6000_emit_nmsub (t, g, h, mhalf);
22670	  /* Apply correction directly to 1/rsqrt estimate.  */
22671	  rs6000_emit_madd (dst, e, t, e);
22672	}
22673      else
22674	{
22675	  for (i = 0; i < passes; i++)
22676	    {
22677	      rtx t1 = gen_reg_rtx (mode);
22678	      rtx g1 = gen_reg_rtx (mode);
22679	      rtx h1 = gen_reg_rtx (mode);
22680
22681	      rs6000_emit_nmsub (t1, g, h, mhalf);
22682	      rs6000_emit_madd (g1, g, t1, g);
22683	      rs6000_emit_madd (h1, h, t1, h);
22684
22685	      g = g1;
22686	      h = h1;
22687	    }
22688	  /* Multiply by 2 for 1/rsqrt.  */
22689	  emit_insn (gen_add3_insn (dst, h, h));
22690	}
22691    }
22692  else
22693    {
22694      rtx t = gen_reg_rtx (mode);
22695      rs6000_emit_nmsub (t, g, h, mhalf);
22696      rs6000_emit_madd (dst, g, t, g);
22697    }
22698
22699  return;
22700}
22701
22702/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22703   (Power7) targets.  DST is the target, and SRC is the argument operand.  */
22704
22705void
22706rs6000_emit_popcount (rtx dst, rtx src)
22707{
22708  machine_mode mode = GET_MODE (dst);
22709  rtx tmp1, tmp2;
22710
22711  /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can.  */
22712  if (TARGET_POPCNTD)
22713    {
22714      if (mode == SImode)
22715	emit_insn (gen_popcntdsi2 (dst, src));
22716      else
22717	emit_insn (gen_popcntddi2 (dst, src));
22718      return;
22719    }
22720
22721  tmp1 = gen_reg_rtx (mode);
22722
22723  if (mode == SImode)
22724    {
22725      emit_insn (gen_popcntbsi2 (tmp1, src));
22726      tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22727			   NULL_RTX, 0);
22728      tmp2 = force_reg (SImode, tmp2);
22729      emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22730    }
22731  else
22732    {
22733      emit_insn (gen_popcntbdi2 (tmp1, src));
22734      tmp2 = expand_mult (DImode, tmp1,
22735			  GEN_INT ((HOST_WIDE_INT)
22736				   0x01010101 << 32 | 0x01010101),
22737			  NULL_RTX, 0);
22738      tmp2 = force_reg (DImode, tmp2);
22739      emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22740    }
22741}
22742
22743
22744/* Emit parity intrinsic on TARGET_POPCNTB targets.  DST is the
22745   target, and SRC is the argument operand.  */
22746
22747void
22748rs6000_emit_parity (rtx dst, rtx src)
22749{
22750  machine_mode mode = GET_MODE (dst);
22751  rtx tmp;
22752
22753  tmp = gen_reg_rtx (mode);
22754
22755  /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
22756  if (TARGET_CMPB)
22757    {
22758      if (mode == SImode)
22759	{
22760	  emit_insn (gen_popcntbsi2 (tmp, src));
22761	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
22762	}
22763      else
22764	{
22765	  emit_insn (gen_popcntbdi2 (tmp, src));
22766	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
22767	}
22768      return;
22769    }
22770
22771  if (mode == SImode)
22772    {
22773      /* Is mult+shift >= shift+xor+shift+xor?  */
22774      if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22775	{
22776	  rtx tmp1, tmp2, tmp3, tmp4;
22777
22778	  tmp1 = gen_reg_rtx (SImode);
22779	  emit_insn (gen_popcntbsi2 (tmp1, src));
22780
22781	  tmp2 = gen_reg_rtx (SImode);
22782	  emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22783	  tmp3 = gen_reg_rtx (SImode);
22784	  emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22785
22786	  tmp4 = gen_reg_rtx (SImode);
22787	  emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22788	  emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22789	}
22790      else
22791	rs6000_emit_popcount (tmp, src);
22792      emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22793    }
22794  else
22795    {
22796      /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
22797      if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22798	{
22799	  rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22800
22801	  tmp1 = gen_reg_rtx (DImode);
22802	  emit_insn (gen_popcntbdi2 (tmp1, src));
22803
22804	  tmp2 = gen_reg_rtx (DImode);
22805	  emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22806	  tmp3 = gen_reg_rtx (DImode);
22807	  emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22808
22809	  tmp4 = gen_reg_rtx (DImode);
22810	  emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22811	  tmp5 = gen_reg_rtx (DImode);
22812	  emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22813
22814	  tmp6 = gen_reg_rtx (DImode);
22815	  emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22816	  emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22817	}
22818      else
22819        rs6000_emit_popcount (tmp, src);
22820      emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22821    }
22822}
22823
22824/* Expand an Altivec constant permutation for little endian mode.
22825   OP0 and OP1 are the input vectors and TARGET is the output vector.
22826   SEL specifies the constant permutation vector.
22827
22828   There are two issues: First, the two input operands must be
22829   swapped so that together they form a double-wide array in LE
22830   order.  Second, the vperm instruction has surprising behavior
22831   in LE mode:  it interprets the elements of the source vectors
22832   in BE mode ("left to right") and interprets the elements of
22833   the destination vector in LE mode ("right to left").  To
22834   correct for this, we must subtract each element of the permute
22835   control vector from 31.
22836
22837   For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22838   with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22839   We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22840   serve as the permute control vector.  Then, in BE mode,
22841
22842     vperm 9,10,11,12
22843
22844   places the desired result in vr9.  However, in LE mode the
22845   vector contents will be
22846
22847     vr10 = 00000003 00000002 00000001 00000000
22848     vr11 = 00000007 00000006 00000005 00000004
22849
22850   The result of the vperm using the same permute control vector is
22851
22852     vr9  = 05000000 07000000 01000000 03000000
22853
22854   That is, the leftmost 4 bytes of vr10 are interpreted as the
22855   source for the rightmost 4 bytes of vr9, and so on.
22856
22857   If we change the permute control vector to
22858
22859     vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22860
22861   and issue
22862
22863     vperm 9,11,10,12
22864
22865   we get the desired
22866
22867   vr9  = 00000006 00000004 00000002 00000000.  */
22868
22869static void
22870altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22871				  const vec_perm_indices &sel)
22872{
22873  unsigned int i;
22874  rtx perm[16];
22875  rtx constv, unspec;
22876
22877  /* Unpack and adjust the constant selector.  */
22878  for (i = 0; i < 16; ++i)
22879    {
22880      unsigned int elt = 31 - (sel[i] & 31);
22881      perm[i] = GEN_INT (elt);
22882    }
22883
22884  /* Expand to a permute, swapping the inputs and using the
22885     adjusted selector.  */
22886  if (!REG_P (op0))
22887    op0 = force_reg (V16QImode, op0);
22888  if (!REG_P (op1))
22889    op1 = force_reg (V16QImode, op1);
22890
22891  constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22892  constv = force_reg (V16QImode, constv);
22893  unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22894			   UNSPEC_VPERM);
22895  if (!REG_P (target))
22896    {
22897      rtx tmp = gen_reg_rtx (V16QImode);
22898      emit_move_insn (tmp, unspec);
22899      unspec = tmp;
22900    }
22901
22902  emit_move_insn (target, unspec);
22903}
22904
22905/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22906   permute control vector.  But here it's not a constant, so we must
22907   generate a vector NAND or NOR to do the adjustment.  */
22908
22909void
22910altivec_expand_vec_perm_le (rtx operands[4])
22911{
22912  rtx notx, iorx, unspec;
22913  rtx target = operands[0];
22914  rtx op0 = operands[1];
22915  rtx op1 = operands[2];
22916  rtx sel = operands[3];
22917  rtx tmp = target;
22918  rtx norreg = gen_reg_rtx (V16QImode);
22919  machine_mode mode = GET_MODE (target);
22920
22921  /* Get everything in regs so the pattern matches.  */
22922  if (!REG_P (op0))
22923    op0 = force_reg (mode, op0);
22924  if (!REG_P (op1))
22925    op1 = force_reg (mode, op1);
22926  if (!REG_P (sel))
22927    sel = force_reg (V16QImode, sel);
22928  if (!REG_P (target))
22929    tmp = gen_reg_rtx (mode);
22930
22931  if (TARGET_P9_VECTOR)
22932    {
22933      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22934			       UNSPEC_VPERMR);
22935    }
22936  else
22937    {
22938      /* Invert the selector with a VNAND if available, else a VNOR.
22939	 The VNAND is preferred for future fusion opportunities.  */
22940      notx = gen_rtx_NOT (V16QImode, sel);
22941      iorx = (TARGET_P8_VECTOR
22942	      ? gen_rtx_IOR (V16QImode, notx, notx)
22943	      : gen_rtx_AND (V16QImode, notx, notx));
22944      emit_insn (gen_rtx_SET (norreg, iorx));
22945
22946      /* Permute with operands reversed and adjusted selector.  */
22947      unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22948			       UNSPEC_VPERM);
22949    }
22950
22951  /* Copy into target, possibly by way of a register.  */
22952  if (!REG_P (target))
22953    {
22954      emit_move_insn (tmp, unspec);
22955      unspec = tmp;
22956    }
22957
22958  emit_move_insn (target, unspec);
22959}
22960
22961/* Expand an Altivec constant permutation.  Return true if we match
22962   an efficient implementation; false to fall back to VPERM.
22963
22964   OP0 and OP1 are the input vectors and TARGET is the output vector.
22965   SEL specifies the constant permutation vector.  */
22966
22967static bool
22968altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22969			       const vec_perm_indices &sel)
22970{
22971  struct altivec_perm_insn {
22972    HOST_WIDE_INT mask;
22973    enum insn_code impl;
22974    unsigned char perm[16];
22975  };
22976  static const struct altivec_perm_insn patterns[] = {
22977    {OPTION_MASK_ALTIVEC,
22978     CODE_FOR_altivec_vpkuhum_direct,
22979     {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
22980    {OPTION_MASK_ALTIVEC,
22981     CODE_FOR_altivec_vpkuwum_direct,
22982     {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
22983    {OPTION_MASK_ALTIVEC,
22984     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22985		      : CODE_FOR_altivec_vmrglb_direct,
22986     {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
22987    {OPTION_MASK_ALTIVEC,
22988     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22989		      : CODE_FOR_altivec_vmrglh_direct,
22990     {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
22991    {OPTION_MASK_ALTIVEC,
22992     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
22993		      : CODE_FOR_altivec_vmrglw_direct_v4si,
22994     {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
22995    {OPTION_MASK_ALTIVEC,
22996     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22997		      : CODE_FOR_altivec_vmrghb_direct,
22998     {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
22999    {OPTION_MASK_ALTIVEC,
23000     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23001		      : CODE_FOR_altivec_vmrghh_direct,
23002     {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23003    {OPTION_MASK_ALTIVEC,
23004     BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23005		      : CODE_FOR_altivec_vmrghw_direct_v4si,
23006     {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23007    {OPTION_MASK_P8_VECTOR,
23008     BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23009		      : CODE_FOR_p8_vmrgow_v4sf_direct,
23010     {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23011    {OPTION_MASK_P8_VECTOR,
23012     BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23013		      : CODE_FOR_p8_vmrgew_v4sf_direct,
23014     {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23015    {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23016     {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23017    {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23018     {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23019    {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23020     {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23021    {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23022     {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23023
23024  unsigned int i, j, elt, which;
23025  unsigned char perm[16];
23026  rtx x;
23027  bool one_vec;
23028
23029  /* Unpack the constant selector.  */
23030  for (i = which = 0; i < 16; ++i)
23031    {
23032      elt = sel[i] & 31;
23033      which |= (elt < 16 ? 1 : 2);
23034      perm[i] = elt;
23035    }
23036
23037  /* Simplify the constant selector based on operands.  */
23038  switch (which)
23039    {
23040    default:
23041      gcc_unreachable ();
23042
23043    case 3:
23044      one_vec = false;
23045      if (!rtx_equal_p (op0, op1))
23046	break;
23047      /* FALLTHRU */
23048
23049    case 2:
23050      for (i = 0; i < 16; ++i)
23051	perm[i] &= 15;
23052      op0 = op1;
23053      one_vec = true;
23054      break;
23055
23056    case 1:
23057      op1 = op0;
23058      one_vec = true;
23059      break;
23060    }
23061
23062  /* Look for splat patterns.  */
23063  if (one_vec)
23064    {
23065      elt = perm[0];
23066
23067      for (i = 0; i < 16; ++i)
23068	if (perm[i] != elt)
23069	  break;
23070      if (i == 16)
23071	{
23072          if (!BYTES_BIG_ENDIAN)
23073            elt = 15 - elt;
23074	  emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23075	  return true;
23076	}
23077
23078      if (elt % 2 == 0)
23079	{
23080	  for (i = 0; i < 16; i += 2)
23081	    if (perm[i] != elt || perm[i + 1] != elt + 1)
23082	      break;
23083	  if (i == 16)
23084	    {
23085	      int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23086	      x = gen_reg_rtx (V8HImode);
23087	      emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23088						    GEN_INT (field)));
23089	      emit_move_insn (target, gen_lowpart (V16QImode, x));
23090	      return true;
23091	    }
23092	}
23093
23094      if (elt % 4 == 0)
23095	{
23096	  for (i = 0; i < 16; i += 4)
23097	    if (perm[i] != elt
23098		|| perm[i + 1] != elt + 1
23099		|| perm[i + 2] != elt + 2
23100		|| perm[i + 3] != elt + 3)
23101	      break;
23102	  if (i == 16)
23103	    {
23104	      int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23105	      x = gen_reg_rtx (V4SImode);
23106	      emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23107						    GEN_INT (field)));
23108	      emit_move_insn (target, gen_lowpart (V16QImode, x));
23109	      return true;
23110	    }
23111	}
23112    }
23113
23114  /* Look for merge and pack patterns.  */
23115  for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23116    {
23117      bool swapped;
23118
23119      if ((patterns[j].mask & rs6000_isa_flags) == 0)
23120	continue;
23121
23122      elt = patterns[j].perm[0];
23123      if (perm[0] == elt)
23124	swapped = false;
23125      else if (perm[0] == elt + 16)
23126	swapped = true;
23127      else
23128	continue;
23129      for (i = 1; i < 16; ++i)
23130	{
23131	  elt = patterns[j].perm[i];
23132	  if (swapped)
23133	    elt = (elt >= 16 ? elt - 16 : elt + 16);
23134	  else if (one_vec && elt >= 16)
23135	    elt -= 16;
23136	  if (perm[i] != elt)
23137	    break;
23138	}
23139      if (i == 16)
23140	{
23141	  enum insn_code icode = patterns[j].impl;
23142	  machine_mode omode = insn_data[icode].operand[0].mode;
23143	  machine_mode imode = insn_data[icode].operand[1].mode;
23144
23145	  rtx perm_idx = GEN_INT (0);
23146	  if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23147	    {
23148	      int perm_val = 0;
23149	      if (one_vec)
23150		{
23151		  if (perm[0] == 8)
23152		    perm_val |= 2;
23153		  if (perm[8] == 8)
23154		    perm_val |= 1;
23155		}
23156	      else
23157		{
23158		  if (perm[0] != 0)
23159		    perm_val |= 2;
23160		  if (perm[8] != 16)
23161		    perm_val |= 1;
23162		}
23163	      perm_idx = GEN_INT (perm_val);
23164	    }
23165
23166	  /* For little-endian, don't use vpkuwum and vpkuhum if the
23167	     underlying vector type is not V4SI and V8HI, respectively.
23168	     For example, using vpkuwum with a V8HI picks up the even
23169	     halfwords (BE numbering) when the even halfwords (LE
23170	     numbering) are what we need.  */
23171	  if (!BYTES_BIG_ENDIAN
23172	      && icode == CODE_FOR_altivec_vpkuwum_direct
23173	      && ((REG_P (op0)
23174		   && GET_MODE (op0) != V4SImode)
23175		  || (SUBREG_P (op0)
23176		      && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23177	    continue;
23178	  if (!BYTES_BIG_ENDIAN
23179	      && icode == CODE_FOR_altivec_vpkuhum_direct
23180	      && ((REG_P (op0)
23181		   && GET_MODE (op0) != V8HImode)
23182		  || (SUBREG_P (op0)
23183		      && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23184	    continue;
23185
23186          /* For little-endian, the two input operands must be swapped
23187             (or swapped back) to ensure proper right-to-left numbering
23188             from 0 to 2N-1.  */
23189	  if (swapped ^ !BYTES_BIG_ENDIAN
23190	      && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23191	    std::swap (op0, op1);
23192	  if (imode != V16QImode)
23193	    {
23194	      op0 = gen_lowpart (imode, op0);
23195	      op1 = gen_lowpart (imode, op1);
23196	    }
23197	  if (omode == V16QImode)
23198	    x = target;
23199	  else
23200	    x = gen_reg_rtx (omode);
23201	  if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23202	    emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23203	  else
23204	    emit_insn (GEN_FCN (icode) (x, op0, op1));
23205	  if (omode != V16QImode)
23206	    emit_move_insn (target, gen_lowpart (V16QImode, x));
23207	  return true;
23208	}
23209    }
23210
23211  if (!BYTES_BIG_ENDIAN)
23212    {
23213      altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23214      return true;
23215    }
23216
23217  return false;
23218}
23219
23220/* Expand a VSX Permute Doubleword constant permutation.
23221   Return true if we match an efficient implementation.  */
23222
23223static bool
23224rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23225				unsigned char perm0, unsigned char perm1)
23226{
23227  rtx x;
23228
23229  /* If both selectors come from the same operand, fold to single op.  */
23230  if ((perm0 & 2) == (perm1 & 2))
23231    {
23232      if (perm0 & 2)
23233	op0 = op1;
23234      else
23235	op1 = op0;
23236    }
23237  /* If both operands are equal, fold to simpler permutation.  */
23238  if (rtx_equal_p (op0, op1))
23239    {
23240      perm0 = perm0 & 1;
23241      perm1 = (perm1 & 1) + 2;
23242    }
23243  /* If the first selector comes from the second operand, swap.  */
23244  else if (perm0 & 2)
23245    {
23246      if (perm1 & 2)
23247	return false;
23248      perm0 -= 2;
23249      perm1 += 2;
23250      std::swap (op0, op1);
23251    }
23252  /* If the second selector does not come from the second operand, fail.  */
23253  else if ((perm1 & 2) == 0)
23254    return false;
23255
23256  /* Success! */
23257  if (target != NULL)
23258    {
23259      machine_mode vmode, dmode;
23260      rtvec v;
23261
23262      vmode = GET_MODE (target);
23263      gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23264      dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23265      x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23266      v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23267      x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23268      emit_insn (gen_rtx_SET (target, x));
23269    }
23270  return true;
23271}
23272
23273/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
23274
23275static bool
23276rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
23277				 rtx op1, const vec_perm_indices &sel)
23278{
23279  bool testing_p = !target;
23280
23281  /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
23282  if (TARGET_ALTIVEC && testing_p)
23283    return true;
23284
23285  if (op0)
23286    {
23287      rtx nop0 = force_reg (vmode, op0);
23288      if (op0 == op1)
23289        op1 = nop0;
23290      op0 = nop0;
23291    }
23292  if (op1)
23293    op1 = force_reg (vmode, op1);
23294
23295  /* Check for ps_merge* or xxpermdi insns.  */
23296  if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23297    {
23298      if (testing_p)
23299	{
23300	  op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23301	  op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23302	}
23303      if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23304	return true;
23305    }
23306
23307  if (TARGET_ALTIVEC)
23308    {
23309      /* Force the target-independent code to lower to V16QImode.  */
23310      if (vmode != V16QImode)
23311	return false;
23312      if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23313	return true;
23314    }
23315
23316  return false;
23317}
23318
23319/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23320   OP0 and OP1 are the input vectors and TARGET is the output vector.
23321   PERM specifies the constant permutation vector.  */
23322
23323static void
23324rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23325			   machine_mode vmode, const vec_perm_builder &perm)
23326{
23327  rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23328  if (x != target)
23329    emit_move_insn (target, x);
23330}
23331
23332/* Expand an extract even operation.  */
23333
23334void
23335rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23336{
23337  machine_mode vmode = GET_MODE (target);
23338  unsigned i, nelt = GET_MODE_NUNITS (vmode);
23339  vec_perm_builder perm (nelt, nelt, 1);
23340
23341  for (i = 0; i < nelt; i++)
23342    perm.quick_push (i * 2);
23343
23344  rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23345}
23346
23347/* Expand a vector interleave operation.  */
23348
23349void
23350rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23351{
23352  machine_mode vmode = GET_MODE (target);
23353  unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23354  vec_perm_builder perm (nelt, nelt, 1);
23355
23356  high = (highp ? 0 : nelt / 2);
23357  for (i = 0; i < nelt / 2; i++)
23358    {
23359      perm.quick_push (i + high);
23360      perm.quick_push (i + nelt + high);
23361    }
23362
23363  rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23364}
23365
23366/* Scale a V2DF vector SRC by two to the SCALE and place in TGT.  */
23367void
23368rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23369{
23370  HOST_WIDE_INT hwi_scale (scale);
23371  REAL_VALUE_TYPE r_pow;
23372  rtvec v = rtvec_alloc (2);
23373  rtx elt;
23374  rtx scale_vec = gen_reg_rtx (V2DFmode);
23375  (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23376  elt = const_double_from_real_value (r_pow, DFmode);
23377  RTVEC_ELT (v, 0) = elt;
23378  RTVEC_ELT (v, 1) = elt;
23379  rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23380  emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23381}
23382
23383/* Return an RTX representing where to find the function value of a
23384   function returning MODE.  */
23385static rtx
23386rs6000_complex_function_value (machine_mode mode)
23387{
23388  unsigned int regno;
23389  rtx r1, r2;
23390  machine_mode inner = GET_MODE_INNER (mode);
23391  unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23392
23393  if (TARGET_FLOAT128_TYPE
23394      && (mode == KCmode
23395	  || (mode == TCmode && TARGET_IEEEQUAD)))
23396    regno = ALTIVEC_ARG_RETURN;
23397
23398  else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23399    regno = FP_ARG_RETURN;
23400
23401  else
23402    {
23403      regno = GP_ARG_RETURN;
23404
23405      /* 32-bit is OK since it'll go in r3/r4.  */
23406      if (TARGET_32BIT && inner_bytes >= 4)
23407	return gen_rtx_REG (mode, regno);
23408    }
23409
23410  if (inner_bytes >= 8)
23411    return gen_rtx_REG (mode, regno);
23412
23413  r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23414			  const0_rtx);
23415  r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23416			  GEN_INT (inner_bytes));
23417  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23418}
23419
23420/* Return an rtx describing a return value of MODE as a PARALLEL
23421   in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23422   stride REG_STRIDE.  */
23423
23424static rtx
23425rs6000_parallel_return (machine_mode mode,
23426			int n_elts, machine_mode elt_mode,
23427			unsigned int regno, unsigned int reg_stride)
23428{
23429  rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23430
23431  int i;
23432  for (i = 0; i < n_elts; i++)
23433    {
23434      rtx r = gen_rtx_REG (elt_mode, regno);
23435      rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23436      XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23437      regno += reg_stride;
23438    }
23439
23440  return par;
23441}
23442
23443/* Target hook for TARGET_FUNCTION_VALUE.
23444
23445   An integer value is in r3 and a floating-point value is in fp1,
23446   unless -msoft-float.  */
23447
23448static rtx
23449rs6000_function_value (const_tree valtype,
23450		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23451		       bool outgoing ATTRIBUTE_UNUSED)
23452{
23453  machine_mode mode;
23454  unsigned int regno;
23455  machine_mode elt_mode;
23456  int n_elts;
23457
23458  /* Special handling for structs in darwin64.  */
23459  if (TARGET_MACHO
23460      && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23461    {
23462      CUMULATIVE_ARGS valcum;
23463      rtx valret;
23464
23465      valcum.words = 0;
23466      valcum.fregno = FP_ARG_MIN_REG;
23467      valcum.vregno = ALTIVEC_ARG_MIN_REG;
23468      /* Do a trial code generation as if this were going to be passed as
23469	 an argument; if any part goes in memory, we return NULL.  */
23470      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23471      if (valret)
23472	return valret;
23473      /* Otherwise fall through to standard ABI rules.  */
23474    }
23475
23476  mode = TYPE_MODE (valtype);
23477
23478  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
23479  if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23480    {
23481      int first_reg, n_regs;
23482
23483      if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23484	{
23485	  /* _Decimal128 must use even/odd register pairs.  */
23486	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23487	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23488	}
23489      else
23490	{
23491	  first_reg = ALTIVEC_ARG_RETURN;
23492	  n_regs = 1;
23493	}
23494
23495      return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23496    }
23497
23498  /* Some return value types need be split in -mpowerpc64, 32bit ABI.  */
23499  if (TARGET_32BIT && TARGET_POWERPC64)
23500    switch (mode)
23501      {
23502      default:
23503	break;
23504      case E_DImode:
23505      case E_SCmode:
23506      case E_DCmode:
23507      case E_TCmode:
23508	int count = GET_MODE_SIZE (mode) / 4;
23509	return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23510      }
23511
23512  if ((INTEGRAL_TYPE_P (valtype)
23513       && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23514      || POINTER_TYPE_P (valtype))
23515    mode = TARGET_32BIT ? SImode : DImode;
23516
23517  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23518    /* _Decimal128 must use an even/odd register pair.  */
23519    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23520  else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23521	   && !FLOAT128_VECTOR_P (mode))
23522    regno = FP_ARG_RETURN;
23523  else if (TREE_CODE (valtype) == COMPLEX_TYPE
23524	   && targetm.calls.split_complex_arg)
23525    return rs6000_complex_function_value (mode);
23526  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
23527     return register is used in both cases, and we won't see V2DImode/V2DFmode
23528     for pure altivec, combine the two cases.  */
23529  else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23530	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23531	   && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23532    regno = ALTIVEC_ARG_RETURN;
23533  else
23534    regno = GP_ARG_RETURN;
23535
23536  return gen_rtx_REG (mode, regno);
23537}
23538
23539/* Define how to find the value returned by a library function
23540   assuming the value has mode MODE.  */
23541rtx
23542rs6000_libcall_value (machine_mode mode)
23543{
23544  unsigned int regno;
23545
23546  /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
23547  if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23548    return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23549
23550  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23551    /* _Decimal128 must use an even/odd register pair.  */
23552    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23553  else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23554    regno = FP_ARG_RETURN;
23555  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
23556     return register is used in both cases, and we won't see V2DImode/V2DFmode
23557     for pure altivec, combine the two cases.  */
23558  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23559	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23560    regno = ALTIVEC_ARG_RETURN;
23561  else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23562    return rs6000_complex_function_value (mode);
23563  else
23564    regno = GP_ARG_RETURN;
23565
23566  return gen_rtx_REG (mode, regno);
23567}
23568
23569/* Compute register pressure classes.  We implement the target hook to avoid
23570   IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23571   lead to incorrect estimates of number of available registers and therefor
23572   increased register pressure/spill.   */
23573static int
23574rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23575{
23576  int n;
23577
23578  n = 0;
23579  pressure_classes[n++] = GENERAL_REGS;
23580  if (TARGET_ALTIVEC)
23581    pressure_classes[n++] = ALTIVEC_REGS;
23582  if (TARGET_VSX)
23583    pressure_classes[n++] = VSX_REGS;
23584  else
23585    {
23586      if (TARGET_HARD_FLOAT)
23587	pressure_classes[n++] = FLOAT_REGS;
23588    }
23589  pressure_classes[n++] = CR_REGS;
23590  pressure_classes[n++] = SPECIAL_REGS;
23591
23592  return n;
23593}
23594
23595/* Given FROM and TO register numbers, say whether this elimination is allowed.
23596   Frame pointer elimination is automatically handled.
23597
23598   For the RS/6000, if frame pointer elimination is being done, we would like
23599   to convert ap into fp, not sp.
23600
23601   We need r30 if -mminimal-toc was specified, and there are constant pool
23602   references.  */
23603
23604static bool
23605rs6000_can_eliminate (const int from, const int to)
23606{
23607  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23608	  ? ! frame_pointer_needed
23609	  : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23610	    ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23611		|| constant_pool_empty_p ()
23612	    : true);
23613}
23614
23615/* Define the offset between two registers, FROM to be eliminated and its
23616   replacement TO, at the start of a routine.  */
23617HOST_WIDE_INT
23618rs6000_initial_elimination_offset (int from, int to)
23619{
23620  rs6000_stack_t *info = rs6000_stack_info ();
23621  HOST_WIDE_INT offset;
23622
23623  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23624    offset = info->push_p ? 0 : -info->total_size;
23625  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23626    {
23627      offset = info->push_p ? 0 : -info->total_size;
23628      if (FRAME_GROWS_DOWNWARD)
23629	offset += info->fixed_size + info->vars_size + info->parm_size;
23630    }
23631  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23632    offset = FRAME_GROWS_DOWNWARD
23633	     ? info->fixed_size + info->vars_size + info->parm_size
23634	     : 0;
23635  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23636    offset = info->total_size;
23637  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23638    offset = info->push_p ? info->total_size : 0;
23639  else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23640    offset = 0;
23641  else
23642    gcc_unreachable ();
23643
23644  return offset;
23645}
23646
23647/* Fill in sizes of registers used by unwinder.  */
23648
23649static void
23650rs6000_init_dwarf_reg_sizes_extra (tree address)
23651{
23652  if (TARGET_MACHO && ! TARGET_ALTIVEC)
23653    {
23654      int i;
23655      machine_mode mode = TYPE_MODE (char_type_node);
23656      rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23657      rtx mem = gen_rtx_MEM (BLKmode, addr);
23658      rtx value = gen_int_mode (16, mode);
23659
23660      /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23661	 The unwinder still needs to know the size of Altivec registers.  */
23662
23663      for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23664	{
23665	  int column = DWARF_REG_TO_UNWIND_COLUMN
23666		(DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23667	  HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23668
23669	  emit_move_insn (adjust_address (mem, mode, offset), value);
23670	}
23671    }
23672}
23673
23674/* Map internal gcc register numbers to debug format register numbers.
23675   FORMAT specifies the type of debug register number to use:
23676     0 -- debug information, except for frame-related sections
23677     1 -- DWARF .debug_frame section
23678     2 -- DWARF .eh_frame section  */
23679
23680unsigned int
23681rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23682{
23683  /* On some platforms, we use the standard DWARF register
23684     numbering for .debug_info and .debug_frame.  */
23685  if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23686    {
23687#ifdef RS6000_USE_DWARF_NUMBERING
23688      if (regno <= 31)
23689	return regno;
23690      if (FP_REGNO_P (regno))
23691	return regno - FIRST_FPR_REGNO + 32;
23692      if (ALTIVEC_REGNO_P (regno))
23693	return regno - FIRST_ALTIVEC_REGNO + 1124;
23694      if (regno == LR_REGNO)
23695	return 108;
23696      if (regno == CTR_REGNO)
23697	return 109;
23698      if (regno == CA_REGNO)
23699	return 101;  /* XER */
23700      /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23701	 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23702	 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23703	 to the DWARF reg for CR.  */
23704      if (format == 1 && regno == CR2_REGNO)
23705	return 64;
23706      if (CR_REGNO_P (regno))
23707	return regno - CR0_REGNO + 86;
23708      if (regno == VRSAVE_REGNO)
23709	return 356;
23710      if (regno == VSCR_REGNO)
23711	return 67;
23712
23713      /* These do not make much sense.  */
23714      if (regno == FRAME_POINTER_REGNUM)
23715	return 111;
23716      if (regno == ARG_POINTER_REGNUM)
23717	return 67;
23718      if (regno == 64)
23719	return 100;
23720
23721      gcc_unreachable ();
23722#endif
23723    }
23724
23725  /* We use the GCC 7 (and before) internal number for non-DWARF debug
23726     information, and also for .eh_frame.  */
23727  /* Translate the regnos to their numbers in GCC 7 (and before).  */
23728  if (regno <= 31)
23729    return regno;
23730  if (FP_REGNO_P (regno))
23731    return regno - FIRST_FPR_REGNO + 32;
23732  if (ALTIVEC_REGNO_P (regno))
23733    return regno - FIRST_ALTIVEC_REGNO + 77;
23734  if (regno == LR_REGNO)
23735    return 65;
23736  if (regno == CTR_REGNO)
23737    return 66;
23738  if (regno == CA_REGNO)
23739    return 76;  /* XER */
23740  if (CR_REGNO_P (regno))
23741    return regno - CR0_REGNO + 68;
23742  if (regno == VRSAVE_REGNO)
23743    return 109;
23744  if (regno == VSCR_REGNO)
23745    return 110;
23746
23747  if (regno == FRAME_POINTER_REGNUM)
23748    return 111;
23749  if (regno == ARG_POINTER_REGNUM)
23750    return 67;
23751  if (regno == 64)
23752    return 64;
23753
23754  gcc_unreachable ();
23755}
23756
23757/* target hook eh_return_filter_mode */
23758static scalar_int_mode
23759rs6000_eh_return_filter_mode (void)
23760{
23761  return TARGET_32BIT ? SImode : word_mode;
23762}
23763
23764/* Target hook for translate_mode_attribute.  */
23765static machine_mode
23766rs6000_translate_mode_attribute (machine_mode mode)
23767{
23768  if ((FLOAT128_IEEE_P (mode)
23769       && ieee128_float_type_node == long_double_type_node)
23770      || (FLOAT128_IBM_P (mode)
23771	  && ibm128_float_type_node == long_double_type_node))
23772    return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23773  return mode;
23774}
23775
23776/* Target hook for scalar_mode_supported_p.  */
23777static bool
23778rs6000_scalar_mode_supported_p (scalar_mode mode)
23779{
23780  /* -m32 does not support TImode.  This is the default, from
23781     default_scalar_mode_supported_p.  For -m32 -mpowerpc64 we want the
23782     same ABI as for -m32.  But default_scalar_mode_supported_p allows
23783     integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23784     for -mpowerpc64.  */
23785  if (TARGET_32BIT && mode == TImode)
23786    return false;
23787
23788  if (DECIMAL_FLOAT_MODE_P (mode))
23789    return default_decimal_float_supported_p ();
23790  else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23791    return true;
23792  else
23793    return default_scalar_mode_supported_p (mode);
23794}
23795
23796/* Target hook for libgcc_floating_mode_supported_p.  */
23797
23798static bool
23799rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23800{
23801  switch (mode)
23802    {
23803    case E_SFmode:
23804    case E_DFmode:
23805    case E_TFmode:
23806      return true;
23807
23808      /* We only return true for KFmode if IEEE 128-bit types are supported, and
23809	 if long double does not use the IEEE 128-bit format.  If long double
23810	 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23811	 Because the code will not use KFmode in that case, there will be aborts
23812	 because it can't find KFmode in the Floatn types.  */
23813    case E_KFmode:
23814      return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23815
23816    default:
23817      return false;
23818    }
23819}
23820
23821/* Target hook for vector_mode_supported_p.  */
23822static bool
23823rs6000_vector_mode_supported_p (machine_mode mode)
23824{
23825  /* There is no vector form for IEEE 128-bit.  If we return true for IEEE
23826     128-bit, the compiler might try to widen IEEE 128-bit to IBM
23827     double-double.  */
23828  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23829    return true;
23830
23831  else
23832    return false;
23833}
23834
23835/* Target hook for floatn_mode.  */
23836static opt_scalar_float_mode
23837rs6000_floatn_mode (int n, bool extended)
23838{
23839  if (extended)
23840    {
23841      switch (n)
23842	{
23843	case 32:
23844	  return DFmode;
23845
23846	case 64:
23847	  if (TARGET_FLOAT128_TYPE)
23848	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23849	  else
23850	    return opt_scalar_float_mode ();
23851
23852	case 128:
23853	  return opt_scalar_float_mode ();
23854
23855	default:
23856	  /* Those are the only valid _FloatNx types.  */
23857	  gcc_unreachable ();
23858	}
23859    }
23860  else
23861    {
23862      switch (n)
23863	{
23864	case 32:
23865	  return SFmode;
23866
23867	case 64:
23868	  return DFmode;
23869
23870	case 128:
23871	  if (TARGET_FLOAT128_TYPE)
23872	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23873	  else
23874	    return opt_scalar_float_mode ();
23875
23876	default:
23877	  return opt_scalar_float_mode ();
23878	}
23879    }
23880
23881}
23882
23883/* Target hook for c_mode_for_suffix.  */
23884static machine_mode
23885rs6000_c_mode_for_suffix (char suffix)
23886{
23887  if (TARGET_FLOAT128_TYPE)
23888    {
23889      if (suffix == 'q' || suffix == 'Q')
23890	return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23891
23892      /* At the moment, we are not defining a suffix for IBM extended double.
23893	 If/when the default for -mabi=ieeelongdouble is changed, and we want
23894	 to support __ibm128 constants in legacy library code, we may need to
23895	 re-evalaute this decision.  Currently, c-lex.cc only supports 'w' and
23896	 'q' as machine dependent suffixes.  The x86_64 port uses 'w' for
23897	 __float80 constants.  */
23898    }
23899
23900  return VOIDmode;
23901}
23902
23903/* Target hook for invalid_arg_for_unprototyped_fn. */
23904static const char *
23905invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23906{
23907  return (!rs6000_darwin64_abi
23908	  && typelist == 0
23909          && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23910          && (funcdecl == NULL_TREE
23911              || (TREE_CODE (funcdecl) == FUNCTION_DECL
23912                  && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23913	  ? N_("AltiVec argument passed to unprototyped function")
23914	  : NULL;
23915}
23916
23917/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23918   setup by using __stack_chk_fail_local hidden function instead of
23919   calling __stack_chk_fail directly.  Otherwise it is better to call
23920   __stack_chk_fail directly.  */
23921
23922static tree ATTRIBUTE_UNUSED
23923rs6000_stack_protect_fail (void)
23924{
23925  return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23926	 ? default_hidden_stack_protect_fail ()
23927	 : default_external_stack_protect_fail ();
23928}
23929
23930/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
23931
23932#if TARGET_ELF
23933static unsigned HOST_WIDE_INT
23934rs6000_asan_shadow_offset (void)
23935{
23936  return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23937}
23938#endif
23939
23940/* Mask options that we want to support inside of attribute((target)) and
23941   #pragma GCC target operations.  Note, we do not include things like
23942   64/32-bit, endianness, hard/soft floating point, etc. that would have
23943   different calling sequences.  */
23944
23945struct rs6000_opt_mask {
23946  const char *name;		/* option name */
23947  HOST_WIDE_INT mask;		/* mask to set */
23948  bool invert;			/* invert sense of mask */
23949  bool valid_target;		/* option is a target option */
23950};
23951
23952static struct rs6000_opt_mask const rs6000_opt_masks[] =
23953{
23954  { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
23955  { "block-ops-unaligned-vsx",	OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23956								false, true  },
23957  { "block-ops-vector-pair",	OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23958								false, true  },
23959  { "cmpb",			OPTION_MASK_CMPB,		false, true  },
23960  { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
23961  { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
23962  { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
23963  { "efficient-unaligned-vsx",	OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23964								false, true  },
23965  { "float128",			OPTION_MASK_FLOAT128_KEYWORD,	false, true  },
23966  { "float128-hardware",	OPTION_MASK_FLOAT128_HW,	false, true  },
23967  { "fprnd",			OPTION_MASK_FPRND,		false, true  },
23968  { "power10",			OPTION_MASK_POWER10,		false, true  },
23969  { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
23970  { "htm",			OPTION_MASK_HTM,		false, true  },
23971  { "isel",			OPTION_MASK_ISEL,		false, true  },
23972  { "mfcrf",			OPTION_MASK_MFCRF,		false, true  },
23973  { "mfpgpr",			0,				false, true  },
23974  { "mma",			OPTION_MASK_MMA,		false, true  },
23975  { "modulo",			OPTION_MASK_MODULO,		false, true  },
23976  { "mulhw",			OPTION_MASK_MULHW,		false, true  },
23977  { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
23978  { "pcrel",			OPTION_MASK_PCREL,		false, true  },
23979  { "pcrel-opt",		OPTION_MASK_PCREL_OPT,		false, true  },
23980  { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
23981  { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
23982  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
23983  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
23984  { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
23985  { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
23986  { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
23987  { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
23988  { "power10-fusion",		OPTION_MASK_P10_FUSION,		false, true  },
23989  { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
23990  { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
23991  { "prefixed",			OPTION_MASK_PREFIXED,		false, true  },
23992  { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
23993  { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
23994  { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
23995  { "save-toc-indirect",	OPTION_MASK_SAVE_TOC_INDIRECT,	false, true  },
23996  { "string",			0,				false, true  },
23997  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
23998  { "vsx",			OPTION_MASK_VSX,		false, true  },
23999#ifdef OPTION_MASK_64BIT
24000#if TARGET_AIX_OS
24001  { "aix64",			OPTION_MASK_64BIT,		false, false },
24002  { "aix32",			OPTION_MASK_64BIT,		true,  false },
24003#else
24004  { "64",			OPTION_MASK_64BIT,		false, false },
24005  { "32",			OPTION_MASK_64BIT,		true,  false },
24006#endif
24007#endif
24008#ifdef OPTION_MASK_EABI
24009  { "eabi",			OPTION_MASK_EABI,		false, false },
24010#endif
24011#ifdef OPTION_MASK_LITTLE_ENDIAN
24012  { "little",			OPTION_MASK_LITTLE_ENDIAN,	false, false },
24013  { "big",			OPTION_MASK_LITTLE_ENDIAN,	true,  false },
24014#endif
24015#ifdef OPTION_MASK_RELOCATABLE
24016  { "relocatable",		OPTION_MASK_RELOCATABLE,	false, false },
24017#endif
24018#ifdef OPTION_MASK_STRICT_ALIGN
24019  { "strict-align",		OPTION_MASK_STRICT_ALIGN,	false, false },
24020#endif
24021  { "soft-float",		OPTION_MASK_SOFT_FLOAT,		false, false },
24022  { "string",			0,				false, false },
24023};
24024
24025/* Builtin mask mapping for printing the flags.  */
24026static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
24027{
24028  { "altivec",		 RS6000_BTM_ALTIVEC,	false, false },
24029  { "vsx",		 RS6000_BTM_VSX,	false, false },
24030  { "fre",		 RS6000_BTM_FRE,	false, false },
24031  { "fres",		 RS6000_BTM_FRES,	false, false },
24032  { "frsqrte",		 RS6000_BTM_FRSQRTE,	false, false },
24033  { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
24034  { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
24035  { "cell",		 RS6000_BTM_CELL,	false, false },
24036  { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
24037  { "power9-vector",	 RS6000_BTM_P9_VECTOR,	false, false },
24038  { "power9-misc",	 RS6000_BTM_P9_MISC,	false, false },
24039  { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
24040  { "htm",		 RS6000_BTM_HTM,	false, false },
24041  { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
24042  { "hard-float",	 RS6000_BTM_HARD_FLOAT,	false, false },
24043  { "long-double-128",	 RS6000_BTM_LDBL128,	false, false },
24044  { "powerpc64",	 RS6000_BTM_POWERPC64,  false, false },
24045  { "float128",		 RS6000_BTM_FLOAT128,   false, false },
24046  { "float128-hw",	 RS6000_BTM_FLOAT128_HW,false, false },
24047  { "mma",		 RS6000_BTM_MMA,	false, false },
24048  { "power10",		 RS6000_BTM_P10,	false, false },
24049};
24050
24051/* Option variables that we want to support inside attribute((target)) and
24052   #pragma GCC target operations.  */
24053
24054struct rs6000_opt_var {
24055  const char *name;		/* option name */
24056  size_t global_offset;		/* offset of the option in global_options.  */
24057  size_t target_offset;		/* offset of the option in target options.  */
24058};
24059
24060static struct rs6000_opt_var const rs6000_opt_vars[] =
24061{
24062  { "friz",
24063    offsetof (struct gcc_options, x_TARGET_FRIZ),
24064    offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24065  { "avoid-indexed-addresses",
24066    offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24067    offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24068  { "longcall",
24069    offsetof (struct gcc_options, x_rs6000_default_long_calls),
24070    offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24071  { "optimize-swaps",
24072    offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24073    offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24074  { "allow-movmisalign",
24075    offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24076    offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24077  { "sched-groups",
24078    offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24079    offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24080  { "always-hint",
24081    offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24082    offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24083  { "align-branch-targets",
24084    offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24085    offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24086  { "sched-prolog",
24087    offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24088    offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24089  { "sched-epilog",
24090    offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24091    offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24092  { "speculate-indirect-jumps",
24093    offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24094    offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24095};
24096
24097/* Inner function to handle attribute((target("..."))) and #pragma GCC target
24098   parsing.  Return true if there were no errors.  */
24099
24100static bool
24101rs6000_inner_target_options (tree args, bool attr_p)
24102{
24103  bool ret = true;
24104
24105  if (args == NULL_TREE)
24106    ;
24107
24108  else if (TREE_CODE (args) == STRING_CST)
24109    {
24110      char *p = ASTRDUP (TREE_STRING_POINTER (args));
24111      char *q;
24112
24113      while ((q = strtok (p, ",")) != NULL)
24114	{
24115	  bool error_p = false;
24116	  bool not_valid_p = false;
24117	  const char *cpu_opt = NULL;
24118
24119	  p = NULL;
24120	  if (startswith (q, "cpu="))
24121	    {
24122	      int cpu_index = rs6000_cpu_name_lookup (q+4);
24123	      if (cpu_index >= 0)
24124		rs6000_cpu_index = cpu_index;
24125	      else
24126		{
24127		  error_p = true;
24128		  cpu_opt = q+4;
24129		}
24130	    }
24131	  else if (startswith (q, "tune="))
24132	    {
24133	      int tune_index = rs6000_cpu_name_lookup (q+5);
24134	      if (tune_index >= 0)
24135		rs6000_tune_index = tune_index;
24136	      else
24137		{
24138		  error_p = true;
24139		  cpu_opt = q+5;
24140		}
24141	    }
24142	  else
24143	    {
24144	      size_t i;
24145	      bool invert = false;
24146	      char *r = q;
24147
24148	      error_p = true;
24149	      if (startswith (r, "no-"))
24150		{
24151		  invert = true;
24152		  r += 3;
24153		}
24154
24155	      for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24156		if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24157		  {
24158		    HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24159
24160		    if (!rs6000_opt_masks[i].valid_target)
24161		      not_valid_p = true;
24162		    else
24163		      {
24164			error_p = false;
24165			rs6000_isa_flags_explicit |= mask;
24166
24167			/* VSX needs altivec, so -mvsx automagically sets
24168			   altivec and disables -mavoid-indexed-addresses.  */
24169			if (!invert)
24170			  {
24171			    if (mask == OPTION_MASK_VSX)
24172			      {
24173				mask |= OPTION_MASK_ALTIVEC;
24174				TARGET_AVOID_XFORM = 0;
24175			      }
24176			  }
24177
24178			if (rs6000_opt_masks[i].invert)
24179			  invert = !invert;
24180
24181			if (invert)
24182			  rs6000_isa_flags &= ~mask;
24183			else
24184			  rs6000_isa_flags |= mask;
24185		      }
24186		    break;
24187		  }
24188
24189	      if (error_p && !not_valid_p)
24190		{
24191		  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24192		    if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24193		      {
24194			size_t j = rs6000_opt_vars[i].global_offset;
24195			*((int *) ((char *)&global_options + j)) = !invert;
24196			error_p = false;
24197			not_valid_p = false;
24198			break;
24199		      }
24200		}
24201	    }
24202
24203	  if (error_p)
24204	    {
24205	      const char *eprefix, *esuffix;
24206
24207	      ret = false;
24208	      if (attr_p)
24209		{
24210		  eprefix = "__attribute__((__target__(";
24211		  esuffix = ")))";
24212		}
24213	      else
24214		{
24215		  eprefix = "#pragma GCC target ";
24216		  esuffix = "";
24217		}
24218
24219	      if (cpu_opt)
24220		error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24221		       q, esuffix);
24222	      else if (not_valid_p)
24223		error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24224	      else
24225		error ("%s%qs%s is invalid", eprefix, q, esuffix);
24226	    }
24227	}
24228    }
24229
24230  else if (TREE_CODE (args) == TREE_LIST)
24231    {
24232      do
24233	{
24234	  tree value = TREE_VALUE (args);
24235	  if (value)
24236	    {
24237	      bool ret2 = rs6000_inner_target_options (value, attr_p);
24238	      if (!ret2)
24239		ret = false;
24240	    }
24241	  args = TREE_CHAIN (args);
24242	}
24243      while (args != NULL_TREE);
24244    }
24245
24246  else
24247    {
24248      error ("attribute %<target%> argument not a string");
24249      return false;
24250    }
24251
24252  return ret;
24253}
24254
24255/* Print out the target options as a list for -mdebug=target.  */
24256
24257static void
24258rs6000_debug_target_options (tree args, const char *prefix)
24259{
24260  if (args == NULL_TREE)
24261    fprintf (stderr, "%s<NULL>", prefix);
24262
24263  else if (TREE_CODE (args) == STRING_CST)
24264    {
24265      char *p = ASTRDUP (TREE_STRING_POINTER (args));
24266      char *q;
24267
24268      while ((q = strtok (p, ",")) != NULL)
24269	{
24270	  p = NULL;
24271	  fprintf (stderr, "%s\"%s\"", prefix, q);
24272	  prefix = ", ";
24273	}
24274    }
24275
24276  else if (TREE_CODE (args) == TREE_LIST)
24277    {
24278      do
24279	{
24280	  tree value = TREE_VALUE (args);
24281	  if (value)
24282	    {
24283	      rs6000_debug_target_options (value, prefix);
24284	      prefix = ", ";
24285	    }
24286	  args = TREE_CHAIN (args);
24287	}
24288      while (args != NULL_TREE);
24289    }
24290
24291  else
24292    gcc_unreachable ();
24293
24294  return;
24295}
24296
24297
24298/* Hook to validate attribute((target("..."))).  */
24299
24300static bool
24301rs6000_valid_attribute_p (tree fndecl,
24302			  tree ARG_UNUSED (name),
24303			  tree args,
24304			  int flags)
24305{
24306  struct cl_target_option cur_target;
24307  bool ret;
24308  tree old_optimize;
24309  tree new_target, new_optimize;
24310  tree func_optimize;
24311
24312  gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24313
24314  if (TARGET_DEBUG_TARGET)
24315    {
24316      tree tname = DECL_NAME (fndecl);
24317      fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24318      if (tname)
24319	fprintf (stderr, "function: %.*s\n",
24320		 (int) IDENTIFIER_LENGTH (tname),
24321		 IDENTIFIER_POINTER (tname));
24322      else
24323	fprintf (stderr, "function: unknown\n");
24324
24325      fprintf (stderr, "args:");
24326      rs6000_debug_target_options (args, " ");
24327      fprintf (stderr, "\n");
24328
24329      if (flags)
24330	fprintf (stderr, "flags: 0x%x\n", flags);
24331
24332      fprintf (stderr, "--------------------\n");
24333    }
24334
24335  /* attribute((target("default"))) does nothing, beyond
24336     affecting multi-versioning.  */
24337  if (TREE_VALUE (args)
24338      && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24339      && TREE_CHAIN (args) == NULL_TREE
24340      && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24341    return true;
24342
24343  old_optimize = build_optimization_node (&global_options,
24344					  &global_options_set);
24345  func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24346
24347  /* If the function changed the optimization levels as well as setting target
24348     options, start with the optimizations specified.  */
24349  if (func_optimize && func_optimize != old_optimize)
24350    cl_optimization_restore (&global_options, &global_options_set,
24351			     TREE_OPTIMIZATION (func_optimize));
24352
24353  /* The target attributes may also change some optimization flags, so update
24354     the optimization options if necessary.  */
24355  cl_target_option_save (&cur_target, &global_options, &global_options_set);
24356  rs6000_cpu_index = rs6000_tune_index = -1;
24357  ret = rs6000_inner_target_options (args, true);
24358
24359  /* Set up any additional state.  */
24360  if (ret)
24361    {
24362      ret = rs6000_option_override_internal (false);
24363      new_target = build_target_option_node (&global_options,
24364					     &global_options_set);
24365    }
24366  else
24367    new_target = NULL;
24368
24369  new_optimize = build_optimization_node (&global_options,
24370					  &global_options_set);
24371
24372  if (!new_target)
24373    ret = false;
24374
24375  else if (fndecl)
24376    {
24377      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24378
24379      if (old_optimize != new_optimize)
24380	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24381    }
24382
24383  cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24384
24385  if (old_optimize != new_optimize)
24386    cl_optimization_restore (&global_options, &global_options_set,
24387			     TREE_OPTIMIZATION (old_optimize));
24388
24389  return ret;
24390}
24391
24392
24393/* Hook to validate the current #pragma GCC target and set the state, and
24394   update the macros based on what was changed.  If ARGS is NULL, then
24395   POP_TARGET is used to reset the options.  */
24396
24397bool
24398rs6000_pragma_target_parse (tree args, tree pop_target)
24399{
24400  tree prev_tree = build_target_option_node (&global_options,
24401					     &global_options_set);
24402  tree cur_tree;
24403  struct cl_target_option *prev_opt, *cur_opt;
24404  HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24405  HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24406
24407  if (TARGET_DEBUG_TARGET)
24408    {
24409      fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24410      fprintf (stderr, "args:");
24411      rs6000_debug_target_options (args, " ");
24412      fprintf (stderr, "\n");
24413
24414      if (pop_target)
24415	{
24416	  fprintf (stderr, "pop_target:\n");
24417	  debug_tree (pop_target);
24418	}
24419      else
24420	fprintf (stderr, "pop_target: <NULL>\n");
24421
24422      fprintf (stderr, "--------------------\n");
24423    }
24424
24425  if (! args)
24426    {
24427      cur_tree = ((pop_target)
24428		  ? pop_target
24429		  : target_option_default_node);
24430      cl_target_option_restore (&global_options, &global_options_set,
24431				TREE_TARGET_OPTION (cur_tree));
24432    }
24433  else
24434    {
24435      rs6000_cpu_index = rs6000_tune_index = -1;
24436      if (!rs6000_inner_target_options (args, false)
24437	  || !rs6000_option_override_internal (false)
24438	  || (cur_tree = build_target_option_node (&global_options,
24439						   &global_options_set))
24440	     == NULL_TREE)
24441	{
24442	  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24443	    fprintf (stderr, "invalid pragma\n");
24444
24445	  return false;
24446	}
24447    }
24448
24449  target_option_current_node = cur_tree;
24450  rs6000_activate_target_options (target_option_current_node);
24451
24452  /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24453     change the macros that are defined.  */
24454  if (rs6000_target_modify_macros_ptr)
24455    {
24456      prev_opt    = TREE_TARGET_OPTION (prev_tree);
24457      prev_bumask = prev_opt->x_rs6000_builtin_mask;
24458      prev_flags  = prev_opt->x_rs6000_isa_flags;
24459
24460      cur_opt     = TREE_TARGET_OPTION (cur_tree);
24461      cur_flags   = cur_opt->x_rs6000_isa_flags;
24462      cur_bumask  = cur_opt->x_rs6000_builtin_mask;
24463
24464      diff_bumask = (prev_bumask ^ cur_bumask);
24465      diff_flags  = (prev_flags ^ cur_flags);
24466
24467      if ((diff_flags != 0) || (diff_bumask != 0))
24468	{
24469	  /* Delete old macros.  */
24470	  rs6000_target_modify_macros_ptr (false,
24471					   prev_flags & diff_flags,
24472					   prev_bumask & diff_bumask);
24473
24474	  /* Define new macros.  */
24475	  rs6000_target_modify_macros_ptr (true,
24476					   cur_flags & diff_flags,
24477					   cur_bumask & diff_bumask);
24478	}
24479    }
24480
24481  return true;
24482}
24483
24484
24485/* Remember the last target of rs6000_set_current_function.  */
24486static GTY(()) tree rs6000_previous_fndecl;
24487
24488/* Restore target's globals from NEW_TREE and invalidate the
24489   rs6000_previous_fndecl cache.  */
24490
24491void
24492rs6000_activate_target_options (tree new_tree)
24493{
24494  cl_target_option_restore (&global_options, &global_options_set,
24495			    TREE_TARGET_OPTION (new_tree));
24496  if (TREE_TARGET_GLOBALS (new_tree))
24497    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24498  else if (new_tree == target_option_default_node)
24499    restore_target_globals (&default_target_globals);
24500  else
24501    TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24502  rs6000_previous_fndecl = NULL_TREE;
24503}
24504
24505/* Establish appropriate back-end context for processing the function
24506   FNDECL.  The argument might be NULL to indicate processing at top
24507   level, outside of any function scope.  */
24508static void
24509rs6000_set_current_function (tree fndecl)
24510{
24511  if (TARGET_DEBUG_TARGET)
24512    {
24513      fprintf (stderr, "\n==================== rs6000_set_current_function");
24514
24515      if (fndecl)
24516	fprintf (stderr, ", fndecl %s (%p)",
24517		 (DECL_NAME (fndecl)
24518		  ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24519		  : "<unknown>"), (void *)fndecl);
24520
24521      if (rs6000_previous_fndecl)
24522	fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24523
24524      fprintf (stderr, "\n");
24525    }
24526
24527  /* Only change the context if the function changes.  This hook is called
24528     several times in the course of compiling a function, and we don't want to
24529     slow things down too much or call target_reinit when it isn't safe.  */
24530  if (fndecl == rs6000_previous_fndecl)
24531    return;
24532
24533  tree old_tree;
24534  if (rs6000_previous_fndecl == NULL_TREE)
24535    old_tree = target_option_current_node;
24536  else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24537    old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24538  else
24539    old_tree = target_option_default_node;
24540
24541  tree new_tree;
24542  if (fndecl == NULL_TREE)
24543    {
24544      if (old_tree != target_option_current_node)
24545	new_tree = target_option_current_node;
24546      else
24547	new_tree = NULL_TREE;
24548    }
24549  else
24550    {
24551      new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24552      if (new_tree == NULL_TREE)
24553	new_tree = target_option_default_node;
24554    }
24555
24556  if (TARGET_DEBUG_TARGET)
24557    {
24558      if (new_tree)
24559	{
24560	  fprintf (stderr, "\nnew fndecl target specific options:\n");
24561	  debug_tree (new_tree);
24562	}
24563
24564      if (old_tree)
24565	{
24566	  fprintf (stderr, "\nold fndecl target specific options:\n");
24567	  debug_tree (old_tree);
24568	}
24569
24570      if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24571	fprintf (stderr, "--------------------\n");
24572    }
24573
24574  if (new_tree && old_tree != new_tree)
24575    rs6000_activate_target_options (new_tree);
24576
24577  if (fndecl)
24578    rs6000_previous_fndecl = fndecl;
24579}
24580
24581
24582/* Save the current options */
24583
24584static void
24585rs6000_function_specific_save (struct cl_target_option *ptr,
24586			       struct gcc_options *opts,
24587			       struct gcc_options */* opts_set */)
24588{
24589  ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24590  ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24591}
24592
24593/* Restore the current options */
24594
24595static void
24596rs6000_function_specific_restore (struct gcc_options *opts,
24597				  struct gcc_options */* opts_set */,
24598				  struct cl_target_option *ptr)
24599
24600{
24601  opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24602  opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24603  (void) rs6000_option_override_internal (false);
24604}
24605
24606/* Print the current options */
24607
24608static void
24609rs6000_function_specific_print (FILE *file, int indent,
24610				struct cl_target_option *ptr)
24611{
24612  rs6000_print_isa_options (file, indent, "Isa options set",
24613			    ptr->x_rs6000_isa_flags);
24614
24615  rs6000_print_isa_options (file, indent, "Isa options explicit",
24616			    ptr->x_rs6000_isa_flags_explicit);
24617}
24618
24619/* Helper function to print the current isa or misc options on a line.  */
24620
24621static void
24622rs6000_print_options_internal (FILE *file,
24623			       int indent,
24624			       const char *string,
24625			       HOST_WIDE_INT flags,
24626			       const char *prefix,
24627			       const struct rs6000_opt_mask *opts,
24628			       size_t num_elements)
24629{
24630  size_t i;
24631  size_t start_column = 0;
24632  size_t cur_column;
24633  size_t max_column = 120;
24634  size_t prefix_len = strlen (prefix);
24635  size_t comma_len = 0;
24636  const char *comma = "";
24637
24638  if (indent)
24639    start_column += fprintf (file, "%*s", indent, "");
24640
24641  if (!flags)
24642    {
24643      fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24644      return;
24645    }
24646
24647  start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24648
24649  /* Print the various mask options.  */
24650  cur_column = start_column;
24651  for (i = 0; i < num_elements; i++)
24652    {
24653      bool invert = opts[i].invert;
24654      const char *name = opts[i].name;
24655      const char *no_str = "";
24656      HOST_WIDE_INT mask = opts[i].mask;
24657      size_t len = comma_len + prefix_len + strlen (name);
24658
24659      if (!invert)
24660	{
24661	  if ((flags & mask) == 0)
24662	    {
24663	      no_str = "no-";
24664	      len += strlen ("no-");
24665	    }
24666
24667	  flags &= ~mask;
24668	}
24669
24670      else
24671	{
24672	  if ((flags & mask) != 0)
24673	    {
24674	      no_str = "no-";
24675	      len += strlen ("no-");
24676	    }
24677
24678	  flags |= mask;
24679	}
24680
24681      cur_column += len;
24682      if (cur_column > max_column)
24683	{
24684	  fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24685	  cur_column = start_column + len;
24686	  comma = "";
24687	}
24688
24689      fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24690      comma = ", ";
24691      comma_len = strlen (", ");
24692    }
24693
24694  fputs ("\n", file);
24695}
24696
24697/* Helper function to print the current isa options on a line.  */
24698
24699static void
24700rs6000_print_isa_options (FILE *file, int indent, const char *string,
24701			  HOST_WIDE_INT flags)
24702{
24703  rs6000_print_options_internal (file, indent, string, flags, "-m",
24704				 &rs6000_opt_masks[0],
24705				 ARRAY_SIZE (rs6000_opt_masks));
24706}
24707
24708static void
24709rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24710			      HOST_WIDE_INT flags)
24711{
24712  rs6000_print_options_internal (file, indent, string, flags, "",
24713				 &rs6000_builtin_mask_names[0],
24714				 ARRAY_SIZE (rs6000_builtin_mask_names));
24715}
24716
24717/* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24718   2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24719   -mupper-regs-df, etc.).
24720
24721   If the user used -mno-power8-vector, we need to turn off all of the implicit
24722   ISA 2.07 and 3.0 options that relate to the vector unit.
24723
24724   If the user used -mno-power9-vector, we need to turn off all of the implicit
24725   ISA 3.0 options that relate to the vector unit.
24726
24727   This function does not handle explicit options such as the user specifying
24728   -mdirect-move.  These are handled in rs6000_option_override_internal, and
24729   the appropriate error is given if needed.
24730
24731   We return a mask of all of the implicit options that should not be enabled
24732   by default.  */
24733
24734static HOST_WIDE_INT
24735rs6000_disable_incompatible_switches (void)
24736{
24737  HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24738  size_t i, j;
24739
24740  static const struct {
24741    const HOST_WIDE_INT no_flag;	/* flag explicitly turned off.  */
24742    const HOST_WIDE_INT dep_flags;	/* flags that depend on this option.  */
24743    const char *const name;		/* name of the switch.  */
24744  } flags[] = {
24745    { OPTION_MASK_P9_VECTOR,	OTHER_P9_VECTOR_MASKS,	"power9-vector"	},
24746    { OPTION_MASK_P8_VECTOR,	OTHER_P8_VECTOR_MASKS,	"power8-vector"	},
24747    { OPTION_MASK_VSX,		OTHER_VSX_VECTOR_MASKS,	"vsx"		},
24748    { OPTION_MASK_ALTIVEC,	OTHER_ALTIVEC_MASKS,	"altivec"	},
24749  };
24750
24751  for (i = 0; i < ARRAY_SIZE (flags); i++)
24752    {
24753      HOST_WIDE_INT no_flag = flags[i].no_flag;
24754
24755      if ((rs6000_isa_flags & no_flag) == 0
24756	  && (rs6000_isa_flags_explicit & no_flag) != 0)
24757	{
24758	  HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24759	  HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24760				     & rs6000_isa_flags
24761				     & dep_flags);
24762
24763	  if (set_flags)
24764	    {
24765	      for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24766		if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24767		  {
24768		    set_flags &= ~rs6000_opt_masks[j].mask;
24769		    error ("%<-mno-%s%> turns off %<-m%s%>",
24770			   flags[i].name,
24771			   rs6000_opt_masks[j].name);
24772		  }
24773
24774	      gcc_assert (!set_flags);
24775	    }
24776
24777	  rs6000_isa_flags &= ~dep_flags;
24778	  ignore_masks |= no_flag | dep_flags;
24779	}
24780    }
24781
24782  return ignore_masks;
24783}
24784
24785
24786/* Helper function for printing the function name when debugging.  */
24787
24788static const char *
24789get_decl_name (tree fn)
24790{
24791  tree name;
24792
24793  if (!fn)
24794    return "<null>";
24795
24796  name = DECL_NAME (fn);
24797  if (!name)
24798    return "<no-name>";
24799
24800  return IDENTIFIER_POINTER (name);
24801}
24802
24803/* Return the clone id of the target we are compiling code for in a target
24804   clone.  The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24805   the priority list for the target clones (ordered from lowest to
24806   highest).  */
24807
24808static int
24809rs6000_clone_priority (tree fndecl)
24810{
24811  tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24812  HOST_WIDE_INT isa_masks;
24813  int ret = CLONE_DEFAULT;
24814  tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24815  const char *attrs_str = NULL;
24816
24817  attrs = TREE_VALUE (TREE_VALUE (attrs));
24818  attrs_str = TREE_STRING_POINTER (attrs);
24819
24820  /* Return priority zero for default function.  Return the ISA needed for the
24821     function if it is not the default.  */
24822  if (strcmp (attrs_str, "default") != 0)
24823    {
24824      if (fn_opts == NULL_TREE)
24825	fn_opts = target_option_default_node;
24826
24827      if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24828	isa_masks = rs6000_isa_flags;
24829      else
24830	isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24831
24832      for (ret = CLONE_MAX - 1; ret != 0; ret--)
24833	if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24834	  break;
24835    }
24836
24837  if (TARGET_DEBUG_TARGET)
24838    fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24839	     get_decl_name (fndecl), ret);
24840
24841  return ret;
24842}
24843
24844/* This compares the priority of target features in function DECL1 and DECL2.
24845   It returns positive value if DECL1 is higher priority, negative value if
24846   DECL2 is higher priority and 0 if they are the same.  Note, priorities are
24847   ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0).  */
24848
24849static int
24850rs6000_compare_version_priority (tree decl1, tree decl2)
24851{
24852  int priority1 = rs6000_clone_priority (decl1);
24853  int priority2 = rs6000_clone_priority (decl2);
24854  int ret = priority1 - priority2;
24855
24856  if (TARGET_DEBUG_TARGET)
24857    fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24858	     get_decl_name (decl1), get_decl_name (decl2), ret);
24859
24860  return ret;
24861}
24862
24863/* Make a dispatcher declaration for the multi-versioned function DECL.
24864   Calls to DECL function will be replaced with calls to the dispatcher
24865   by the front-end.  Returns the decl of the dispatcher function.  */
24866
24867static tree
24868rs6000_get_function_versions_dispatcher (void *decl)
24869{
24870  tree fn = (tree) decl;
24871  struct cgraph_node *node = NULL;
24872  struct cgraph_node *default_node = NULL;
24873  struct cgraph_function_version_info *node_v = NULL;
24874  struct cgraph_function_version_info *first_v = NULL;
24875
24876  tree dispatch_decl = NULL;
24877
24878  struct cgraph_function_version_info *default_version_info = NULL;
24879  gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24880
24881  if (TARGET_DEBUG_TARGET)
24882    fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24883	     get_decl_name (fn));
24884
24885  node = cgraph_node::get (fn);
24886  gcc_assert (node != NULL);
24887
24888  node_v = node->function_version ();
24889  gcc_assert (node_v != NULL);
24890
24891  if (node_v->dispatcher_resolver != NULL)
24892    return node_v->dispatcher_resolver;
24893
24894  /* Find the default version and make it the first node.  */
24895  first_v = node_v;
24896  /* Go to the beginning of the chain.  */
24897  while (first_v->prev != NULL)
24898    first_v = first_v->prev;
24899
24900  default_version_info = first_v;
24901  while (default_version_info != NULL)
24902    {
24903      const tree decl2 = default_version_info->this_node->decl;
24904      if (is_function_default_version (decl2))
24905        break;
24906      default_version_info = default_version_info->next;
24907    }
24908
24909  /* If there is no default node, just return NULL.  */
24910  if (default_version_info == NULL)
24911    return NULL;
24912
24913  /* Make default info the first node.  */
24914  if (first_v != default_version_info)
24915    {
24916      default_version_info->prev->next = default_version_info->next;
24917      if (default_version_info->next)
24918        default_version_info->next->prev = default_version_info->prev;
24919      first_v->prev = default_version_info;
24920      default_version_info->next = first_v;
24921      default_version_info->prev = NULL;
24922    }
24923
24924  default_node = default_version_info->this_node;
24925
24926#ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24927  error_at (DECL_SOURCE_LOCATION (default_node->decl),
24928	    "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24929	    "exports hardware capability bits");
24930#else
24931
24932  if (targetm.has_ifunc_p ())
24933    {
24934      struct cgraph_function_version_info *it_v = NULL;
24935      struct cgraph_node *dispatcher_node = NULL;
24936      struct cgraph_function_version_info *dispatcher_version_info = NULL;
24937
24938      /* Right now, the dispatching is done via ifunc.  */
24939      dispatch_decl = make_dispatcher_decl (default_node->decl);
24940
24941      dispatcher_node = cgraph_node::get_create (dispatch_decl);
24942      gcc_assert (dispatcher_node != NULL);
24943      dispatcher_node->dispatcher_function = 1;
24944      dispatcher_version_info
24945	= dispatcher_node->insert_new_function_version ();
24946      dispatcher_version_info->next = default_version_info;
24947      dispatcher_node->definition = 1;
24948
24949      /* Set the dispatcher for all the versions.  */
24950      it_v = default_version_info;
24951      while (it_v != NULL)
24952	{
24953	  it_v->dispatcher_resolver = dispatch_decl;
24954	  it_v = it_v->next;
24955	}
24956    }
24957  else
24958    {
24959      error_at (DECL_SOURCE_LOCATION (default_node->decl),
24960		"multiversioning needs %<ifunc%> which is not supported "
24961		"on this target");
24962    }
24963#endif
24964
24965  return dispatch_decl;
24966}
24967
24968/* Make the resolver function decl to dispatch the versions of a multi-
24969   versioned function, DEFAULT_DECL.  Create an empty basic block in the
24970   resolver and store the pointer in EMPTY_BB.  Return the decl of the resolver
24971   function.  */
24972
24973static tree
24974make_resolver_func (const tree default_decl,
24975		    const tree dispatch_decl,
24976		    basic_block *empty_bb)
24977{
24978  /* Make the resolver function static.  The resolver function returns
24979     void *.  */
24980  tree decl_name = clone_function_name (default_decl, "resolver");
24981  const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24982  tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24983  tree decl = build_fn_decl (resolver_name, type);
24984  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24985
24986  DECL_NAME (decl) = decl_name;
24987  TREE_USED (decl) = 1;
24988  DECL_ARTIFICIAL (decl) = 1;
24989  DECL_IGNORED_P (decl) = 0;
24990  TREE_PUBLIC (decl) = 0;
24991  DECL_UNINLINABLE (decl) = 1;
24992
24993  /* Resolver is not external, body is generated.  */
24994  DECL_EXTERNAL (decl) = 0;
24995  DECL_EXTERNAL (dispatch_decl) = 0;
24996
24997  DECL_CONTEXT (decl) = NULL_TREE;
24998  DECL_INITIAL (decl) = make_node (BLOCK);
24999  DECL_STATIC_CONSTRUCTOR (decl) = 0;
25000
25001  if (DECL_COMDAT_GROUP (default_decl)
25002      || TREE_PUBLIC (default_decl))
25003    {
25004      /* In this case, each translation unit with a call to this
25005	 versioned function will put out a resolver.  Ensure it
25006	 is comdat to keep just one copy.  */
25007      DECL_COMDAT (decl) = 1;
25008      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25009    }
25010  else
25011    TREE_PUBLIC (dispatch_decl) = 0;
25012
25013  /* Build result decl and add to function_decl.  */
25014  tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25015  DECL_CONTEXT (t) = decl;
25016  DECL_ARTIFICIAL (t) = 1;
25017  DECL_IGNORED_P (t) = 1;
25018  DECL_RESULT (decl) = t;
25019
25020  gimplify_function_tree (decl);
25021  push_cfun (DECL_STRUCT_FUNCTION (decl));
25022  *empty_bb = init_lowered_empty_function (decl, false,
25023					   profile_count::uninitialized ());
25024
25025  cgraph_node::add_new_function (decl, true);
25026  symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25027
25028  pop_cfun ();
25029
25030  /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
25031  DECL_ATTRIBUTES (dispatch_decl)
25032    = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25033
25034  cgraph_node::create_same_body_alias (dispatch_decl, decl);
25035
25036  return decl;
25037}
25038
25039/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25040   return a pointer to VERSION_DECL if we are running on a machine that
25041   supports the index CLONE_ISA hardware architecture bits.  This function will
25042   be called during version dispatch to decide which function version to
25043   execute.  It returns the basic block at the end, to which more conditions
25044   can be added.  */
25045
25046static basic_block
25047add_condition_to_bb (tree function_decl, tree version_decl,
25048		     int clone_isa, basic_block new_bb)
25049{
25050  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25051
25052  gcc_assert (new_bb != NULL);
25053  gimple_seq gseq = bb_seq (new_bb);
25054
25055
25056  tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25057			      build_fold_addr_expr (version_decl));
25058  tree result_var = create_tmp_var (ptr_type_node);
25059  gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25060  gimple *return_stmt = gimple_build_return (result_var);
25061
25062  if (clone_isa == CLONE_DEFAULT)
25063    {
25064      gimple_seq_add_stmt (&gseq, convert_stmt);
25065      gimple_seq_add_stmt (&gseq, return_stmt);
25066      set_bb_seq (new_bb, gseq);
25067      gimple_set_bb (convert_stmt, new_bb);
25068      gimple_set_bb (return_stmt, new_bb);
25069      pop_cfun ();
25070      return new_bb;
25071    }
25072
25073  tree bool_zero = build_int_cst (bool_int_type_node, 0);
25074  tree cond_var = create_tmp_var (bool_int_type_node);
25075  tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25076  const char *arg_str = rs6000_clone_map[clone_isa].name;
25077  tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25078  gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25079  gimple_call_set_lhs (call_cond_stmt, cond_var);
25080
25081  gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25082  gimple_set_bb (call_cond_stmt, new_bb);
25083  gimple_seq_add_stmt (&gseq, call_cond_stmt);
25084
25085  gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25086					    NULL_TREE, NULL_TREE);
25087  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25088  gimple_set_bb (if_else_stmt, new_bb);
25089  gimple_seq_add_stmt (&gseq, if_else_stmt);
25090
25091  gimple_seq_add_stmt (&gseq, convert_stmt);
25092  gimple_seq_add_stmt (&gseq, return_stmt);
25093  set_bb_seq (new_bb, gseq);
25094
25095  basic_block bb1 = new_bb;
25096  edge e12 = split_block (bb1, if_else_stmt);
25097  basic_block bb2 = e12->dest;
25098  e12->flags &= ~EDGE_FALLTHRU;
25099  e12->flags |= EDGE_TRUE_VALUE;
25100
25101  edge e23 = split_block (bb2, return_stmt);
25102  gimple_set_bb (convert_stmt, bb2);
25103  gimple_set_bb (return_stmt, bb2);
25104
25105  basic_block bb3 = e23->dest;
25106  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25107
25108  remove_edge (e23);
25109  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25110
25111  pop_cfun ();
25112  return bb3;
25113}
25114
25115/* This function generates the dispatch function for multi-versioned functions.
25116   DISPATCH_DECL is the function which will contain the dispatch logic.
25117   FNDECLS are the function choices for dispatch, and is a tree chain.
25118   EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25119   code is generated.  */
25120
25121static int
25122dispatch_function_versions (tree dispatch_decl,
25123			    void *fndecls_p,
25124			    basic_block *empty_bb)
25125{
25126  int ix;
25127  tree ele;
25128  vec<tree> *fndecls;
25129  tree clones[CLONE_MAX];
25130
25131  if (TARGET_DEBUG_TARGET)
25132    fputs ("dispatch_function_versions, top\n", stderr);
25133
25134  gcc_assert (dispatch_decl != NULL
25135	      && fndecls_p != NULL
25136	      && empty_bb != NULL);
25137
25138  /* fndecls_p is actually a vector.  */
25139  fndecls = static_cast<vec<tree> *> (fndecls_p);
25140
25141  /* At least one more version other than the default.  */
25142  gcc_assert (fndecls->length () >= 2);
25143
25144  /* The first version in the vector is the default decl.  */
25145  memset ((void *) clones, '\0', sizeof (clones));
25146  clones[CLONE_DEFAULT] = (*fndecls)[0];
25147
25148  /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25149     on the PowerPC (on the x86_64, it is not a NOP).  The builtin function
25150     __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25151     recent glibc.  If we ever need to call __builtin_cpu_init, we would need
25152     to insert the code here to do the call.  */
25153
25154  for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25155    {
25156      int priority = rs6000_clone_priority (ele);
25157      if (!clones[priority])
25158	clones[priority] = ele;
25159    }
25160
25161  for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25162    if (clones[ix])
25163      {
25164	if (TARGET_DEBUG_TARGET)
25165	  fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25166		   ix, get_decl_name (clones[ix]));
25167
25168	*empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25169					 *empty_bb);
25170      }
25171
25172  return 0;
25173}
25174
25175/* Generate the dispatching code body to dispatch multi-versioned function
25176   DECL.  The target hook is called to process the "target" attributes and
25177   provide the code to dispatch the right function at run-time.  NODE points
25178   to the dispatcher decl whose body will be created.  */
25179
25180static tree
25181rs6000_generate_version_dispatcher_body (void *node_p)
25182{
25183  tree resolver;
25184  basic_block empty_bb;
25185  struct cgraph_node *node = (cgraph_node *) node_p;
25186  struct cgraph_function_version_info *ninfo = node->function_version ();
25187
25188  if (ninfo->dispatcher_resolver)
25189    return ninfo->dispatcher_resolver;
25190
25191  /* node is going to be an alias, so remove the finalized bit.  */
25192  node->definition = false;
25193
25194  /* The first version in the chain corresponds to the default version.  */
25195  ninfo->dispatcher_resolver = resolver
25196    = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25197
25198  if (TARGET_DEBUG_TARGET)
25199    fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25200	     get_decl_name (resolver));
25201
25202  push_cfun (DECL_STRUCT_FUNCTION (resolver));
25203  auto_vec<tree, 2> fn_ver_vec;
25204
25205  for (struct cgraph_function_version_info *vinfo = ninfo->next;
25206       vinfo;
25207       vinfo = vinfo->next)
25208    {
25209      struct cgraph_node *version = vinfo->this_node;
25210      /* Check for virtual functions here again, as by this time it should
25211	 have been determined if this function needs a vtable index or
25212	 not.  This happens for methods in derived classes that override
25213	 virtual methods in base classes but are not explicitly marked as
25214	 virtual.  */
25215      if (DECL_VINDEX (version->decl))
25216	sorry ("Virtual function multiversioning not supported");
25217
25218      fn_ver_vec.safe_push (version->decl);
25219    }
25220
25221  dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25222  cgraph_edge::rebuild_edges ();
25223  pop_cfun ();
25224  return resolver;
25225}
25226
25227/* Hook to decide if we need to scan function gimple statements to
25228   collect target specific information for inlining, and update the
25229   corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25230   to predict which ISA feature is used at this time.  Return true
25231   if we need to scan, otherwise return false.  */
25232
25233static bool
25234rs6000_need_ipa_fn_target_info (const_tree decl,
25235				unsigned int &info ATTRIBUTE_UNUSED)
25236{
25237  tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25238  if (!target)
25239    target = target_option_default_node;
25240  struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25241
25242  /* See PR102059, we only handle HTM for now, so will only do
25243     the consequent scannings when HTM feature enabled.  */
25244  if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25245      return true;
25246
25247  return false;
25248}
25249
25250/* Hook to update target specific information INFO for inlining by
25251   checking the given STMT.  Return false if we don't need to scan
25252   any more, otherwise return true.  */
25253
25254static bool
25255rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25256{
25257  /* Assume inline asm can use any instruction features.  */
25258  if (gimple_code (stmt) == GIMPLE_ASM)
25259    {
25260      /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25261	 the only bit we care about.  */
25262      info |= RS6000_FN_TARGET_INFO_HTM;
25263      return false;
25264    }
25265  else if (gimple_code (stmt) == GIMPLE_CALL)
25266    {
25267      tree fndecl = gimple_call_fndecl (stmt);
25268      if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25269	{
25270	  enum rs6000_gen_builtins fcode
25271	    = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25272	  /* HTM bifs definitely exploit HTM insns.  */
25273	  if (bif_is_htm (rs6000_builtin_info[fcode]))
25274	    {
25275	      info |= RS6000_FN_TARGET_INFO_HTM;
25276	      return false;
25277	    }
25278	}
25279    }
25280
25281  return true;
25282}
25283
25284/* Hook to determine if one function can safely inline another.  */
25285
25286static bool
25287rs6000_can_inline_p (tree caller, tree callee)
25288{
25289  bool ret = false;
25290  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25291  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25292
25293  /* If the callee has no option attributes, then it is ok to inline.  */
25294  if (!callee_tree)
25295    ret = true;
25296
25297  else
25298    {
25299      HOST_WIDE_INT caller_isa;
25300      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25301      HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25302      HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25303
25304      /* If the caller has option attributes, then use them.
25305	 Otherwise, use the command line options.  */
25306      if (caller_tree)
25307	caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25308      else
25309	caller_isa = rs6000_isa_flags;
25310
25311      cgraph_node *callee_node = cgraph_node::get (callee);
25312      if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25313	{
25314	  unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25315	  if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25316	    {
25317	      callee_isa &= ~OPTION_MASK_HTM;
25318	      explicit_isa &= ~OPTION_MASK_HTM;
25319	    }
25320	}
25321
25322      /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25323	 purposes.  */
25324      callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25325      explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25326
25327      /* The callee's options must be a subset of the caller's options, i.e.
25328	 a vsx function may inline an altivec function, but a no-vsx function
25329	 must not inline a vsx function.  However, for those options that the
25330	 callee has explicitly enabled or disabled, then we must enforce that
25331	 the callee's and caller's options match exactly; see PR70010.  */
25332      if (((caller_isa & callee_isa) == callee_isa)
25333	  && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25334	ret = true;
25335    }
25336
25337  if (TARGET_DEBUG_TARGET)
25338    fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25339	     get_decl_name (caller), get_decl_name (callee),
25340	     (ret ? "can" : "cannot"));
25341
25342  return ret;
25343}
25344
25345/* Allocate a stack temp and fixup the address so it meets the particular
25346   memory requirements (either offetable or REG+REG addressing).  */
25347
25348rtx
25349rs6000_allocate_stack_temp (machine_mode mode,
25350			    bool offsettable_p,
25351			    bool reg_reg_p)
25352{
25353  rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25354  rtx addr = XEXP (stack, 0);
25355  int strict_p = reload_completed;
25356
25357  if (!legitimate_indirect_address_p (addr, strict_p))
25358    {
25359      if (offsettable_p
25360	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25361	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25362
25363      else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25364	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25365    }
25366
25367  return stack;
25368}
25369
25370/* Given a memory reference, if it is not a reg or reg+reg addressing,
25371   convert to such a form to deal with memory reference instructions
25372   like STFIWX and LDBRX that only take reg+reg addressing.  */
25373
25374rtx
25375rs6000_force_indexed_or_indirect_mem (rtx x)
25376{
25377  machine_mode mode = GET_MODE (x);
25378
25379  gcc_assert (MEM_P (x));
25380  if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25381    {
25382      rtx addr = XEXP (x, 0);
25383      if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25384	{
25385	  rtx reg = XEXP (addr, 0);
25386	  HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25387	  rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25388	  gcc_assert (REG_P (reg));
25389	  emit_insn (gen_add3_insn (reg, reg, size_rtx));
25390	  addr = reg;
25391	}
25392      else if (GET_CODE (addr) == PRE_MODIFY)
25393	{
25394	  rtx reg = XEXP (addr, 0);
25395	  rtx expr = XEXP (addr, 1);
25396	  gcc_assert (REG_P (reg));
25397	  gcc_assert (GET_CODE (expr) == PLUS);
25398	  emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25399	  addr = reg;
25400	}
25401
25402      if (GET_CODE (addr) == PLUS)
25403	{
25404	  rtx op0 = XEXP (addr, 0);
25405	  rtx op1 = XEXP (addr, 1);
25406	  op0 = force_reg (Pmode, op0);
25407	  op1 = force_reg (Pmode, op1);
25408	  x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25409	}
25410      else
25411	x = replace_equiv_address (x, force_reg (Pmode, addr));
25412    }
25413
25414  return x;
25415}
25416
25417/* Implement TARGET_LEGITIMATE_CONSTANT_P.
25418
25419   On the RS/6000, all integer constants are acceptable, most won't be valid
25420   for particular insns, though.  Only easy FP constants are acceptable.  */
25421
25422static bool
25423rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25424{
25425  if (TARGET_ELF && tls_referenced_p (x))
25426    return false;
25427
25428  if (CONST_DOUBLE_P (x))
25429    return easy_fp_constant (x, mode);
25430
25431  if (GET_CODE (x) == CONST_VECTOR)
25432    return easy_vector_constant (x, mode);
25433
25434  return true;
25435}
25436
25437#if TARGET_AIX_OS
25438/* Implement TARGET_PRECOMPUTE_TLS_P.
25439
25440   On the AIX, TLS symbols are in the TOC, which is maintained in the
25441   constant pool.  AIX TOC TLS symbols need to be pre-computed, but
25442   must be considered legitimate constants.  */
25443
25444static bool
25445rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25446{
25447  return tls_referenced_p (x);
25448}
25449#endif
25450
25451
25452/* Return TRUE iff the sequence ending in LAST sets the static chain.  */
25453
25454static bool
25455chain_already_loaded (rtx_insn *last)
25456{
25457  for (; last != NULL; last = PREV_INSN (last))
25458    {
25459      if (NONJUMP_INSN_P (last))
25460	{
25461	  rtx patt = PATTERN (last);
25462
25463	  if (GET_CODE (patt) == SET)
25464	    {
25465	      rtx lhs = XEXP (patt, 0);
25466
25467	      if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25468		return true;
25469	    }
25470	}
25471    }
25472  return false;
25473}
25474
25475/* Expand code to perform a call under the AIX or ELFv2 ABI.  */
25476
25477void
25478rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25479{
25480  rtx func = func_desc;
25481  rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25482  rtx toc_load = NULL_RTX;
25483  rtx toc_restore = NULL_RTX;
25484  rtx func_addr;
25485  rtx abi_reg = NULL_RTX;
25486  rtx call[5];
25487  int n_call;
25488  rtx insn;
25489  bool is_pltseq_longcall;
25490
25491  if (global_tlsarg)
25492    tlsarg = global_tlsarg;
25493
25494  /* Handle longcall attributes.  */
25495  is_pltseq_longcall = false;
25496  if ((INTVAL (cookie) & CALL_LONG) != 0
25497      && GET_CODE (func_desc) == SYMBOL_REF)
25498    {
25499      func = rs6000_longcall_ref (func_desc, tlsarg);
25500      if (TARGET_PLTSEQ)
25501	is_pltseq_longcall = true;
25502    }
25503
25504  /* Handle indirect calls.  */
25505  if (!SYMBOL_REF_P (func)
25506      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25507    {
25508      if (!rs6000_pcrel_p ())
25509	{
25510	  /* Save the TOC into its reserved slot before the call,
25511	     and prepare to restore it after the call.  */
25512	  rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25513	  rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25514						 gen_rtvec (1, stack_toc_offset),
25515						 UNSPEC_TOCSLOT);
25516	  toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25517
25518	  /* Can we optimize saving the TOC in the prologue or
25519	     do we need to do it at every call?  */
25520	  if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25521	    cfun->machine->save_toc_in_prologue = true;
25522	  else
25523	    {
25524	      rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25525	      rtx stack_toc_mem = gen_frame_mem (Pmode,
25526						 gen_rtx_PLUS (Pmode, stack_ptr,
25527							       stack_toc_offset));
25528	      MEM_VOLATILE_P (stack_toc_mem) = 1;
25529	      if (is_pltseq_longcall)
25530		{
25531		  rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25532		  rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25533		  emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25534		}
25535	      else
25536		emit_move_insn (stack_toc_mem, toc_reg);
25537	    }
25538	}
25539
25540      if (DEFAULT_ABI == ABI_ELFv2)
25541	{
25542	  /* A function pointer in the ELFv2 ABI is just a plain address, but
25543	     the ABI requires it to be loaded into r12 before the call.  */
25544	  func_addr = gen_rtx_REG (Pmode, 12);
25545	  emit_move_insn (func_addr, func);
25546	  abi_reg = func_addr;
25547	  /* Indirect calls via CTR are strongly preferred over indirect
25548	     calls via LR, so move the address there.  Needed to mark
25549	     this insn for linker plt sequence editing too.  */
25550	  func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25551	  if (is_pltseq_longcall)
25552	    {
25553	      rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25554	      rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25555	      emit_insn (gen_rtx_SET (func_addr, mark_func));
25556	      v = gen_rtvec (2, func_addr, func_desc);
25557	      func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25558	    }
25559	  else
25560	    emit_move_insn (func_addr, abi_reg);
25561	}
25562      else
25563	{
25564	  /* A function pointer under AIX is a pointer to a data area whose
25565	     first word contains the actual address of the function, whose
25566	     second word contains a pointer to its TOC, and whose third word
25567	     contains a value to place in the static chain register (r11).
25568	     Note that if we load the static chain, our "trampoline" need
25569	     not have any executable code.  */
25570
25571	  /* Load up address of the actual function.  */
25572	  func = force_reg (Pmode, func);
25573	  func_addr = gen_reg_rtx (Pmode);
25574	  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25575
25576	  /* Indirect calls via CTR are strongly preferred over indirect
25577	     calls via LR, so move the address there.  */
25578	  rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25579	  emit_move_insn (ctr_reg, func_addr);
25580	  func_addr = ctr_reg;
25581
25582	  /* Prepare to load the TOC of the called function.  Note that the
25583	     TOC load must happen immediately before the actual call so
25584	     that unwinding the TOC registers works correctly.  See the
25585	     comment in frob_update_context.  */
25586	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25587	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
25588					  gen_rtx_PLUS (Pmode, func,
25589							func_toc_offset));
25590	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25591
25592	  /* If we have a static chain, load it up.  But, if the call was
25593	     originally direct, the 3rd word has not been written since no
25594	     trampoline has been built, so we ought not to load it, lest we
25595	     override a static chain value.  */
25596	  if (!(GET_CODE (func_desc) == SYMBOL_REF
25597		&& SYMBOL_REF_FUNCTION_P (func_desc))
25598	      && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25599	      && !chain_already_loaded (get_current_sequence ()->next->last))
25600	    {
25601	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25602	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25603	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
25604					     gen_rtx_PLUS (Pmode, func,
25605							   func_sc_offset));
25606	      emit_move_insn (sc_reg, func_sc_mem);
25607	      abi_reg = sc_reg;
25608	    }
25609	}
25610    }
25611  else
25612    {
25613      /* No TOC register needed for calls from PC-relative callers.  */
25614      if (!rs6000_pcrel_p ())
25615	/* Direct calls use the TOC: for local calls, the callee will
25616	   assume the TOC register is set; for non-local calls, the
25617	   PLT stub needs the TOC register.  */
25618	abi_reg = toc_reg;
25619      func_addr = func;
25620    }
25621
25622  /* Create the call.  */
25623  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25624  if (value != NULL_RTX)
25625    call[0] = gen_rtx_SET (value, call[0]);
25626  call[1] = gen_rtx_USE (VOIDmode, cookie);
25627  n_call = 2;
25628
25629  if (toc_load)
25630    call[n_call++] = toc_load;
25631  if (toc_restore)
25632    call[n_call++] = toc_restore;
25633
25634  call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25635
25636  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25637  insn = emit_call_insn (insn);
25638
25639  /* Mention all registers defined by the ABI to hold information
25640     as uses in CALL_INSN_FUNCTION_USAGE.  */
25641  if (abi_reg)
25642    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25643}
25644
25645/* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
25646
25647void
25648rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25649{
25650  rtx call[2];
25651  rtx insn;
25652  rtx r12 = NULL_RTX;
25653  rtx func_addr = func_desc;
25654
25655  if (global_tlsarg)
25656    tlsarg = global_tlsarg;
25657
25658  /* Handle longcall attributes.  */
25659  if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25660    {
25661      /* PCREL can do a sibling call to a longcall function
25662	 because we don't need to restore the TOC register.  */
25663      gcc_assert (rs6000_pcrel_p ());
25664      func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25665    }
25666  else
25667    gcc_assert (INTVAL (cookie) == 0);
25668
25669  /* For ELFv2, r12 and CTR need to hold the function address
25670     for an indirect call.  */
25671  if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25672    {
25673      r12 = gen_rtx_REG (Pmode, 12);
25674      emit_move_insn (r12, func_desc);
25675      func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25676      emit_move_insn (func_addr, r12);
25677    }
25678
25679  /* Create the call.  */
25680  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25681  if (value != NULL_RTX)
25682    call[0] = gen_rtx_SET (value, call[0]);
25683
25684  call[1] = simple_return_rtx;
25685
25686  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25687  insn = emit_call_insn (insn);
25688
25689  /* Note use of the TOC register.  */
25690  if (!rs6000_pcrel_p ())
25691    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25692	     gen_rtx_REG (Pmode, TOC_REGNUM));
25693
25694  /* Note use of r12.  */
25695  if (r12)
25696    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25697}
25698
25699/* Expand code to perform a call under the SYSV4 ABI.  */
25700
25701void
25702rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25703{
25704  rtx func = func_desc;
25705  rtx func_addr;
25706  rtx call[4];
25707  rtx insn;
25708  rtx abi_reg = NULL_RTX;
25709  int n;
25710
25711  if (global_tlsarg)
25712    tlsarg = global_tlsarg;
25713
25714  /* Handle longcall attributes.  */
25715  if ((INTVAL (cookie) & CALL_LONG) != 0
25716      && GET_CODE (func_desc) == SYMBOL_REF)
25717    {
25718      func = rs6000_longcall_ref (func_desc, tlsarg);
25719      /* If the longcall was implemented as an inline PLT call using
25720	 PLT unspecs then func will be REG:r11.  If not, func will be
25721	 a pseudo reg.  The inline PLT call sequence supports lazy
25722	 linking (and longcalls to functions in dlopen'd libraries).
25723	 The other style of longcalls don't.  The lazy linking entry
25724	 to the dynamic symbol resolver requires r11 be the function
25725	 address (as it is for linker generated PLT stubs).  Ensure
25726	 r11 stays valid to the bctrl by marking r11 used by the call.  */
25727      if (TARGET_PLTSEQ)
25728	abi_reg = func;
25729    }
25730
25731  /* Handle indirect calls.  */
25732  if (GET_CODE (func) != SYMBOL_REF)
25733    {
25734      func = force_reg (Pmode, func);
25735
25736      /* Indirect calls via CTR are strongly preferred over indirect
25737	 calls via LR, so move the address there.  That can't be left
25738	 to reload because we want to mark every instruction in an
25739	 inline PLT call sequence with a reloc, enabling the linker to
25740	 edit the sequence back to a direct call when that makes sense.  */
25741      func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25742      if (abi_reg)
25743	{
25744	  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25745	  rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25746	  emit_insn (gen_rtx_SET (func_addr, mark_func));
25747	  v = gen_rtvec (2, func_addr, func_desc);
25748	  func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25749	}
25750      else
25751	emit_move_insn (func_addr, func);
25752    }
25753  else
25754    func_addr = func;
25755
25756  /* Create the call.  */
25757  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25758  if (value != NULL_RTX)
25759    call[0] = gen_rtx_SET (value, call[0]);
25760
25761  call[1] = gen_rtx_USE (VOIDmode, cookie);
25762  n = 2;
25763  if (TARGET_SECURE_PLT
25764      && flag_pic
25765      && GET_CODE (func_addr) == SYMBOL_REF
25766      && !SYMBOL_REF_LOCAL_P (func_addr))
25767    call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25768
25769  call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25770
25771  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25772  insn = emit_call_insn (insn);
25773  if (abi_reg)
25774    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25775}
25776
25777/* Expand code to perform a sibling call under the SysV4 ABI.  */
25778
25779void
25780rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25781{
25782  rtx func = func_desc;
25783  rtx func_addr;
25784  rtx call[3];
25785  rtx insn;
25786  rtx abi_reg = NULL_RTX;
25787
25788  if (global_tlsarg)
25789    tlsarg = global_tlsarg;
25790
25791  /* Handle longcall attributes.  */
25792  if ((INTVAL (cookie) & CALL_LONG) != 0
25793      && GET_CODE (func_desc) == SYMBOL_REF)
25794    {
25795      func = rs6000_longcall_ref (func_desc, tlsarg);
25796      /* If the longcall was implemented as an inline PLT call using
25797	 PLT unspecs then func will be REG:r11.  If not, func will be
25798	 a pseudo reg.  The inline PLT call sequence supports lazy
25799	 linking (and longcalls to functions in dlopen'd libraries).
25800	 The other style of longcalls don't.  The lazy linking entry
25801	 to the dynamic symbol resolver requires r11 be the function
25802	 address (as it is for linker generated PLT stubs).  Ensure
25803	 r11 stays valid to the bctr by marking r11 used by the call.  */
25804      if (TARGET_PLTSEQ)
25805	abi_reg = func;
25806    }
25807
25808  /* Handle indirect calls.  */
25809  if (GET_CODE (func) != SYMBOL_REF)
25810    {
25811      func = force_reg (Pmode, func);
25812
25813      /* Indirect sibcalls must go via CTR.  That can't be left to
25814	 reload because we want to mark every instruction in an inline
25815	 PLT call sequence with a reloc, enabling the linker to edit
25816	 the sequence back to a direct call when that makes sense.  */
25817      func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25818      if (abi_reg)
25819	{
25820	  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25821	  rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25822	  emit_insn (gen_rtx_SET (func_addr, mark_func));
25823	  v = gen_rtvec (2, func_addr, func_desc);
25824	  func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25825	}
25826      else
25827	emit_move_insn (func_addr, func);
25828    }
25829  else
25830    func_addr = func;
25831
25832  /* Create the call.  */
25833  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25834  if (value != NULL_RTX)
25835    call[0] = gen_rtx_SET (value, call[0]);
25836
25837  call[1] = gen_rtx_USE (VOIDmode, cookie);
25838  call[2] = simple_return_rtx;
25839
25840  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25841  insn = emit_call_insn (insn);
25842  if (abi_reg)
25843    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25844}
25845
25846#if TARGET_MACHO
25847
25848/* Expand code to perform a call under the Darwin ABI.
25849   Modulo handling of mlongcall, this is much the same as sysv.
25850   if/when the longcall optimisation is removed, we could drop this
25851   code and use the sysv case (taking care to avoid the tls stuff).
25852
25853   We can use this for sibcalls too, if needed.  */
25854
25855void
25856rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25857		      rtx cookie, bool sibcall)
25858{
25859  rtx func = func_desc;
25860  rtx func_addr;
25861  rtx call[3];
25862  rtx insn;
25863  int cookie_val = INTVAL (cookie);
25864  bool make_island = false;
25865
25866  /* Handle longcall attributes, there are two cases for Darwin:
25867     1) Newer linkers are capable of synthesising any branch islands needed.
25868     2) We need a helper branch island synthesised by the compiler.
25869     The second case has mostly been retired and we don't use it for m64.
25870     In fact, it's is an optimisation, we could just indirect as sysv does..
25871     ... however, backwards compatibility for now.
25872     If we're going to use this, then we need to keep the CALL_LONG bit set,
25873     so that we can pick up the special insn form later.  */
25874  if ((cookie_val & CALL_LONG) != 0
25875      && GET_CODE (func_desc) == SYMBOL_REF)
25876    {
25877      /* FIXME: the longcall opt should not hang off this flag, it is most
25878	 likely incorrect for kernel-mode code-generation.  */
25879      if (darwin_symbol_stubs && TARGET_32BIT)
25880	make_island = true; /* Do nothing yet, retain the CALL_LONG flag.  */
25881      else
25882	{
25883	  /* The linker is capable of doing this, but the user explicitly
25884	     asked for -mlongcall, so we'll do the 'normal' version.  */
25885	  func = rs6000_longcall_ref (func_desc, NULL_RTX);
25886	  cookie_val &= ~CALL_LONG; /* Handled, zap it.  */
25887	}
25888    }
25889
25890  /* Handle indirect calls.  */
25891  if (GET_CODE (func) != SYMBOL_REF)
25892    {
25893      func = force_reg (Pmode, func);
25894
25895      /* Indirect calls via CTR are strongly preferred over indirect
25896	 calls via LR, and are required for indirect sibcalls, so move
25897	 the address there.   */
25898      func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25899      emit_move_insn (func_addr, func);
25900    }
25901  else
25902    func_addr = func;
25903
25904  /* Create the call.  */
25905  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25906  if (value != NULL_RTX)
25907    call[0] = gen_rtx_SET (value, call[0]);
25908
25909  call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25910
25911  if (sibcall)
25912    call[2] = simple_return_rtx;
25913  else
25914    call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25915
25916  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25917  insn = emit_call_insn (insn);
25918  /* Now we have the debug info in the insn, we can set up the branch island
25919     if we're using one.  */
25920  if (make_island)
25921    {
25922      tree funname = get_identifier (XSTR (func_desc, 0));
25923
25924      if (no_previous_def (funname))
25925	{
25926	  rtx label_rtx = gen_label_rtx ();
25927	  char *label_buf, temp_buf[256];
25928	  ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25929				       CODE_LABEL_NUMBER (label_rtx));
25930	  label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25931	  tree labelname = get_identifier (label_buf);
25932	  add_compiler_branch_island (labelname, funname,
25933				     insn_line ((const rtx_insn*)insn));
25934	}
25935     }
25936}
25937#endif
25938
25939void
25940rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25941		    rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25942{
25943#if TARGET_MACHO
25944  rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25945#else
25946  gcc_unreachable();
25947#endif
25948}
25949
25950
25951void
25952rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25953		       rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25954{
25955#if TARGET_MACHO
25956  rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25957#else
25958  gcc_unreachable();
25959#endif
25960}
25961
25962/* Return whether we should generate PC-relative code for FNDECL.  */
25963bool
25964rs6000_fndecl_pcrel_p (const_tree fndecl)
25965{
25966  if (DEFAULT_ABI != ABI_ELFv2)
25967    return false;
25968
25969  struct cl_target_option *opts = target_opts_for_fn (fndecl);
25970
25971  return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25972	  && TARGET_CMODEL == CMODEL_MEDIUM);
25973}
25974
25975/* Return whether we should generate PC-relative code for *FN.  */
25976bool
25977rs6000_function_pcrel_p (struct function *fn)
25978{
25979  if (DEFAULT_ABI != ABI_ELFv2)
25980    return false;
25981
25982  /* Optimize usual case.  */
25983  if (fn == cfun)
25984    return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25985	    && TARGET_CMODEL == CMODEL_MEDIUM);
25986
25987  return rs6000_fndecl_pcrel_p (fn->decl);
25988}
25989
25990/* Return whether we should generate PC-relative code for the current
25991   function.  */
25992bool
25993rs6000_pcrel_p ()
25994{
25995  return (DEFAULT_ABI == ABI_ELFv2
25996	  && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25997	  && TARGET_CMODEL == CMODEL_MEDIUM);
25998}
25999
26000
26001/* Given an address (ADDR), a mode (MODE), and what the format of the
26002   non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26003   for the address.  */
26004
26005enum insn_form
26006address_to_insn_form (rtx addr,
26007		      machine_mode mode,
26008		      enum non_prefixed_form non_prefixed_format)
26009{
26010  /* Single register is easy.  */
26011  if (REG_P (addr) || SUBREG_P (addr))
26012    return INSN_FORM_BASE_REG;
26013
26014  /* If the non prefixed instruction format doesn't support offset addressing,
26015     make sure only indexed addressing is allowed.
26016
26017     We special case SDmode so that the register allocator does not try to move
26018     SDmode through GPR registers, but instead uses the 32-bit integer load and
26019     store instructions for the floating point registers.  */
26020  if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26021    {
26022      if (GET_CODE (addr) != PLUS)
26023	return INSN_FORM_BAD;
26024
26025      rtx op0 = XEXP (addr, 0);
26026      rtx op1 = XEXP (addr, 1);
26027      if (!REG_P (op0) && !SUBREG_P (op0))
26028	return INSN_FORM_BAD;
26029
26030      if (!REG_P (op1) && !SUBREG_P (op1))
26031	return INSN_FORM_BAD;
26032
26033      return INSN_FORM_X;
26034    }
26035
26036  /* Deal with update forms.  */
26037  if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26038    return INSN_FORM_UPDATE;
26039
26040  /* Handle PC-relative symbols and labels.  Check for both local and
26041     external symbols.  Assume labels are always local.  TLS symbols
26042     are not PC-relative for rs6000.  */
26043  if (TARGET_PCREL)
26044    {
26045      if (LABEL_REF_P (addr))
26046	return INSN_FORM_PCREL_LOCAL;
26047
26048      if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26049	{
26050	  if (!SYMBOL_REF_LOCAL_P (addr))
26051	    return INSN_FORM_PCREL_EXTERNAL;
26052	  else
26053	    return INSN_FORM_PCREL_LOCAL;
26054	}
26055    }
26056
26057  if (GET_CODE (addr) == CONST)
26058    addr = XEXP (addr, 0);
26059
26060  /* Recognize LO_SUM addresses used with TOC and 32-bit addressing.  */
26061  if (GET_CODE (addr) == LO_SUM)
26062    return INSN_FORM_LO_SUM;
26063
26064  /* Everything below must be an offset address of some form.  */
26065  if (GET_CODE (addr) != PLUS)
26066    return INSN_FORM_BAD;
26067
26068  rtx op0 = XEXP (addr, 0);
26069  rtx op1 = XEXP (addr, 1);
26070
26071  /* Check for indexed addresses.  */
26072  if (REG_P (op1) || SUBREG_P (op1))
26073    {
26074      if (REG_P (op0) || SUBREG_P (op0))
26075	return INSN_FORM_X;
26076
26077      return INSN_FORM_BAD;
26078    }
26079
26080  if (!CONST_INT_P (op1))
26081    return INSN_FORM_BAD;
26082
26083  HOST_WIDE_INT offset = INTVAL (op1);
26084  if (!SIGNED_INTEGER_34BIT_P (offset))
26085    return INSN_FORM_BAD;
26086
26087  /* Check for local and external PC-relative addresses.  Labels are always
26088     local.  TLS symbols are not PC-relative for rs6000.  */
26089  if (TARGET_PCREL)
26090    {
26091      if (LABEL_REF_P (op0))
26092	return INSN_FORM_PCREL_LOCAL;
26093
26094      if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26095	{
26096	  if (!SYMBOL_REF_LOCAL_P (op0))
26097	    return INSN_FORM_PCREL_EXTERNAL;
26098	  else
26099	    return INSN_FORM_PCREL_LOCAL;
26100	}
26101    }
26102
26103  /* If it isn't PC-relative, the address must use a base register.  */
26104  if (!REG_P (op0) && !SUBREG_P (op0))
26105    return INSN_FORM_BAD;
26106
26107  /* Large offsets must be prefixed.  */
26108  if (!SIGNED_INTEGER_16BIT_P (offset))
26109    {
26110      if (TARGET_PREFIXED)
26111	return INSN_FORM_PREFIXED_NUMERIC;
26112
26113      return INSN_FORM_BAD;
26114    }
26115
26116  /* We have a 16-bit offset, see what default instruction format to use.  */
26117  if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26118    {
26119      unsigned size = GET_MODE_SIZE (mode);
26120
26121      /* On 64-bit systems, assume 64-bit integers need to use DS form
26122	 addresses (for LD/STD).  VSX vectors need to use DQ form addresses
26123	 (for LXV and STXV).  TImode is problematical in that its normal usage
26124	 is expected to be GPRs where it wants a DS instruction format, but if
26125	 it goes into the vector registers, it wants a DQ instruction
26126	 format.  */
26127      if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26128	non_prefixed_format = NON_PREFIXED_DS;
26129
26130      else if (TARGET_VSX && size >= 16
26131	       && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26132	non_prefixed_format = NON_PREFIXED_DQ;
26133
26134      else
26135	non_prefixed_format = NON_PREFIXED_D;
26136    }
26137
26138  /* Classify the D/DS/DQ-form addresses.  */
26139  switch (non_prefixed_format)
26140    {
26141      /* Instruction format D, all 16 bits are valid.  */
26142    case NON_PREFIXED_D:
26143      return INSN_FORM_D;
26144
26145      /* Instruction format DS, bottom 2 bits must be 0.  */
26146    case NON_PREFIXED_DS:
26147      if ((offset & 3) == 0)
26148	return INSN_FORM_DS;
26149
26150      else if (TARGET_PREFIXED)
26151	return INSN_FORM_PREFIXED_NUMERIC;
26152
26153      else
26154	return INSN_FORM_BAD;
26155
26156      /* Instruction format DQ, bottom 4 bits must be 0.  */
26157    case NON_PREFIXED_DQ:
26158      if ((offset & 15) == 0)
26159	return INSN_FORM_DQ;
26160
26161      else if (TARGET_PREFIXED)
26162	return INSN_FORM_PREFIXED_NUMERIC;
26163
26164      else
26165	return INSN_FORM_BAD;
26166
26167    default:
26168      break;
26169    }
26170
26171  return INSN_FORM_BAD;
26172}
26173
26174/* Given address rtx ADDR for a load of MODE, is this legitimate for a
26175   non-prefixed D-form or X-form instruction?  NON_PREFIXED_FORMAT is
26176   given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26177   a D-form or DS-form instruction.  X-form and base_reg are always
26178   allowed.  */
26179bool
26180address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26181			   enum non_prefixed_form non_prefixed_format)
26182{
26183  enum insn_form result_form;
26184
26185  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26186
26187  switch (non_prefixed_format)
26188    {
26189    case NON_PREFIXED_D:
26190      switch (result_form)
26191	{
26192	case INSN_FORM_X:
26193	case INSN_FORM_D:
26194	case INSN_FORM_DS:
26195	case INSN_FORM_BASE_REG:
26196	  return true;
26197	default:
26198	  return false;
26199	}
26200      break;
26201    case NON_PREFIXED_DS:
26202      switch (result_form)
26203	{
26204	case INSN_FORM_X:
26205	case INSN_FORM_DS:
26206	case INSN_FORM_BASE_REG:
26207	  return true;
26208	default:
26209	  return false;
26210	}
26211      break;
26212    default:
26213      break;
26214    }
26215  return false;
26216}
26217
26218/* Return true if an REG with a given MODE is loaded from or stored into a MEM
26219   location uses a non-prefixed D/DS/DQ-form address.  This is used to validate
26220   the load or store with the PCREL_OPT optimization to make sure it is an
26221   instruction that can be optimized.
26222
26223   We need to specify the MODE separately from the REG to allow for loads that
26224   include zero/sign/float extension.  */
26225
26226bool
26227pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26228{
26229  /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26230     PCREL_OPT optimization.  */
26231  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26232  if (non_prefixed == NON_PREFIXED_X)
26233    return false;
26234
26235  /* Check if this is a non-prefixed D/DS/DQ-form instruction.  */
26236  rtx addr = XEXP (mem, 0);
26237  enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26238  return (iform == INSN_FORM_BASE_REG
26239	  || iform == INSN_FORM_D
26240	  || iform == INSN_FORM_DS
26241	  || iform == INSN_FORM_DQ);
26242}
26243
26244/* Helper function to see if we're potentially looking at lfs/stfs.
26245   - PARALLEL containing a SET and a CLOBBER
26246   - stfs:
26247    - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26248    - CLOBBER is a V4SF
26249   - lfs:
26250    - SET is from UNSPEC_SF_FROM_SI to REG:SF
26251    - CLOBBER is a DI
26252 */
26253
26254static bool
26255is_lfs_stfs_insn (rtx_insn *insn)
26256{
26257  rtx pattern = PATTERN (insn);
26258  if (GET_CODE (pattern) != PARALLEL)
26259    return false;
26260
26261  /* This should be a parallel with exactly one set and one clobber.  */
26262  if (XVECLEN (pattern, 0) != 2)
26263    return false;
26264
26265  rtx set = XVECEXP (pattern, 0, 0);
26266  if (GET_CODE (set) != SET)
26267    return false;
26268
26269  rtx clobber = XVECEXP (pattern, 0, 1);
26270  if (GET_CODE (clobber) != CLOBBER)
26271    return false;
26272
26273  /* All we care is that the destination of the SET is a mem:SI,
26274     the source should be an UNSPEC_SI_FROM_SF, and the clobber
26275     should be a scratch:V4SF.  */
26276
26277  rtx dest = SET_DEST (set);
26278  rtx src = SET_SRC (set);
26279  rtx scratch = SET_DEST (clobber);
26280
26281  if (GET_CODE (src) != UNSPEC)
26282    return false;
26283
26284  /* stfs case.  */
26285  if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26286      && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26287      && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26288    return true;
26289
26290  /* lfs case.  */
26291  if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26292      && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26293      && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26294    return true;
26295
26296  return false;
26297}
26298
26299/* Helper function to take a REG and a MODE and turn it into the non-prefixed
26300   instruction format (D/DS/DQ) used for offset memory.  */
26301
26302enum non_prefixed_form
26303reg_to_non_prefixed (rtx reg, machine_mode mode)
26304{
26305  /* If it isn't a register, use the defaults.  */
26306  if (!REG_P (reg) && !SUBREG_P (reg))
26307    return NON_PREFIXED_DEFAULT;
26308
26309  unsigned int r = reg_or_subregno (reg);
26310
26311  /* If we have a pseudo, use the default instruction format.  */
26312  if (!HARD_REGISTER_NUM_P (r))
26313    return NON_PREFIXED_DEFAULT;
26314
26315  unsigned size = GET_MODE_SIZE (mode);
26316
26317  /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26318     128-bit floating point, and 128-bit integers.  Before power9, only indexed
26319     addressing was available for vectors.  */
26320  if (FP_REGNO_P (r))
26321    {
26322      if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26323	return NON_PREFIXED_D;
26324
26325      else if (size < 8)
26326	return NON_PREFIXED_X;
26327
26328      else if (TARGET_VSX && size >= 16
26329	       && (VECTOR_MODE_P (mode)
26330		   || VECTOR_ALIGNMENT_P (mode)
26331		   || mode == TImode || mode == CTImode))
26332	return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26333
26334      else
26335	return NON_PREFIXED_DEFAULT;
26336    }
26337
26338  /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26339     128-bit floating point, and 128-bit integers.  Before power9, only indexed
26340     addressing was available.  */
26341  else if (ALTIVEC_REGNO_P (r))
26342    {
26343      if (!TARGET_P9_VECTOR)
26344	return NON_PREFIXED_X;
26345
26346      if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26347	return NON_PREFIXED_DS;
26348
26349      else if (size < 8)
26350	return NON_PREFIXED_X;
26351
26352      else if (TARGET_VSX && size >= 16
26353	       && (VECTOR_MODE_P (mode)
26354		   || VECTOR_ALIGNMENT_P (mode)
26355		   || mode == TImode || mode == CTImode))
26356	return NON_PREFIXED_DQ;
26357
26358      else
26359	return NON_PREFIXED_DEFAULT;
26360    }
26361
26362  /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26363     otherwise.  Assume that any other register, such as LR, CRs, etc. will go
26364     through the GPR registers for memory operations.  */
26365  else if (TARGET_POWERPC64 && size >= 8)
26366    return NON_PREFIXED_DS;
26367
26368  return NON_PREFIXED_D;
26369}
26370
26371
26372/* Whether a load instruction is a prefixed instruction.  This is called from
26373   the prefixed attribute processing.  */
26374
26375bool
26376prefixed_load_p (rtx_insn *insn)
26377{
26378  /* Validate the insn to make sure it is a normal load insn.  */
26379  extract_insn_cached (insn);
26380  if (recog_data.n_operands < 2)
26381    return false;
26382
26383  rtx reg = recog_data.operand[0];
26384  rtx mem = recog_data.operand[1];
26385
26386  if (!REG_P (reg) && !SUBREG_P (reg))
26387    return false;
26388
26389  if (!MEM_P (mem))
26390    return false;
26391
26392  /* Prefixed load instructions do not support update or indexed forms.  */
26393  if (get_attr_indexed (insn) == INDEXED_YES
26394      || get_attr_update (insn) == UPDATE_YES)
26395    return false;
26396
26397  /* LWA uses the DS format instead of the D format that LWZ uses.  */
26398  enum non_prefixed_form non_prefixed;
26399  machine_mode reg_mode = GET_MODE (reg);
26400  machine_mode mem_mode = GET_MODE (mem);
26401
26402  if (mem_mode == SImode && reg_mode == DImode
26403      && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26404    non_prefixed = NON_PREFIXED_DS;
26405
26406  else
26407    non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26408
26409  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26410    return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26411  else
26412    return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26413}
26414
26415/* Whether a store instruction is a prefixed instruction.  This is called from
26416   the prefixed attribute processing.  */
26417
26418bool
26419prefixed_store_p (rtx_insn *insn)
26420{
26421  /* Validate the insn to make sure it is a normal store insn.  */
26422  extract_insn_cached (insn);
26423  if (recog_data.n_operands < 2)
26424    return false;
26425
26426  rtx mem = recog_data.operand[0];
26427  rtx reg = recog_data.operand[1];
26428
26429  if (!REG_P (reg) && !SUBREG_P (reg))
26430    return false;
26431
26432  if (!MEM_P (mem))
26433    return false;
26434
26435  /* Prefixed store instructions do not support update or indexed forms.  */
26436  if (get_attr_indexed (insn) == INDEXED_YES
26437      || get_attr_update (insn) == UPDATE_YES)
26438    return false;
26439
26440  machine_mode mem_mode = GET_MODE (mem);
26441  rtx addr = XEXP (mem, 0);
26442  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26443
26444  /* Need to make sure we aren't looking at a stfs which doesn't look
26445     like the other things reg_to_non_prefixed/address_is_prefixed
26446     looks for.  */
26447  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26448    return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26449  else
26450    return address_is_prefixed (addr, mem_mode, non_prefixed);
26451}
26452
26453/* Whether a load immediate or add instruction is a prefixed instruction.  This
26454   is called from the prefixed attribute processing.  */
26455
26456bool
26457prefixed_paddi_p (rtx_insn *insn)
26458{
26459  rtx set = single_set (insn);
26460  if (!set)
26461    return false;
26462
26463  rtx dest = SET_DEST (set);
26464  rtx src = SET_SRC (set);
26465
26466  if (!REG_P (dest) && !SUBREG_P (dest))
26467    return false;
26468
26469  /* Is this a load immediate that can't be done with a simple ADDI or
26470     ADDIS?  */
26471  if (CONST_INT_P (src))
26472    return (satisfies_constraint_eI (src)
26473	    && !satisfies_constraint_I (src)
26474	    && !satisfies_constraint_L (src));
26475
26476  /* Is this a PADDI instruction that can't be done with a simple ADDI or
26477     ADDIS?  */
26478  if (GET_CODE (src) == PLUS)
26479    {
26480      rtx op1 = XEXP (src, 1);
26481
26482      return (CONST_INT_P (op1)
26483	      && satisfies_constraint_eI (op1)
26484	      && !satisfies_constraint_I (op1)
26485	      && !satisfies_constraint_L (op1));
26486    }
26487
26488  /* If not, is it a load of a PC-relative address?  */
26489  if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26490    return false;
26491
26492  if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26493    return false;
26494
26495  enum insn_form iform = address_to_insn_form (src, Pmode,
26496					       NON_PREFIXED_DEFAULT);
26497
26498  return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26499}
26500
26501/* Whether the next instruction needs a 'p' prefix issued before the
26502   instruction is printed out.  */
26503static bool prepend_p_to_next_insn;
26504
26505/* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26506   outputting the assembler code.  On the PowerPC, we remember if the current
26507   insn is a prefixed insn where we need to emit a 'p' before the insn.
26508
26509   In addition, if the insn is part of a PC-relative reference to an external
26510   label optimization, this is recorded also.  */
26511void
26512rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26513{
26514  prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26515			    == MAYBE_PREFIXED_YES
26516			    && get_attr_prefixed (insn) == PREFIXED_YES);
26517  return;
26518}
26519
26520/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26521   We use it to emit a 'p' for prefixed insns that is set in
26522   FINAL_PRESCAN_INSN.  */
26523void
26524rs6000_asm_output_opcode (FILE *stream)
26525{
26526  if (prepend_p_to_next_insn)
26527    {
26528      fprintf (stream, "p");
26529
26530      /* Reset the flag in the case where there are separate insn lines in the
26531	 sequence, so the 'p' is only emitted for the first line.  This shows up
26532	 when we are doing the PCREL_OPT optimization, in that the label created
26533	 with %r<n> would have a leading 'p' printed.  */
26534      prepend_p_to_next_insn = false;
26535    }
26536
26537  return;
26538}
26539
26540/* Emit the relocation to tie the next instruction to a previous instruction
26541   that loads up an external address.  This is used to do the PCREL_OPT
26542   optimization.  Note, the label is generated after the PLD of the got
26543   pc-relative address to allow for the assembler to insert NOPs before the PLD
26544   instruction.  The operand is a constant integer that is the label
26545   number.  */
26546
26547void
26548output_pcrel_opt_reloc (rtx label_num)
26549{
26550  rtx operands[1] = { label_num };
26551  output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26552		   operands);
26553}
26554
26555/* Adjust the length of an INSN.  LENGTH is the currently-computed length and
26556   should be adjusted to reflect any required changes.  This macro is used when
26557   there is some systematic length adjustment required that would be difficult
26558   to express in the length attribute.
26559
26560   In the PowerPC, we use this to adjust the length of an instruction if one or
26561   more prefixed instructions are generated, using the attribute
26562   num_prefixed_insns.  A prefixed instruction is 8 bytes instead of 4, but the
26563   hardware requires that a prefied instruciton does not cross a 64-byte
26564   boundary.  This means the compiler has to assume the length of the first
26565   prefixed instruction is 12 bytes instead of 8 bytes.  Since the length is
26566   already set for the non-prefixed instruction, we just need to udpate for the
26567   difference.  */
26568
26569int
26570rs6000_adjust_insn_length (rtx_insn *insn, int length)
26571{
26572  if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26573    {
26574      rtx pattern = PATTERN (insn);
26575      if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26576	  && get_attr_prefixed (insn) == PREFIXED_YES)
26577	{
26578	  int num_prefixed = get_attr_max_prefixed_insns (insn);
26579	  length += 4 * (num_prefixed + 1);
26580	}
26581    }
26582
26583  return length;
26584}
26585
26586
26587#ifdef HAVE_GAS_HIDDEN
26588# define USE_HIDDEN_LINKONCE 1
26589#else
26590# define USE_HIDDEN_LINKONCE 0
26591#endif
26592
26593/* Fills in the label name that should be used for a 476 link stack thunk.  */
26594
26595void
26596get_ppc476_thunk_name (char name[32])
26597{
26598  gcc_assert (TARGET_LINK_STACK);
26599
26600  if (USE_HIDDEN_LINKONCE)
26601    sprintf (name, "__ppc476.get_thunk");
26602  else
26603    ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26604}
26605
26606/* This function emits the simple thunk routine that is used to preserve
26607   the link stack on the 476 cpu.  */
26608
26609static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26610static void
26611rs6000_code_end (void)
26612{
26613  char name[32];
26614  tree decl;
26615
26616  if (!TARGET_LINK_STACK)
26617    return;
26618
26619  get_ppc476_thunk_name (name);
26620
26621  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26622		     build_function_type_list (void_type_node, NULL_TREE));
26623  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26624				   NULL_TREE, void_type_node);
26625  TREE_PUBLIC (decl) = 1;
26626  TREE_STATIC (decl) = 1;
26627
26628#if RS6000_WEAK
26629  if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26630    {
26631      cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26632      targetm.asm_out.unique_section (decl, 0);
26633      switch_to_section (get_named_section (decl, NULL, 0));
26634      DECL_WEAK (decl) = 1;
26635      ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26636      targetm.asm_out.globalize_label (asm_out_file, name);
26637      targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26638      ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26639    }
26640  else
26641#endif
26642    {
26643      switch_to_section (text_section);
26644      ASM_OUTPUT_LABEL (asm_out_file, name);
26645    }
26646
26647  DECL_INITIAL (decl) = make_node (BLOCK);
26648  current_function_decl = decl;
26649  allocate_struct_function (decl, false);
26650  init_function_start (decl);
26651  first_function_block_is_cold = false;
26652  /* Make sure unwind info is emitted for the thunk if needed.  */
26653  final_start_function (emit_barrier (), asm_out_file, 1);
26654
26655  fputs ("\tblr\n", asm_out_file);
26656
26657  final_end_function ();
26658  init_insn_lengths ();
26659  free_after_compilation (cfun);
26660  set_cfun (NULL);
26661  current_function_decl = NULL;
26662}
26663
26664/* Add r30 to hard reg set if the prologue sets it up and it is not
26665   pic_offset_table_rtx.  */
26666
26667static void
26668rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26669{
26670  if (!TARGET_SINGLE_PIC_BASE
26671      && TARGET_TOC
26672      && TARGET_MINIMAL_TOC
26673      && !constant_pool_empty_p ())
26674    add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26675  if (cfun->machine->split_stack_argp_used)
26676    add_to_hard_reg_set (&set->set, Pmode, 12);
26677
26678  /* Make sure the hard reg set doesn't include r2, which was possibly added
26679     via PIC_OFFSET_TABLE_REGNUM.  */
26680  if (TARGET_TOC)
26681    remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26682}
26683
26684
26685/* Helper function for rs6000_split_logical to emit a logical instruction after
26686   spliting the operation to single GPR registers.
26687
26688   DEST is the destination register.
26689   OP1 and OP2 are the input source registers.
26690   CODE is the base operation (AND, IOR, XOR, NOT).
26691   MODE is the machine mode.
26692   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26693   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26694   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
26695
26696static void
26697rs6000_split_logical_inner (rtx dest,
26698			    rtx op1,
26699			    rtx op2,
26700			    enum rtx_code code,
26701			    machine_mode mode,
26702			    bool complement_final_p,
26703			    bool complement_op1_p,
26704			    bool complement_op2_p)
26705{
26706  rtx bool_rtx;
26707
26708  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
26709  if (op2 && CONST_INT_P (op2)
26710      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26711      && !complement_final_p && !complement_op1_p && !complement_op2_p)
26712    {
26713      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26714      HOST_WIDE_INT value = INTVAL (op2) & mask;
26715
26716      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
26717      if (code == AND)
26718	{
26719	  if (value == 0)
26720	    {
26721	      emit_insn (gen_rtx_SET (dest, const0_rtx));
26722	      return;
26723	    }
26724
26725	  else if (value == mask)
26726	    {
26727	      if (!rtx_equal_p (dest, op1))
26728		emit_insn (gen_rtx_SET (dest, op1));
26729	      return;
26730	    }
26731	}
26732
26733      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
26734	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
26735      else if (code == IOR || code == XOR)
26736	{
26737	  if (value == 0)
26738	    {
26739	      if (!rtx_equal_p (dest, op1))
26740		emit_insn (gen_rtx_SET (dest, op1));
26741	      return;
26742	    }
26743	}
26744    }
26745
26746  if (code == AND && mode == SImode
26747      && !complement_final_p && !complement_op1_p && !complement_op2_p)
26748    {
26749      emit_insn (gen_andsi3 (dest, op1, op2));
26750      return;
26751    }
26752
26753  if (complement_op1_p)
26754    op1 = gen_rtx_NOT (mode, op1);
26755
26756  if (complement_op2_p)
26757    op2 = gen_rtx_NOT (mode, op2);
26758
26759  /* For canonical RTL, if only one arm is inverted it is the first.  */
26760  if (!complement_op1_p && complement_op2_p)
26761    std::swap (op1, op2);
26762
26763  bool_rtx = ((code == NOT)
26764	      ? gen_rtx_NOT (mode, op1)
26765	      : gen_rtx_fmt_ee (code, mode, op1, op2));
26766
26767  if (complement_final_p)
26768    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26769
26770  emit_insn (gen_rtx_SET (dest, bool_rtx));
26771}
26772
26773/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
26774   operations are split immediately during RTL generation to allow for more
26775   optimizations of the AND/IOR/XOR.
26776
26777   OPERANDS is an array containing the destination and two input operands.
26778   CODE is the base operation (AND, IOR, XOR, NOT).
26779   MODE is the machine mode.
26780   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26781   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26782   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26783   CLOBBER_REG is either NULL or a scratch register of type CC to allow
26784   formation of the AND instructions.  */
26785
26786static void
26787rs6000_split_logical_di (rtx operands[3],
26788			 enum rtx_code code,
26789			 bool complement_final_p,
26790			 bool complement_op1_p,
26791			 bool complement_op2_p)
26792{
26793  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26794  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26795  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26796  enum hi_lo { hi = 0, lo = 1 };
26797  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26798  size_t i;
26799
26800  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26801  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26802  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26803  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26804
26805  if (code == NOT)
26806    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26807  else
26808    {
26809      if (!CONST_INT_P (operands[2]))
26810	{
26811	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26812	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26813	}
26814      else
26815	{
26816	  HOST_WIDE_INT value = INTVAL (operands[2]);
26817	  HOST_WIDE_INT value_hi_lo[2];
26818
26819	  gcc_assert (!complement_final_p);
26820	  gcc_assert (!complement_op1_p);
26821	  gcc_assert (!complement_op2_p);
26822
26823	  value_hi_lo[hi] = value >> 32;
26824	  value_hi_lo[lo] = value & lower_32bits;
26825
26826	  for (i = 0; i < 2; i++)
26827	    {
26828	      HOST_WIDE_INT sub_value = value_hi_lo[i];
26829
26830	      if (sub_value & sign_bit)
26831		sub_value |= upper_32bits;
26832
26833	      op2_hi_lo[i] = GEN_INT (sub_value);
26834
26835	      /* If this is an AND instruction, check to see if we need to load
26836		 the value in a register.  */
26837	      if (code == AND && sub_value != -1 && sub_value != 0
26838		  && !and_operand (op2_hi_lo[i], SImode))
26839		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26840	    }
26841	}
26842    }
26843
26844  for (i = 0; i < 2; i++)
26845    {
26846      /* Split large IOR/XOR operations.  */
26847      if ((code == IOR || code == XOR)
26848	  && CONST_INT_P (op2_hi_lo[i])
26849	  && !complement_final_p
26850	  && !complement_op1_p
26851	  && !complement_op2_p
26852	  && !logical_const_operand (op2_hi_lo[i], SImode))
26853	{
26854	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26855	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26856	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26857	  rtx tmp = gen_reg_rtx (SImode);
26858
26859	  /* Make sure the constant is sign extended.  */
26860	  if ((hi_16bits & sign_bit) != 0)
26861	    hi_16bits |= upper_32bits;
26862
26863	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26864				      code, SImode, false, false, false);
26865
26866	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26867				      code, SImode, false, false, false);
26868	}
26869      else
26870	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26871				    code, SImode, complement_final_p,
26872				    complement_op1_p, complement_op2_p);
26873    }
26874
26875  return;
26876}
26877
26878/* Split the insns that make up boolean operations operating on multiple GPR
26879   registers.  The boolean MD patterns ensure that the inputs either are
26880   exactly the same as the output registers, or there is no overlap.
26881
26882   OPERANDS is an array containing the destination and two input operands.
26883   CODE is the base operation (AND, IOR, XOR, NOT).
26884   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26885   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26886   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
26887
26888void
26889rs6000_split_logical (rtx operands[3],
26890		      enum rtx_code code,
26891		      bool complement_final_p,
26892		      bool complement_op1_p,
26893		      bool complement_op2_p)
26894{
26895  machine_mode mode = GET_MODE (operands[0]);
26896  machine_mode sub_mode;
26897  rtx op0, op1, op2;
26898  int sub_size, regno0, regno1, nregs, i;
26899
26900  /* If this is DImode, use the specialized version that can run before
26901     register allocation.  */
26902  if (mode == DImode && !TARGET_POWERPC64)
26903    {
26904      rs6000_split_logical_di (operands, code, complement_final_p,
26905			       complement_op1_p, complement_op2_p);
26906      return;
26907    }
26908
26909  op0 = operands[0];
26910  op1 = operands[1];
26911  op2 = (code == NOT) ? NULL_RTX : operands[2];
26912  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26913  sub_size = GET_MODE_SIZE (sub_mode);
26914  regno0 = REGNO (op0);
26915  regno1 = REGNO (op1);
26916
26917  gcc_assert (reload_completed);
26918  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26919  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26920
26921  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26922  gcc_assert (nregs > 1);
26923
26924  if (op2 && REG_P (op2))
26925    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26926
26927  for (i = 0; i < nregs; i++)
26928    {
26929      int offset = i * sub_size;
26930      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26931      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26932      rtx sub_op2 = ((code == NOT)
26933		     ? NULL_RTX
26934		     : simplify_subreg (sub_mode, op2, mode, offset));
26935
26936      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26937				  complement_final_p, complement_op1_p,
26938				  complement_op2_p);
26939    }
26940
26941  return;
26942}
26943
26944/* Emit instructions to move SRC to DST.  Called by splitters for
26945   multi-register moves.  It will emit at most one instruction for
26946   each register that is accessed; that is, it won't emit li/lis pairs
26947   (or equivalent for 64-bit code).  One of SRC or DST must be a hard
26948   register.  */
26949
26950void
26951rs6000_split_multireg_move (rtx dst, rtx src)
26952{
26953  /* The register number of the first register being moved.  */
26954  int reg;
26955  /* The mode that is to be moved.  */
26956  machine_mode mode;
26957  /* The mode that the move is being done in, and its size.  */
26958  machine_mode reg_mode;
26959  int reg_mode_size;
26960  /* The number of registers that will be moved.  */
26961  int nregs;
26962
26963  reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26964  mode = GET_MODE (dst);
26965  nregs = hard_regno_nregs (reg, mode);
26966
26967  /* If we have a vector quad register for MMA, and this is a load or store,
26968     see if we can use vector paired load/stores.  */
26969  if (mode == XOmode && TARGET_MMA
26970      && (MEM_P (dst) || MEM_P (src)))
26971    {
26972      reg_mode = OOmode;
26973      nregs /= 2;
26974    }
26975  /* If we have a vector pair/quad mode, split it into two/four separate
26976     vectors.  */
26977  else if (mode == OOmode || mode == XOmode)
26978    reg_mode = V1TImode;
26979  else if (FP_REGNO_P (reg))
26980    reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26981	(TARGET_HARD_FLOAT ? DFmode : SFmode);
26982  else if (ALTIVEC_REGNO_P (reg))
26983    reg_mode = V16QImode;
26984  else
26985    reg_mode = word_mode;
26986  reg_mode_size = GET_MODE_SIZE (reg_mode);
26987
26988  gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26989
26990  /* TDmode residing in FP registers is special, since the ISA requires that
26991     the lower-numbered word of a register pair is always the most significant
26992     word, even in little-endian mode.  This does not match the usual subreg
26993     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
26994     the appropriate constituent registers "by hand" in little-endian mode.
26995
26996     Note we do not need to check for destructive overlap here since TDmode
26997     can only reside in even/odd register pairs.  */
26998  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26999    {
27000      rtx p_src, p_dst;
27001      int i;
27002
27003      for (i = 0; i < nregs; i++)
27004	{
27005	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27006	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27007	  else
27008	    p_src = simplify_gen_subreg (reg_mode, src, mode,
27009					 i * reg_mode_size);
27010
27011	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27012	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27013	  else
27014	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27015					 i * reg_mode_size);
27016
27017	  emit_insn (gen_rtx_SET (p_dst, p_src));
27018	}
27019
27020      return;
27021    }
27022
27023  /* The __vector_pair and __vector_quad modes are multi-register
27024     modes, so if we have to load or store the registers, we have to be
27025     careful to properly swap them if we're in little endian mode
27026     below.  This means the last register gets the first memory
27027     location.  We also need to be careful of using the right register
27028     numbers if we are splitting XO to OO.  */
27029  if (mode == OOmode || mode == XOmode)
27030    {
27031      nregs = hard_regno_nregs (reg, mode);
27032      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27033      if (MEM_P (dst))
27034	{
27035	  unsigned offset = 0;
27036	  unsigned size = GET_MODE_SIZE (reg_mode);
27037
27038	  /* If we are reading an accumulator register, we have to
27039	     deprime it before we can access it.  */
27040	  if (TARGET_MMA
27041	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27042	    emit_insn (gen_mma_xxmfacc (src, src));
27043
27044	  for (int i = 0; i < nregs; i += reg_mode_nregs)
27045	    {
27046	      unsigned subreg
27047		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27048	      rtx dst2 = adjust_address (dst, reg_mode, offset);
27049	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27050	      offset += size;
27051	      emit_insn (gen_rtx_SET (dst2, src2));
27052	    }
27053
27054	  return;
27055	}
27056
27057      if (MEM_P (src))
27058	{
27059	  unsigned offset = 0;
27060	  unsigned size = GET_MODE_SIZE (reg_mode);
27061
27062	  for (int i = 0; i < nregs; i += reg_mode_nregs)
27063	    {
27064	      unsigned subreg
27065		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27066	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27067	      rtx src2 = adjust_address (src, reg_mode, offset);
27068	      offset += size;
27069	      emit_insn (gen_rtx_SET (dst2, src2));
27070	    }
27071
27072	  /* If we are writing an accumulator register, we have to
27073	     prime it after we've written it.  */
27074	  if (TARGET_MMA
27075	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27076	    emit_insn (gen_mma_xxmtacc (dst, dst));
27077
27078	  return;
27079	}
27080
27081      if (GET_CODE (src) == UNSPEC
27082	  || GET_CODE (src) == UNSPEC_VOLATILE)
27083	{
27084	  gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27085		      || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27086	  gcc_assert (REG_P (dst));
27087	  if (GET_MODE (src) == XOmode)
27088	    gcc_assert (FP_REGNO_P (REGNO (dst)));
27089	  if (GET_MODE (src) == OOmode)
27090	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
27091
27092	  int nvecs = XVECLEN (src, 0);
27093	  for (int i = 0; i < nvecs; i++)
27094	    {
27095	      rtx op;
27096	      int regno = reg + i;
27097
27098	      if (WORDS_BIG_ENDIAN)
27099		{
27100		  op = XVECEXP (src, 0, i);
27101
27102		  /* If we are loading an even VSX register and the memory location
27103		     is adjacent to the next register's memory location (if any),
27104		     then we can load them both with one LXVP instruction.  */
27105		  if ((regno & 1) == 0)
27106		    {
27107		      rtx op2 = XVECEXP (src, 0, i + 1);
27108		      if (adjacent_mem_locations (op, op2) == op)
27109			{
27110			  op = adjust_address (op, OOmode, 0);
27111			  /* Skip the next register, since we're going to
27112			     load it together with this register.  */
27113			  i++;
27114			}
27115		    }
27116		}
27117	      else
27118		{
27119		  op = XVECEXP (src, 0, nvecs - i - 1);
27120
27121		  /* If we are loading an even VSX register and the memory location
27122		     is adjacent to the next register's memory location (if any),
27123		     then we can load them both with one LXVP instruction.  */
27124		  if ((regno & 1) == 0)
27125		    {
27126			  rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27127			  if (adjacent_mem_locations (op2, op) == op2)
27128			    {
27129			      op = adjust_address (op2, OOmode, 0);
27130			      /* Skip the next register, since we're going to
27131				 load it together with this register.  */
27132			      i++;
27133			    }
27134		    }
27135		}
27136
27137	      rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27138	      emit_insn (gen_rtx_SET (dst_i, op));
27139	    }
27140
27141	  /* We are writing an accumulator register, so we have to
27142	     prime it after we've written it.  */
27143	  if (GET_MODE (src) == XOmode)
27144	    emit_insn (gen_mma_xxmtacc (dst, dst));
27145
27146	  return;
27147	}
27148
27149      /* Register -> register moves can use common code.  */
27150    }
27151
27152  if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27153    {
27154      /* If we are reading an accumulator register, we have to
27155	 deprime it before we can access it.  */
27156      if (TARGET_MMA
27157	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27158	emit_insn (gen_mma_xxmfacc (src, src));
27159
27160      /* Move register range backwards, if we might have destructive
27161	 overlap.  */
27162      int i;
27163      /* XO/OO are opaque so cannot use subregs. */
27164      if (mode == OOmode || mode == XOmode )
27165	{
27166	  for (i = nregs - 1; i >= 0; i--)
27167	    {
27168	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27169	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27170	      emit_insn (gen_rtx_SET (dst_i, src_i));
27171	    }
27172	}
27173      else
27174	{
27175	  for (i = nregs - 1; i >= 0; i--)
27176	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27177							 i * reg_mode_size),
27178				    simplify_gen_subreg (reg_mode, src, mode,
27179							 i * reg_mode_size)));
27180	}
27181
27182      /* If we are writing an accumulator register, we have to
27183	 prime it after we've written it.  */
27184      if (TARGET_MMA
27185	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27186	emit_insn (gen_mma_xxmtacc (dst, dst));
27187    }
27188  else
27189    {
27190      int i;
27191      int j = -1;
27192      bool used_update = false;
27193      rtx restore_basereg = NULL_RTX;
27194
27195      if (MEM_P (src) && INT_REGNO_P (reg))
27196	{
27197	  rtx breg;
27198
27199	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
27200	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27201	    {
27202	      rtx delta_rtx;
27203	      breg = XEXP (XEXP (src, 0), 0);
27204	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27205			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27206			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27207	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27208	      src = replace_equiv_address (src, breg);
27209	    }
27210	  else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27211	    {
27212	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27213		{
27214		  rtx basereg = XEXP (XEXP (src, 0), 0);
27215		  if (TARGET_UPDATE)
27216		    {
27217		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27218		      emit_insn (gen_rtx_SET (ndst,
27219					      gen_rtx_MEM (reg_mode,
27220							   XEXP (src, 0))));
27221		      used_update = true;
27222		    }
27223		  else
27224		    emit_insn (gen_rtx_SET (basereg,
27225					    XEXP (XEXP (src, 0), 1)));
27226		  src = replace_equiv_address (src, basereg);
27227		}
27228	      else
27229		{
27230		  rtx basereg = gen_rtx_REG (Pmode, reg);
27231		  emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27232		  src = replace_equiv_address (src, basereg);
27233		}
27234	    }
27235
27236	  breg = XEXP (src, 0);
27237	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27238	    breg = XEXP (breg, 0);
27239
27240	  /* If the base register we are using to address memory is
27241	     also a destination reg, then change that register last.  */
27242	  if (REG_P (breg)
27243	      && REGNO (breg) >= REGNO (dst)
27244	      && REGNO (breg) < REGNO (dst) + nregs)
27245	    j = REGNO (breg) - REGNO (dst);
27246	}
27247      else if (MEM_P (dst) && INT_REGNO_P (reg))
27248	{
27249	  rtx breg;
27250
27251	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27252	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27253	    {
27254	      rtx delta_rtx;
27255	      breg = XEXP (XEXP (dst, 0), 0);
27256	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27257			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27258			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27259
27260	      /* We have to update the breg before doing the store.
27261		 Use store with update, if available.  */
27262
27263	      if (TARGET_UPDATE)
27264		{
27265		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27266		  emit_insn (TARGET_32BIT
27267			     ? (TARGET_POWERPC64
27268				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27269				: gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27270			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27271		  used_update = true;
27272		}
27273	      else
27274		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27275	      dst = replace_equiv_address (dst, breg);
27276	    }
27277	  else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27278		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27279	    {
27280	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27281		{
27282		  rtx basereg = XEXP (XEXP (dst, 0), 0);
27283		  if (TARGET_UPDATE)
27284		    {
27285		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27286		      emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27287							   XEXP (dst, 0)),
27288					      nsrc));
27289		      used_update = true;
27290		    }
27291		  else
27292		    emit_insn (gen_rtx_SET (basereg,
27293					    XEXP (XEXP (dst, 0), 1)));
27294		  dst = replace_equiv_address (dst, basereg);
27295		}
27296	      else
27297		{
27298		  rtx basereg = XEXP (XEXP (dst, 0), 0);
27299		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27300		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27301			      && REG_P (basereg)
27302			      && REG_P (offsetreg)
27303			      && REGNO (basereg) != REGNO (offsetreg));
27304		  if (REGNO (basereg) == 0)
27305		    {
27306		      rtx tmp = offsetreg;
27307		      offsetreg = basereg;
27308		      basereg = tmp;
27309		    }
27310		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27311		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27312		  dst = replace_equiv_address (dst, basereg);
27313		}
27314	    }
27315	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27316	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27317	}
27318
27319      /* If we are reading an accumulator register, we have to
27320	 deprime it before we can access it.  */
27321      if (TARGET_MMA && REG_P (src)
27322	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27323	emit_insn (gen_mma_xxmfacc (src, src));
27324
27325      for (i = 0; i < nregs; i++)
27326	{
27327	  /* Calculate index to next subword.  */
27328	  ++j;
27329	  if (j == nregs)
27330	    j = 0;
27331
27332	  /* If compiler already emitted move of first word by
27333	     store with update, no need to do anything.  */
27334	  if (j == 0 && used_update)
27335	    continue;
27336
27337	  /* XO/OO are opaque so cannot use subregs. */
27338	  if (mode == OOmode || mode == XOmode )
27339	    {
27340	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27341	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27342	      emit_insn (gen_rtx_SET (dst_i, src_i));
27343	    }
27344	  else
27345	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27346							 j * reg_mode_size),
27347				    simplify_gen_subreg (reg_mode, src, mode,
27348							 j * reg_mode_size)));
27349	}
27350
27351      /* If we are writing an accumulator register, we have to
27352	 prime it after we've written it.  */
27353      if (TARGET_MMA && REG_P (dst)
27354	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27355	emit_insn (gen_mma_xxmtacc (dst, dst));
27356
27357      if (restore_basereg != NULL_RTX)
27358	emit_insn (restore_basereg);
27359    }
27360}
27361
27362/* Return true if the peephole2 can combine a load involving a combination of
27363   an addis instruction and a load with an offset that can be fused together on
27364   a power8.  */
27365
27366bool
27367fusion_gpr_load_p (rtx addis_reg,	/* register set via addis.  */
27368		   rtx addis_value,	/* addis value.  */
27369		   rtx target,		/* target register that is loaded.  */
27370		   rtx mem)		/* bottom part of the memory addr.  */
27371{
27372  rtx addr;
27373  rtx base_reg;
27374
27375  /* Validate arguments.  */
27376  if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27377    return false;
27378
27379  if (!base_reg_operand (target, GET_MODE (target)))
27380    return false;
27381
27382  if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27383    return false;
27384
27385  /* Allow sign/zero extension.  */
27386  if (GET_CODE (mem) == ZERO_EXTEND
27387      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27388    mem = XEXP (mem, 0);
27389
27390  if (!MEM_P (mem))
27391    return false;
27392
27393  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27394    return false;
27395
27396  addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
27397  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27398    return false;
27399
27400  /* Validate that the register used to load the high value is either the
27401     register being loaded, or we can safely replace its use.
27402
27403     This function is only called from the peephole2 pass and we assume that
27404     there are 2 instructions in the peephole (addis and load), so we want to
27405     check if the target register was not used in the memory address and the
27406     register to hold the addis result is dead after the peephole.  */
27407  if (REGNO (addis_reg) != REGNO (target))
27408    {
27409      if (reg_mentioned_p (target, mem))
27410	return false;
27411
27412      if (!peep2_reg_dead_p (2, addis_reg))
27413	return false;
27414
27415      /* If the target register being loaded is the stack pointer, we must
27416         avoid loading any other value into it, even temporarily.  */
27417      if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27418	return false;
27419    }
27420
27421  base_reg = XEXP (addr, 0);
27422  return REGNO (addis_reg) == REGNO (base_reg);
27423}
27424
27425/* During the peephole2 pass, adjust and expand the insns for a load fusion
27426   sequence.  We adjust the addis register to use the target register.  If the
27427   load sign extends, we adjust the code to do the zero extending load, and an
27428   explicit sign extension later since the fusion only covers zero extending
27429   loads.
27430
27431   The operands are:
27432	operands[0]	register set with addis (to be replaced with target)
27433	operands[1]	value set via addis
27434	operands[2]	target register being loaded
27435	operands[3]	D-form memory reference using operands[0].  */
27436
27437void
27438expand_fusion_gpr_load (rtx *operands)
27439{
27440  rtx addis_value = operands[1];
27441  rtx target = operands[2];
27442  rtx orig_mem = operands[3];
27443  rtx  new_addr, new_mem, orig_addr, offset;
27444  enum rtx_code plus_or_lo_sum;
27445  machine_mode target_mode = GET_MODE (target);
27446  machine_mode extend_mode = target_mode;
27447  machine_mode ptr_mode = Pmode;
27448  enum rtx_code extend = UNKNOWN;
27449
27450  if (GET_CODE (orig_mem) == ZERO_EXTEND
27451      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27452    {
27453      extend = GET_CODE (orig_mem);
27454      orig_mem = XEXP (orig_mem, 0);
27455      target_mode = GET_MODE (orig_mem);
27456    }
27457
27458  gcc_assert (MEM_P (orig_mem));
27459
27460  orig_addr = XEXP (orig_mem, 0);
27461  plus_or_lo_sum = GET_CODE (orig_addr);
27462  gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27463
27464  offset = XEXP (orig_addr, 1);
27465  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27466  new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27467
27468  if (extend != UNKNOWN)
27469    new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27470
27471  new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27472			    UNSPEC_FUSION_GPR);
27473  emit_insn (gen_rtx_SET (target, new_mem));
27474
27475  if (extend == SIGN_EXTEND)
27476    {
27477      int sub_off = ((BYTES_BIG_ENDIAN)
27478		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27479		     : 0);
27480      rtx sign_reg
27481	= simplify_subreg (target_mode, target, extend_mode, sub_off);
27482
27483      emit_insn (gen_rtx_SET (target,
27484			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27485    }
27486
27487  return;
27488}
27489
27490/* Emit the addis instruction that will be part of a fused instruction
27491   sequence.  */
27492
27493void
27494emit_fusion_addis (rtx target, rtx addis_value)
27495{
27496  rtx fuse_ops[10];
27497  const char *addis_str = NULL;
27498
27499  /* Emit the addis instruction.  */
27500  fuse_ops[0] = target;
27501  if (satisfies_constraint_L (addis_value))
27502    {
27503      fuse_ops[1] = addis_value;
27504      addis_str = "lis %0,%v1";
27505    }
27506
27507  else if (GET_CODE (addis_value) == PLUS)
27508    {
27509      rtx op0 = XEXP (addis_value, 0);
27510      rtx op1 = XEXP (addis_value, 1);
27511
27512      if (REG_P (op0) && CONST_INT_P (op1)
27513	  && satisfies_constraint_L (op1))
27514	{
27515	  fuse_ops[1] = op0;
27516	  fuse_ops[2] = op1;
27517	  addis_str = "addis %0,%1,%v2";
27518	}
27519    }
27520
27521  else if (GET_CODE (addis_value) == HIGH)
27522    {
27523      rtx value = XEXP (addis_value, 0);
27524      if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27525	{
27526	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
27527	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
27528	  if (TARGET_ELF)
27529	    addis_str = "addis %0,%2,%1@toc@ha";
27530
27531	  else if (TARGET_XCOFF)
27532	    addis_str = "addis %0,%1@u(%2)";
27533
27534	  else
27535	    gcc_unreachable ();
27536	}
27537
27538      else if (GET_CODE (value) == PLUS)
27539	{
27540	  rtx op0 = XEXP (value, 0);
27541	  rtx op1 = XEXP (value, 1);
27542
27543	  if (GET_CODE (op0) == UNSPEC
27544	      && XINT (op0, 1) == UNSPEC_TOCREL
27545	      && CONST_INT_P (op1))
27546	    {
27547	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
27548	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
27549	      fuse_ops[3] = op1;
27550	      if (TARGET_ELF)
27551		addis_str = "addis %0,%2,%1+%3@toc@ha";
27552
27553	      else if (TARGET_XCOFF)
27554		addis_str = "addis %0,%1+%3@u(%2)";
27555
27556	      else
27557		gcc_unreachable ();
27558	    }
27559	}
27560
27561      else if (satisfies_constraint_L (value))
27562	{
27563	  fuse_ops[1] = value;
27564	  addis_str = "lis %0,%v1";
27565	}
27566
27567      else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27568	{
27569	  fuse_ops[1] = value;
27570	  addis_str = "lis %0,%1@ha";
27571	}
27572    }
27573
27574  if (!addis_str)
27575    fatal_insn ("Could not generate addis value for fusion", addis_value);
27576
27577  output_asm_insn (addis_str, fuse_ops);
27578}
27579
27580/* Emit a D-form load or store instruction that is the second instruction
27581   of a fusion sequence.  */
27582
27583static void
27584emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27585{
27586  rtx fuse_ops[10];
27587  char insn_template[80];
27588
27589  fuse_ops[0] = load_reg;
27590  fuse_ops[1] = addis_reg;
27591
27592  if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27593    {
27594      sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27595      fuse_ops[2] = offset;
27596      output_asm_insn (insn_template, fuse_ops);
27597    }
27598
27599  else if (GET_CODE (offset) == UNSPEC
27600	   && XINT (offset, 1) == UNSPEC_TOCREL)
27601    {
27602      if (TARGET_ELF)
27603	sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27604
27605      else if (TARGET_XCOFF)
27606	sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27607
27608      else
27609	gcc_unreachable ();
27610
27611      fuse_ops[2] = XVECEXP (offset, 0, 0);
27612      output_asm_insn (insn_template, fuse_ops);
27613    }
27614
27615  else if (GET_CODE (offset) == PLUS
27616	   && GET_CODE (XEXP (offset, 0)) == UNSPEC
27617	   && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27618	   && CONST_INT_P (XEXP (offset, 1)))
27619    {
27620      rtx tocrel_unspec = XEXP (offset, 0);
27621      if (TARGET_ELF)
27622	sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27623
27624      else if (TARGET_XCOFF)
27625	sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27626
27627      else
27628	gcc_unreachable ();
27629
27630      fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27631      fuse_ops[3] = XEXP (offset, 1);
27632      output_asm_insn (insn_template, fuse_ops);
27633    }
27634
27635  else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27636    {
27637      sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27638
27639      fuse_ops[2] = offset;
27640      output_asm_insn (insn_template, fuse_ops);
27641    }
27642
27643  else
27644    fatal_insn ("Unable to generate load/store offset for fusion", offset);
27645
27646  return;
27647}
27648
27649/* Given an address, convert it into the addis and load offset parts.  Addresses
27650   created during the peephole2 process look like:
27651	(lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27652		(unspec [(...)] UNSPEC_TOCREL))  */
27653
27654static void
27655fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27656{
27657  rtx hi, lo;
27658
27659  if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27660    {
27661      hi = XEXP (addr, 0);
27662      lo = XEXP (addr, 1);
27663    }
27664  else
27665    gcc_unreachable ();
27666
27667  *p_hi = hi;
27668  *p_lo = lo;
27669}
27670
27671/* Return a string to fuse an addis instruction with a gpr load to the same
27672   register that we loaded up the addis instruction.  The address that is used
27673   is the logical address that was formed during peephole2:
27674	(lo_sum (high) (low-part))
27675
27676   The code is complicated, so we call output_asm_insn directly, and just
27677   return "".  */
27678
27679const char *
27680emit_fusion_gpr_load (rtx target, rtx mem)
27681{
27682  rtx addis_value;
27683  rtx addr;
27684  rtx load_offset;
27685  const char *load_str = NULL;
27686  machine_mode mode;
27687
27688  if (GET_CODE (mem) == ZERO_EXTEND)
27689    mem = XEXP (mem, 0);
27690
27691  gcc_assert (REG_P (target) && MEM_P (mem));
27692
27693  addr = XEXP (mem, 0);
27694  fusion_split_address (addr, &addis_value, &load_offset);
27695
27696  /* Now emit the load instruction to the same register.  */
27697  mode = GET_MODE (mem);
27698  switch (mode)
27699    {
27700    case E_QImode:
27701      load_str = "lbz";
27702      break;
27703
27704    case E_HImode:
27705      load_str = "lhz";
27706      break;
27707
27708    case E_SImode:
27709    case E_SFmode:
27710      load_str = "lwz";
27711      break;
27712
27713    case E_DImode:
27714    case E_DFmode:
27715      gcc_assert (TARGET_POWERPC64);
27716      load_str = "ld";
27717      break;
27718
27719    default:
27720      fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27721    }
27722
27723  /* Emit the addis instruction.  */
27724  emit_fusion_addis (target, addis_value);
27725
27726  /* Emit the D-form load instruction.  */
27727  emit_fusion_load (target, target, load_offset, load_str);
27728
27729  return "";
27730}
27731
27732/* This is not inside an  #ifdef RS6000_GLIBC_ATOMIC_FENV  because gengtype
27733   ignores it then.  */
27734static GTY(()) tree atomic_hold_decl;
27735static GTY(()) tree atomic_clear_decl;
27736static GTY(()) tree atomic_update_decl;
27737
27738/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
27739static void
27740rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27741{
27742  if (!TARGET_HARD_FLOAT)
27743    {
27744#ifdef RS6000_GLIBC_ATOMIC_FENV
27745      if (atomic_hold_decl == NULL_TREE)
27746	{
27747	  atomic_hold_decl
27748	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27749			  get_identifier ("__atomic_feholdexcept"),
27750			  build_function_type_list (void_type_node,
27751						    double_ptr_type_node,
27752						    NULL_TREE));
27753	  TREE_PUBLIC (atomic_hold_decl) = 1;
27754	  DECL_EXTERNAL (atomic_hold_decl) = 1;
27755	}
27756
27757      if (atomic_clear_decl == NULL_TREE)
27758	{
27759	  atomic_clear_decl
27760	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27761			  get_identifier ("__atomic_feclearexcept"),
27762			  build_function_type_list (void_type_node,
27763						    NULL_TREE));
27764	  TREE_PUBLIC (atomic_clear_decl) = 1;
27765	  DECL_EXTERNAL (atomic_clear_decl) = 1;
27766	}
27767
27768      tree const_double = build_qualified_type (double_type_node,
27769						TYPE_QUAL_CONST);
27770      tree const_double_ptr = build_pointer_type (const_double);
27771      if (atomic_update_decl == NULL_TREE)
27772	{
27773	  atomic_update_decl
27774	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27775			  get_identifier ("__atomic_feupdateenv"),
27776			  build_function_type_list (void_type_node,
27777						    const_double_ptr,
27778						    NULL_TREE));
27779	  TREE_PUBLIC (atomic_update_decl) = 1;
27780	  DECL_EXTERNAL (atomic_update_decl) = 1;
27781	}
27782
27783      tree fenv_var = create_tmp_var_raw (double_type_node);
27784      TREE_ADDRESSABLE (fenv_var) = 1;
27785      tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27786			       build4 (TARGET_EXPR, double_type_node, fenv_var,
27787				       void_node, NULL_TREE, NULL_TREE));
27788
27789      *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27790      *clear = build_call_expr (atomic_clear_decl, 0);
27791      *update = build_call_expr (atomic_update_decl, 1,
27792				 fold_convert (const_double_ptr, fenv_addr));
27793#endif
27794      return;
27795    }
27796
27797  tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27798  tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27799  tree call_mffs = build_call_expr (mffs, 0);
27800
27801  /* Generates the equivalent of feholdexcept (&fenv_var)
27802
27803     *fenv_var = __builtin_mffs ();
27804     double fenv_hold;
27805     *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27806     __builtin_mtfsf (0xff, fenv_hold);  */
27807
27808  /* Mask to clear everything except for the rounding modes and non-IEEE
27809     arithmetic flag.  */
27810  const unsigned HOST_WIDE_INT hold_exception_mask
27811    = HOST_WIDE_INT_C (0xffffffff00000007);
27812
27813  tree fenv_var = create_tmp_var_raw (double_type_node);
27814
27815  tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27816			   NULL_TREE, NULL_TREE);
27817
27818  tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27819  tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27820			      build_int_cst (uint64_type_node,
27821					     hold_exception_mask));
27822
27823  tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27824				 fenv_llu_and);
27825
27826  tree hold_mtfsf = build_call_expr (mtfsf, 2,
27827				     build_int_cst (unsigned_type_node, 0xff),
27828				     fenv_hold_mtfsf);
27829
27830  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27831
27832  /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27833
27834     double fenv_clear = __builtin_mffs ();
27835     *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27836     __builtin_mtfsf (0xff, fenv_clear);  */
27837
27838  /* Mask to clear everything except for the rounding modes and non-IEEE
27839     arithmetic flag.  */
27840  const unsigned HOST_WIDE_INT clear_exception_mask
27841    = HOST_WIDE_INT_C (0xffffffff00000000);
27842
27843  tree fenv_clear = create_tmp_var_raw (double_type_node);
27844
27845  tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27846			    call_mffs, NULL_TREE, NULL_TREE);
27847
27848  tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27849  tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27850				    fenv_clean_llu,
27851				    build_int_cst (uint64_type_node,
27852						   clear_exception_mask));
27853
27854  tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27855				  fenv_clear_llu_and);
27856
27857  tree clear_mtfsf = build_call_expr (mtfsf, 2,
27858				      build_int_cst (unsigned_type_node, 0xff),
27859				      fenv_clear_mtfsf);
27860
27861  *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27862
27863  /* Generates the equivalent of feupdateenv (&fenv_var)
27864
27865     double old_fenv = __builtin_mffs ();
27866     double fenv_update;
27867     *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27868                                (*(uint64_t*)fenv_var 0x1ff80fff);
27869     __builtin_mtfsf (0xff, fenv_update);  */
27870
27871  const unsigned HOST_WIDE_INT update_exception_mask
27872    = HOST_WIDE_INT_C (0xffffffff1fffff00);
27873  const unsigned HOST_WIDE_INT new_exception_mask
27874    = HOST_WIDE_INT_C (0x1ff80fff);
27875
27876  tree old_fenv = create_tmp_var_raw (double_type_node);
27877  tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27878			     call_mffs, NULL_TREE, NULL_TREE);
27879
27880  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27881  tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27882			     build_int_cst (uint64_type_node,
27883					    update_exception_mask));
27884
27885  tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27886			     build_int_cst (uint64_type_node,
27887					    new_exception_mask));
27888
27889  tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27890			      old_llu_and, new_llu_and);
27891
27892  tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27893				   new_llu_mask);
27894
27895  tree update_mtfsf = build_call_expr (mtfsf, 2,
27896				       build_int_cst (unsigned_type_node, 0xff),
27897				       fenv_update_mtfsf);
27898
27899  *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27900}
27901
27902void
27903rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27904{
27905  rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27906
27907  rtx_tmp0 = gen_reg_rtx (V2DFmode);
27908  rtx_tmp1 = gen_reg_rtx (V2DFmode);
27909
27910  /* The destination of the vmrgew instruction layout is:
27911     rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27912     Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27913     vmrgew instruction will be correct.  */
27914  if (BYTES_BIG_ENDIAN)
27915    {
27916       emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27917					    GEN_INT (0)));
27918       emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27919					    GEN_INT (3)));
27920    }
27921  else
27922    {
27923       emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27924       emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27925    }
27926
27927  rtx_tmp2 = gen_reg_rtx (V4SFmode);
27928  rtx_tmp3 = gen_reg_rtx (V4SFmode);
27929
27930  emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27931  emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27932
27933  if (BYTES_BIG_ENDIAN)
27934    emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27935  else
27936    emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27937}
27938
27939void
27940rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27941{
27942  rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27943
27944  rtx_tmp0 = gen_reg_rtx (V2DImode);
27945  rtx_tmp1 = gen_reg_rtx (V2DImode);
27946
27947  /* The destination of the vmrgew instruction layout is:
27948     rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27949     Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27950     vmrgew instruction will be correct.  */
27951  if (BYTES_BIG_ENDIAN)
27952    {
27953      emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27954      emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27955    }
27956  else
27957    {
27958      emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27959      emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27960    }
27961
27962  rtx_tmp2 = gen_reg_rtx (V4SFmode);
27963  rtx_tmp3 = gen_reg_rtx (V4SFmode);
27964
27965  if (signed_convert)
27966    {
27967      emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27968      emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27969    }
27970  else
27971    {
27972       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27973       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27974    }
27975
27976  if (BYTES_BIG_ENDIAN)
27977    emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27978  else
27979    emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27980}
27981
27982void
27983rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27984			       rtx src2)
27985{
27986  rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27987
27988  rtx_tmp0 = gen_reg_rtx (V2DFmode);
27989  rtx_tmp1 = gen_reg_rtx (V2DFmode);
27990
27991  emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27992  emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27993
27994  rtx_tmp2 = gen_reg_rtx (V4SImode);
27995  rtx_tmp3 = gen_reg_rtx (V4SImode);
27996
27997  if (signed_convert)
27998    {
27999      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28000      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28001    }
28002  else
28003    {
28004      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28005      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28006    }
28007
28008  emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28009}
28010
28011/* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
28012
28013static bool
28014rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28015			  optimization_type opt_type)
28016{
28017  switch (op)
28018    {
28019    case rsqrt_optab:
28020      return (opt_type == OPTIMIZE_FOR_SPEED
28021	      && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28022
28023    default:
28024      return true;
28025    }
28026}
28027
28028/* Implement TARGET_CONSTANT_ALIGNMENT.  */
28029
28030static HOST_WIDE_INT
28031rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28032{
28033  if (TREE_CODE (exp) == STRING_CST
28034      && (STRICT_ALIGNMENT || !optimize_size))
28035    return MAX (align, BITS_PER_WORD);
28036  return align;
28037}
28038
28039/* Implement TARGET_STARTING_FRAME_OFFSET.  */
28040
28041static HOST_WIDE_INT
28042rs6000_starting_frame_offset (void)
28043{
28044  if (FRAME_GROWS_DOWNWARD)
28045    return 0;
28046  return RS6000_STARTING_FRAME_OFFSET;
28047}
28048
28049/* Internal function to return the built-in function id for the complex
28050   multiply operation for a given mode.  */
28051
28052static inline built_in_function
28053complex_multiply_builtin_code (machine_mode mode)
28054{
28055  gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28056  int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28057  return (built_in_function) func;
28058}
28059
28060/* Internal function to return the built-in function id for the complex divide
28061   operation for a given mode.  */
28062
28063static inline built_in_function
28064complex_divide_builtin_code (machine_mode mode)
28065{
28066  gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28067  int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28068  return (built_in_function) func;
28069}
28070
28071/* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28072   function names from <foo>l to <foo>f128 if the default long double type is
28073   IEEE 128-bit.  Typically, with the C and C++ languages, the standard math.h
28074   include file switches the names on systems that support long double as IEEE
28075   128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28076   In the future, glibc will export names like __ieee128_sinf128 and we can
28077   switch to using those instead of using sinf128, which pollutes the user's
28078   namespace.
28079
28080   This will switch the names for Fortran math functions as well (which doesn't
28081   use math.h).  However, Fortran needs other changes to the compiler and
28082   library before you can switch the real*16 type at compile time.
28083
28084   We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name.  We
28085   only do this transformation if the __float128 type is enabled.  This
28086   prevents us from doing the transformation on older 32-bit ports that might
28087   have enabled using IEEE 128-bit floating point as the default long double
28088   type.
28089
28090   We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28091   function names used for complex multiply and divide to the appropriate
28092   names.  */
28093
28094static tree
28095rs6000_mangle_decl_assembler_name (tree decl, tree id)
28096{
28097  /* Handle complex multiply/divide.  For IEEE 128-bit, use __mulkc3 or
28098     __divkc3 and for IBM 128-bit use __multc3 and __divtc3.  */
28099  if (TARGET_FLOAT128_TYPE
28100      && TREE_CODE (decl) == FUNCTION_DECL
28101      && DECL_IS_UNDECLARED_BUILTIN (decl)
28102      && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28103    {
28104      built_in_function id = DECL_FUNCTION_CODE (decl);
28105      const char *newname = NULL;
28106
28107      if (id == complex_multiply_builtin_code (KCmode))
28108	newname = "__mulkc3";
28109
28110      else if (id == complex_multiply_builtin_code (ICmode))
28111	newname = "__multc3";
28112
28113      else if (id == complex_multiply_builtin_code (TCmode))
28114	newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28115
28116      else if (id == complex_divide_builtin_code (KCmode))
28117	newname = "__divkc3";
28118
28119      else if (id == complex_divide_builtin_code (ICmode))
28120	newname = "__divtc3";
28121
28122      else if (id == complex_divide_builtin_code (TCmode))
28123	newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28124
28125      if (newname)
28126	{
28127	  if (TARGET_DEBUG_BUILTIN)
28128	    fprintf (stderr, "Map complex mul/div => %s\n", newname);
28129
28130	  return get_identifier (newname);
28131	}
28132    }
28133
28134  /* Map long double built-in functions if long double is IEEE 128-bit.  */
28135  if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28136      && TREE_CODE (decl) == FUNCTION_DECL
28137      && DECL_IS_UNDECLARED_BUILTIN (decl)
28138      && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28139    {
28140      size_t len = IDENTIFIER_LENGTH (id);
28141      const char *name = IDENTIFIER_POINTER (id);
28142      char *newname = NULL;
28143
28144      /* See if it is one of the built-in functions with an unusual name.  */
28145      switch (DECL_FUNCTION_CODE (decl))
28146	{
28147	case BUILT_IN_DREML:
28148	  newname = xstrdup ("__remainderieee128");
28149	  break;
28150
28151	case BUILT_IN_GAMMAL:
28152	  newname = xstrdup ("__lgammaieee128");
28153	  break;
28154
28155	case BUILT_IN_GAMMAL_R:
28156	case BUILT_IN_LGAMMAL_R:
28157	  newname = xstrdup ("__lgammaieee128_r");
28158	  break;
28159
28160	case BUILT_IN_NEXTTOWARD:
28161	  newname = xstrdup ("__nexttoward_to_ieee128");
28162	  break;
28163
28164	case BUILT_IN_NEXTTOWARDF:
28165	  newname = xstrdup ("__nexttowardf_to_ieee128");
28166	  break;
28167
28168	case BUILT_IN_NEXTTOWARDL:
28169	  newname = xstrdup ("__nexttowardieee128");
28170	  break;
28171
28172	case BUILT_IN_POW10L:
28173	  newname = xstrdup ("__exp10ieee128");
28174	  break;
28175
28176	case BUILT_IN_SCALBL:
28177	  newname = xstrdup ("__scalbieee128");
28178	  break;
28179
28180	case BUILT_IN_SIGNIFICANDL:
28181	  newname = xstrdup ("__significandieee128");
28182	  break;
28183
28184	case BUILT_IN_SINCOSL:
28185	  newname = xstrdup ("__sincosieee128");
28186	  break;
28187
28188	default:
28189	  break;
28190	}
28191
28192      /* Update the __builtin_*printf and __builtin_*scanf functions.  */
28193      if (!newname)
28194	{
28195	  size_t printf_len = strlen ("printf");
28196	  size_t scanf_len = strlen ("scanf");
28197	  size_t printf_chk_len = strlen ("printf_chk");
28198
28199	  if (len >= printf_len
28200	      && strcmp (name + len - printf_len, "printf") == 0)
28201	    newname = xasprintf ("__%sieee128", name);
28202
28203	  else if (len >= scanf_len
28204		   && strcmp (name + len - scanf_len, "scanf") == 0)
28205	    newname = xasprintf ("__isoc99_%sieee128", name);
28206
28207	  else if (len >= printf_chk_len
28208		   && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28209	    newname = xasprintf ("%sieee128", name);
28210
28211	  else if (name[len - 1] == 'l')
28212	    {
28213	      bool uses_ieee128_p = false;
28214	      tree type = TREE_TYPE (decl);
28215	      machine_mode ret_mode = TYPE_MODE (type);
28216
28217	      /* See if the function returns a IEEE 128-bit floating point type or
28218		 complex type.  */
28219	      if (ret_mode == TFmode || ret_mode == TCmode)
28220		uses_ieee128_p = true;
28221	      else
28222		{
28223		  function_args_iterator args_iter;
28224		  tree arg;
28225
28226		  /* See if the function passes a IEEE 128-bit floating point type
28227		     or complex type.  */
28228		  FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28229		    {
28230		      machine_mode arg_mode = TYPE_MODE (arg);
28231		      if (arg_mode == TFmode || arg_mode == TCmode)
28232			{
28233			  uses_ieee128_p = true;
28234			  break;
28235			}
28236		    }
28237		}
28238
28239	      /* If we passed or returned an IEEE 128-bit floating point type,
28240		 change the name.  Use __<name>ieee128, instead of <name>l.  */
28241	      if (uses_ieee128_p)
28242		newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28243	    }
28244	}
28245
28246      if (newname)
28247	{
28248	  if (TARGET_DEBUG_BUILTIN)
28249	    fprintf (stderr, "Map %s => %s\n", name, newname);
28250
28251	  id = get_identifier (newname);
28252	  free (newname);
28253	}
28254    }
28255
28256  return id;
28257}
28258
28259/* Predict whether the given loop in gimple will be transformed in the RTL
28260   doloop_optimize pass.  */
28261
28262static bool
28263rs6000_predict_doloop_p (struct loop *loop)
28264{
28265  gcc_assert (loop);
28266
28267  /* On rs6000, targetm.can_use_doloop_p is actually
28268     can_use_doloop_if_innermost.  Just ensure the loop is innermost.  */
28269  if (loop->inner != NULL)
28270    {
28271      if (dump_file && (dump_flags & TDF_DETAILS))
28272	fprintf (dump_file, "Predict doloop failure due to"
28273			    " loop nesting.\n");
28274      return false;
28275    }
28276
28277  return true;
28278}
28279
28280/* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28281
28282static machine_mode
28283rs6000_preferred_doloop_mode (machine_mode)
28284{
28285  return word_mode;
28286}
28287
28288/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P.  */
28289
28290static bool
28291rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28292{
28293  gcc_assert (MEM_P (mem));
28294
28295  /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28296     type addresses, so don't allow MEMs with those address types to be
28297     substituted as an equivalent expression.  See PR93974 for details.  */
28298  if (GET_CODE (XEXP (mem, 0)) == AND)
28299    return true;
28300
28301  return false;
28302}
28303
28304/* Implement TARGET_INVALID_CONVERSION.  */
28305
28306static const char *
28307rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28308{
28309  /* Make sure we're working with the canonical types.  */
28310  if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28311    fromtype = TYPE_CANONICAL (fromtype);
28312  if (TYPE_CANONICAL (totype) != NULL_TREE)
28313    totype = TYPE_CANONICAL (totype);
28314
28315  machine_mode frommode = TYPE_MODE (fromtype);
28316  machine_mode tomode = TYPE_MODE (totype);
28317
28318  if (frommode != tomode)
28319    {
28320      /* Do not allow conversions to/from XOmode and OOmode types.  */
28321      if (frommode == XOmode)
28322	return N_("invalid conversion from type %<__vector_quad%>");
28323      if (tomode == XOmode)
28324	return N_("invalid conversion to type %<__vector_quad%>");
28325      if (frommode == OOmode)
28326	return N_("invalid conversion from type %<__vector_pair%>");
28327      if (tomode == OOmode)
28328	return N_("invalid conversion to type %<__vector_pair%>");
28329    }
28330
28331  /* Conversion allowed.  */
28332  return NULL;
28333}
28334
28335/* Convert a SFmode constant to the integer bit pattern.  */
28336
28337long
28338rs6000_const_f32_to_i32 (rtx operand)
28339{
28340  long value;
28341  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28342
28343  gcc_assert (GET_MODE (operand) == SFmode);
28344  REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28345  return value;
28346}
28347
28348void
28349rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28350{
28351  if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28352    inform (input_location,
28353	    "the result for the xxspltidp instruction "
28354	    "is undefined for subnormal input values");
28355  emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28356}
28357
28358/* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC.  */
28359
28360static bool
28361rs6000_gen_pic_addr_diff_vec (void)
28362{
28363  return rs6000_relative_jumptables;
28364}
28365
28366void
28367rs6000_output_addr_vec_elt (FILE *file, int value)
28368{
28369  const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28370  char buf[100];
28371
28372  fprintf (file, "%s", directive);
28373  ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28374  assemble_name (file, buf);
28375  fprintf (file, "\n");
28376}
28377
28378
28379/* Copy an integer constant to the vector constant structure.  */
28380
28381static void
28382constant_int_to_128bit_vector (rtx op,
28383			       machine_mode mode,
28384			       size_t byte_num,
28385			       vec_const_128bit_type *info)
28386{
28387  unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28388  unsigned bitsize = GET_MODE_BITSIZE (mode);
28389
28390  for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28391    info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28392}
28393
28394/* Copy a floating point constant to the vector constant structure.  */
28395
28396static void
28397constant_fp_to_128bit_vector (rtx op,
28398			      machine_mode mode,
28399			      size_t byte_num,
28400			      vec_const_128bit_type *info)
28401{
28402  unsigned bitsize = GET_MODE_BITSIZE (mode);
28403  unsigned num_words = bitsize / 32;
28404  const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28405  long real_words[VECTOR_128BIT_WORDS];
28406
28407  /* Make sure we don't overflow the real_words array and that it is
28408     filled completely.  */
28409  gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28410
28411  real_to_target (real_words, rtype, mode);
28412
28413  /* Iterate over each 32-bit word in the floating point constant.  The
28414     real_to_target function puts out words in target endian fashion.  We need
28415     to arrange the order so that the bytes are written in big endian order.  */
28416  for (unsigned num = 0; num < num_words; num++)
28417    {
28418      unsigned endian_num = (BYTES_BIG_ENDIAN
28419			     ? num
28420			     : num_words - 1 - num);
28421
28422      unsigned uvalue = real_words[endian_num];
28423      for (int shift = 32 - 8; shift >= 0; shift -= 8)
28424	info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28425    }
28426
28427  /* Mark that this constant involves floating point.  */
28428  info->fp_constant_p = true;
28429}
28430
28431/* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28432   structure INFO.
28433
28434   Break out the constant out to bytes, half words, words, and double words.
28435   Return true if we have successfully converted the constant.
28436
28437   We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28438   constants.  Integer and floating point scalar constants are splatted to fill
28439   out the vector.  */
28440
28441bool
28442vec_const_128bit_to_bytes (rtx op,
28443			   machine_mode mode,
28444			   vec_const_128bit_type *info)
28445{
28446  /* Initialize the constant structure.  */
28447  memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28448
28449  /* Assume CONST_INTs are DImode.  */
28450  if (mode == VOIDmode)
28451    mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28452
28453  if (mode == VOIDmode)
28454    return false;
28455
28456  unsigned size = GET_MODE_SIZE (mode);
28457  bool splat_p = false;
28458
28459  if (size > VECTOR_128BIT_BYTES)
28460    return false;
28461
28462  /* Set up the bits.  */
28463  switch (GET_CODE (op))
28464    {
28465      /* Integer constants, default to double word.  */
28466    case CONST_INT:
28467      {
28468	constant_int_to_128bit_vector (op, mode, 0, info);
28469	splat_p = true;
28470	break;
28471      }
28472
28473      /* Floating point constants.  */
28474    case CONST_DOUBLE:
28475      {
28476	/* Fail if the floating point constant is the wrong mode.  */
28477	if (GET_MODE (op) != mode)
28478	  return false;
28479
28480	/* SFmode stored as scalars are stored in DFmode format.  */
28481	if (mode == SFmode)
28482	  {
28483	    mode = DFmode;
28484	    size = GET_MODE_SIZE (DFmode);
28485	  }
28486
28487	constant_fp_to_128bit_vector (op, mode, 0, info);
28488	splat_p = true;
28489	break;
28490      }
28491
28492      /* Vector constants, iterate over each element.  On little endian
28493	 systems, we have to reverse the element numbers.  */
28494    case CONST_VECTOR:
28495      {
28496	/* Fail if the vector constant is the wrong mode or size.  */
28497	if (GET_MODE (op) != mode
28498	    || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28499	  return false;
28500
28501	machine_mode ele_mode = GET_MODE_INNER (mode);
28502	size_t ele_size = GET_MODE_SIZE (ele_mode);
28503	size_t nunits = GET_MODE_NUNITS (mode);
28504
28505	for (size_t num = 0; num < nunits; num++)
28506	  {
28507	    rtx ele = CONST_VECTOR_ELT (op, num);
28508	    size_t byte_num = (BYTES_BIG_ENDIAN
28509			       ? num
28510			       : nunits - 1 - num) * ele_size;
28511
28512	    if (CONST_INT_P (ele))
28513	      constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28514	    else if (CONST_DOUBLE_P (ele))
28515	      constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28516	    else
28517	      return false;
28518	  }
28519
28520	break;
28521      }
28522
28523	/* Treat VEC_DUPLICATE of a constant just like a vector constant.
28524	   Since we are duplicating the element, we don't have to worry about
28525	   endian issues.  */
28526    case VEC_DUPLICATE:
28527      {
28528	/* Fail if the vector duplicate is the wrong mode or size.  */
28529	if (GET_MODE (op) != mode
28530	    || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28531	  return false;
28532
28533	machine_mode ele_mode = GET_MODE_INNER (mode);
28534	size_t ele_size = GET_MODE_SIZE (ele_mode);
28535	rtx ele = XEXP (op, 0);
28536	size_t nunits = GET_MODE_NUNITS (mode);
28537
28538	if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28539	  return false;
28540
28541	for (size_t num = 0; num < nunits; num++)
28542	  {
28543	    size_t byte_num = num * ele_size;
28544
28545	    if (CONST_INT_P (ele))
28546	      constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28547	    else
28548	      constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28549	  }
28550
28551	break;
28552      }
28553
28554      /* Any thing else, just return failure.  */
28555    default:
28556      return false;
28557    }
28558
28559  /* Splat the constant to fill 128 bits if desired.  */
28560  if (splat_p && size < VECTOR_128BIT_BYTES)
28561    {
28562      if ((VECTOR_128BIT_BYTES % size) != 0)
28563	return false;
28564
28565      for (size_t offset = size;
28566	   offset < VECTOR_128BIT_BYTES;
28567	   offset += size)
28568	memcpy ((void *) &info->bytes[offset],
28569		(void *) &info->bytes[0],
28570		size);
28571    }
28572
28573  /* Remember original size.  */
28574  info->original_size = size;
28575
28576  /* Determine if the bytes are all the same.  */
28577  unsigned char first_byte = info->bytes[0];
28578  info->all_bytes_same = true;
28579  for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28580    if (first_byte != info->bytes[i])
28581      {
28582	info->all_bytes_same = false;
28583	break;
28584      }
28585
28586  /* Pack half words together & determine if all of the half words are the
28587     same.  */
28588  for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28589    info->half_words[i] = ((info->bytes[i * 2] << 8)
28590			   | info->bytes[(i * 2) + 1]);
28591
28592  unsigned short first_hword = info->half_words[0];
28593  info->all_half_words_same = true;
28594  for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28595    if (first_hword != info->half_words[i])
28596      {
28597	info->all_half_words_same = false;
28598	break;
28599      }
28600
28601  /* Pack words together & determine if all of the words are the same.  */
28602  for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28603    info->words[i] = ((info->bytes[i * 4] << 24)
28604		      | (info->bytes[(i * 4) + 1] << 16)
28605		      | (info->bytes[(i * 4) + 2] << 8)
28606		      | info->bytes[(i * 4) + 3]);
28607
28608  info->all_words_same
28609    = (info->words[0] == info->words[1]
28610       && info->words[0] == info->words[1]
28611       && info->words[0] == info->words[2]
28612       && info->words[0] == info->words[3]);
28613
28614  /* Pack double words together & determine if all of the double words are the
28615     same.  */
28616  for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28617    {
28618      unsigned HOST_WIDE_INT d_word = 0;
28619      for (size_t j = 0; j < 8; j++)
28620	d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28621
28622      info->double_words[i] = d_word;
28623    }
28624
28625  info->all_double_words_same
28626    = (info->double_words[0] == info->double_words[1]);
28627
28628  return true;
28629}
28630
28631/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ.  Return zero
28632   if the LXVKQ instruction cannot be used.  Otherwise return the immediate
28633   value to be used with the LXVKQ instruction.  */
28634
28635unsigned
28636constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28637{
28638  /* Is the instruction supported with power10 code generation, IEEE 128-bit
28639     floating point hardware and VSX registers are available.  */
28640  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28641      || !TARGET_VSX)
28642    return 0;
28643
28644  /* All of the constants that are generated by LXVKQ have the bottom 3 words
28645     that are 0.  */
28646  if (vsx_const->words[1] != 0
28647      || vsx_const->words[2] != 0
28648      || vsx_const->words[3] != 0)
28649      return 0;
28650
28651  /* See if we have a match for the first word.  */
28652  switch (vsx_const->words[0])
28653    {
28654    case 0x3FFF0000U: return 1;		/* IEEE 128-bit +1.0.  */
28655    case 0x40000000U: return 2;		/* IEEE 128-bit +2.0.  */
28656    case 0x40008000U: return 3;		/* IEEE 128-bit +3.0.  */
28657    case 0x40010000U: return 4;		/* IEEE 128-bit +4.0.  */
28658    case 0x40014000U: return 5;		/* IEEE 128-bit +5.0.  */
28659    case 0x40018000U: return 6;		/* IEEE 128-bit +6.0.  */
28660    case 0x4001C000U: return 7;		/* IEEE 128-bit +7.0.  */
28661    case 0x7FFF0000U: return 8;		/* IEEE 128-bit +Infinity.  */
28662    case 0x7FFF8000U: return 9;		/* IEEE 128-bit quiet NaN.  */
28663    case 0x80000000U: return 16;	/* IEEE 128-bit -0.0.  */
28664    case 0xBFFF0000U: return 17;	/* IEEE 128-bit -1.0.  */
28665    case 0xC0000000U: return 18;	/* IEEE 128-bit -2.0.  */
28666    case 0xC0008000U: return 19;	/* IEEE 128-bit -3.0.  */
28667    case 0xC0010000U: return 20;	/* IEEE 128-bit -4.0.  */
28668    case 0xC0014000U: return 21;	/* IEEE 128-bit -5.0.  */
28669    case 0xC0018000U: return 22;	/* IEEE 128-bit -6.0.  */
28670    case 0xC001C000U: return 23;	/* IEEE 128-bit -7.0.  */
28671    case 0xFFFF0000U: return 24;	/* IEEE 128-bit -Infinity.  */
28672
28673      /* anything else cannot be loaded.  */
28674    default:
28675      break;
28676    }
28677
28678  return 0;
28679}
28680
28681/* Determine if a vector constant can be loaded with XXSPLTIW.  Return zero if
28682   the XXSPLTIW instruction cannot be used.  Otherwise return the immediate
28683   value to be used with the XXSPLTIW instruction.  */
28684
28685unsigned
28686constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28687{
28688  if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28689    return 0;
28690
28691  if (!vsx_const->all_words_same)
28692    return 0;
28693
28694  /* If we can use XXSPLTIB, don't generate XXSPLTIW.  */
28695  if (vsx_const->all_bytes_same)
28696    return 0;
28697
28698  /* See if we can use VSPLTISH or VSPLTISW.  */
28699  if (vsx_const->all_half_words_same)
28700    {
28701      unsigned short h_word = vsx_const->half_words[0];
28702      short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28703      if (EASY_VECTOR_15 (sign_h_word))
28704	return 0;
28705    }
28706
28707  unsigned int word = vsx_const->words[0];
28708  int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28709  if (EASY_VECTOR_15 (sign_word))
28710    return 0;
28711
28712  return vsx_const->words[0];
28713}
28714
28715/* Determine if a vector constant can be loaded with XXSPLTIDP.  Return zero if
28716   the XXSPLTIDP instruction cannot be used.  Otherwise return the immediate
28717   value to be used with the XXSPLTIDP instruction.  */
28718
28719unsigned
28720constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28721{
28722  if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28723    return 0;
28724
28725  /* Reject if the two 64-bit segments are not the same.  */
28726  if (!vsx_const->all_double_words_same)
28727    return 0;
28728
28729  /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28730     Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW).  */
28731  if (vsx_const->all_bytes_same
28732      || vsx_const->all_half_words_same
28733      || vsx_const->all_words_same)
28734    return 0;
28735
28736  unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28737
28738  /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28739     pattern and the signalling NaN bit pattern.  Recognize infinity and
28740     negative infinity.  */
28741
28742  /* Bit representation of DFmode normal quiet NaN.  */
28743#define RS6000_CONST_DF_NAN	HOST_WIDE_INT_UC (0x7ff8000000000000)
28744
28745  /* Bit representation of DFmode normal signaling NaN.  */
28746#define RS6000_CONST_DF_NANS	HOST_WIDE_INT_UC (0x7ff4000000000000)
28747
28748  /* Bit representation of DFmode positive infinity.  */
28749#define RS6000_CONST_DF_INF	HOST_WIDE_INT_UC (0x7ff0000000000000)
28750
28751  /* Bit representation of DFmode negative infinity.  */
28752#define RS6000_CONST_DF_NEG_INF	HOST_WIDE_INT_UC (0xfff0000000000000)
28753
28754  if (value != RS6000_CONST_DF_NAN
28755      && value != RS6000_CONST_DF_NANS
28756      && value != RS6000_CONST_DF_INF
28757      && value != RS6000_CONST_DF_NEG_INF)
28758    {
28759      /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28760	 the exponent, and 52 bits for the mantissa (not counting the hidden
28761	 bit used for normal numbers).  NaN values have the exponent set to all
28762	 1 bits, and the mantissa non-zero (mantissa == 0 is infinity).  */
28763
28764      int df_exponent = (value >> 52) & 0x7ff;
28765      unsigned HOST_WIDE_INT
28766	df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28767
28768      if (df_exponent == 0x7ff && df_mantissa != 0)	/* other NaNs.  */
28769	return 0;
28770
28771      /* Avoid values that are DFmode subnormal values.  Subnormal numbers have
28772	 the exponent all 0 bits, and the mantissa non-zero.  If the value is
28773	 subnormal, then the hidden bit in the mantissa is not set.  */
28774      if (df_exponent == 0 && df_mantissa != 0)		/* subnormal.  */
28775	return 0;
28776    }
28777
28778  /* Change the representation to DFmode constant.  */
28779  long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28780
28781  /* real_from_target takes the target words in target order.  */
28782  if (!BYTES_BIG_ENDIAN)
28783    std::swap (df_words[0], df_words[1]);
28784
28785  REAL_VALUE_TYPE rv_type;
28786  real_from_target (&rv_type, df_words, DFmode);
28787
28788  const REAL_VALUE_TYPE *rv = &rv_type;
28789
28790  /* Validate that the number can be stored as a SFmode value.  */
28791  if (!exact_real_truncate (SFmode, rv))
28792    return 0;
28793
28794  /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28795     mantissa field is non-zero) which is undefined for the XXSPLTIDP
28796     instruction.  */
28797  long sf_value;
28798  real_to_target (&sf_value, rv, SFmode);
28799
28800  /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28801     and 23 bits for the mantissa.  Subnormal numbers have the exponent all
28802     0 bits, and the mantissa non-zero.  */
28803  long sf_exponent = (sf_value >> 23) & 0xFF;
28804  long sf_mantissa = sf_value & 0x7FFFFF;
28805
28806  if (sf_exponent == 0 && sf_mantissa != 0)
28807    return 0;
28808
28809  /* Return the immediate to be used.  */
28810  return sf_value;
28811}
28812
28813/* Now we have only two opaque types, they are __vector_quad and
28814   __vector_pair built-in types.  They are target specific and
28815   only available when MMA is supported.  With MMA supported, it
28816   simply returns true, otherwise it checks if the given gimple
28817   STMT is an assignment, asm or call stmt and uses either of
28818   these two opaque types unexpectedly, if yes, it would raise
28819   an error message and returns true, otherwise it returns false.  */
28820
28821bool
28822rs6000_opaque_type_invalid_use_p (gimple *stmt)
28823{
28824  if (TARGET_MMA)
28825    return false;
28826
28827  /* If the given TYPE is one MMA opaque type, emit the corresponding
28828     error messages and return true, otherwise return false.  */
28829  auto check_and_error_invalid_use = [](tree type)
28830  {
28831    tree mv = TYPE_MAIN_VARIANT (type);
28832    if (mv == vector_quad_type_node)
28833      {
28834	error ("type %<__vector_quad%> requires the %qs option", "-mmma");
28835	return true;
28836      }
28837    else if (mv == vector_pair_type_node)
28838      {
28839	error ("type %<__vector_pair%> requires the %qs option", "-mmma");
28840	return true;
28841      }
28842    return false;
28843  };
28844
28845  if (stmt)
28846    {
28847      /* The usage of MMA opaque types is very limited for now,
28848	 to check with gassign, gasm and gcall is enough so far.  */
28849      if (gassign *ga = dyn_cast<gassign *> (stmt))
28850	{
28851	  tree lhs = gimple_assign_lhs (ga);
28852	  tree type = TREE_TYPE (lhs);
28853	  if (check_and_error_invalid_use (type))
28854	    return true;
28855	}
28856      else if (gasm *gs = dyn_cast<gasm *> (stmt))
28857	{
28858	  unsigned ninputs = gimple_asm_ninputs (gs);
28859	  for (unsigned i = 0; i < ninputs; i++)
28860	    {
28861	      tree op = gimple_asm_input_op (gs, i);
28862	      tree val = TREE_VALUE (op);
28863	      tree type = TREE_TYPE (val);
28864	      if (check_and_error_invalid_use (type))
28865		return true;
28866	    }
28867	  unsigned noutputs = gimple_asm_noutputs (gs);
28868	  for (unsigned i = 0; i < noutputs; i++)
28869	    {
28870	      tree op = gimple_asm_output_op (gs, i);
28871	      tree val = TREE_VALUE (op);
28872	      tree type = TREE_TYPE (val);
28873	      if (check_and_error_invalid_use (type))
28874		return true;
28875	    }
28876	}
28877      else if (gcall *gc = dyn_cast<gcall *> (stmt))
28878	{
28879	  unsigned nargs = gimple_call_num_args (gc);
28880	  for (unsigned i = 0; i < nargs; i++)
28881	    {
28882	      tree arg = gimple_call_arg (gc, i);
28883	      tree type = TREE_TYPE (arg);
28884	      if (check_and_error_invalid_use (type))
28885		return true;
28886	    }
28887	}
28888    }
28889
28890  return false;
28891}
28892
28893struct gcc_target targetm = TARGET_INITIALIZER;
28894
28895#include "gt-rs6000.h"
28896