1/* Subroutines used for code generation on IBM S/390 and zSeries
2   Copyright (C) 1999-2020 Free Software Foundation, Inc.
3   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                  Ulrich Weigand (uweigand@de.ibm.com) and
5                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#define IN_TARGET_CODE 1
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "backend.h"
29#include "target.h"
30#include "target-globals.h"
31#include "rtl.h"
32#include "tree.h"
33#include "gimple.h"
34#include "cfghooks.h"
35#include "cfgloop.h"
36#include "df.h"
37#include "memmodel.h"
38#include "tm_p.h"
39#include "stringpool.h"
40#include "attribs.h"
41#include "expmed.h"
42#include "optabs.h"
43#include "regs.h"
44#include "emit-rtl.h"
45#include "recog.h"
46#include "cgraph.h"
47#include "diagnostic-core.h"
48#include "diagnostic.h"
49#include "alias.h"
50#include "fold-const.h"
51#include "print-tree.h"
52#include "stor-layout.h"
53#include "varasm.h"
54#include "calls.h"
55#include "conditions.h"
56#include "output.h"
57#include "insn-attr.h"
58#include "flags.h"
59#include "except.h"
60#include "dojump.h"
61#include "explow.h"
62#include "stmt.h"
63#include "expr.h"
64#include "reload.h"
65#include "cfgrtl.h"
66#include "cfganal.h"
67#include "lcm.h"
68#include "cfgbuild.h"
69#include "cfgcleanup.h"
70#include "debug.h"
71#include "langhooks.h"
72#include "internal-fn.h"
73#include "gimple-fold.h"
74#include "tree-eh.h"
75#include "gimplify.h"
76#include "opts.h"
77#include "tree-pass.h"
78#include "context.h"
79#include "builtins.h"
80#include "rtl-iter.h"
81#include "intl.h"
82#include "tm-constrs.h"
83#include "tree-vrp.h"
84#include "symbol-summary.h"
85#include "ipa-prop.h"
86#include "ipa-fnsummary.h"
87#include "sched-int.h"
88
89/* This file should be included last.  */
90#include "target-def.h"
91
92static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
93
94/* Remember the last target of s390_set_current_function.  */
95static GTY(()) tree s390_previous_fndecl;
96
97/* Define the specific costs for a given cpu.  */
98
99struct processor_costs
100{
101  /* multiplication */
102  const int m;        /* cost of an M instruction.  */
103  const int mghi;     /* cost of an MGHI instruction.  */
104  const int mh;       /* cost of an MH instruction.  */
105  const int mhi;      /* cost of an MHI instruction.  */
106  const int ml;       /* cost of an ML instruction.  */
107  const int mr;       /* cost of an MR instruction.  */
108  const int ms;       /* cost of an MS instruction.  */
109  const int msg;      /* cost of an MSG instruction.  */
110  const int msgf;     /* cost of an MSGF instruction.  */
111  const int msgfr;    /* cost of an MSGFR instruction.  */
112  const int msgr;     /* cost of an MSGR instruction.  */
113  const int msr;      /* cost of an MSR instruction.  */
114  const int mult_df;  /* cost of multiplication in DFmode.  */
115  const int mxbr;
116  /* square root */
117  const int sqxbr;    /* cost of square root in TFmode.  */
118  const int sqdbr;    /* cost of square root in DFmode.  */
119  const int sqebr;    /* cost of square root in SFmode.  */
120  /* multiply and add */
121  const int madbr;    /* cost of multiply and add in DFmode.  */
122  const int maebr;    /* cost of multiply and add in SFmode.  */
123  /* division */
124  const int dxbr;
125  const int ddbr;
126  const int debr;
127  const int dlgr;
128  const int dlr;
129  const int dr;
130  const int dsgfr;
131  const int dsgr;
132};
133
134#define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135
136static const
137struct processor_costs z900_cost =
138{
139  COSTS_N_INSNS (5),     /* M     */
140  COSTS_N_INSNS (10),    /* MGHI  */
141  COSTS_N_INSNS (5),     /* MH    */
142  COSTS_N_INSNS (4),     /* MHI   */
143  COSTS_N_INSNS (5),     /* ML    */
144  COSTS_N_INSNS (5),     /* MR    */
145  COSTS_N_INSNS (4),     /* MS    */
146  COSTS_N_INSNS (15),    /* MSG   */
147  COSTS_N_INSNS (7),     /* MSGF  */
148  COSTS_N_INSNS (7),     /* MSGFR */
149  COSTS_N_INSNS (10),    /* MSGR  */
150  COSTS_N_INSNS (4),     /* MSR   */
151  COSTS_N_INSNS (7),     /* multiplication in DFmode */
152  COSTS_N_INSNS (13),    /* MXBR */
153  COSTS_N_INSNS (136),   /* SQXBR */
154  COSTS_N_INSNS (44),    /* SQDBR */
155  COSTS_N_INSNS (35),    /* SQEBR */
156  COSTS_N_INSNS (18),    /* MADBR */
157  COSTS_N_INSNS (13),    /* MAEBR */
158  COSTS_N_INSNS (134),   /* DXBR */
159  COSTS_N_INSNS (30),    /* DDBR */
160  COSTS_N_INSNS (27),    /* DEBR */
161  COSTS_N_INSNS (220),   /* DLGR */
162  COSTS_N_INSNS (34),    /* DLR */
163  COSTS_N_INSNS (34),    /* DR */
164  COSTS_N_INSNS (32),    /* DSGFR */
165  COSTS_N_INSNS (32),    /* DSGR */
166};
167
168static const
169struct processor_costs z990_cost =
170{
171  COSTS_N_INSNS (4),     /* M     */
172  COSTS_N_INSNS (2),     /* MGHI  */
173  COSTS_N_INSNS (2),     /* MH    */
174  COSTS_N_INSNS (2),     /* MHI   */
175  COSTS_N_INSNS (4),     /* ML    */
176  COSTS_N_INSNS (4),     /* MR    */
177  COSTS_N_INSNS (5),     /* MS    */
178  COSTS_N_INSNS (6),     /* MSG   */
179  COSTS_N_INSNS (4),     /* MSGF  */
180  COSTS_N_INSNS (4),     /* MSGFR */
181  COSTS_N_INSNS (4),     /* MSGR  */
182  COSTS_N_INSNS (4),     /* MSR   */
183  COSTS_N_INSNS (1),     /* multiplication in DFmode */
184  COSTS_N_INSNS (28),    /* MXBR */
185  COSTS_N_INSNS (130),   /* SQXBR */
186  COSTS_N_INSNS (66),    /* SQDBR */
187  COSTS_N_INSNS (38),    /* SQEBR */
188  COSTS_N_INSNS (1),     /* MADBR */
189  COSTS_N_INSNS (1),     /* MAEBR */
190  COSTS_N_INSNS (60),    /* DXBR */
191  COSTS_N_INSNS (40),    /* DDBR */
192  COSTS_N_INSNS (26),    /* DEBR */
193  COSTS_N_INSNS (176),   /* DLGR */
194  COSTS_N_INSNS (31),    /* DLR */
195  COSTS_N_INSNS (31),    /* DR */
196  COSTS_N_INSNS (31),    /* DSGFR */
197  COSTS_N_INSNS (31),    /* DSGR */
198};
199
200static const
201struct processor_costs z9_109_cost =
202{
203  COSTS_N_INSNS (4),     /* M     */
204  COSTS_N_INSNS (2),     /* MGHI  */
205  COSTS_N_INSNS (2),     /* MH    */
206  COSTS_N_INSNS (2),     /* MHI   */
207  COSTS_N_INSNS (4),     /* ML    */
208  COSTS_N_INSNS (4),     /* MR    */
209  COSTS_N_INSNS (5),     /* MS    */
210  COSTS_N_INSNS (6),     /* MSG   */
211  COSTS_N_INSNS (4),     /* MSGF  */
212  COSTS_N_INSNS (4),     /* MSGFR */
213  COSTS_N_INSNS (4),     /* MSGR  */
214  COSTS_N_INSNS (4),     /* MSR   */
215  COSTS_N_INSNS (1),     /* multiplication in DFmode */
216  COSTS_N_INSNS (28),    /* MXBR */
217  COSTS_N_INSNS (130),   /* SQXBR */
218  COSTS_N_INSNS (66),    /* SQDBR */
219  COSTS_N_INSNS (38),    /* SQEBR */
220  COSTS_N_INSNS (1),     /* MADBR */
221  COSTS_N_INSNS (1),     /* MAEBR */
222  COSTS_N_INSNS (60),    /* DXBR */
223  COSTS_N_INSNS (40),    /* DDBR */
224  COSTS_N_INSNS (26),    /* DEBR */
225  COSTS_N_INSNS (30),    /* DLGR */
226  COSTS_N_INSNS (23),    /* DLR */
227  COSTS_N_INSNS (23),    /* DR */
228  COSTS_N_INSNS (24),    /* DSGFR */
229  COSTS_N_INSNS (24),    /* DSGR */
230};
231
232static const
233struct processor_costs z10_cost =
234{
235  COSTS_N_INSNS (10),    /* M     */
236  COSTS_N_INSNS (10),    /* MGHI  */
237  COSTS_N_INSNS (10),    /* MH    */
238  COSTS_N_INSNS (10),    /* MHI   */
239  COSTS_N_INSNS (10),    /* ML    */
240  COSTS_N_INSNS (10),    /* MR    */
241  COSTS_N_INSNS (10),    /* MS    */
242  COSTS_N_INSNS (10),    /* MSG   */
243  COSTS_N_INSNS (10),    /* MSGF  */
244  COSTS_N_INSNS (10),    /* MSGFR */
245  COSTS_N_INSNS (10),    /* MSGR  */
246  COSTS_N_INSNS (10),    /* MSR   */
247  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
248  COSTS_N_INSNS (50),    /* MXBR */
249  COSTS_N_INSNS (120),   /* SQXBR */
250  COSTS_N_INSNS (52),    /* SQDBR */
251  COSTS_N_INSNS (38),    /* SQEBR */
252  COSTS_N_INSNS (1),     /* MADBR */
253  COSTS_N_INSNS (1),     /* MAEBR */
254  COSTS_N_INSNS (111),   /* DXBR */
255  COSTS_N_INSNS (39),    /* DDBR */
256  COSTS_N_INSNS (32),    /* DEBR */
257  COSTS_N_INSNS (160),   /* DLGR */
258  COSTS_N_INSNS (71),    /* DLR */
259  COSTS_N_INSNS (71),    /* DR */
260  COSTS_N_INSNS (71),    /* DSGFR */
261  COSTS_N_INSNS (71),    /* DSGR */
262};
263
264static const
265struct processor_costs z196_cost =
266{
267  COSTS_N_INSNS (7),     /* M     */
268  COSTS_N_INSNS (5),     /* MGHI  */
269  COSTS_N_INSNS (5),     /* MH    */
270  COSTS_N_INSNS (5),     /* MHI   */
271  COSTS_N_INSNS (7),     /* ML    */
272  COSTS_N_INSNS (7),     /* MR    */
273  COSTS_N_INSNS (6),     /* MS    */
274  COSTS_N_INSNS (8),     /* MSG   */
275  COSTS_N_INSNS (6),     /* MSGF  */
276  COSTS_N_INSNS (6),     /* MSGFR */
277  COSTS_N_INSNS (8),     /* MSGR  */
278  COSTS_N_INSNS (6),     /* MSR   */
279  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
280  COSTS_N_INSNS (40),    /* MXBR B+40 */
281  COSTS_N_INSNS (100),   /* SQXBR B+100 */
282  COSTS_N_INSNS (42),    /* SQDBR B+42 */
283  COSTS_N_INSNS (28),    /* SQEBR B+28 */
284  COSTS_N_INSNS (1),     /* MADBR B */
285  COSTS_N_INSNS (1),     /* MAEBR B */
286  COSTS_N_INSNS (101),   /* DXBR B+101 */
287  COSTS_N_INSNS (29),    /* DDBR */
288  COSTS_N_INSNS (22),    /* DEBR */
289  COSTS_N_INSNS (160),   /* DLGR cracked */
290  COSTS_N_INSNS (160),   /* DLR cracked */
291  COSTS_N_INSNS (160),   /* DR expanded */
292  COSTS_N_INSNS (160),   /* DSGFR cracked */
293  COSTS_N_INSNS (160),   /* DSGR cracked */
294};
295
296static const
297struct processor_costs zEC12_cost =
298{
299  COSTS_N_INSNS (7),     /* M     */
300  COSTS_N_INSNS (5),     /* MGHI  */
301  COSTS_N_INSNS (5),     /* MH    */
302  COSTS_N_INSNS (5),     /* MHI   */
303  COSTS_N_INSNS (7),     /* ML    */
304  COSTS_N_INSNS (7),     /* MR    */
305  COSTS_N_INSNS (6),     /* MS    */
306  COSTS_N_INSNS (8),     /* MSG   */
307  COSTS_N_INSNS (6),     /* MSGF  */
308  COSTS_N_INSNS (6),     /* MSGFR */
309  COSTS_N_INSNS (8),     /* MSGR  */
310  COSTS_N_INSNS (6),     /* MSR   */
311  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
312  COSTS_N_INSNS (40),    /* MXBR B+40 */
313  COSTS_N_INSNS (100),   /* SQXBR B+100 */
314  COSTS_N_INSNS (42),    /* SQDBR B+42 */
315  COSTS_N_INSNS (28),    /* SQEBR B+28 */
316  COSTS_N_INSNS (1),     /* MADBR B */
317  COSTS_N_INSNS (1),     /* MAEBR B */
318  COSTS_N_INSNS (131),   /* DXBR B+131 */
319  COSTS_N_INSNS (29),    /* DDBR */
320  COSTS_N_INSNS (22),    /* DEBR */
321  COSTS_N_INSNS (160),   /* DLGR cracked */
322  COSTS_N_INSNS (160),   /* DLR cracked */
323  COSTS_N_INSNS (160),   /* DR expanded */
324  COSTS_N_INSNS (160),   /* DSGFR cracked */
325  COSTS_N_INSNS (160),   /* DSGR cracked */
326};
327
328const struct s390_processor processor_table[] =
329{
330  { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost,   5  },
331  { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost,   6  },
332  { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7  },
333  { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost, 7  },
334  { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost,    8  },
335  { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost,   9  },
336  { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost,  10 },
337  { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost,  11 },
338  { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
339  { "z15",    "arch13", PROCESSOR_8561_Z15,    &zEC12_cost,  13 },
340  { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
341};
342
343extern int reload_completed;
344
345/* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
346static rtx_insn *last_scheduled_insn;
347#define NUM_SIDES 2
348
349#define MAX_SCHED_UNITS 4
350static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
351
352/* Estimate of number of cycles a long-running insn occupies an
353   execution unit.  */
354static int fxd_longrunning[NUM_SIDES];
355static int fpd_longrunning[NUM_SIDES];
356
357/* The maximum score added for an instruction whose unit hasn't been
358   in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
359   give instruction mix scheduling more priority over instruction
360   grouping.  */
361#define MAX_SCHED_MIX_SCORE      2
362
363/* The maximum distance up to which individual scores will be
364   calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
365   Increase this with the OOO windows size of the machine.  */
366#define MAX_SCHED_MIX_DISTANCE 70
367
368/* Structure used to hold the components of a S/390 memory
369   address.  A legitimate address on S/390 is of the general
370   form
371          base + index + displacement
372   where any of the components is optional.
373
374   base and index are registers of the class ADDR_REGS,
375   displacement is an unsigned 12-bit immediate constant.  */
376
377/* The max number of insns of backend generated memset/memcpy/memcmp
378   loops.  This value is used in the unroll adjust hook to detect such
379   loops.  Current max is 9 coming from the memcmp loop.  */
380#define BLOCK_MEM_OPS_LOOP_INSNS 9
381
382struct s390_address
383{
384  rtx base;
385  rtx indx;
386  rtx disp;
387  bool pointer;
388  bool literal_pool;
389};
390
391/* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
392
393#define cfun_frame_layout (cfun->machine->frame_layout)
394#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395#define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
396				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
397				 : cfun_frame_layout.fpr_bitmap & 0x03))
398#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399  cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
401  (1 << (REGNO - FPR0_REGNUM)))
402#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
403  (1 << (REGNO - FPR0_REGNUM))))
404#define cfun_gpr_save_slot(REGNO) \
405  cfun->machine->frame_layout.gpr_save_slots[REGNO]
406
407/* Number of GPRs and FPRs used for argument passing.  */
408#define GP_ARG_NUM_REG 5
409#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410#define VEC_ARG_NUM_REG 8
411
412/* A couple of shortcuts.  */
413#define CONST_OK_FOR_J(x) \
414	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415#define CONST_OK_FOR_K(x) \
416	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417#define CONST_OK_FOR_Os(x) \
418	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419#define CONST_OK_FOR_Op(x) \
420	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421#define CONST_OK_FOR_On(x) \
422	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
423
424#define REGNO_PAIR_OK(REGNO, MODE)                               \
425  (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
426
427/* That's the read ahead of the dynamic branch prediction unit in
428   bytes on a z10 (or higher) CPU.  */
429#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
430
431/* Masks per jump target register indicating which thunk need to be
432   generated.  */
433static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434static GTY(()) int indirect_branch_z10thunk_mask = 0;
435
436#define INDIRECT_BRANCH_NUM_OPTIONS 4
437
438enum s390_indirect_branch_option
439  {
440    s390_opt_indirect_branch_jump = 0,
441    s390_opt_indirect_branch_call,
442    s390_opt_function_return_reg,
443    s390_opt_function_return_mem
444  };
445
446static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448  { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
450  { ".s390_indirect_jump", ".s390_indirect_call",
451    ".s390_return_reg", ".s390_return_mem" };
452
453bool
454s390_return_addr_from_memory ()
455{
456  return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
457}
458
459/* Indicate which ABI has been used for passing vector args.
460   0 - no vector type arguments have been passed where the ABI is relevant
461   1 - the old ABI has been used
462   2 - a vector type argument has been passed either in a vector register
463       or on the stack by value  */
464static int s390_vector_abi = 0;
465
466/* Set the vector ABI marker if TYPE is subject to the vector ABI
467   switch.  The vector ABI affects only vector data types.  There are
468   two aspects of the vector ABI relevant here:
469
470   1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471   ABI and natural alignment with the old.
472
473   2. vector <= 16 bytes are passed in VRs or by value on the stack
474   with the new ABI but by reference on the stack with the old.
475
476   If ARG_P is true TYPE is used for a function argument or return
477   value.  The ABI marker then is set for all vector data types.  If
478   ARG_P is false only type 1 vectors are being checked.  */
479
480static void
481s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
482{
483  static hash_set<const_tree> visited_types_hash;
484
485  if (s390_vector_abi)
486    return;
487
488  if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489    return;
490
491  if (visited_types_hash.contains (type))
492    return;
493
494  visited_types_hash.add (type);
495
496  if (VECTOR_TYPE_P (type))
497    {
498      int type_size = int_size_in_bytes (type);
499
500      /* Outside arguments only the alignment is changing and this
501	 only happens for vector types >= 16 bytes.  */
502      if (!arg_p && type_size < 16)
503	return;
504
505      /* In arguments vector types > 16 are passed as before (GCC
506	 never enforced the bigger alignment for arguments which was
507	 required by the old vector ABI).  However, it might still be
508	 ABI relevant due to the changed alignment if it is a struct
509	 member.  */
510      if (arg_p && type_size > 16 && !in_struct_p)
511	return;
512
513      s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
514    }
515  else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
516    {
517      /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518	 natural alignment there will never be ABI dependent padding
519	 in an array type.  That's why we do not set in_struct_p to
520	 true here.  */
521      s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
522    }
523  else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
524    {
525      tree arg_chain;
526
527      /* Check the return type.  */
528      s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
529
530      for (arg_chain = TYPE_ARG_TYPES (type);
531	   arg_chain;
532	   arg_chain = TREE_CHAIN (arg_chain))
533	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
534    }
535  else if (RECORD_OR_UNION_TYPE_P (type))
536    {
537      tree field;
538
539      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
540	{
541	  if (TREE_CODE (field) != FIELD_DECL)
542	    continue;
543
544	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
545	}
546    }
547}
548
549
550/* System z builtins.  */
551
552#include "s390-builtins.h"
553
554const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
555  {
556#undef B_DEF
557#undef OB_DEF
558#undef OB_DEF_VAR
559#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560#define OB_DEF(...)
561#define OB_DEF_VAR(...)
562#include "s390-builtins.def"
563    0
564  };
565
566const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
567  {
568#undef B_DEF
569#undef OB_DEF
570#undef OB_DEF_VAR
571#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572#define OB_DEF(...)
573#define OB_DEF_VAR(...)
574#include "s390-builtins.def"
575    0
576  };
577
578const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
579  {
580#undef B_DEF
581#undef OB_DEF
582#undef OB_DEF_VAR
583#define B_DEF(...)
584#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585#define OB_DEF_VAR(...)
586#include "s390-builtins.def"
587    0
588  };
589
590const unsigned int
591bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
592  {
593#undef B_DEF
594#undef OB_DEF
595#undef OB_DEF_VAR
596#define B_DEF(...)
597#define OB_DEF(...)
598#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599#include "s390-builtins.def"
600    0
601  };
602
603const unsigned int
604opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
605  {
606#undef B_DEF
607#undef OB_DEF
608#undef OB_DEF_VAR
609#define B_DEF(...)
610#define OB_DEF(...)
611#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612#include "s390-builtins.def"
613    0
614  };
615
616tree s390_builtin_types[BT_MAX];
617tree s390_builtin_fn_types[BT_FN_MAX];
618tree s390_builtin_decls[S390_BUILTIN_MAX +
619			S390_OVERLOADED_BUILTIN_MAX +
620			S390_OVERLOADED_BUILTIN_VAR_MAX];
621
622static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623#undef B_DEF
624#undef OB_DEF
625#undef OB_DEF_VAR
626#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627#define OB_DEF(...)
628#define OB_DEF_VAR(...)
629
630#include "s390-builtins.def"
631  CODE_FOR_nothing
632};
633
634static void
635s390_init_builtins (void)
636{
637  /* These definitions are being used in s390-builtins.def.  */
638  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639				       NULL, NULL);
640  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641  tree c_uint64_type_node;
642
643  /* The uint64_type_node from tree.c is not compatible to the C99
644     uint64_t data type.  What we want is c_uint64_type_node from
645     c-common.c.  But since backend code is not supposed to interface
646     with the frontend we recreate it here.  */
647  if (TARGET_64BIT)
648    c_uint64_type_node = long_unsigned_type_node;
649  else
650    c_uint64_type_node = long_long_unsigned_type_node;
651
652#undef DEF_TYPE
653#define DEF_TYPE(INDEX, NODE, CONST_P)			\
654  if (s390_builtin_types[INDEX] == NULL)		\
655    s390_builtin_types[INDEX] = (!CONST_P) ?		\
656      (NODE) : build_type_variant ((NODE), 1, 0);
657
658#undef DEF_POINTER_TYPE
659#define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
660  if (s390_builtin_types[INDEX] == NULL)				\
661    s390_builtin_types[INDEX] =						\
662      build_pointer_type (s390_builtin_types[INDEX_BASE]);
663
664#undef DEF_DISTINCT_TYPE
665#define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
666  if (s390_builtin_types[INDEX] == NULL)				\
667    s390_builtin_types[INDEX] =						\
668      build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
669
670#undef DEF_VECTOR_TYPE
671#define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
672  if (s390_builtin_types[INDEX] == NULL)				\
673    s390_builtin_types[INDEX] =						\
674      build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
675
676#undef DEF_OPAQUE_VECTOR_TYPE
677#define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
678  if (s390_builtin_types[INDEX] == NULL)				\
679    s390_builtin_types[INDEX] =						\
680      build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
681
682#undef DEF_FN_TYPE
683#define DEF_FN_TYPE(INDEX, args...)				\
684  if (s390_builtin_fn_types[INDEX] == NULL)			\
685    s390_builtin_fn_types[INDEX] =				\
686      build_function_type_list (args, NULL_TREE);
687#undef DEF_OV_TYPE
688#define DEF_OV_TYPE(...)
689#include "s390-builtin-types.def"
690
691#undef B_DEF
692#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
693  if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
694    s390_builtin_decls[S390_BUILTIN_##NAME] =				\
695      add_builtin_function ("__builtin_" #NAME,				\
696			    s390_builtin_fn_types[FNTYPE],		\
697			    S390_BUILTIN_##NAME,			\
698			    BUILT_IN_MD,				\
699			    NULL,					\
700			    ATTRS);
701#undef OB_DEF
702#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
703  if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704      == NULL)								\
705    s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706      add_builtin_function ("__builtin_" #NAME,				\
707			    s390_builtin_fn_types[FNTYPE],		\
708			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709			    BUILT_IN_MD,				\
710			    NULL,					\
711			    0);
712#undef OB_DEF_VAR
713#define OB_DEF_VAR(...)
714#include "s390-builtins.def"
715
716}
717
718/* Return true if ARG is appropriate as argument number ARGNUM of
719   builtin DECL.  The operand flags from s390-builtins.def have to
720   passed as OP_FLAGS.  */
721bool
722s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
723{
724  if (O_UIMM_P (op_flags))
725    {
726      int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727      int bitwidth = bitwidths[op_flags - O_U1];
728
729      if (!tree_fits_uhwi_p (arg)
730	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
731	{
732	  error ("constant argument %d for builtin %qF is out of range "
733		 "(0..%wu)", argnum, decl,
734		 (HOST_WIDE_INT_1U << bitwidth) - 1);
735	  return false;
736	}
737    }
738
739  if (O_SIMM_P (op_flags))
740    {
741      int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742      int bitwidth = bitwidths[op_flags - O_S2];
743
744      if (!tree_fits_shwi_p (arg)
745	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
747	{
748	  error ("constant argument %d for builtin %qF is out of range "
749		 "(%wd..%wd)", argnum, decl,
750		 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751		 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752	  return false;
753	}
754    }
755  return true;
756}
757
758/* Expand an expression EXP that calls a built-in function,
759   with result going to TARGET if that's convenient
760   (and in mode MODE if that's convenient).
761   SUBTARGET may be used as the target for computing one of EXP's operands.
762   IGNORE is nonzero if the value is to be ignored.  */
763
764static rtx
765s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766		     machine_mode mode ATTRIBUTE_UNUSED,
767		     int ignore ATTRIBUTE_UNUSED)
768{
769#define MAX_ARGS 6
770
771  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773  enum insn_code icode;
774  rtx op[MAX_ARGS], pat;
775  int arity;
776  bool nonvoid;
777  tree arg;
778  call_expr_arg_iterator iter;
779  unsigned int all_op_flags = opflags_for_builtin (fcode);
780  machine_mode last_vec_mode = VOIDmode;
781
782  if (TARGET_DEBUG_ARG)
783    {
784      fprintf (stderr,
785	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787	       bflags_for_builtin (fcode));
788    }
789
790  if (S390_USE_TARGET_ATTRIBUTE)
791    {
792      unsigned int bflags;
793
794      bflags = bflags_for_builtin (fcode);
795      if ((bflags & B_HTM) && !TARGET_HTM)
796	{
797	  error ("builtin %qF is not supported without %<-mhtm%> "
798		 "(default with %<-march=zEC12%> and higher).", fndecl);
799	  return const0_rtx;
800	}
801      if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
802	{
803	  error ("builtin %qF requires %<-mvx%> "
804		 "(default with %<-march=z13%> and higher).", fndecl);
805	  return const0_rtx;
806	}
807
808      if ((bflags & B_VXE) && !TARGET_VXE)
809	{
810	  error ("Builtin %qF requires z14 or higher.", fndecl);
811	  return const0_rtx;
812	}
813
814      if ((bflags & B_VXE2) && !TARGET_VXE2)
815	{
816	  error ("Builtin %qF requires z15 or higher.", fndecl);
817	  return const0_rtx;
818	}
819    }
820  if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821      && fcode < S390_ALL_BUILTIN_MAX)
822    {
823      gcc_unreachable ();
824    }
825  else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
826    {
827      icode = code_for_builtin[fcode];
828      /* Set a flag in the machine specific cfun part in order to support
829	 saving/restoring of FPRs.  */
830      if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831	cfun->machine->tbegin_p = true;
832    }
833  else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
834    {
835      error ("unresolved overloaded builtin");
836      return const0_rtx;
837    }
838  else
839    internal_error ("bad builtin fcode");
840
841  if (icode == 0)
842    internal_error ("bad builtin icode");
843
844  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
845
846  if (nonvoid)
847    {
848      machine_mode tmode = insn_data[icode].operand[0].mode;
849      if (!target
850	  || GET_MODE (target) != tmode
851	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852	target = gen_reg_rtx (tmode);
853
854      /* There are builtins (e.g. vec_promote) with no vector
855	 arguments but an element selector.  So we have to also look
856	 at the vector return type when emitting the modulo
857	 operation.  */
858      if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859	last_vec_mode = insn_data[icode].operand[0].mode;
860    }
861
862  arity = 0;
863  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
864    {
865      rtx tmp_rtx;
866      const struct insn_operand_data *insn_op;
867      unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
868
869      all_op_flags = all_op_flags >> O_SHIFT;
870
871      if (arg == error_mark_node)
872	return NULL_RTX;
873      if (arity >= MAX_ARGS)
874	return NULL_RTX;
875
876      if (O_IMM_P (op_flags)
877	  && TREE_CODE (arg) != INTEGER_CST)
878	{
879	  error ("constant value required for builtin %qF argument %d",
880		 fndecl, arity + 1);
881	  return const0_rtx;
882	}
883
884      if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885	return const0_rtx;
886
887      insn_op = &insn_data[icode].operand[arity + nonvoid];
888      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
889
890      /* expand_expr truncates constants to the target mode only if it
891	 is "convenient".  However, our checks below rely on this
892	 being done.  */
893      if (CONST_INT_P (op[arity])
894	  && SCALAR_INT_MODE_P (insn_op->mode)
895	  && GET_MODE (op[arity]) != insn_op->mode)
896	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897						 insn_op->mode));
898
899      /* Wrap the expanded RTX for pointer types into a MEM expr with
900	 the proper mode.  This allows us to use e.g. (match_operand
901	 "memory_operand"..) in the insn patterns instead of (mem
902	 (match_operand "address_operand)).  This is helpful for
903	 patterns not just accepting MEMs.  */
904      if (POINTER_TYPE_P (TREE_TYPE (arg))
905	  && insn_op->predicate != address_operand)
906	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
907
908      /* Expand the module operation required on element selectors.  */
909      if (op_flags == O_ELEM)
910	{
911	  gcc_assert (last_vec_mode != VOIDmode);
912	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913					     op[arity],
914					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915					     NULL_RTX, 1, OPTAB_DIRECT);
916	}
917
918      /* Record the vector mode used for an element selector.  This assumes:
919	 1. There is no builtin with two different vector modes and an element selector
920	 2. The element selector comes after the vector type it is referring to.
921	 This currently the true for all the builtins but FIXME we
922	 should better check for that.  */
923      if (VECTOR_MODE_P (insn_op->mode))
924	last_vec_mode = insn_op->mode;
925
926      if (insn_op->predicate (op[arity], insn_op->mode))
927	{
928	  arity++;
929	  continue;
930	}
931
932      /* A memory operand is rejected by the memory_operand predicate.
933	 Try making the address legal by copying it into a register.  */
934      if (MEM_P (op[arity])
935	  && insn_op->predicate == memory_operand
936	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
938	{
939	  op[arity] = replace_equiv_address (op[arity],
940					     copy_to_mode_reg (Pmode,
941					       XEXP (op[arity], 0)));
942	}
943      /* Some of the builtins require different modes/types than the
944	 pattern in order to implement a specific API.  Instead of
945	 adding many expanders which do the mode change we do it here.
946	 E.g. s390_vec_add_u128 required to have vector unsigned char
947	 arguments is mapped to addti3.  */
948      else if (insn_op->mode != VOIDmode
949	       && GET_MODE (op[arity]) != VOIDmode
950	       && GET_MODE (op[arity]) != insn_op->mode
951	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952						   GET_MODE (op[arity]), 0))
953		   != NULL_RTX))
954	{
955	  op[arity] = tmp_rtx;
956	}
957
958      /* The predicate rejects the operand although the mode is fine.
959	 Copy the operand to register.  */
960      if (!insn_op->predicate (op[arity], insn_op->mode)
961	  && (GET_MODE (op[arity]) == insn_op->mode
962	      || GET_MODE (op[arity]) == VOIDmode
963	      || (insn_op->predicate == address_operand
964		  && GET_MODE (op[arity]) == Pmode)))
965	{
966	  /* An address_operand usually has VOIDmode in the expander
967	     so we cannot use this.  */
968	  machine_mode target_mode =
969	    (insn_op->predicate == address_operand
970	     ? (machine_mode) Pmode : insn_op->mode);
971	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
972	}
973
974      if (!insn_op->predicate (op[arity], insn_op->mode))
975	{
976	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977	  return const0_rtx;
978	}
979      arity++;
980    }
981
982  switch (arity)
983    {
984    case 0:
985      pat = GEN_FCN (icode) (target);
986      break;
987    case 1:
988      if (nonvoid)
989	pat = GEN_FCN (icode) (target, op[0]);
990      else
991	pat = GEN_FCN (icode) (op[0]);
992      break;
993    case 2:
994      if (nonvoid)
995	pat = GEN_FCN (icode) (target, op[0], op[1]);
996      else
997	pat = GEN_FCN (icode) (op[0], op[1]);
998      break;
999    case 3:
1000      if (nonvoid)
1001	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002      else
1003	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004      break;
1005    case 4:
1006      if (nonvoid)
1007	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008      else
1009	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010      break;
1011    case 5:
1012      if (nonvoid)
1013	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014      else
1015	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016      break;
1017    case 6:
1018      if (nonvoid)
1019	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020      else
1021	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022      break;
1023    default:
1024      gcc_unreachable ();
1025    }
1026  if (!pat)
1027    return NULL_RTX;
1028  emit_insn (pat);
1029
1030  if (nonvoid)
1031    return target;
1032  else
1033    return const0_rtx;
1034}
1035
1036
1037static const int s390_hotpatch_hw_max = 1000000;
1038static int s390_hotpatch_hw_before_label = 0;
1039static int s390_hotpatch_hw_after_label = 0;
1040
1041/* Check whether the hotpatch attribute is applied to a function and, if it has
1042   an argument, the argument is valid.  */
1043
1044static tree
1045s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1047{
1048  tree expr;
1049  tree expr2;
1050  int err;
1051
1052  if (TREE_CODE (*node) != FUNCTION_DECL)
1053    {
1054      warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055	       name);
1056      *no_add_attrs = true;
1057    }
1058  if (args != NULL && TREE_CHAIN (args) != NULL)
1059    {
1060      expr = TREE_VALUE (args);
1061      expr2 = TREE_VALUE (TREE_CHAIN (args));
1062    }
1063  if (args == NULL || TREE_CHAIN (args) == NULL)
1064    err = 1;
1065  else if (TREE_CODE (expr) != INTEGER_CST
1066	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068    err = 1;
1069  else if (TREE_CODE (expr2) != INTEGER_CST
1070	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072    err = 1;
1073  else
1074    err = 0;
1075  if (err)
1076    {
1077      error ("requested %qE attribute is not a comma separated pair of"
1078	     " non-negative integer constants or too large (max. %d)", name,
1079	     s390_hotpatch_hw_max);
1080      *no_add_attrs = true;
1081    }
1082
1083  return NULL_TREE;
1084}
1085
1086/* Expand the s390_vector_bool type attribute.  */
1087
1088static tree
1089s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090				  tree args ATTRIBUTE_UNUSED,
1091				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1092{
1093  tree type = *node, result = NULL_TREE;
1094  machine_mode mode;
1095
1096  while (POINTER_TYPE_P (type)
1097	 || TREE_CODE (type) == FUNCTION_TYPE
1098	 || TREE_CODE (type) == METHOD_TYPE
1099	 || TREE_CODE (type) == ARRAY_TYPE)
1100    type = TREE_TYPE (type);
1101
1102  mode = TYPE_MODE (type);
1103  switch (mode)
1104    {
1105    case E_DImode: case E_V2DImode:
1106      result = s390_builtin_types[BT_BV2DI];
1107      break;
1108    case E_SImode: case E_V4SImode:
1109      result = s390_builtin_types[BT_BV4SI];
1110      break;
1111    case E_HImode: case E_V8HImode:
1112      result = s390_builtin_types[BT_BV8HI];
1113      break;
1114    case E_QImode: case E_V16QImode:
1115      result = s390_builtin_types[BT_BV16QI];
1116      break;
1117    default:
1118      break;
1119    }
1120
1121  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1122
1123  if (result)
1124    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1125
1126  return NULL_TREE;
1127}
1128
1129/* Check syntax of function decl attributes having a string type value.  */
1130
1131static tree
1132s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133			      tree args ATTRIBUTE_UNUSED,
1134			      int flags ATTRIBUTE_UNUSED,
1135			      bool *no_add_attrs)
1136{
1137  tree cst;
1138
1139  if (TREE_CODE (*node) != FUNCTION_DECL)
1140    {
1141      warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142	       name);
1143      *no_add_attrs = true;
1144    }
1145
1146  cst = TREE_VALUE (args);
1147
1148  if (TREE_CODE (cst) != STRING_CST)
1149    {
1150      warning (OPT_Wattributes,
1151	       "%qE attribute requires a string constant argument",
1152	       name);
1153      *no_add_attrs = true;
1154    }
1155
1156  if (is_attribute_p ("indirect_branch", name)
1157      || is_attribute_p ("indirect_branch_call", name)
1158      || is_attribute_p ("function_return", name)
1159      || is_attribute_p ("function_return_reg", name)
1160      || is_attribute_p ("function_return_mem", name))
1161    {
1162      if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1165      {
1166	warning (OPT_Wattributes,
1167		 "argument to %qE attribute is not "
1168		 "(keep|thunk|thunk-extern)", name);
1169	*no_add_attrs = true;
1170      }
1171    }
1172
1173  if (is_attribute_p ("indirect_branch_jump", name)
1174      && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175      && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176      && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177      && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1178    {
1179      warning (OPT_Wattributes,
1180	       "argument to %qE attribute is not "
1181	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1182      *no_add_attrs = true;
1183    }
1184
1185  return NULL_TREE;
1186}
1187
1188static const struct attribute_spec s390_attribute_table[] = {
1189  { "hotpatch", 2, 2, true, false, false, false,
1190    s390_handle_hotpatch_attribute, NULL },
1191  { "s390_vector_bool", 0, 0, false, true, false, true,
1192    s390_handle_vectorbool_attribute, NULL },
1193  { "indirect_branch", 1, 1, true, false, false, false,
1194    s390_handle_string_attribute, NULL },
1195  { "indirect_branch_jump", 1, 1, true, false, false, false,
1196    s390_handle_string_attribute, NULL },
1197  { "indirect_branch_call", 1, 1, true, false, false, false,
1198    s390_handle_string_attribute, NULL },
1199  { "function_return", 1, 1, true, false, false, false,
1200    s390_handle_string_attribute, NULL },
1201  { "function_return_reg", 1, 1, true, false, false, false,
1202    s390_handle_string_attribute, NULL },
1203  { "function_return_mem", 1, 1, true, false, false, false,
1204    s390_handle_string_attribute, NULL },
1205
1206  /* End element.  */
1207  { NULL,        0, 0, false, false, false, false, NULL, NULL }
1208};
1209
1210/* Return the alignment for LABEL.  We default to the -falign-labels
1211   value except for the literal pool base label.  */
1212int
1213s390_label_align (rtx_insn *label)
1214{
1215  rtx_insn *prev_insn = prev_active_insn (label);
1216  rtx set, src;
1217
1218  if (prev_insn == NULL_RTX)
1219    goto old;
1220
1221  set = single_set (prev_insn);
1222
1223  if (set == NULL_RTX)
1224    goto old;
1225
1226  src = SET_SRC (set);
1227
1228  /* Don't align literal pool base labels.  */
1229  if (GET_CODE (src) == UNSPEC
1230      && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231    return 0;
1232
1233 old:
1234  return align_labels.levels[0].log;
1235}
1236
1237static GTY(()) rtx got_symbol;
1238
1239/* Return the GOT table symbol.  The symbol will be created when the
1240   function is invoked for the first time.  */
1241
1242static rtx
1243s390_got_symbol (void)
1244{
1245  if (!got_symbol)
1246    {
1247      got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248      SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1249    }
1250
1251  return got_symbol;
1252}
1253
1254static scalar_int_mode
1255s390_libgcc_cmp_return_mode (void)
1256{
1257  return TARGET_64BIT ? DImode : SImode;
1258}
1259
1260static scalar_int_mode
1261s390_libgcc_shift_count_mode (void)
1262{
1263  return TARGET_64BIT ? DImode : SImode;
1264}
1265
1266static scalar_int_mode
1267s390_unwind_word_mode (void)
1268{
1269  return TARGET_64BIT ? DImode : SImode;
1270}
1271
1272/* Return true if the back end supports mode MODE.  */
1273static bool
1274s390_scalar_mode_supported_p (scalar_mode mode)
1275{
1276  /* In contrast to the default implementation reject TImode constants on 31bit
1277     TARGET_ZARCH for ABI compliance.  */
1278  if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279    return false;
1280
1281  if (DECIMAL_FLOAT_MODE_P (mode))
1282    return default_decimal_float_supported_p ();
1283
1284  return default_scalar_mode_supported_p (mode);
1285}
1286
1287/* Return true if the back end supports vector mode MODE.  */
1288static bool
1289s390_vector_mode_supported_p (machine_mode mode)
1290{
1291  machine_mode inner;
1292
1293  if (!VECTOR_MODE_P (mode)
1294      || !TARGET_VX
1295      || GET_MODE_SIZE (mode) > 16)
1296    return false;
1297
1298  inner = GET_MODE_INNER (mode);
1299
1300  switch (inner)
1301    {
1302    case E_QImode:
1303    case E_HImode:
1304    case E_SImode:
1305    case E_DImode:
1306    case E_TImode:
1307    case E_SFmode:
1308    case E_DFmode:
1309    case E_TFmode:
1310      return true;
1311    default:
1312      return false;
1313    }
1314}
1315
1316/* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1317
1318void
1319s390_set_has_landing_pad_p (bool value)
1320{
1321  cfun->machine->has_landing_pad_p = value;
1322}
1323
1324/* If two condition code modes are compatible, return a condition code
1325   mode which is compatible with both.  Otherwise, return
1326   VOIDmode.  */
1327
1328static machine_mode
1329s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1330{
1331  if (m1 == m2)
1332    return m1;
1333
1334  switch (m1)
1335    {
1336    case E_CCZmode:
1337      if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339	return m2;
1340      return VOIDmode;
1341
1342    case E_CCSmode:
1343    case E_CCUmode:
1344    case E_CCTmode:
1345    case E_CCSRmode:
1346    case E_CCURmode:
1347    case E_CCZ1mode:
1348      if (m2 == CCZmode)
1349	return m1;
1350
1351      return VOIDmode;
1352
1353    default:
1354      return VOIDmode;
1355    }
1356  return VOIDmode;
1357}
1358
1359/* Return true if SET either doesn't set the CC register, or else
1360   the source and destination have matching CC modes and that
1361   CC mode is at least as constrained as REQ_MODE.  */
1362
1363static bool
1364s390_match_ccmode_set (rtx set, machine_mode req_mode)
1365{
1366  machine_mode set_mode;
1367
1368  gcc_assert (GET_CODE (set) == SET);
1369
1370  /* These modes are supposed to be used only in CC consumer
1371     patterns.  */
1372  gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1374
1375  if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376    return 1;
1377
1378  set_mode = GET_MODE (SET_DEST (set));
1379  switch (set_mode)
1380    {
1381    case E_CCZ1mode:
1382    case E_CCSmode:
1383    case E_CCSRmode:
1384    case E_CCSFPSmode:
1385    case E_CCUmode:
1386    case E_CCURmode:
1387    case E_CCOmode:
1388    case E_CCLmode:
1389    case E_CCL1mode:
1390    case E_CCL2mode:
1391    case E_CCL3mode:
1392    case E_CCT1mode:
1393    case E_CCT2mode:
1394    case E_CCT3mode:
1395    case E_CCVEQmode:
1396    case E_CCVIHmode:
1397    case E_CCVIHUmode:
1398    case E_CCVFHmode:
1399    case E_CCVFHEmode:
1400      if (req_mode != set_mode)
1401	return 0;
1402      break;
1403
1404    case E_CCZmode:
1405      if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406	  && req_mode != CCSRmode && req_mode != CCURmode
1407	  && req_mode != CCZ1mode)
1408	return 0;
1409      break;
1410
1411    case E_CCAPmode:
1412    case E_CCANmode:
1413      if (req_mode != CCAmode)
1414	return 0;
1415      break;
1416
1417    default:
1418      gcc_unreachable ();
1419    }
1420
1421  return (GET_MODE (SET_SRC (set)) == set_mode);
1422}
1423
1424/* Return true if every SET in INSN that sets the CC register
1425   has source and destination with matching CC modes and that
1426   CC mode is at least as constrained as REQ_MODE.
1427   If REQ_MODE is VOIDmode, always return false.  */
1428
1429bool
1430s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431{
1432  int i;
1433
1434  /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1435  if (req_mode == VOIDmode)
1436    return false;
1437
1438  if (GET_CODE (PATTERN (insn)) == SET)
1439    return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440
1441  if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442      for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443	{
1444	  rtx set = XVECEXP (PATTERN (insn), 0, i);
1445	  if (GET_CODE (set) == SET)
1446	    if (!s390_match_ccmode_set (set, req_mode))
1447	      return false;
1448	}
1449
1450  return true;
1451}
1452
1453/* If a test-under-mask instruction can be used to implement
1454   (compare (and ... OP1) OP2), return the CC mode required
1455   to do that.  Otherwise, return VOIDmode.
1456   MIXED is true if the instruction can distinguish between
1457   CC1 and CC2 for mixed selected bits (TMxx), it is false
1458   if the instruction cannot (TM).  */
1459
1460machine_mode
1461s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462{
1463  int bit0, bit1;
1464
1465  /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1466  if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467    return VOIDmode;
1468
1469  /* Selected bits all zero: CC0.
1470     e.g.: int a; if ((a & (16 + 128)) == 0) */
1471  if (INTVAL (op2) == 0)
1472    return CCTmode;
1473
1474  /* Selected bits all one: CC3.
1475     e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476  if (INTVAL (op2) == INTVAL (op1))
1477    return CCT3mode;
1478
1479  /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480     int a;
1481     if ((a & (16 + 128)) == 16)         -> CCT1
1482     if ((a & (16 + 128)) == 128)        -> CCT2  */
1483  if (mixed)
1484    {
1485      bit1 = exact_log2 (INTVAL (op2));
1486      bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487      if (bit0 != -1 && bit1 != -1)
1488	return bit0 > bit1 ? CCT1mode : CCT2mode;
1489    }
1490
1491  return VOIDmode;
1492}
1493
1494/* Given a comparison code OP (EQ, NE, etc.) and the operands
1495   OP0 and OP1 of a COMPARE, return the mode to be used for the
1496   comparison.  */
1497
1498machine_mode
1499s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500{
1501  switch (code)
1502    {
1503      case EQ:
1504      case NE:
1505	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507	  return CCAPmode;
1508	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510	  return CCAPmode;
1511	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512	     || GET_CODE (op1) == NEG)
1513	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514	  return CCLmode;
1515
1516	if (GET_CODE (op0) == AND)
1517	  {
1518	    /* Check whether we can potentially do it via TM.  */
1519	    machine_mode ccmode;
1520	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521	    if (ccmode != VOIDmode)
1522	      {
1523		/* Relax CCTmode to CCZmode to allow fall-back to AND
1524		   if that turns out to be beneficial.  */
1525		return ccmode == CCTmode ? CCZmode : ccmode;
1526	      }
1527	  }
1528
1529	if (register_operand (op0, HImode)
1530	    && GET_CODE (op1) == CONST_INT
1531	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532	  return CCT3mode;
1533	if (register_operand (op0, QImode)
1534	    && GET_CODE (op1) == CONST_INT
1535	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536	  return CCT3mode;
1537
1538	return CCZmode;
1539
1540      case LE:
1541      case LT:
1542      case GE:
1543      case GT:
1544	/* The only overflow condition of NEG and ABS happens when
1545	   -INT_MAX is used as parameter, which stays negative. So
1546	   we have an overflow from a positive value to a negative.
1547	   Using CCAP mode the resulting cc can be used for comparisons.  */
1548	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550	  return CCAPmode;
1551
1552	/* If constants are involved in an add instruction it is possible to use
1553	   the resulting cc for comparisons with zero. Knowing the sign of the
1554	   constant the overflow behavior gets predictable. e.g.:
1555	     int a, b; if ((b = a + c) > 0)
1556	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1557	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560		    /* Avoid INT32_MIN on 32 bit.  */
1561		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562	  {
1563	    if (INTVAL (XEXP((op0), 1)) < 0)
1564	      return CCANmode;
1565	    else
1566	      return CCAPmode;
1567	  }
1568
1569	/* Fall through.  */
1570      case LTGT:
1571	if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572	  return CCSFPSmode;
1573
1574	/* Fall through.  */
1575      case UNORDERED:
1576      case ORDERED:
1577      case UNEQ:
1578      case UNLE:
1579      case UNLT:
1580      case UNGE:
1581      case UNGT:
1582	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583	    && GET_CODE (op1) != CONST_INT)
1584	  return CCSRmode;
1585	return CCSmode;
1586
1587      case LTU:
1588      case GEU:
1589	if (GET_CODE (op0) == PLUS
1590	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591	  return CCL1mode;
1592
1593	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594	    && GET_CODE (op1) != CONST_INT)
1595	  return CCURmode;
1596	return CCUmode;
1597
1598      case LEU:
1599      case GTU:
1600	if (GET_CODE (op0) == MINUS
1601	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602	  return CCL2mode;
1603
1604	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605	    && GET_CODE (op1) != CONST_INT)
1606	  return CCURmode;
1607	return CCUmode;
1608
1609      default:
1610	gcc_unreachable ();
1611    }
1612}
1613
1614/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615   that we can implement more efficiently.  */
1616
1617static void
1618s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619			      bool op0_preserve_value)
1620{
1621  if (op0_preserve_value)
1622    return;
1623
1624  /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1625  if ((*code == EQ || *code == NE)
1626      && *op1 == const0_rtx
1627      && GET_CODE (*op0) == ZERO_EXTRACT
1628      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629      && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1631    {
1632      rtx inner = XEXP (*op0, 0);
1633      HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634      HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635      HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1636
1637      if (len > 0 && len < modesize
1638	  && pos >= 0 && pos + len <= modesize
1639	  && modesize <= HOST_BITS_PER_WIDE_INT)
1640	{
1641	  unsigned HOST_WIDE_INT block;
1642	  block = (HOST_WIDE_INT_1U << len) - 1;
1643	  block <<= modesize - pos - len;
1644
1645	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646			      gen_int_mode (block, GET_MODE (inner)));
1647	}
1648    }
1649
1650  /* Narrow AND of memory against immediate to enable TM.  */
1651  if ((*code == EQ || *code == NE)
1652      && *op1 == const0_rtx
1653      && GET_CODE (*op0) == AND
1654      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1656    {
1657      rtx inner = XEXP (*op0, 0);
1658      rtx mask = XEXP (*op0, 1);
1659
1660      /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1661      if (GET_CODE (inner) == SUBREG
1662	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663	  && (GET_MODE_SIZE (GET_MODE (inner))
1664	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665	  && ((INTVAL (mask)
1666	       & GET_MODE_MASK (GET_MODE (inner))
1667	       & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668	      == 0))
1669	inner = SUBREG_REG (inner);
1670
1671      /* Do not change volatile MEMs.  */
1672      if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1673	{
1674	  int part = s390_single_part (XEXP (*op0, 1),
1675				       GET_MODE (inner), QImode, 0);
1676	  if (part >= 0)
1677	    {
1678	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679	      inner = adjust_address_nv (inner, QImode, part);
1680	      *op0 = gen_rtx_AND (QImode, inner, mask);
1681	    }
1682	}
1683    }
1684
1685  /* Narrow comparisons against 0xffff to HImode if possible.  */
1686  if ((*code == EQ || *code == NE)
1687      && GET_CODE (*op1) == CONST_INT
1688      && INTVAL (*op1) == 0xffff
1689      && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690      && (nonzero_bits (*op0, GET_MODE (*op0))
1691	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1692    {
1693      *op0 = gen_lowpart (HImode, *op0);
1694      *op1 = constm1_rtx;
1695    }
1696
1697  /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1698  if (GET_CODE (*op0) == UNSPEC
1699      && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700      && XVECLEN (*op0, 0) == 1
1701      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704      && *op1 == const0_rtx)
1705    {
1706      enum rtx_code new_code = UNKNOWN;
1707      switch (*code)
1708	{
1709	  case EQ: new_code = EQ;  break;
1710	  case NE: new_code = NE;  break;
1711	  case LT: new_code = GTU; break;
1712	  case GT: new_code = LTU; break;
1713	  case LE: new_code = GEU; break;
1714	  case GE: new_code = LEU; break;
1715	  default: break;
1716	}
1717
1718      if (new_code != UNKNOWN)
1719	{
1720	  *op0 = XVECEXP (*op0, 0, 0);
1721	  *code = new_code;
1722	}
1723    }
1724
1725  /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1726  if (GET_CODE (*op0) == UNSPEC
1727      && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728      && XVECLEN (*op0, 0) == 1
1729      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731      && CONST_INT_P (*op1))
1732    {
1733      enum rtx_code new_code = UNKNOWN;
1734      switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1735	{
1736	case E_CCZmode:
1737	case E_CCRAWmode:
1738	  switch (*code)
1739	    {
1740	    case EQ: new_code = EQ;  break;
1741	    case NE: new_code = NE;  break;
1742	    default: break;
1743	    }
1744	  break;
1745	default: break;
1746	}
1747
1748      if (new_code != UNKNOWN)
1749	{
1750	  /* For CCRAWmode put the required cc mask into the second
1751	     operand.  */
1752	if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753	    && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755	  *op0 = XVECEXP (*op0, 0, 0);
1756	  *code = new_code;
1757	}
1758    }
1759
1760  /* Simplify cascaded EQ, NE with const0_rtx.  */
1761  if ((*code == NE || *code == EQ)
1762      && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763      && GET_MODE (*op0) == SImode
1764      && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765      && REG_P (XEXP (*op0, 0))
1766      && XEXP (*op0, 1) == const0_rtx
1767      && *op1 == const0_rtx)
1768    {
1769      if ((*code == EQ && GET_CODE (*op0) == NE)
1770	  || (*code == NE && GET_CODE (*op0) == EQ))
1771	*code = EQ;
1772      else
1773	*code = NE;
1774      *op0 = XEXP (*op0, 0);
1775    }
1776
1777  /* Prefer register over memory as first operand.  */
1778  if (MEM_P (*op0) && REG_P (*op1))
1779    {
1780      rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781      *code = (int)swap_condition ((enum rtx_code)*code);
1782    }
1783
1784  /* A comparison result is compared against zero.  Replace it with
1785     the (perhaps inverted) original comparison.
1786     This probably should be done by simplify_relational_operation.  */
1787  if ((*code == EQ || *code == NE)
1788      && *op1 == const0_rtx
1789      && COMPARISON_P (*op0)
1790      && CC_REG_P (XEXP (*op0, 0)))
1791    {
1792      enum rtx_code new_code;
1793
1794      if (*code == EQ)
1795	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796						   XEXP (*op0, 0),
1797						   XEXP (*op0, 1), NULL);
1798      else
1799	new_code = GET_CODE (*op0);
1800
1801      if (new_code != UNKNOWN)
1802	{
1803	  *code = new_code;
1804	  *op1 = XEXP (*op0, 1);
1805	  *op0 = XEXP (*op0, 0);
1806	}
1807    }
1808
1809  /* ~a==b -> ~(a^b)==0   ~a!=b -> ~(a^b)!=0 */
1810  if (TARGET_Z15
1811      && (*code == EQ || *code == NE)
1812      && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813      && GET_CODE (*op0) == NOT)
1814    {
1815      machine_mode mode = GET_MODE (*op0);
1816      *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817      *op0 = gen_rtx_NOT (mode, *op0);
1818      *op1 = const0_rtx;
1819    }
1820
1821  /* a&b == -1 -> ~a|~b == 0    a|b == -1 -> ~a&~b == 0  */
1822  if (TARGET_Z15
1823      && (*code == EQ || *code == NE)
1824      && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825      && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826      && CONST_INT_P (*op1)
1827      && *op1 == constm1_rtx)
1828    {
1829      machine_mode mode = GET_MODE (*op0);
1830      rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831      rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1832
1833      if (GET_CODE (*op0) == AND)
1834	*op0 = gen_rtx_IOR (mode, op00, op01);
1835      else
1836	*op0 = gen_rtx_AND (mode, op00, op01);
1837
1838      *op1 = const0_rtx;
1839    }
1840}
1841
1842
1843/* Emit a compare instruction suitable to implement the comparison
1844   OP0 CODE OP1.  Return the correct condition RTL to be placed in
1845   the IF_THEN_ELSE of the conditional branch testing the result.  */
1846
1847rtx
1848s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1849{
1850  machine_mode mode = s390_select_ccmode (code, op0, op1);
1851  rtx cc;
1852
1853  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1854    {
1855      /* Do not output a redundant compare instruction if a
1856	 compare_and_swap pattern already computed the result and the
1857	 machine modes are compatible.  */
1858      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859		  == GET_MODE (op0));
1860      cc = op0;
1861    }
1862  else
1863    {
1864      cc = gen_rtx_REG (mode, CC_REGNUM);
1865      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1866    }
1867
1868  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1869}
1870
1871/* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872   MEM, whose address is a pseudo containing the original MEM's address.  */
1873
1874static rtx
1875s390_legitimize_cs_operand (rtx mem)
1876{
1877  rtx tmp;
1878
1879  if (!contains_symbol_ref_p (mem))
1880    return mem;
1881  tmp = gen_reg_rtx (Pmode);
1882  emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883  return change_address (mem, VOIDmode, tmp);
1884}
1885
1886/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887   matches CMP.
1888   Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889   conditional branch testing the result.  */
1890
1891static rtx
1892s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1894{
1895  rtx cc;
1896
1897  mem = s390_legitimize_cs_operand (mem);
1898  cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899  switch (GET_MODE (mem))
1900    {
1901    case E_SImode:
1902      emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903							 new_rtx, cc));
1904      break;
1905    case E_DImode:
1906      emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907							 new_rtx, cc));
1908      break;
1909    case E_TImode:
1910	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911							   new_rtx, cc));
1912      break;
1913    case E_QImode:
1914    case E_HImode:
1915    default:
1916      gcc_unreachable ();
1917    }
1918  return s390_emit_compare (code, cc, const0_rtx);
1919}
1920
1921/* Emit a jump instruction to TARGET and return it.  If COND is
1922   NULL_RTX, emit an unconditional jump, else a conditional jump under
1923   condition COND.  */
1924
1925rtx_insn *
1926s390_emit_jump (rtx target, rtx cond)
1927{
1928  rtx insn;
1929
1930  target = gen_rtx_LABEL_REF (VOIDmode, target);
1931  if (cond)
1932    target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1933
1934  insn = gen_rtx_SET (pc_rtx, target);
1935  return emit_jump_insn (insn);
1936}
1937
1938/* Return branch condition mask to implement a branch
1939   specified by CODE.  Return -1 for invalid comparisons.  */
1940
1941int
1942s390_branch_condition_mask (rtx code)
1943{
1944  const int CC0 = 1 << 3;
1945  const int CC1 = 1 << 2;
1946  const int CC2 = 1 << 1;
1947  const int CC3 = 1 << 0;
1948
1949  gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950  gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951  gcc_assert (XEXP (code, 1) == const0_rtx
1952	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953		  && CONST_INT_P (XEXP (code, 1))));
1954
1955
1956  switch (GET_MODE (XEXP (code, 0)))
1957    {
1958    case E_CCZmode:
1959    case E_CCZ1mode:
1960      switch (GET_CODE (code))
1961	{
1962	case EQ:	return CC0;
1963	case NE:	return CC1 | CC2 | CC3;
1964	default:	return -1;
1965	}
1966      break;
1967
1968    case E_CCT1mode:
1969      switch (GET_CODE (code))
1970	{
1971	case EQ:	return CC1;
1972	case NE:	return CC0 | CC2 | CC3;
1973	default:	return -1;
1974	}
1975      break;
1976
1977    case E_CCT2mode:
1978      switch (GET_CODE (code))
1979	{
1980	case EQ:	return CC2;
1981	case NE:	return CC0 | CC1 | CC3;
1982	default:	return -1;
1983	}
1984      break;
1985
1986    case E_CCT3mode:
1987      switch (GET_CODE (code))
1988	{
1989	case EQ:	return CC3;
1990	case NE:	return CC0 | CC1 | CC2;
1991	default:	return -1;
1992	}
1993      break;
1994
1995    case E_CCLmode:
1996      switch (GET_CODE (code))
1997	{
1998	case EQ:	return CC0 | CC2;
1999	case NE:	return CC1 | CC3;
2000	default:	return -1;
2001	}
2002      break;
2003
2004    case E_CCL1mode:
2005      switch (GET_CODE (code))
2006	{
2007	case LTU:	return CC2 | CC3;  /* carry */
2008	case GEU:	return CC0 | CC1;  /* no carry */
2009	default:	return -1;
2010	}
2011      break;
2012
2013    case E_CCL2mode:
2014      switch (GET_CODE (code))
2015	{
2016	case GTU:	return CC0 | CC1;  /* borrow */
2017	case LEU:	return CC2 | CC3;  /* no borrow */
2018	default:	return -1;
2019	}
2020      break;
2021
2022    case E_CCL3mode:
2023      switch (GET_CODE (code))
2024	{
2025	case EQ:	return CC0 | CC2;
2026	case NE:	return CC1 | CC3;
2027	case LTU:	return CC1;
2028	case GTU:	return CC3;
2029	case LEU:	return CC1 | CC2;
2030	case GEU:	return CC2 | CC3;
2031	default:	return -1;
2032	}
2033
2034    case E_CCUmode:
2035      switch (GET_CODE (code))
2036	{
2037	case EQ:	return CC0;
2038	case NE:	return CC1 | CC2 | CC3;
2039	case LTU:	return CC1;
2040	case GTU:	return CC2;
2041	case LEU:	return CC0 | CC1;
2042	case GEU:	return CC0 | CC2;
2043	default:	return -1;
2044	}
2045      break;
2046
2047    case E_CCURmode:
2048      switch (GET_CODE (code))
2049	{
2050	case EQ:	return CC0;
2051	case NE:	return CC2 | CC1 | CC3;
2052	case LTU:	return CC2;
2053	case GTU:	return CC1;
2054	case LEU:	return CC0 | CC2;
2055	case GEU:	return CC0 | CC1;
2056	default:	return -1;
2057	}
2058      break;
2059
2060    case E_CCAPmode:
2061      switch (GET_CODE (code))
2062	{
2063	case EQ:	return CC0;
2064	case NE:	return CC1 | CC2 | CC3;
2065	case LT:	return CC1 | CC3;
2066	case GT:	return CC2;
2067	case LE:	return CC0 | CC1 | CC3;
2068	case GE:	return CC0 | CC2;
2069	default:	return -1;
2070	}
2071      break;
2072
2073    case E_CCANmode:
2074      switch (GET_CODE (code))
2075	{
2076	case EQ:	return CC0;
2077	case NE:	return CC1 | CC2 | CC3;
2078	case LT:	return CC1;
2079	case GT:	return CC2 | CC3;
2080	case LE:	return CC0 | CC1;
2081	case GE:	return CC0 | CC2 | CC3;
2082	default:	return -1;
2083	}
2084      break;
2085
2086    case E_CCOmode:
2087      switch (GET_CODE (code))
2088	{
2089	case EQ:	return CC0 | CC1 | CC2;
2090	case NE:	return CC3;
2091	default:	return -1;
2092	}
2093      break;
2094
2095    case E_CCSmode:
2096    case E_CCSFPSmode:
2097      switch (GET_CODE (code))
2098	{
2099	case EQ:	return CC0;
2100	case NE:	return CC1 | CC2 | CC3;
2101	case LT:	return CC1;
2102	case GT:	return CC2;
2103	case LE:	return CC0 | CC1;
2104	case GE:	return CC0 | CC2;
2105	case UNORDERED:	return CC3;
2106	case ORDERED:	return CC0 | CC1 | CC2;
2107	case UNEQ:	return CC0 | CC3;
2108	case UNLT:	return CC1 | CC3;
2109	case UNGT:	return CC2 | CC3;
2110	case UNLE:	return CC0 | CC1 | CC3;
2111	case UNGE:	return CC0 | CC2 | CC3;
2112	case LTGT:	return CC1 | CC2;
2113	default:	return -1;
2114	}
2115      break;
2116
2117    case E_CCSRmode:
2118      switch (GET_CODE (code))
2119	{
2120	case EQ:	return CC0;
2121	case NE:	return CC2 | CC1 | CC3;
2122	case LT:	return CC2;
2123	case GT:	return CC1;
2124	case LE:	return CC0 | CC2;
2125	case GE:	return CC0 | CC1;
2126	case UNORDERED:	return CC3;
2127	case ORDERED:	return CC0 | CC2 | CC1;
2128	case UNEQ:	return CC0 | CC3;
2129	case UNLT:	return CC2 | CC3;
2130	case UNGT:	return CC1 | CC3;
2131	case UNLE:	return CC0 | CC2 | CC3;
2132	case UNGE:	return CC0 | CC1 | CC3;
2133	case LTGT:	return CC2 | CC1;
2134	default:	return -1;
2135	}
2136      break;
2137
2138      /* Vector comparison modes.  */
2139      /* CC2 will never be set.  It however is part of the negated
2140	 masks.  */
2141    case E_CCVIALLmode:
2142      switch (GET_CODE (code))
2143	{
2144	case EQ:
2145	case GTU:
2146	case GT:
2147	case GE:        return CC0;
2148	  /* The inverted modes are in fact *any* modes.  */
2149	case NE:
2150	case LEU:
2151	case LE:
2152	case LT:        return CC3 | CC1 | CC2;
2153	default:        return -1;
2154	}
2155
2156    case E_CCVIANYmode:
2157      switch (GET_CODE (code))
2158	{
2159	case EQ:
2160	case GTU:
2161	case GT:
2162	case GE:        return CC0 | CC1;
2163	  /* The inverted modes are in fact *all* modes.  */
2164	case NE:
2165	case LEU:
2166	case LE:
2167	case LT:        return CC3 | CC2;
2168	default:        return -1;
2169	}
2170    case E_CCVFALLmode:
2171      switch (GET_CODE (code))
2172	{
2173	case EQ:
2174	case GT:
2175	case GE:        return CC0;
2176	  /* The inverted modes are in fact *any* modes.  */
2177	case NE:
2178	case UNLE:
2179	case UNLT:      return CC3 | CC1 | CC2;
2180	default:        return -1;
2181	}
2182
2183    case E_CCVFANYmode:
2184      switch (GET_CODE (code))
2185	{
2186	case EQ:
2187	case GT:
2188	case GE:        return CC0 | CC1;
2189	  /* The inverted modes are in fact *all* modes.  */
2190	case NE:
2191	case UNLE:
2192	case UNLT:      return CC3 | CC2;
2193	default:        return -1;
2194	}
2195
2196    case E_CCRAWmode:
2197      switch (GET_CODE (code))
2198	{
2199	case EQ:
2200	  return INTVAL (XEXP (code, 1));
2201	case NE:
2202	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203	default:
2204	  gcc_unreachable ();
2205	}
2206
2207    default:
2208      return -1;
2209    }
2210}
2211
2212
2213/* Return branch condition mask to implement a compare and branch
2214   specified by CODE.  Return -1 for invalid comparisons.  */
2215
2216int
2217s390_compare_and_branch_condition_mask (rtx code)
2218{
2219  const int CC0 = 1 << 3;
2220  const int CC1 = 1 << 2;
2221  const int CC2 = 1 << 1;
2222
2223  switch (GET_CODE (code))
2224    {
2225    case EQ:
2226      return CC0;
2227    case NE:
2228      return CC1 | CC2;
2229    case LT:
2230    case LTU:
2231      return CC1;
2232    case GT:
2233    case GTU:
2234      return CC2;
2235    case LE:
2236    case LEU:
2237      return CC0 | CC1;
2238    case GE:
2239    case GEU:
2240      return CC0 | CC2;
2241    default:
2242      gcc_unreachable ();
2243    }
2244  return -1;
2245}
2246
2247/* If INV is false, return assembler mnemonic string to implement
2248   a branch specified by CODE.  If INV is true, return mnemonic
2249   for the corresponding inverted branch.  */
2250
2251static const char *
2252s390_branch_condition_mnemonic (rtx code, int inv)
2253{
2254  int mask;
2255
2256  static const char *const mnemonic[16] =
2257    {
2258      NULL, "o", "h", "nle",
2259      "l", "nhe", "lh", "ne",
2260      "e", "nlh", "he", "nl",
2261      "le", "nh", "no", NULL
2262    };
2263
2264  if (GET_CODE (XEXP (code, 0)) == REG
2265      && REGNO (XEXP (code, 0)) == CC_REGNUM
2266      && (XEXP (code, 1) == const0_rtx
2267	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268	      && CONST_INT_P (XEXP (code, 1)))))
2269    mask = s390_branch_condition_mask (code);
2270  else
2271    mask = s390_compare_and_branch_condition_mask (code);
2272
2273  gcc_assert (mask >= 0);
2274
2275  if (inv)
2276    mask ^= 15;
2277
2278  gcc_assert (mask >= 1 && mask <= 14);
2279
2280  return mnemonic[mask];
2281}
2282
2283/* Return the part of op which has a value different from def.
2284   The size of the part is determined by mode.
2285   Use this function only if you already know that op really
2286   contains such a part.  */
2287
2288unsigned HOST_WIDE_INT
2289s390_extract_part (rtx op, machine_mode mode, int def)
2290{
2291  unsigned HOST_WIDE_INT value = 0;
2292  int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293  int part_bits = GET_MODE_BITSIZE (mode);
2294  unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295  int i;
2296
2297  for (i = 0; i < max_parts; i++)
2298    {
2299      if (i == 0)
2300	value = UINTVAL (op);
2301      else
2302	value >>= part_bits;
2303
2304      if ((value & part_mask) != (def & part_mask))
2305	return value & part_mask;
2306    }
2307
2308  gcc_unreachable ();
2309}
2310
2311/* If OP is an integer constant of mode MODE with exactly one
2312   part of mode PART_MODE unequal to DEF, return the number of that
2313   part. Otherwise, return -1.  */
2314
2315int
2316s390_single_part (rtx op,
2317		  machine_mode mode,
2318		  machine_mode part_mode,
2319		  int def)
2320{
2321  unsigned HOST_WIDE_INT value = 0;
2322  int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323  unsigned HOST_WIDE_INT part_mask
2324    = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325  int i, part = -1;
2326
2327  if (GET_CODE (op) != CONST_INT)
2328    return -1;
2329
2330  for (i = 0; i < n_parts; i++)
2331    {
2332      if (i == 0)
2333	value = UINTVAL (op);
2334      else
2335	value >>= GET_MODE_BITSIZE (part_mode);
2336
2337      if ((value & part_mask) != (def & part_mask))
2338	{
2339	  if (part != -1)
2340	    return -1;
2341	  else
2342	    part = i;
2343	}
2344    }
2345  return part == -1 ? -1 : n_parts - 1 - part;
2346}
2347
2348/* Return true if IN contains a contiguous bitfield in the lower SIZE
2349   bits and no other bits are set in (the lower SIZE bits of) IN.
2350
2351   PSTART and PEND can be used to obtain the start and end
2352   position (inclusive) of the bitfield relative to 64
2353   bits. *PSTART / *PEND gives the position of the first/last bit
2354   of the bitfield counting from the highest order bit starting
2355   with zero.  */
2356
2357bool
2358s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359				  int *pstart, int *pend)
2360{
2361  int start;
2362  int end = -1;
2363  int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364  int highbit = HOST_BITS_PER_WIDE_INT - size;
2365  unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2366
2367  gcc_assert (!!pstart == !!pend);
2368  for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369    if (end == -1)
2370      {
2371	/* Look for the rightmost bit of a contiguous range of ones.  */
2372	if (bitmask & in)
2373	  /* Found it.  */
2374	  end = start;
2375      }
2376    else
2377      {
2378	/* Look for the firt zero bit after the range of ones.  */
2379	if (! (bitmask & in))
2380	  /* Found it.  */
2381	  break;
2382      }
2383  /* We're one past the last one-bit.  */
2384  start++;
2385
2386  if (end == -1)
2387    /* No one bits found.  */
2388    return false;
2389
2390  if (start > highbit)
2391    {
2392      unsigned HOST_WIDE_INT mask;
2393
2394      /* Calculate a mask for all bits beyond the contiguous bits.  */
2395      mask = ((~HOST_WIDE_INT_0U >> highbit)
2396	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397      if (mask & in)
2398	/* There are more bits set beyond the first range of one bits.  */
2399	return false;
2400    }
2401
2402  if (pstart)
2403    {
2404      *pstart = start;
2405      *pend = end;
2406    }
2407
2408  return true;
2409}
2410
2411/* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412   if ~IN contains a contiguous bitfield.  In that case, *END is <
2413   *START.
2414
2415   If WRAP_P is true, a bitmask that wraps around is also tested.
2416   When a wraparoud occurs *START is greater than *END (in
2417   non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418   part of the range.  If WRAP_P is false, no wraparound is
2419   tested.  */
2420
2421bool
2422s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423			   int size, int *start, int *end)
2424{
2425  int bs = HOST_BITS_PER_WIDE_INT;
2426  bool b;
2427
2428  gcc_assert (!!start == !!end);
2429  if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430    /* This cannot be expressed as a contiguous bitmask.  Exit early because
2431       the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432       a valid bitmask.  */
2433    return false;
2434  b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435  if (b)
2436    return true;
2437  if (! wrap_p)
2438    return false;
2439  b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440  if (b && start)
2441    {
2442      int s = *start;
2443      int e = *end;
2444
2445      gcc_assert (s >= 1);
2446      *start = ((e + 1) & (bs - 1));
2447      *end = ((s - 1 + bs) & (bs - 1));
2448    }
2449
2450  return b;
2451}
2452
2453/* Return true if OP contains the same contiguous bitfield in *all*
2454   its elements.  START and END can be used to obtain the start and
2455   end position of the bitfield.
2456
2457   START/STOP give the position of the first/last bit of the bitfield
2458   counting from the lowest order bit starting with zero.  In order to
2459   use these values for S/390 instructions this has to be converted to
2460   "bits big endian" style.  */
2461
2462bool
2463s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2464{
2465  unsigned HOST_WIDE_INT mask;
2466  int size;
2467  rtx elt;
2468  bool b;
2469
2470  gcc_assert (!!start == !!end);
2471  if (!const_vec_duplicate_p (op, &elt)
2472      || !CONST_INT_P (elt))
2473    return false;
2474
2475  size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2476
2477  /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2478  if (size > 64)
2479    return false;
2480
2481  mask = UINTVAL (elt);
2482
2483  b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2484  if (b)
2485    {
2486      if (start)
2487	{
2488	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2489	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2490	}
2491      return true;
2492    }
2493  else
2494    return false;
2495}
2496
2497/* Return true if C consists only of byte chunks being either 0 or
2498   0xff.  If MASK is !=NULL a byte mask is generated which is
2499   appropriate for the vector generate byte mask instruction.  */
2500
2501bool
2502s390_bytemask_vector_p (rtx op, unsigned *mask)
2503{
2504  int i;
2505  unsigned tmp_mask = 0;
2506  int nunit, unit_size;
2507
2508  if (!VECTOR_MODE_P (GET_MODE (op))
2509      || GET_CODE (op) != CONST_VECTOR
2510      || !CONST_INT_P (XVECEXP (op, 0, 0)))
2511    return false;
2512
2513  nunit = GET_MODE_NUNITS (GET_MODE (op));
2514  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2515
2516  for (i = 0; i < nunit; i++)
2517    {
2518      unsigned HOST_WIDE_INT c;
2519      int j;
2520
2521      if (!CONST_INT_P (XVECEXP (op, 0, i)))
2522	return false;
2523
2524      c = UINTVAL (XVECEXP (op, 0, i));
2525      for (j = 0; j < unit_size; j++)
2526	{
2527	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2528	    return false;
2529	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2530	  c = c >> BITS_PER_UNIT;
2531	}
2532    }
2533
2534  if (mask != NULL)
2535    *mask = tmp_mask;
2536
2537  return true;
2538}
2539
2540/* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541   equivalent to a shift followed by the AND.  In particular, CONTIG
2542   should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2543   for ROTL indicate a rotate to the right.  */
2544
2545bool
2546s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2547{
2548  int start, end;
2549  bool ok;
2550
2551  ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2552  gcc_assert (ok);
2553
2554  if (rotl >= 0)
2555    return (64 - end >= rotl);
2556  else
2557    {
2558      /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2559	 DIMode.  */
2560      rotl = -rotl + (64 - bitsize);
2561      return (start >= rotl);
2562    }
2563}
2564
2565/* Check whether we can (and want to) split a double-word
2566   move in mode MODE from SRC to DST into two single-word
2567   moves, moving the subword FIRST_SUBWORD first.  */
2568
2569bool
2570s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2571{
2572  /* Floating point and vector registers cannot be split.  */
2573  if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2574    return false;
2575
2576  /* Non-offsettable memory references cannot be split.  */
2577  if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2578      || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2579    return false;
2580
2581  /* Moving the first subword must not clobber a register
2582     needed to move the second subword.  */
2583  if (register_operand (dst, mode))
2584    {
2585      rtx subreg = operand_subword (dst, first_subword, 0, mode);
2586      if (reg_overlap_mentioned_p (subreg, src))
2587	return false;
2588    }
2589
2590  return true;
2591}
2592
2593/* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594   and [MEM2, MEM2 + SIZE] do overlap and false
2595   otherwise.  */
2596
2597bool
2598s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2599{
2600  rtx addr1, addr2, addr_delta;
2601  HOST_WIDE_INT delta;
2602
2603  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2604    return true;
2605
2606  if (size == 0)
2607    return false;
2608
2609  addr1 = XEXP (mem1, 0);
2610  addr2 = XEXP (mem2, 0);
2611
2612  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2613
2614  /* This overlapping check is used by peepholes merging memory block operations.
2615     Overlapping operations would otherwise be recognized by the S/390 hardware
2616     and would fall back to a slower implementation. Allowing overlapping
2617     operations would lead to slow code but not to wrong code. Therefore we are
2618     somewhat optimistic if we cannot prove that the memory blocks are
2619     overlapping.
2620     That's why we return false here although this may accept operations on
2621     overlapping memory areas.  */
2622  if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2623    return false;
2624
2625  delta = INTVAL (addr_delta);
2626
2627  if (delta == 0
2628      || (delta > 0 && delta < size)
2629      || (delta < 0 && -delta < size))
2630    return true;
2631
2632  return false;
2633}
2634
2635/* Check whether the address of memory reference MEM2 equals exactly
2636   the address of memory reference MEM1 plus DELTA.  Return true if
2637   we can prove this to be the case, false otherwise.  */
2638
2639bool
2640s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2641{
2642  rtx addr1, addr2, addr_delta;
2643
2644  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2645    return false;
2646
2647  addr1 = XEXP (mem1, 0);
2648  addr2 = XEXP (mem2, 0);
2649
2650  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2651  if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2652    return false;
2653
2654  return true;
2655}
2656
2657/* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2658
2659void
2660s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2661			      rtx *operands)
2662{
2663  machine_mode wmode = mode;
2664  rtx dst = operands[0];
2665  rtx src1 = operands[1];
2666  rtx src2 = operands[2];
2667  rtx op, clob, tem;
2668
2669  /* If we cannot handle the operation directly, use a temp register.  */
2670  if (!s390_logical_operator_ok_p (operands))
2671    dst = gen_reg_rtx (mode);
2672
2673  /* QImode and HImode patterns make sense only if we have a destination
2674     in memory.  Otherwise perform the operation in SImode.  */
2675  if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2676    wmode = SImode;
2677
2678  /* Widen operands if required.  */
2679  if (mode != wmode)
2680    {
2681      if (GET_CODE (dst) == SUBREG
2682	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2683	dst = tem;
2684      else if (REG_P (dst))
2685	dst = gen_rtx_SUBREG (wmode, dst, 0);
2686      else
2687	dst = gen_reg_rtx (wmode);
2688
2689      if (GET_CODE (src1) == SUBREG
2690	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2691	src1 = tem;
2692      else if (GET_MODE (src1) != VOIDmode)
2693	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2694
2695      if (GET_CODE (src2) == SUBREG
2696	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2697	src2 = tem;
2698      else if (GET_MODE (src2) != VOIDmode)
2699	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2700    }
2701
2702  /* Emit the instruction.  */
2703  op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2704  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2705  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2706
2707  /* Fix up the destination if needed.  */
2708  if (dst != operands[0])
2709    emit_move_insn (operands[0], gen_lowpart (mode, dst));
2710}
2711
2712/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2713
2714bool
2715s390_logical_operator_ok_p (rtx *operands)
2716{
2717  /* If the destination operand is in memory, it needs to coincide
2718     with one of the source operands.  After reload, it has to be
2719     the first source operand.  */
2720  if (GET_CODE (operands[0]) == MEM)
2721    return rtx_equal_p (operands[0], operands[1])
2722	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2723
2724  return true;
2725}
2726
2727/* Narrow logical operation CODE of memory operand MEMOP with immediate
2728   operand IMMOP to switch from SS to SI type instructions.  */
2729
2730void
2731s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2732{
2733  int def = code == AND ? -1 : 0;
2734  HOST_WIDE_INT mask;
2735  int part;
2736
2737  gcc_assert (GET_CODE (*memop) == MEM);
2738  gcc_assert (!MEM_VOLATILE_P (*memop));
2739
2740  mask = s390_extract_part (*immop, QImode, def);
2741  part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2742  gcc_assert (part >= 0);
2743
2744  *memop = adjust_address (*memop, QImode, part);
2745  *immop = gen_int_mode (mask, QImode);
2746}
2747
2748
2749/* How to allocate a 'struct machine_function'.  */
2750
2751static struct machine_function *
2752s390_init_machine_status (void)
2753{
2754  return ggc_cleared_alloc<machine_function> ();
2755}
2756
2757/* Map for smallest class containing reg regno.  */
2758
2759const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2760{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2761  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2762  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2763  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2764  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2765  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2766  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2767  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2768  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2769  ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2770  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2771  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2772  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2773  VEC_REGS, VEC_REGS                              /* 52 */
2774};
2775
2776/* Return attribute type of insn.  */
2777
2778static enum attr_type
2779s390_safe_attr_type (rtx_insn *insn)
2780{
2781  if (recog_memoized (insn) >= 0)
2782    return get_attr_type (insn);
2783  else
2784    return TYPE_NONE;
2785}
2786
2787/* Return attribute relative_long of insn.  */
2788
2789static bool
2790s390_safe_relative_long_p (rtx_insn *insn)
2791{
2792  if (recog_memoized (insn) >= 0)
2793    return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2794  else
2795    return false;
2796}
2797
2798/* Return true if DISP is a valid short displacement.  */
2799
2800static bool
2801s390_short_displacement (rtx disp)
2802{
2803  /* No displacement is OK.  */
2804  if (!disp)
2805    return true;
2806
2807  /* Without the long displacement facility we don't need to
2808     distingiush between long and short displacement.  */
2809  if (!TARGET_LONG_DISPLACEMENT)
2810    return true;
2811
2812  /* Integer displacement in range.  */
2813  if (GET_CODE (disp) == CONST_INT)
2814    return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2815
2816  /* GOT offset is not OK, the GOT can be large.  */
2817  if (GET_CODE (disp) == CONST
2818      && GET_CODE (XEXP (disp, 0)) == UNSPEC
2819      && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2820	  || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2821    return false;
2822
2823  /* All other symbolic constants are literal pool references,
2824     which are OK as the literal pool must be small.  */
2825  if (GET_CODE (disp) == CONST)
2826    return true;
2827
2828  return false;
2829}
2830
2831/* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832   If successful, also determines the
2833   following characteristics of `ref': `is_ptr' - whether it can be an
2834   LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835   base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836   considered a literal pool pointer for purposes of avoiding two different
2837   literal pool pointers per insn during or after reload (`B' constraint).  */
2838static bool
2839s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2840				  bool *is_base_ptr, bool *is_pool_ptr)
2841{
2842  if (!*ref)
2843    return true;
2844
2845  if (GET_CODE (*ref) == UNSPEC)
2846    switch (XINT (*ref, 1))
2847      {
2848      case UNSPEC_LTREF:
2849	if (!*disp)
2850	  *disp = gen_rtx_UNSPEC (Pmode,
2851				  gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2852				  UNSPEC_LTREL_OFFSET);
2853	else
2854	  return false;
2855
2856	*ref = XVECEXP (*ref, 0, 1);
2857	break;
2858
2859      default:
2860	return false;
2861      }
2862
2863  if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2864    return false;
2865
2866  if (REGNO (*ref) == STACK_POINTER_REGNUM
2867      || REGNO (*ref) == FRAME_POINTER_REGNUM
2868      || ((reload_completed || reload_in_progress)
2869	  && frame_pointer_needed
2870	  && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2871      || REGNO (*ref) == ARG_POINTER_REGNUM
2872      || (flag_pic
2873	  && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2874    *is_ptr = *is_base_ptr = true;
2875
2876  if ((reload_completed || reload_in_progress)
2877      && *ref == cfun->machine->base_reg)
2878    *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2879
2880  return true;
2881}
2882
2883/* Decompose a RTL expression ADDR for a memory address into
2884   its components, returned in OUT.
2885
2886   Returns false if ADDR is not a valid memory address, true
2887   otherwise.  If OUT is NULL, don't return the components,
2888   but check for validity only.
2889
2890   Note: Only addresses in canonical form are recognized.
2891   LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892   canonical form so that they will be recognized.  */
2893
2894static int
2895s390_decompose_address (rtx addr, struct s390_address *out)
2896{
2897  HOST_WIDE_INT offset = 0;
2898  rtx base = NULL_RTX;
2899  rtx indx = NULL_RTX;
2900  rtx disp = NULL_RTX;
2901  rtx orig_disp;
2902  bool pointer = false;
2903  bool base_ptr = false;
2904  bool indx_ptr = false;
2905  bool literal_pool = false;
2906
2907  /* We may need to substitute the literal pool base register into the address
2908     below.  However, at this point we do not know which register is going to
2909     be used as base, so we substitute the arg pointer register.  This is going
2910     to be treated as holding a pointer below -- it shouldn't be used for any
2911     other purpose.  */
2912  rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2913
2914  /* Decompose address into base + index + displacement.  */
2915
2916  if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2917    base = addr;
2918
2919  else if (GET_CODE (addr) == PLUS)
2920    {
2921      rtx op0 = XEXP (addr, 0);
2922      rtx op1 = XEXP (addr, 1);
2923      enum rtx_code code0 = GET_CODE (op0);
2924      enum rtx_code code1 = GET_CODE (op1);
2925
2926      if (code0 == REG || code0 == UNSPEC)
2927	{
2928	  if (code1 == REG || code1 == UNSPEC)
2929	    {
2930	      indx = op0;	/* index + base */
2931	      base = op1;
2932	    }
2933
2934	  else
2935	    {
2936	      base = op0;	/* base + displacement */
2937	      disp = op1;
2938	    }
2939	}
2940
2941      else if (code0 == PLUS)
2942	{
2943	  indx = XEXP (op0, 0);	/* index + base + disp */
2944	  base = XEXP (op0, 1);
2945	  disp = op1;
2946	}
2947
2948      else
2949	{
2950	  return false;
2951	}
2952    }
2953
2954  else
2955    disp = addr;		/* displacement */
2956
2957  /* Extract integer part of displacement.  */
2958  orig_disp = disp;
2959  if (disp)
2960    {
2961      if (GET_CODE (disp) == CONST_INT)
2962	{
2963	  offset = INTVAL (disp);
2964	  disp = NULL_RTX;
2965	}
2966      else if (GET_CODE (disp) == CONST
2967	       && GET_CODE (XEXP (disp, 0)) == PLUS
2968	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2969	{
2970	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2971	  disp = XEXP (XEXP (disp, 0), 0);
2972	}
2973    }
2974
2975  /* Strip off CONST here to avoid special case tests later.  */
2976  if (disp && GET_CODE (disp) == CONST)
2977    disp = XEXP (disp, 0);
2978
2979  /* We can convert literal pool addresses to
2980     displacements by basing them off the base register.  */
2981  if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2982    {
2983      if (base || indx)
2984	return false;
2985
2986      base = fake_pool_base, literal_pool = true;
2987
2988      /* Mark up the displacement.  */
2989      disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2990			     UNSPEC_LTREL_OFFSET);
2991    }
2992
2993  /* Validate base register.  */
2994  if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2995					 &literal_pool))
2996    return false;
2997
2998  /* Validate index register.  */
2999  if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3000					 &literal_pool))
3001    return false;
3002
3003  /* Prefer to use pointer as base, not index.  */
3004  if (base && indx && !base_ptr
3005      && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3006    {
3007      rtx tmp = base;
3008      base = indx;
3009      indx = tmp;
3010    }
3011
3012  /* Validate displacement.  */
3013  if (!disp)
3014    {
3015      /* If virtual registers are involved, the displacement will change later
3016	 anyway as the virtual registers get eliminated.  This could make a
3017	 valid displacement invalid, but it is more likely to make an invalid
3018	 displacement valid, because we sometimes access the register save area
3019	 via negative offsets to one of those registers.
3020	 Thus we don't check the displacement for validity here.  If after
3021	 elimination the displacement turns out to be invalid after all,
3022	 this is fixed up by reload in any case.  */
3023      /* LRA maintains always displacements up to date and we need to
3024	 know the displacement is right during all LRA not only at the
3025	 final elimination.  */
3026      if (lra_in_progress
3027	  || (base != arg_pointer_rtx
3028	      && indx != arg_pointer_rtx
3029	      && base != return_address_pointer_rtx
3030	      && indx != return_address_pointer_rtx
3031	      && base != frame_pointer_rtx
3032	      && indx != frame_pointer_rtx
3033	      && base != virtual_stack_vars_rtx
3034	      && indx != virtual_stack_vars_rtx))
3035	if (!DISP_IN_RANGE (offset))
3036	  return false;
3037    }
3038  else
3039    {
3040      /* All the special cases are pointers.  */
3041      pointer = true;
3042
3043      /* In the small-PIC case, the linker converts @GOT
3044	 and @GOTNTPOFF offsets to possible displacements.  */
3045      if (GET_CODE (disp) == UNSPEC
3046	  && (XINT (disp, 1) == UNSPEC_GOT
3047	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3048	  && flag_pic == 1)
3049	{
3050	  ;
3051	}
3052
3053      /* Accept pool label offsets.  */
3054      else if (GET_CODE (disp) == UNSPEC
3055	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3056	;
3057
3058      /* Accept literal pool references.  */
3059      else if (GET_CODE (disp) == UNSPEC
3060	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3061	{
3062	  /* In case CSE pulled a non literal pool reference out of
3063	     the pool we have to reject the address.  This is
3064	     especially important when loading the GOT pointer on non
3065	     zarch CPUs.  In this case the literal pool contains an lt
3066	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067	     will most likely exceed the displacement.  */
3068	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3069	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3070	    return false;
3071
3072	  orig_disp = gen_rtx_CONST (Pmode, disp);
3073	  if (offset)
3074	    {
3075	      /* If we have an offset, make sure it does not
3076		 exceed the size of the constant pool entry.
3077		 Otherwise we might generate an out-of-range
3078		 displacement for the base register form.  */
3079	      rtx sym = XVECEXP (disp, 0, 0);
3080	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3081		return false;
3082
3083	      orig_disp = plus_constant (Pmode, orig_disp, offset);
3084	    }
3085	}
3086
3087      else
3088	return false;
3089    }
3090
3091  if (!base && !indx)
3092    pointer = true;
3093
3094  if (out)
3095    {
3096      out->base = base;
3097      out->indx = indx;
3098      out->disp = orig_disp;
3099      out->pointer = pointer;
3100      out->literal_pool = literal_pool;
3101    }
3102
3103  return true;
3104}
3105
3106/* Decompose a RTL expression OP for an address style operand into its
3107   components, and return the base register in BASE and the offset in
3108   OFFSET.  While OP looks like an address it is never supposed to be
3109   used as such.
3110
3111   Return true if OP is a valid address operand, false if not.  */
3112
3113bool
3114s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3115					HOST_WIDE_INT *offset)
3116{
3117  rtx off = NULL_RTX;
3118
3119  /* We can have an integer constant, an address register,
3120     or a sum of the two.  */
3121  if (CONST_SCALAR_INT_P (op))
3122    {
3123      off = op;
3124      op = NULL_RTX;
3125    }
3126  if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3127    {
3128      off = XEXP (op, 1);
3129      op = XEXP (op, 0);
3130    }
3131  while (op && GET_CODE (op) == SUBREG)
3132    op = SUBREG_REG (op);
3133
3134  if (op && GET_CODE (op) != REG)
3135    return false;
3136
3137  if (offset)
3138    {
3139      if (off == NULL_RTX)
3140	*offset = 0;
3141      else if (CONST_INT_P (off))
3142	*offset = INTVAL (off);
3143      else if (CONST_WIDE_INT_P (off))
3144	/* The offset will anyway be cut down to 12 bits so take just
3145	   the lowest order chunk of the wide int.  */
3146	*offset = CONST_WIDE_INT_ELT (off, 0);
3147      else
3148	gcc_unreachable ();
3149    }
3150  if (base)
3151    *base = op;
3152
3153   return true;
3154}
3155
3156/*  Check that OP is a valid shift count operand.
3157    It should be of the following structure:
3158      (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159    where subreg, and and plus are optional.
3160
3161    If IMPLICIT_MASK is > 0 and OP contains and
3162      (AND ... immediate)
3163    it is checked whether IMPLICIT_MASK and the immediate match.
3164    Otherwise, no checking is performed.
3165  */
3166bool
3167s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3168{
3169  /* Strip subreg.  */
3170  while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3171    op = XEXP (op, 0);
3172
3173  /* Check for an and with proper constant.  */
3174  if (GET_CODE (op) == AND)
3175  {
3176    rtx op1 = XEXP (op, 0);
3177    rtx imm = XEXP (op, 1);
3178
3179    if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3180      op1 = XEXP (op1, 0);
3181
3182    if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3183      return false;
3184
3185    if (!immediate_operand (imm, GET_MODE (imm)))
3186      return false;
3187
3188    HOST_WIDE_INT val = INTVAL (imm);
3189    if (implicit_mask > 0
3190	&& (val & implicit_mask) != implicit_mask)
3191      return false;
3192
3193    op = op1;
3194  }
3195
3196  /* Check the rest.  */
3197  return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3198}
3199
3200/* Return true if CODE is a valid address without index.  */
3201
3202bool
3203s390_legitimate_address_without_index_p (rtx op)
3204{
3205  struct s390_address addr;
3206
3207  if (!s390_decompose_address (XEXP (op, 0), &addr))
3208    return false;
3209  if (addr.indx)
3210    return false;
3211
3212  return true;
3213}
3214
3215
3216/* Return TRUE if ADDR is an operand valid for a load/store relative
3217   instruction.  Be aware that the alignment of the operand needs to
3218   be checked separately.
3219   Valid addresses are single references or a sum of a reference and a
3220   constant integer. Return these parts in SYMREF and ADDEND.  You can
3221   pass NULL in REF and/or ADDEND if you are not interested in these
3222   values.  */
3223
3224static bool
3225s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3226{
3227  HOST_WIDE_INT tmpaddend = 0;
3228
3229  if (GET_CODE (addr) == CONST)
3230    addr = XEXP (addr, 0);
3231
3232  if (GET_CODE (addr) == PLUS)
3233    {
3234      if (!CONST_INT_P (XEXP (addr, 1)))
3235	return false;
3236
3237      tmpaddend = INTVAL (XEXP (addr, 1));
3238      addr = XEXP (addr, 0);
3239    }
3240
3241  if (GET_CODE (addr) == SYMBOL_REF
3242      || (GET_CODE (addr) == UNSPEC
3243	  && (XINT (addr, 1) == UNSPEC_GOTENT
3244	      || XINT (addr, 1) == UNSPEC_PLT)))
3245    {
3246      if (symref)
3247	*symref = addr;
3248      if (addend)
3249	*addend = tmpaddend;
3250
3251      return true;
3252    }
3253  return false;
3254}
3255
3256/* Return true if the address in OP is valid for constraint letter C
3257   if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3258   pool MEMs should be accepted.  Only the Q, R, S, T constraint
3259   letters are allowed for C.  */
3260
3261static int
3262s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3263{
3264  rtx symref;
3265  struct s390_address addr;
3266  bool decomposed = false;
3267
3268  if (!address_operand (op, GET_MODE (op)))
3269    return 0;
3270
3271  /* This check makes sure that no symbolic address (except literal
3272     pool references) are accepted by the R or T constraints.  */
3273  if (s390_loadrelative_operand_p (op, &symref, NULL)
3274      && (!lit_pool_ok
3275          || !SYMBOL_REF_P (symref)
3276          || !CONSTANT_POOL_ADDRESS_P (symref)))
3277    return 0;
3278
3279  /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3280  if (!lit_pool_ok)
3281    {
3282      if (!s390_decompose_address (op, &addr))
3283	return 0;
3284      if (addr.literal_pool)
3285	return 0;
3286      decomposed = true;
3287    }
3288
3289  /* With reload, we sometimes get intermediate address forms that are
3290     actually invalid as-is, but we need to accept them in the most
3291     generic cases below ('R' or 'T'), since reload will in fact fix
3292     them up.  LRA behaves differently here; we never see such forms,
3293     but on the other hand, we need to strictly reject every invalid
3294     address form.  After both reload and LRA invalid address forms
3295     must be rejected, because nothing will fix them up later.  Perform
3296     this check right up front.  */
3297  if (lra_in_progress || reload_completed)
3298    {
3299      if (!decomposed && !s390_decompose_address (op, &addr))
3300	return 0;
3301      decomposed = true;
3302    }
3303
3304  switch (c)
3305    {
3306    case 'Q': /* no index short displacement */
3307      if (!decomposed && !s390_decompose_address (op, &addr))
3308	return 0;
3309      if (addr.indx)
3310	return 0;
3311      if (!s390_short_displacement (addr.disp))
3312	return 0;
3313      break;
3314
3315    case 'R': /* with index short displacement */
3316      if (TARGET_LONG_DISPLACEMENT)
3317	{
3318	  if (!decomposed && !s390_decompose_address (op, &addr))
3319	    return 0;
3320	  if (!s390_short_displacement (addr.disp))
3321	    return 0;
3322	}
3323      /* Any invalid address here will be fixed up by reload,
3324	 so accept it for the most generic constraint.  */
3325      break;
3326
3327    case 'S': /* no index long displacement */
3328      if (!decomposed && !s390_decompose_address (op, &addr))
3329	return 0;
3330      if (addr.indx)
3331	return 0;
3332      break;
3333
3334    case 'T': /* with index long displacement */
3335      /* Any invalid address here will be fixed up by reload,
3336	 so accept it for the most generic constraint.  */
3337      break;
3338
3339    default:
3340      return 0;
3341    }
3342  return 1;
3343}
3344
3345
3346/* Evaluates constraint strings described by the regular expression
3347   ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348   the constraint given in STR, or 0 else.  */
3349
3350int
3351s390_mem_constraint (const char *str, rtx op)
3352{
3353  char c = str[0];
3354
3355  switch (c)
3356    {
3357    case 'A':
3358      /* Check for offsettable variants of memory constraints.  */
3359      if (!MEM_P (op) || MEM_VOLATILE_P (op))
3360	return 0;
3361      if ((reload_completed || reload_in_progress)
3362	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3363	return 0;
3364      return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3365    case 'B':
3366      /* Check for non-literal-pool variants of memory constraints.  */
3367      if (!MEM_P (op))
3368	return 0;
3369      return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3370    case 'Q':
3371    case 'R':
3372    case 'S':
3373    case 'T':
3374      if (GET_CODE (op) != MEM)
3375	return 0;
3376      return s390_check_qrst_address (c, XEXP (op, 0), true);
3377    case 'Y':
3378      /* Simply check for the basic form of a shift count.  Reload will
3379	 take care of making sure we have a proper base register.  */
3380      if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3381	return 0;
3382      break;
3383    case 'Z':
3384      return s390_check_qrst_address (str[1], op, true);
3385    default:
3386      return 0;
3387    }
3388  return 1;
3389}
3390
3391
3392/* Evaluates constraint strings starting with letter O.  Input
3393   parameter C is the second letter following the "O" in the constraint
3394   string. Returns 1 if VALUE meets the respective constraint and 0
3395   otherwise.  */
3396
3397int
3398s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3399{
3400  if (!TARGET_EXTIMM)
3401    return 0;
3402
3403  switch (c)
3404    {
3405    case 's':
3406      return trunc_int_for_mode (value, SImode) == value;
3407
3408    case 'p':
3409      return value == 0
3410	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3411
3412    case 'n':
3413      return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3414
3415    default:
3416      gcc_unreachable ();
3417    }
3418}
3419
3420
3421/* Evaluates constraint strings starting with letter N.  Parameter STR
3422   contains the letters following letter "N" in the constraint string.
3423   Returns true if VALUE matches the constraint.  */
3424
3425int
3426s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3427{
3428  machine_mode mode, part_mode;
3429  int def;
3430  int part, part_goal;
3431
3432
3433  if (str[0] == 'x')
3434    part_goal = -1;
3435  else
3436    part_goal = str[0] - '0';
3437
3438  switch (str[1])
3439    {
3440    case 'Q':
3441      part_mode = QImode;
3442      break;
3443    case 'H':
3444      part_mode = HImode;
3445      break;
3446    case 'S':
3447      part_mode = SImode;
3448      break;
3449    default:
3450      return 0;
3451    }
3452
3453  switch (str[2])
3454    {
3455    case 'H':
3456      mode = HImode;
3457      break;
3458    case 'S':
3459      mode = SImode;
3460      break;
3461    case 'D':
3462      mode = DImode;
3463      break;
3464    default:
3465      return 0;
3466    }
3467
3468  switch (str[3])
3469    {
3470    case '0':
3471      def = 0;
3472      break;
3473    case 'F':
3474      def = -1;
3475      break;
3476    default:
3477      return 0;
3478    }
3479
3480  if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3481    return 0;
3482
3483  part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3484  if (part < 0)
3485    return 0;
3486  if (part_goal != -1 && part_goal != part)
3487    return 0;
3488
3489  return 1;
3490}
3491
3492
3493/* Returns true if the input parameter VALUE is a float zero.  */
3494
3495int
3496s390_float_const_zero_p (rtx value)
3497{
3498  return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3499	  && value == CONST0_RTX (GET_MODE (value)));
3500}
3501
3502/* Implement TARGET_REGISTER_MOVE_COST.  */
3503
3504static int
3505s390_register_move_cost (machine_mode mode,
3506			 reg_class_t from, reg_class_t to)
3507{
3508  /* On s390, copy between fprs and gprs is expensive.  */
3509
3510  /* It becomes somewhat faster having ldgr/lgdr.  */
3511  if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3512    {
3513      /* ldgr is single cycle. */
3514      if (reg_classes_intersect_p (from, GENERAL_REGS)
3515	  && reg_classes_intersect_p (to, FP_REGS))
3516	return 1;
3517      /* lgdr needs 3 cycles. */
3518      if (reg_classes_intersect_p (to, GENERAL_REGS)
3519	  && reg_classes_intersect_p (from, FP_REGS))
3520	return 3;
3521    }
3522
3523  /* Otherwise copying is done via memory.  */
3524  if ((reg_classes_intersect_p (from, GENERAL_REGS)
3525       && reg_classes_intersect_p (to, FP_REGS))
3526      || (reg_classes_intersect_p (from, FP_REGS)
3527	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3528    return 10;
3529
3530  /* We usually do not want to copy via CC.  */
3531  if (reg_classes_intersect_p (from, CC_REGS)
3532       || reg_classes_intersect_p (to, CC_REGS))
3533    return 5;
3534
3535  return 1;
3536}
3537
3538/* Implement TARGET_MEMORY_MOVE_COST.  */
3539
3540static int
3541s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3542		       reg_class_t rclass ATTRIBUTE_UNUSED,
3543		       bool in ATTRIBUTE_UNUSED)
3544{
3545  return 2;
3546}
3547
3548/* Compute a (partial) cost for rtx X.  Return true if the complete
3549   cost has been computed, and false if subexpressions should be
3550   scanned.  In either case, *TOTAL contains the cost result.  The
3551   initial value of *TOTAL is the default value computed by
3552   rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3553   code of the superexpression of x.  */
3554
3555static bool
3556s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3557		int opno ATTRIBUTE_UNUSED,
3558		int *total, bool speed ATTRIBUTE_UNUSED)
3559{
3560  int code = GET_CODE (x);
3561  switch (code)
3562    {
3563    case CONST:
3564    case CONST_INT:
3565    case LABEL_REF:
3566    case SYMBOL_REF:
3567    case CONST_DOUBLE:
3568    case CONST_WIDE_INT:
3569    case MEM:
3570      *total = 0;
3571      return true;
3572
3573    case SET:
3574      {
3575	/* Without this a conditional move instruction would be
3576	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577	   comparison operator).  That's a bit pessimistic.  */
3578
3579	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3580	  return false;
3581
3582	rtx cond = XEXP (SET_SRC (x), 0);
3583
3584	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3585	  return false;
3586
3587	/* It is going to be a load/store on condition.  Make it
3588	   slightly more expensive than a normal load.  */
3589	*total = COSTS_N_INSNS (1) + 1;
3590
3591	rtx dst = SET_DEST (x);
3592	rtx then = XEXP (SET_SRC (x), 1);
3593	rtx els = XEXP (SET_SRC (x), 2);
3594
3595	/* It is a real IF-THEN-ELSE.  An additional move will be
3596	   needed to implement that.  */
3597	if (!TARGET_Z15
3598	    && reload_completed
3599	    && !rtx_equal_p (dst, then)
3600	    && !rtx_equal_p (dst, els))
3601	  *total += COSTS_N_INSNS (1) / 2;
3602
3603	/* A minor penalty for constants we cannot directly handle.  */
3604	if ((CONST_INT_P (then) || CONST_INT_P (els))
3605	    && (!TARGET_Z13 || MEM_P (dst)
3606		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3607		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3608	  *total += COSTS_N_INSNS (1) / 2;
3609
3610	/* A store on condition can only handle register src operands.  */
3611	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3612	  *total += COSTS_N_INSNS (1) / 2;
3613
3614	return true;
3615      }
3616    case IOR:
3617
3618      /* nnrk, nngrk */
3619      if (TARGET_Z15
3620	  && (mode == SImode || mode == DImode)
3621	  && GET_CODE (XEXP (x, 0)) == NOT
3622	  && GET_CODE (XEXP (x, 1)) == NOT)
3623	{
3624	  *total = COSTS_N_INSNS (1);
3625	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3626	    *total += 1;
3627	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3628	    *total += 1;
3629	  return true;
3630	}
3631
3632      /* risbg */
3633      if (GET_CODE (XEXP (x, 0)) == AND
3634	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3635	  && REG_P (XEXP (XEXP (x, 0), 0))
3636	  && REG_P (XEXP (XEXP (x, 1), 0))
3637	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3638	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3639	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3640	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3641	{
3642	  *total = COSTS_N_INSNS (2);
3643	  return true;
3644	}
3645
3646      /* ~AND on a 128 bit mode.  This can be done using a vector
3647	 instruction.  */
3648      if (TARGET_VXE
3649	  && GET_CODE (XEXP (x, 0)) == NOT
3650	  && GET_CODE (XEXP (x, 1)) == NOT
3651	  && REG_P (XEXP (XEXP (x, 0), 0))
3652	  && REG_P (XEXP (XEXP (x, 1), 0))
3653	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3654	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3655				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3656	{
3657	  *total = COSTS_N_INSNS (1);
3658	  return true;
3659	}
3660
3661      *total = COSTS_N_INSNS (1);
3662      return false;
3663
3664    case AND:
3665      /* nork, nogrk */
3666      if (TARGET_Z15
3667	  && (mode == SImode || mode == DImode)
3668	  && GET_CODE (XEXP (x, 0)) == NOT
3669	  && GET_CODE (XEXP (x, 1)) == NOT)
3670	{
3671	  *total = COSTS_N_INSNS (1);
3672	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3673	    *total += 1;
3674	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3675	    *total += 1;
3676	  return true;
3677	}
3678      /* fallthrough */
3679    case ASHIFT:
3680    case ASHIFTRT:
3681    case LSHIFTRT:
3682    case ROTATE:
3683    case ROTATERT:
3684    case XOR:
3685    case NEG:
3686    case NOT:
3687    case PLUS:
3688    case MINUS:
3689      *total = COSTS_N_INSNS (1);
3690      return false;
3691
3692    case MULT:
3693      switch (mode)
3694	{
3695	case E_SImode:
3696	  {
3697	    rtx left = XEXP (x, 0);
3698	    rtx right = XEXP (x, 1);
3699	    if (GET_CODE (right) == CONST_INT
3700		&& CONST_OK_FOR_K (INTVAL (right)))
3701	      *total = s390_cost->mhi;
3702	    else if (GET_CODE (left) == SIGN_EXTEND)
3703	      *total = s390_cost->mh;
3704	    else
3705	      *total = s390_cost->ms;  /* msr, ms, msy */
3706	    break;
3707	  }
3708	case E_DImode:
3709	  {
3710	    rtx left = XEXP (x, 0);
3711	    rtx right = XEXP (x, 1);
3712	    if (TARGET_ZARCH)
3713	      {
3714		if (GET_CODE (right) == CONST_INT
3715		    && CONST_OK_FOR_K (INTVAL (right)))
3716		  *total = s390_cost->mghi;
3717		else if (GET_CODE (left) == SIGN_EXTEND)
3718		  *total = s390_cost->msgf;
3719		else
3720		  *total = s390_cost->msg;  /* msgr, msg */
3721	      }
3722	    else /* TARGET_31BIT */
3723	      {
3724		if (GET_CODE (left) == SIGN_EXTEND
3725		    && GET_CODE (right) == SIGN_EXTEND)
3726		  /* mulsidi case: mr, m */
3727		  *total = s390_cost->m;
3728		else if (GET_CODE (left) == ZERO_EXTEND
3729			 && GET_CODE (right) == ZERO_EXTEND)
3730		  /* umulsidi case: ml, mlr */
3731		  *total = s390_cost->ml;
3732		else
3733		  /* Complex calculation is required.  */
3734		  *total = COSTS_N_INSNS (40);
3735	      }
3736	    break;
3737	  }
3738	case E_SFmode:
3739	case E_DFmode:
3740	  *total = s390_cost->mult_df;
3741	  break;
3742	case E_TFmode:
3743	  *total = s390_cost->mxbr;
3744	  break;
3745	default:
3746	  return false;
3747	}
3748      return false;
3749
3750    case FMA:
3751      switch (mode)
3752	{
3753	case E_DFmode:
3754	  *total = s390_cost->madbr;
3755	  break;
3756	case E_SFmode:
3757	  *total = s390_cost->maebr;
3758	  break;
3759	default:
3760	  return false;
3761	}
3762      /* Negate in the third argument is free: FMSUB.  */
3763      if (GET_CODE (XEXP (x, 2)) == NEG)
3764	{
3765	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3766		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3767		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3768	  return true;
3769	}
3770      return false;
3771
3772    case UDIV:
3773    case UMOD:
3774      if (mode == TImode)	       /* 128 bit division */
3775	*total = s390_cost->dlgr;
3776      else if (mode == DImode)
3777	{
3778	  rtx right = XEXP (x, 1);
3779	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3780	    *total = s390_cost->dlr;
3781	  else				       /* 64 by 64 bit division */
3782	    *total = s390_cost->dlgr;
3783	}
3784      else if (mode == SImode)         /* 32 bit division */
3785	*total = s390_cost->dlr;
3786      return false;
3787
3788    case DIV:
3789    case MOD:
3790      if (mode == DImode)
3791	{
3792	  rtx right = XEXP (x, 1);
3793	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3794	    if (TARGET_ZARCH)
3795	      *total = s390_cost->dsgfr;
3796	    else
3797	      *total = s390_cost->dr;
3798	  else				       /* 64 by 64 bit division */
3799	    *total = s390_cost->dsgr;
3800	}
3801      else if (mode == SImode)         /* 32 bit division */
3802	*total = s390_cost->dlr;
3803      else if (mode == SFmode)
3804	{
3805	  *total = s390_cost->debr;
3806	}
3807      else if (mode == DFmode)
3808	{
3809	  *total = s390_cost->ddbr;
3810	}
3811      else if (mode == TFmode)
3812	{
3813	  *total = s390_cost->dxbr;
3814	}
3815      return false;
3816
3817    case SQRT:
3818      if (mode == SFmode)
3819	*total = s390_cost->sqebr;
3820      else if (mode == DFmode)
3821	*total = s390_cost->sqdbr;
3822      else /* TFmode */
3823	*total = s390_cost->sqxbr;
3824      return false;
3825
3826    case SIGN_EXTEND:
3827    case ZERO_EXTEND:
3828      if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3829	  || outer_code == PLUS || outer_code == MINUS
3830	  || outer_code == COMPARE)
3831	*total = 0;
3832      return false;
3833
3834    case COMPARE:
3835      *total = COSTS_N_INSNS (1);
3836
3837      /* nxrk, nxgrk ~(a^b)==0 */
3838      if (TARGET_Z15
3839	  && GET_CODE (XEXP (x, 0)) == NOT
3840	  && XEXP (x, 1) == const0_rtx
3841	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3842	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3843	  && mode == CCZmode)
3844	{
3845	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3846	    *total += 1;
3847	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3848	    *total += 1;
3849	  return true;
3850	}
3851
3852      /* nnrk, nngrk, nork, nogrk */
3853      if (TARGET_Z15
3854	  && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3855	  && XEXP (x, 1) == const0_rtx
3856	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3857	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3858	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3859	  && mode == CCZmode)
3860	{
3861	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3862	    *total += 1;
3863	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3864	    *total += 1;
3865	  return true;
3866	}
3867
3868      if (GET_CODE (XEXP (x, 0)) == AND
3869	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3870	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3871	{
3872	  rtx op0 = XEXP (XEXP (x, 0), 0);
3873	  rtx op1 = XEXP (XEXP (x, 0), 1);
3874	  rtx op2 = XEXP (x, 1);
3875
3876	  if (memory_operand (op0, GET_MODE (op0))
3877	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3878	    return true;
3879	  if (register_operand (op0, GET_MODE (op0))
3880	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3881	    return true;
3882	}
3883      return false;
3884
3885    default:
3886      return false;
3887    }
3888}
3889
3890/* Return the cost of an address rtx ADDR.  */
3891
3892static int
3893s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3894		   addr_space_t as ATTRIBUTE_UNUSED,
3895		   bool speed ATTRIBUTE_UNUSED)
3896{
3897  struct s390_address ad;
3898  if (!s390_decompose_address (addr, &ad))
3899    return 1000;
3900
3901  return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3902}
3903
3904/* Implement targetm.vectorize.builtin_vectorization_cost.  */
3905static int
3906s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3907				 tree vectype,
3908				 int misalign ATTRIBUTE_UNUSED)
3909{
3910  switch (type_of_cost)
3911    {
3912      case scalar_stmt:
3913      case scalar_load:
3914      case scalar_store:
3915      case vector_stmt:
3916      case vector_load:
3917      case vector_store:
3918      case vector_gather_load:
3919      case vector_scatter_store:
3920      case vec_to_scalar:
3921      case scalar_to_vec:
3922      case cond_branch_not_taken:
3923      case vec_perm:
3924      case vec_promote_demote:
3925      case unaligned_load:
3926      case unaligned_store:
3927	return 1;
3928
3929      case cond_branch_taken:
3930	return 3;
3931
3932      case vec_construct:
3933	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3934
3935      default:
3936	gcc_unreachable ();
3937    }
3938}
3939
3940/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941   otherwise return 0.  */
3942
3943int
3944tls_symbolic_operand (rtx op)
3945{
3946  if (GET_CODE (op) != SYMBOL_REF)
3947    return 0;
3948  return SYMBOL_REF_TLS_MODEL (op);
3949}
3950
3951/* Split DImode access register reference REG (on 64-bit) into its constituent
3952   low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3953   gen_highpart cannot be used as they assume all registers are word-sized,
3954   while our access registers have only half that size.  */
3955
3956void
3957s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3958{
3959  gcc_assert (TARGET_64BIT);
3960  gcc_assert (ACCESS_REG_P (reg));
3961  gcc_assert (GET_MODE (reg) == DImode);
3962  gcc_assert (!(REGNO (reg) & 1));
3963
3964  *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3965  *hi = gen_rtx_REG (SImode, REGNO (reg));
3966}
3967
3968/* Return true if OP contains a symbol reference */
3969
3970bool
3971symbolic_reference_mentioned_p (rtx op)
3972{
3973  const char *fmt;
3974  int i;
3975
3976  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3977    return 1;
3978
3979  fmt = GET_RTX_FORMAT (GET_CODE (op));
3980  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3981    {
3982      if (fmt[i] == 'E')
3983	{
3984	  int j;
3985
3986	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3988	      return 1;
3989	}
3990
3991      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3992	return 1;
3993    }
3994
3995  return 0;
3996}
3997
3998/* Return true if OP contains a reference to a thread-local symbol.  */
3999
4000bool
4001tls_symbolic_reference_mentioned_p (rtx op)
4002{
4003  const char *fmt;
4004  int i;
4005
4006  if (GET_CODE (op) == SYMBOL_REF)
4007    return tls_symbolic_operand (op);
4008
4009  fmt = GET_RTX_FORMAT (GET_CODE (op));
4010  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4011    {
4012      if (fmt[i] == 'E')
4013	{
4014	  int j;
4015
4016	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4017	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4018	      return true;
4019	}
4020
4021      else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4022	return true;
4023    }
4024
4025  return false;
4026}
4027
4028
4029/* Return true if OP is a legitimate general operand when
4030   generating PIC code.  It is given that flag_pic is on
4031   and that OP satisfies CONSTANT_P.  */
4032
4033int
4034legitimate_pic_operand_p (rtx op)
4035{
4036  /* Accept all non-symbolic constants.  */
4037  if (!SYMBOLIC_CONST (op))
4038    return 1;
4039
4040  /* Accept addresses that can be expressed relative to (pc).  */
4041  if (larl_operand (op, VOIDmode))
4042    return 1;
4043
4044  /* Reject everything else; must be handled
4045     via emit_symbolic_move.  */
4046  return 0;
4047}
4048
4049/* Returns true if the constant value OP is a legitimate general operand.
4050   It is given that OP satisfies CONSTANT_P.  */
4051
4052static bool
4053s390_legitimate_constant_p (machine_mode mode, rtx op)
4054{
4055  if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4056    {
4057      if (GET_MODE_SIZE (mode) != 16)
4058	return 0;
4059
4060      if (!satisfies_constraint_j00 (op)
4061	  && !satisfies_constraint_jm1 (op)
4062	  && !satisfies_constraint_jKK (op)
4063	  && !satisfies_constraint_jxx (op)
4064	  && !satisfies_constraint_jyy (op))
4065	return 0;
4066    }
4067
4068  /* Accept all non-symbolic constants.  */
4069  if (!SYMBOLIC_CONST (op))
4070    return 1;
4071
4072  /* Accept immediate LARL operands.  */
4073  if (larl_operand (op, mode))
4074    return 1;
4075
4076  /* Thread-local symbols are never legal constants.  This is
4077     so that emit_call knows that computing such addresses
4078     might require a function call.  */
4079  if (TLS_SYMBOLIC_CONST (op))
4080    return 0;
4081
4082  /* In the PIC case, symbolic constants must *not* be
4083     forced into the literal pool.  We accept them here,
4084     so that they will be handled by emit_symbolic_move.  */
4085  if (flag_pic)
4086    return 1;
4087
4088  /* All remaining non-PIC symbolic constants are
4089     forced into the literal pool.  */
4090  return 0;
4091}
4092
4093/* Determine if it's legal to put X into the constant pool.  This
4094   is not possible if X contains the address of a symbol that is
4095   not constant (TLS) or not known at final link time (PIC).  */
4096
4097static bool
4098s390_cannot_force_const_mem (machine_mode mode, rtx x)
4099{
4100  switch (GET_CODE (x))
4101    {
4102    case CONST_INT:
4103    case CONST_DOUBLE:
4104    case CONST_WIDE_INT:
4105    case CONST_VECTOR:
4106      /* Accept all non-symbolic constants.  */
4107      return false;
4108
4109    case LABEL_REF:
4110      /* Labels are OK iff we are non-PIC.  */
4111      return flag_pic != 0;
4112
4113    case SYMBOL_REF:
4114      /* 'Naked' TLS symbol references are never OK,
4115	 non-TLS symbols are OK iff we are non-PIC.  */
4116      if (tls_symbolic_operand (x))
4117	return true;
4118      else
4119	return flag_pic != 0;
4120
4121    case CONST:
4122      return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4123    case PLUS:
4124    case MINUS:
4125      return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4126	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4127
4128    case UNSPEC:
4129      switch (XINT (x, 1))
4130	{
4131	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
4132	case UNSPEC_LTREL_OFFSET:
4133	case UNSPEC_GOT:
4134	case UNSPEC_GOTOFF:
4135	case UNSPEC_PLTOFF:
4136	case UNSPEC_TLSGD:
4137	case UNSPEC_TLSLDM:
4138	case UNSPEC_NTPOFF:
4139	case UNSPEC_DTPOFF:
4140	case UNSPEC_GOTNTPOFF:
4141	case UNSPEC_INDNTPOFF:
4142	  return false;
4143
4144	/* If the literal pool shares the code section, be put
4145	   execute template placeholders into the pool as well.  */
4146	case UNSPEC_INSN:
4147	default:
4148	  return true;
4149	}
4150      break;
4151
4152    default:
4153      gcc_unreachable ();
4154    }
4155}
4156
4157/* Returns true if the constant value OP is a legitimate general
4158   operand during and after reload.  The difference to
4159   legitimate_constant_p is that this function will not accept
4160   a constant that would need to be forced to the literal pool
4161   before it can be used as operand.
4162   This function accepts all constants which can be loaded directly
4163   into a GPR.  */
4164
4165bool
4166legitimate_reload_constant_p (rtx op)
4167{
4168  /* Accept la(y) operands.  */
4169  if (GET_CODE (op) == CONST_INT
4170      && DISP_IN_RANGE (INTVAL (op)))
4171    return true;
4172
4173  /* Accept l(g)hi/l(g)fi operands.  */
4174  if (GET_CODE (op) == CONST_INT
4175      && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4176    return true;
4177
4178  /* Accept lliXX operands.  */
4179  if (TARGET_ZARCH
4180      && GET_CODE (op) == CONST_INT
4181      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4182      && s390_single_part (op, word_mode, HImode, 0) >= 0)
4183  return true;
4184
4185  if (TARGET_EXTIMM
4186      && GET_CODE (op) == CONST_INT
4187      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4188      && s390_single_part (op, word_mode, SImode, 0) >= 0)
4189    return true;
4190
4191  /* Accept larl operands.  */
4192  if (larl_operand (op, VOIDmode))
4193    return true;
4194
4195  /* Accept floating-point zero operands that fit into a single GPR.  */
4196  if (GET_CODE (op) == CONST_DOUBLE
4197      && s390_float_const_zero_p (op)
4198      && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4199    return true;
4200
4201  /* Accept double-word operands that can be split.  */
4202  if (GET_CODE (op) == CONST_WIDE_INT
4203      || (GET_CODE (op) == CONST_INT
4204	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4205    {
4206      machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4207      rtx hi = operand_subword (op, 0, 0, dword_mode);
4208      rtx lo = operand_subword (op, 1, 0, dword_mode);
4209      return legitimate_reload_constant_p (hi)
4210	     && legitimate_reload_constant_p (lo);
4211    }
4212
4213  /* Everything else cannot be handled without reload.  */
4214  return false;
4215}
4216
4217/* Returns true if the constant value OP is a legitimate fp operand
4218   during and after reload.
4219   This function accepts all constants which can be loaded directly
4220   into an FPR.  */
4221
4222static bool
4223legitimate_reload_fp_constant_p (rtx op)
4224{
4225  /* Accept floating-point zero operands if the load zero instruction
4226     can be used.  Prior to z196 the load fp zero instruction caused a
4227     performance penalty if the result is used as BFP number.  */
4228  if (TARGET_Z196
4229      && GET_CODE (op) == CONST_DOUBLE
4230      && s390_float_const_zero_p (op))
4231    return true;
4232
4233  return false;
4234}
4235
4236/* Returns true if the constant value OP is a legitimate vector operand
4237   during and after reload.
4238   This function accepts all constants which can be loaded directly
4239   into an VR.  */
4240
4241static bool
4242legitimate_reload_vector_constant_p (rtx op)
4243{
4244  if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4245      && (satisfies_constraint_j00 (op)
4246	  || satisfies_constraint_jm1 (op)
4247	  || satisfies_constraint_jKK (op)
4248	  || satisfies_constraint_jxx (op)
4249	  || satisfies_constraint_jyy (op)))
4250    return true;
4251
4252  return false;
4253}
4254
4255/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256   return the class of reg to actually use.  */
4257
4258static reg_class_t
4259s390_preferred_reload_class (rtx op, reg_class_t rclass)
4260{
4261  switch (GET_CODE (op))
4262    {
4263      /* Constants we cannot reload into general registers
4264	 must be forced into the literal pool.  */
4265      case CONST_VECTOR:
4266      case CONST_DOUBLE:
4267      case CONST_INT:
4268      case CONST_WIDE_INT:
4269	if (reg_class_subset_p (GENERAL_REGS, rclass)
4270	    && legitimate_reload_constant_p (op))
4271	  return GENERAL_REGS;
4272	else if (reg_class_subset_p (ADDR_REGS, rclass)
4273		 && legitimate_reload_constant_p (op))
4274	  return ADDR_REGS;
4275	else if (reg_class_subset_p (FP_REGS, rclass)
4276		 && legitimate_reload_fp_constant_p (op))
4277	  return FP_REGS;
4278	else if (reg_class_subset_p (VEC_REGS, rclass)
4279		 && legitimate_reload_vector_constant_p (op))
4280	  return VEC_REGS;
4281
4282	return NO_REGS;
4283
4284      /* If a symbolic constant or a PLUS is reloaded,
4285	 it is most likely being used as an address, so
4286	 prefer ADDR_REGS.  If 'class' is not a superset
4287	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4288      case CONST:
4289	/* Symrefs cannot be pushed into the literal pool with -fPIC
4290	   so we *MUST NOT* return NO_REGS for these cases
4291	   (s390_cannot_force_const_mem will return true).
4292
4293	   On the other hand we MUST return NO_REGS for symrefs with
4294	   invalid addend which might have been pushed to the literal
4295	   pool (no -fPIC).  Usually we would expect them to be
4296	   handled via secondary reload but this does not happen if
4297	   they are used as literal pool slot replacement in reload
4298	   inheritance (see emit_input_reload_insns).  */
4299	if (GET_CODE (XEXP (op, 0)) == PLUS
4300	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4301	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4302	  {
4303	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4304	      return ADDR_REGS;
4305	    else
4306	      return NO_REGS;
4307	  }
4308	/* fallthrough */
4309      case LABEL_REF:
4310      case SYMBOL_REF:
4311	if (!legitimate_reload_constant_p (op))
4312	  return NO_REGS;
4313	/* fallthrough */
4314      case PLUS:
4315	/* load address will be used.  */
4316	if (reg_class_subset_p (ADDR_REGS, rclass))
4317	  return ADDR_REGS;
4318	else
4319	  return NO_REGS;
4320
4321      default:
4322	break;
4323    }
4324
4325  return rclass;
4326}
4327
4328/* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329   multiple of ALIGNMENT and the SYMBOL_REF being naturally
4330   aligned.  */
4331
4332bool
4333s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4334{
4335  HOST_WIDE_INT addend;
4336  rtx symref;
4337
4338  /* The "required alignment" might be 0 (e.g. for certain structs
4339     accessed via BLKmode).  Early abort in this case, as well as when
4340     an alignment > 8 is required.  */
4341  if (alignment < 2 || alignment > 8)
4342    return false;
4343
4344  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4345    return false;
4346
4347  if (addend & (alignment - 1))
4348    return false;
4349
4350  if (GET_CODE (symref) == SYMBOL_REF)
4351    {
4352      /* s390_encode_section_info is not called for anchors, since they don't
4353	 have corresponding VAR_DECLs.  Therefore, we cannot rely on
4354	 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information.  */
4355      if (SYMBOL_REF_ANCHOR_P (symref))
4356	{
4357	  HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4358	  unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4359					  / BITS_PER_UNIT);
4360
4361	  gcc_assert (block_offset >= 0);
4362	  return ((block_offset & (alignment - 1)) == 0
4363		  && block_alignment >= alignment);
4364	}
4365
4366      /* We have load-relative instructions for 2-byte, 4-byte, and
4367	 8-byte alignment so allow only these.  */
4368      switch (alignment)
4369	{
4370	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4371	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4372	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4373	default: return false;
4374	}
4375    }
4376
4377  if (GET_CODE (symref) == UNSPEC
4378      && alignment <= UNITS_PER_LONG)
4379    return true;
4380
4381  return false;
4382}
4383
4384/* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4385   operand SCRATCH is used to reload the even part of the address and
4386   adding one.  */
4387
4388void
4389s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4390{
4391  HOST_WIDE_INT addend;
4392  rtx symref;
4393
4394  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4395    gcc_unreachable ();
4396
4397  if (!(addend & 1))
4398    /* Easy case.  The addend is even so larl will do fine.  */
4399    emit_move_insn (reg, addr);
4400  else
4401    {
4402      /* We can leave the scratch register untouched if the target
4403	 register is a valid base register.  */
4404      if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4405	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4406	scratch = reg;
4407
4408      gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4409      gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4410
4411      if (addend != 1)
4412	emit_move_insn (scratch,
4413			gen_rtx_CONST (Pmode,
4414				       gen_rtx_PLUS (Pmode, symref,
4415						     GEN_INT (addend - 1))));
4416      else
4417	emit_move_insn (scratch, symref);
4418
4419      /* Increment the address using la in order to avoid clobbering cc.  */
4420      s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4421    }
4422}
4423
4424/* Generate what is necessary to move between REG and MEM using
4425   SCRATCH.  The direction is given by TOMEM.  */
4426
4427void
4428s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4429{
4430  /* Reload might have pulled a constant out of the literal pool.
4431     Force it back in.  */
4432  if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4433      || GET_CODE (mem) == CONST_WIDE_INT
4434      || GET_CODE (mem) == CONST_VECTOR
4435      || GET_CODE (mem) == CONST)
4436    mem = force_const_mem (GET_MODE (reg), mem);
4437
4438  gcc_assert (MEM_P (mem));
4439
4440  /* For a load from memory we can leave the scratch register
4441     untouched if the target register is a valid base register.  */
4442  if (!tomem
4443      && REGNO (reg) < FIRST_PSEUDO_REGISTER
4444      && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4445      && GET_MODE (reg) == GET_MODE (scratch))
4446    scratch = reg;
4447
4448  /* Load address into scratch register.  Since we can't have a
4449     secondary reload for a secondary reload we have to cover the case
4450     where larl would need a secondary reload here as well.  */
4451  s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4452
4453  /* Now we can use a standard load/store to do the move.  */
4454  if (tomem)
4455    emit_move_insn (replace_equiv_address (mem, scratch), reg);
4456  else
4457    emit_move_insn (reg, replace_equiv_address (mem, scratch));
4458}
4459
4460/* Inform reload about cases where moving X with a mode MODE to a register in
4461   RCLASS requires an extra scratch or immediate register.  Return the class
4462   needed for the immediate register.  */
4463
4464static reg_class_t
4465s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4466		       machine_mode mode, secondary_reload_info *sri)
4467{
4468  enum reg_class rclass = (enum reg_class) rclass_i;
4469
4470  /* Intermediate register needed.  */
4471  if (reg_classes_intersect_p (CC_REGS, rclass))
4472    return GENERAL_REGS;
4473
4474  if (TARGET_VX)
4475    {
4476      /* The vst/vl vector move instructions allow only for short
4477	 displacements.  */
4478      if (MEM_P (x)
4479	  && GET_CODE (XEXP (x, 0)) == PLUS
4480	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4481	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4482	  && reg_class_subset_p (rclass, VEC_REGS)
4483	  && (!reg_class_subset_p (rclass, FP_REGS)
4484	      || (GET_MODE_SIZE (mode) > 8
4485		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4486	{
4487	  if (in_p)
4488	    sri->icode = (TARGET_64BIT ?
4489			  CODE_FOR_reloaddi_la_in :
4490			  CODE_FOR_reloadsi_la_in);
4491	  else
4492	    sri->icode = (TARGET_64BIT ?
4493			  CODE_FOR_reloaddi_la_out :
4494			  CODE_FOR_reloadsi_la_out);
4495	}
4496    }
4497
4498  if (TARGET_Z10)
4499    {
4500      HOST_WIDE_INT offset;
4501      rtx symref;
4502
4503      /* On z10 several optimizer steps may generate larl operands with
4504	 an odd addend.  */
4505      if (in_p
4506	  && s390_loadrelative_operand_p (x, &symref, &offset)
4507	  && mode == Pmode
4508	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4509	  && (offset & 1) == 1)
4510	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4512
4513      /* Handle all the (mem (symref)) accesses we cannot use the z10
4514	 instructions for.  */
4515      if (MEM_P (x)
4516	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4517	  && (mode == QImode
4518	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4519	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4520	      || !s390_check_symref_alignment (XEXP (x, 0),
4521					       GET_MODE_SIZE (mode))))
4522	{
4523#define __SECONDARY_RELOAD_CASE(M,m)					\
4524	  case E_##M##mode:						\
4525	    if (TARGET_64BIT)						\
4526	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4527				  CODE_FOR_reload##m##di_tomem_z10;	\
4528	    else							\
4529	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4530				  CODE_FOR_reload##m##si_tomem_z10;	\
4531	  break;
4532
4533	  switch (GET_MODE (x))
4534	    {
4535	      __SECONDARY_RELOAD_CASE (QI, qi);
4536	      __SECONDARY_RELOAD_CASE (HI, hi);
4537	      __SECONDARY_RELOAD_CASE (SI, si);
4538	      __SECONDARY_RELOAD_CASE (DI, di);
4539	      __SECONDARY_RELOAD_CASE (TI, ti);
4540	      __SECONDARY_RELOAD_CASE (SF, sf);
4541	      __SECONDARY_RELOAD_CASE (DF, df);
4542	      __SECONDARY_RELOAD_CASE (TF, tf);
4543	      __SECONDARY_RELOAD_CASE (SD, sd);
4544	      __SECONDARY_RELOAD_CASE (DD, dd);
4545	      __SECONDARY_RELOAD_CASE (TD, td);
4546	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4547	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4548	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4549	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4550	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4551	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4552	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4553	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4554	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4555	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4556	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4557	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4558	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4559	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4560	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4561	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4562	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4563	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4564	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4565	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4566	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4567	    default:
4568	      gcc_unreachable ();
4569	    }
4570#undef __SECONDARY_RELOAD_CASE
4571	}
4572    }
4573
4574  /* We need a scratch register when loading a PLUS expression which
4575     is not a legitimate operand of the LOAD ADDRESS instruction.  */
4576  /* LRA can deal with transformation of plus op very well -- so we
4577     don't need to prompt LRA in this case.  */
4578  if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4579    sri->icode = (TARGET_64BIT ?
4580		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4581
4582  /* Performing a multiword move from or to memory we have to make sure the
4583     second chunk in memory is addressable without causing a displacement
4584     overflow.  If that would be the case we calculate the address in
4585     a scratch register.  */
4586  if (MEM_P (x)
4587      && GET_CODE (XEXP (x, 0)) == PLUS
4588      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4589      && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4590			 + GET_MODE_SIZE (mode) - 1))
4591    {
4592      /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593	 in a s_operand address since we may fallback to lm/stm.  So we only
4594	 have to care about overflows in the b+i+d case.  */
4595      if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4596	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4597	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4598	  /* For FP_REGS no lm/stm is available so this check is triggered
4599	     for displacement overflows in b+i+d and b+d like addresses.  */
4600	  || (reg_classes_intersect_p (FP_REGS, rclass)
4601	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4602	{
4603	  if (in_p)
4604	    sri->icode = (TARGET_64BIT ?
4605			  CODE_FOR_reloaddi_la_in :
4606			  CODE_FOR_reloadsi_la_in);
4607	  else
4608	    sri->icode = (TARGET_64BIT ?
4609			  CODE_FOR_reloaddi_la_out :
4610			  CODE_FOR_reloadsi_la_out);
4611	}
4612    }
4613
4614  /* A scratch address register is needed when a symbolic constant is
4615     copied to r0 compiling with -fPIC.  In other cases the target
4616     register might be used as temporary (see legitimize_pic_address).  */
4617  if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4618    sri->icode = (TARGET_64BIT ?
4619		  CODE_FOR_reloaddi_PIC_addr :
4620		  CODE_FOR_reloadsi_PIC_addr);
4621
4622  /* Either scratch or no register needed.  */
4623  return NO_REGS;
4624}
4625
4626/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4627
4628   We need secondary memory to move data between GPRs and FPRs.
4629
4630   - With DFP the ldgr lgdr instructions are available.  Due to the
4631     different alignment we cannot use them for SFmode.  For 31 bit a
4632     64 bit value in GPR would be a register pair so here we still
4633     need to go via memory.
4634
4635   - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4636     overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637     in full VRs so as before also on z13 we do these moves via
4638     memory.
4639
4640     FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4641
4642static bool
4643s390_secondary_memory_needed (machine_mode mode,
4644			      reg_class_t class1, reg_class_t class2)
4645{
4646  return (((reg_classes_intersect_p (class1, VEC_REGS)
4647	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4648	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4649	       && reg_classes_intersect_p (class2, VEC_REGS)))
4650	  && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4651	      || GET_MODE_SIZE (mode) != 8)
4652	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4653			     && GET_MODE_SIZE (mode) > 8)));
4654}
4655
4656/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4657
4658   get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659   because the movsi and movsf patterns don't handle r/f moves.  */
4660
4661static machine_mode
4662s390_secondary_memory_needed_mode (machine_mode mode)
4663{
4664  if (GET_MODE_BITSIZE (mode) < 32)
4665    return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4666  return mode;
4667}
4668
4669/* Generate code to load SRC, which is PLUS that is not a
4670   legitimate operand for the LA instruction, into TARGET.
4671   SCRATCH may be used as scratch register.  */
4672
4673void
4674s390_expand_plus_operand (rtx target, rtx src,
4675			  rtx scratch)
4676{
4677  rtx sum1, sum2;
4678  struct s390_address ad;
4679
4680  /* src must be a PLUS; get its two operands.  */
4681  gcc_assert (GET_CODE (src) == PLUS);
4682  gcc_assert (GET_MODE (src) == Pmode);
4683
4684  /* Check if any of the two operands is already scheduled
4685     for replacement by reload.  This can happen e.g. when
4686     float registers occur in an address.  */
4687  sum1 = find_replacement (&XEXP (src, 0));
4688  sum2 = find_replacement (&XEXP (src, 1));
4689  src = gen_rtx_PLUS (Pmode, sum1, sum2);
4690
4691  /* If the address is already strictly valid, there's nothing to do.  */
4692  if (!s390_decompose_address (src, &ad)
4693      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4694      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4695    {
4696      /* Otherwise, one of the operands cannot be an address register;
4697	 we reload its value into the scratch register.  */
4698      if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4699	{
4700	  emit_move_insn (scratch, sum1);
4701	  sum1 = scratch;
4702	}
4703      if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4704	{
4705	  emit_move_insn (scratch, sum2);
4706	  sum2 = scratch;
4707	}
4708
4709      /* According to the way these invalid addresses are generated
4710	 in reload.c, it should never happen (at least on s390) that
4711	 *neither* of the PLUS components, after find_replacements
4712	 was applied, is an address register.  */
4713      if (sum1 == scratch && sum2 == scratch)
4714	{
4715	  debug_rtx (src);
4716	  gcc_unreachable ();
4717	}
4718
4719      src = gen_rtx_PLUS (Pmode, sum1, sum2);
4720    }
4721
4722  /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4723     is only ever performed on addresses, so we can mark the
4724     sum as legitimate for LA in any case.  */
4725  s390_load_address (target, src);
4726}
4727
4728
4729/* Return true if ADDR is a valid memory address.
4730   STRICT specifies whether strict register checking applies.  */
4731
4732static bool
4733s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4734{
4735  struct s390_address ad;
4736
4737  if (TARGET_Z10
4738      && larl_operand (addr, VOIDmode)
4739      && (mode == VOIDmode
4740	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4741    return true;
4742
4743  if (!s390_decompose_address (addr, &ad))
4744    return false;
4745
4746  /* The vector memory instructions only support short displacements.
4747     Reject invalid displacements early to prevent plenty of lay
4748     instructions to be generated later which then cannot be merged
4749     properly.  */
4750  if (TARGET_VX
4751      && VECTOR_MODE_P (mode)
4752      && ad.disp != NULL_RTX
4753      && CONST_INT_P (ad.disp)
4754      && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4755    return false;
4756
4757  if (strict)
4758    {
4759      if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4760	return false;
4761
4762      if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4763	return false;
4764    }
4765  else
4766    {
4767      if (ad.base
4768	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4769	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4770	return false;
4771
4772      if (ad.indx
4773	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4774	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4775	  return false;
4776    }
4777  return true;
4778}
4779
4780/* Return true if OP is a valid operand for the LA instruction.
4781   In 31-bit, we need to prove that the result is used as an
4782   address, as LA performs only a 31-bit addition.  */
4783
4784bool
4785legitimate_la_operand_p (rtx op)
4786{
4787  struct s390_address addr;
4788  if (!s390_decompose_address (op, &addr))
4789    return false;
4790
4791  return (TARGET_64BIT || addr.pointer);
4792}
4793
4794/* Return true if it is valid *and* preferable to use LA to
4795   compute the sum of OP1 and OP2.  */
4796
4797bool
4798preferred_la_operand_p (rtx op1, rtx op2)
4799{
4800  struct s390_address addr;
4801
4802  if (op2 != const0_rtx)
4803    op1 = gen_rtx_PLUS (Pmode, op1, op2);
4804
4805  if (!s390_decompose_address (op1, &addr))
4806    return false;
4807  if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4808    return false;
4809  if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4810    return false;
4811
4812  /* Avoid LA instructions with index (and base) register on z196 or
4813     later; it is preferable to use regular add instructions when
4814     possible.  Starting with zEC12 the la with index register is
4815     "uncracked" again but still slower than a regular add.  */
4816  if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4817    return false;
4818
4819  if (!TARGET_64BIT && !addr.pointer)
4820    return false;
4821
4822  if (addr.pointer)
4823    return true;
4824
4825  if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4826      || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4827    return true;
4828
4829  return false;
4830}
4831
4832/* Emit a forced load-address operation to load SRC into DST.
4833   This will use the LOAD ADDRESS instruction even in situations
4834   where legitimate_la_operand_p (SRC) returns false.  */
4835
4836void
4837s390_load_address (rtx dst, rtx src)
4838{
4839  if (TARGET_64BIT)
4840    emit_move_insn (dst, src);
4841  else
4842    emit_insn (gen_force_la_31 (dst, src));
4843}
4844
4845/* Return true if it ok to use SYMBOL_REF in a relative address.  */
4846
4847bool
4848s390_rel_address_ok_p (rtx symbol_ref)
4849{
4850  tree decl;
4851
4852  if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4853    return true;
4854
4855  decl = SYMBOL_REF_DECL (symbol_ref);
4856
4857  if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4858    return (s390_pic_data_is_text_relative
4859	    || (decl
4860		&& TREE_CODE (decl) == FUNCTION_DECL));
4861
4862  return false;
4863}
4864
4865/* Return a legitimate reference for ORIG (an address) using the
4866   register REG.  If REG is 0, a new pseudo is generated.
4867
4868   There are two types of references that must be handled:
4869
4870   1. Global data references must load the address from the GOT, via
4871      the PIC reg.  An insn is emitted to do this load, and the reg is
4872      returned.
4873
4874   2. Static data references, constant pool addresses, and code labels
4875      compute the address as an offset from the GOT, whose base is in
4876      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4877      differentiate them from global data objects.  The returned
4878      address is the PIC reg + an unspec constant.
4879
4880   TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881   reg also appears in the address.  */
4882
4883rtx
4884legitimize_pic_address (rtx orig, rtx reg)
4885{
4886  rtx addr = orig;
4887  rtx addend = const0_rtx;
4888  rtx new_rtx = orig;
4889
4890  gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4891
4892  if (GET_CODE (addr) == CONST)
4893    addr = XEXP (addr, 0);
4894
4895  if (GET_CODE (addr) == PLUS)
4896    {
4897      addend = XEXP (addr, 1);
4898      addr = XEXP (addr, 0);
4899    }
4900
4901  if ((GET_CODE (addr) == LABEL_REF
4902       || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4903       || (GET_CODE (addr) == UNSPEC &&
4904	   (XINT (addr, 1) == UNSPEC_GOTENT
4905	    || XINT (addr, 1) == UNSPEC_PLT)))
4906      && GET_CODE (addend) == CONST_INT)
4907    {
4908      /* This can be locally addressed.  */
4909
4910      /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4911      rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4912			gen_rtx_CONST (Pmode, addr) : addr);
4913
4914      if (larl_operand (const_addr, VOIDmode)
4915	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4916	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4917	{
4918	  if (INTVAL (addend) & 1)
4919	    {
4920	      /* LARL can't handle odd offsets, so emit a pair of LARL
4921		 and LA.  */
4922	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4923
4924	      if (!DISP_IN_RANGE (INTVAL (addend)))
4925		{
4926		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4927		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4928		  addr = gen_rtx_CONST (Pmode, addr);
4929		  addend = const1_rtx;
4930		}
4931
4932	      emit_move_insn (temp, addr);
4933	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4934
4935	      if (reg != 0)
4936		{
4937		  s390_load_address (reg, new_rtx);
4938		  new_rtx = reg;
4939		}
4940	    }
4941	  else
4942	    {
4943	      /* If the offset is even, we can just use LARL.  This
4944		 will happen automatically.  */
4945	    }
4946	}
4947      else
4948	{
4949	  /* No larl - Access local symbols relative to the GOT.  */
4950
4951	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4952
4953	  if (reload_in_progress || reload_completed)
4954	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4955
4956	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4957	  if (addend != const0_rtx)
4958	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4959	  addr = gen_rtx_CONST (Pmode, addr);
4960	  addr = force_const_mem (Pmode, addr);
4961	  emit_move_insn (temp, addr);
4962
4963	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4964	  if (reg != 0)
4965	    {
4966	      s390_load_address (reg, new_rtx);
4967	      new_rtx = reg;
4968	    }
4969	}
4970    }
4971  else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4972    {
4973      /* A non-local symbol reference without addend.
4974
4975	 The symbol ref is wrapped into an UNSPEC to make sure the
4976	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977	 This will tell the linker to put the symbol into the GOT.
4978
4979	 Additionally the code dereferencing the GOT slot is emitted here.
4980
4981	 An addend to the symref needs to be added afterwards.
4982	 legitimize_pic_address calls itself recursively to handle
4983	 that case.  So no need to do it here.  */
4984
4985      if (reg == 0)
4986	reg = gen_reg_rtx (Pmode);
4987
4988      if (TARGET_Z10)
4989	{
4990	  /* Use load relative if possible.
4991	     lgrl <target>, sym@GOTENT  */
4992	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4993	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4994	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4995
4996	  emit_move_insn (reg, new_rtx);
4997	  new_rtx = reg;
4998	}
4999      else if (flag_pic == 1)
5000	{
5001	  /* Assume GOT offset is a valid displacement operand (< 4k
5002	     or < 512k with z990).  This is handled the same way in
5003	     both 31- and 64-bit code (@GOT).
5004	     lg <target>, sym@GOT(r12)  */
5005
5006	  if (reload_in_progress || reload_completed)
5007	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5008
5009	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5012	  new_rtx = gen_const_mem (Pmode, new_rtx);
5013	  emit_move_insn (reg, new_rtx);
5014	  new_rtx = reg;
5015	}
5016      else
5017	{
5018	  /* If the GOT offset might be >= 4k, we determine the position
5019	     of the GOT entry via a PC-relative LARL (@GOTENT).
5020	     larl temp, sym@GOTENT
5021	     lg   <target>, 0(temp) */
5022
5023	  rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5024
5025	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5026		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5027
5028	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5029	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5030	  emit_move_insn (temp, new_rtx);
5031	  new_rtx = gen_const_mem (Pmode, temp);
5032	  emit_move_insn (reg, new_rtx);
5033
5034	  new_rtx = reg;
5035	}
5036    }
5037  else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5038    {
5039      gcc_assert (XVECLEN (addr, 0) == 1);
5040      switch (XINT (addr, 1))
5041	{
5042	  /* These address symbols (or PLT slots) relative to the GOT
5043	     (not GOT slots!).  In general this will exceed the
5044	     displacement range so these value belong into the literal
5045	     pool.  */
5046	case UNSPEC_GOTOFF:
5047	case UNSPEC_PLTOFF:
5048	  new_rtx = force_const_mem (Pmode, orig);
5049	  break;
5050
5051	  /* For -fPIC the GOT size might exceed the displacement
5052	     range so make sure the value is in the literal pool.  */
5053	case UNSPEC_GOT:
5054	  if (flag_pic == 2)
5055	    new_rtx = force_const_mem (Pmode, orig);
5056	  break;
5057
5058	  /* For @GOTENT larl is used.  This is handled like local
5059	     symbol refs.  */
5060	case UNSPEC_GOTENT:
5061	  gcc_unreachable ();
5062	  break;
5063
5064	  /* For @PLT larl is used.  This is handled like local
5065	     symbol refs.  */
5066	case UNSPEC_PLT:
5067	  gcc_unreachable ();
5068	  break;
5069
5070	  /* Everything else cannot happen.  */
5071	default:
5072	  gcc_unreachable ();
5073	}
5074    }
5075  else if (addend != const0_rtx)
5076    {
5077      /* Otherwise, compute the sum.  */
5078
5079      rtx base = legitimize_pic_address (addr, reg);
5080      new_rtx  = legitimize_pic_address (addend,
5081					 base == reg ? NULL_RTX : reg);
5082      if (GET_CODE (new_rtx) == CONST_INT)
5083	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5084      else
5085	{
5086	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5087	    {
5088	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5089	      new_rtx = XEXP (new_rtx, 1);
5090	    }
5091	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5092	}
5093
5094      if (GET_CODE (new_rtx) == CONST)
5095	new_rtx = XEXP (new_rtx, 0);
5096      new_rtx = force_operand (new_rtx, 0);
5097    }
5098
5099  return new_rtx;
5100}
5101
5102/* Load the thread pointer into a register.  */
5103
5104rtx
5105s390_get_thread_pointer (void)
5106{
5107  rtx tp = gen_reg_rtx (Pmode);
5108
5109  emit_insn (gen_get_thread_pointer (Pmode, tp));
5110
5111  mark_reg_pointer (tp, BITS_PER_WORD);
5112
5113  return tp;
5114}
5115
5116/* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117   in s390_tls_symbol which always refers to __tls_get_offset.
5118   The returned offset is written to RESULT_REG and an USE rtx is
5119   generated for TLS_CALL.  */
5120
5121static GTY(()) rtx s390_tls_symbol;
5122
5123static void
5124s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5125{
5126  rtx insn;
5127
5128  if (!flag_pic)
5129    emit_insn (s390_load_got ());
5130
5131  if (!s390_tls_symbol)
5132    s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5133
5134  insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5135			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5136
5137  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5138  RTL_CONST_CALL_P (insn) = 1;
5139}
5140
5141/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5142   this (thread-local) address.  REG may be used as temporary.  */
5143
5144static rtx
5145legitimize_tls_address (rtx addr, rtx reg)
5146{
5147  rtx new_rtx, tls_call, temp, base, r2;
5148  rtx_insn *insn;
5149
5150  if (GET_CODE (addr) == SYMBOL_REF)
5151    switch (tls_symbolic_operand (addr))
5152      {
5153      case TLS_MODEL_GLOBAL_DYNAMIC:
5154	start_sequence ();
5155	r2 = gen_rtx_REG (Pmode, 2);
5156	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5157	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5158	new_rtx = force_const_mem (Pmode, new_rtx);
5159	emit_move_insn (r2, new_rtx);
5160	s390_emit_tls_call_insn (r2, tls_call);
5161	insn = get_insns ();
5162	end_sequence ();
5163
5164	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5165	temp = gen_reg_rtx (Pmode);
5166	emit_libcall_block (insn, temp, r2, new_rtx);
5167
5168	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5169	if (reg != 0)
5170	  {
5171	    s390_load_address (reg, new_rtx);
5172	    new_rtx = reg;
5173	  }
5174	break;
5175
5176      case TLS_MODEL_LOCAL_DYNAMIC:
5177	start_sequence ();
5178	r2 = gen_rtx_REG (Pmode, 2);
5179	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5180	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5181	new_rtx = force_const_mem (Pmode, new_rtx);
5182	emit_move_insn (r2, new_rtx);
5183	s390_emit_tls_call_insn (r2, tls_call);
5184	insn = get_insns ();
5185	end_sequence ();
5186
5187	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5188	temp = gen_reg_rtx (Pmode);
5189	emit_libcall_block (insn, temp, r2, new_rtx);
5190
5191	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5192	base = gen_reg_rtx (Pmode);
5193	s390_load_address (base, new_rtx);
5194
5195	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5196	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5197	new_rtx = force_const_mem (Pmode, new_rtx);
5198	temp = gen_reg_rtx (Pmode);
5199	emit_move_insn (temp, new_rtx);
5200
5201	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5202	if (reg != 0)
5203	  {
5204	    s390_load_address (reg, new_rtx);
5205	    new_rtx = reg;
5206	  }
5207	break;
5208
5209      case TLS_MODEL_INITIAL_EXEC:
5210	if (flag_pic == 1)
5211	  {
5212	    /* Assume GOT offset < 4k.  This is handled the same way
5213	       in both 31- and 64-bit code.  */
5214
5215	    if (reload_in_progress || reload_completed)
5216	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5217
5218	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5219	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5220	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5221	    new_rtx = gen_const_mem (Pmode, new_rtx);
5222	    temp = gen_reg_rtx (Pmode);
5223	    emit_move_insn (temp, new_rtx);
5224	  }
5225	else
5226	  {
5227	    /* If the GOT offset might be >= 4k, we determine the position
5228	       of the GOT entry via a PC-relative LARL.  */
5229
5230	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5231	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5232	    temp = gen_reg_rtx (Pmode);
5233	    emit_move_insn (temp, new_rtx);
5234
5235	    new_rtx = gen_const_mem (Pmode, temp);
5236	    temp = gen_reg_rtx (Pmode);
5237	    emit_move_insn (temp, new_rtx);
5238	  }
5239
5240	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5241	if (reg != 0)
5242	  {
5243	    s390_load_address (reg, new_rtx);
5244	    new_rtx = reg;
5245	  }
5246	break;
5247
5248      case TLS_MODEL_LOCAL_EXEC:
5249	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5250	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5251	new_rtx = force_const_mem (Pmode, new_rtx);
5252	temp = gen_reg_rtx (Pmode);
5253	emit_move_insn (temp, new_rtx);
5254
5255	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256	if (reg != 0)
5257	  {
5258	    s390_load_address (reg, new_rtx);
5259	    new_rtx = reg;
5260	  }
5261	break;
5262
5263      default:
5264	gcc_unreachable ();
5265      }
5266
5267  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5268    {
5269      switch (XINT (XEXP (addr, 0), 1))
5270	{
5271	case UNSPEC_INDNTPOFF:
5272	  new_rtx = addr;
5273	  break;
5274
5275	default:
5276	  gcc_unreachable ();
5277	}
5278    }
5279
5280  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5281	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5282    {
5283      new_rtx = XEXP (XEXP (addr, 0), 0);
5284      if (GET_CODE (new_rtx) != SYMBOL_REF)
5285	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5286
5287      new_rtx = legitimize_tls_address (new_rtx, reg);
5288      new_rtx = plus_constant (Pmode, new_rtx,
5289			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5290      new_rtx = force_operand (new_rtx, 0);
5291    }
5292
5293  else
5294    gcc_unreachable ();  /* for now ... */
5295
5296  return new_rtx;
5297}
5298
5299/* Emit insns making the address in operands[1] valid for a standard
5300   move to operands[0].  operands[1] is replaced by an address which
5301   should be used instead of the former RTX to emit the move
5302   pattern.  */
5303
5304void
5305emit_symbolic_move (rtx *operands)
5306{
5307  rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5308
5309  if (GET_CODE (operands[0]) == MEM)
5310    operands[1] = force_reg (Pmode, operands[1]);
5311  else if (TLS_SYMBOLIC_CONST (operands[1]))
5312    operands[1] = legitimize_tls_address (operands[1], temp);
5313  else if (flag_pic)
5314    operands[1] = legitimize_pic_address (operands[1], temp);
5315}
5316
5317/* Try machine-dependent ways of modifying an illegitimate address X
5318   to be legitimate.  If we find one, return the new, valid address.
5319
5320   OLDX is the address as it was before break_out_memory_refs was called.
5321   In some cases it is useful to look at this to decide what needs to be done.
5322
5323   MODE is the mode of the operand pointed to by X.
5324
5325   When -fpic is used, special handling is needed for symbolic references.
5326   See comments by legitimize_pic_address for details.  */
5327
5328static rtx
5329s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5330			 machine_mode mode ATTRIBUTE_UNUSED)
5331{
5332  rtx constant_term = const0_rtx;
5333
5334  if (TLS_SYMBOLIC_CONST (x))
5335    {
5336      x = legitimize_tls_address (x, 0);
5337
5338      if (s390_legitimate_address_p (mode, x, FALSE))
5339	return x;
5340    }
5341  else if (GET_CODE (x) == PLUS
5342	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5343	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5344    {
5345      return x;
5346    }
5347  else if (flag_pic)
5348    {
5349      if (SYMBOLIC_CONST (x)
5350	  || (GET_CODE (x) == PLUS
5351	      && (SYMBOLIC_CONST (XEXP (x, 0))
5352		  || SYMBOLIC_CONST (XEXP (x, 1)))))
5353	  x = legitimize_pic_address (x, 0);
5354
5355      if (s390_legitimate_address_p (mode, x, FALSE))
5356	return x;
5357    }
5358
5359  x = eliminate_constant_term (x, &constant_term);
5360
5361  /* Optimize loading of large displacements by splitting them
5362     into the multiple of 4K and the rest; this allows the
5363     former to be CSE'd if possible.
5364
5365     Don't do this if the displacement is added to a register
5366     pointing into the stack frame, as the offsets will
5367     change later anyway.  */
5368
5369  if (GET_CODE (constant_term) == CONST_INT
5370      && !TARGET_LONG_DISPLACEMENT
5371      && !DISP_IN_RANGE (INTVAL (constant_term))
5372      && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5373    {
5374      HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5375      HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5376
5377      rtx temp = gen_reg_rtx (Pmode);
5378      rtx val  = force_operand (GEN_INT (upper), temp);
5379      if (val != temp)
5380	emit_move_insn (temp, val);
5381
5382      x = gen_rtx_PLUS (Pmode, x, temp);
5383      constant_term = GEN_INT (lower);
5384    }
5385
5386  if (GET_CODE (x) == PLUS)
5387    {
5388      if (GET_CODE (XEXP (x, 0)) == REG)
5389	{
5390	  rtx temp = gen_reg_rtx (Pmode);
5391	  rtx val  = force_operand (XEXP (x, 1), temp);
5392	  if (val != temp)
5393	    emit_move_insn (temp, val);
5394
5395	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5396	}
5397
5398      else if (GET_CODE (XEXP (x, 1)) == REG)
5399	{
5400	  rtx temp = gen_reg_rtx (Pmode);
5401	  rtx val  = force_operand (XEXP (x, 0), temp);
5402	  if (val != temp)
5403	    emit_move_insn (temp, val);
5404
5405	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5406	}
5407    }
5408
5409  if (constant_term != const0_rtx)
5410    x = gen_rtx_PLUS (Pmode, x, constant_term);
5411
5412  return x;
5413}
5414
5415/* Try a machine-dependent way of reloading an illegitimate address AD
5416   operand.  If we find one, push the reload and return the new address.
5417
5418   MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5419   and TYPE is the reload type of the current reload.  */
5420
5421rtx
5422legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5423			   int opnum, int type)
5424{
5425  if (!optimize || TARGET_LONG_DISPLACEMENT)
5426    return NULL_RTX;
5427
5428  if (GET_CODE (ad) == PLUS)
5429    {
5430      rtx tem = simplify_binary_operation (PLUS, Pmode,
5431					   XEXP (ad, 0), XEXP (ad, 1));
5432      if (tem)
5433	ad = tem;
5434    }
5435
5436  if (GET_CODE (ad) == PLUS
5437      && GET_CODE (XEXP (ad, 0)) == REG
5438      && GET_CODE (XEXP (ad, 1)) == CONST_INT
5439      && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5440    {
5441      HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5442      HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5443      rtx cst, tem, new_rtx;
5444
5445      cst = GEN_INT (upper);
5446      if (!legitimate_reload_constant_p (cst))
5447	cst = force_const_mem (Pmode, cst);
5448
5449      tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5450      new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5451
5452      push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5453		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5454		   opnum, (enum reload_type) type);
5455      return new_rtx;
5456    }
5457
5458  return NULL_RTX;
5459}
5460
5461/* Emit code to move LEN bytes from DST to SRC.  */
5462
5463bool
5464s390_expand_cpymem (rtx dst, rtx src, rtx len)
5465{
5466  /* When tuning for z10 or higher we rely on the Glibc functions to
5467     do the right thing. Only for constant lengths below 64k we will
5468     generate inline code.  */
5469  if (s390_tune >= PROCESSOR_2097_Z10
5470      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5471    return false;
5472
5473  /* Expand memcpy for constant length operands without a loop if it
5474     is shorter that way.
5475
5476     With a constant length argument a
5477     memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5478  if (GET_CODE (len) == CONST_INT
5479      && INTVAL (len) >= 0
5480      && INTVAL (len) <= 256 * 6
5481      && (!TARGET_MVCLE || INTVAL (len) <= 256))
5482    {
5483      HOST_WIDE_INT o, l;
5484
5485      for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5486	{
5487	  rtx newdst = adjust_address (dst, BLKmode, o);
5488	  rtx newsrc = adjust_address (src, BLKmode, o);
5489	  emit_insn (gen_cpymem_short (newdst, newsrc,
5490				       GEN_INT (l > 256 ? 255 : l - 1)));
5491	}
5492    }
5493
5494  else if (TARGET_MVCLE)
5495    {
5496      emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5497    }
5498
5499  else
5500    {
5501      rtx dst_addr, src_addr, count, blocks, temp;
5502      rtx_code_label *loop_start_label = gen_label_rtx ();
5503      rtx_code_label *loop_end_label = gen_label_rtx ();
5504      rtx_code_label *end_label = gen_label_rtx ();
5505      machine_mode mode;
5506
5507      mode = GET_MODE (len);
5508      if (mode == VOIDmode)
5509	mode = Pmode;
5510
5511      dst_addr = gen_reg_rtx (Pmode);
5512      src_addr = gen_reg_rtx (Pmode);
5513      count = gen_reg_rtx (mode);
5514      blocks = gen_reg_rtx (mode);
5515
5516      convert_move (count, len, 1);
5517      emit_cmp_and_jump_insns (count, const0_rtx,
5518			       EQ, NULL_RTX, mode, 1, end_label);
5519
5520      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5521      emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5522      dst = change_address (dst, VOIDmode, dst_addr);
5523      src = change_address (src, VOIDmode, src_addr);
5524
5525      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5526			   OPTAB_DIRECT);
5527      if (temp != count)
5528	emit_move_insn (count, temp);
5529
5530      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5531			   OPTAB_DIRECT);
5532      if (temp != blocks)
5533	emit_move_insn (blocks, temp);
5534
5535      emit_cmp_and_jump_insns (blocks, const0_rtx,
5536			       EQ, NULL_RTX, mode, 1, loop_end_label);
5537
5538      emit_label (loop_start_label);
5539
5540      if (TARGET_Z10
5541	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5542	{
5543	  rtx prefetch;
5544
5545	  /* Issue a read prefetch for the +3 cache line.  */
5546	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5547				   const0_rtx, const0_rtx);
5548	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5549	  emit_insn (prefetch);
5550
5551	  /* Issue a write prefetch for the +3 cache line.  */
5552	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5553				   const1_rtx, const0_rtx);
5554	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5555	  emit_insn (prefetch);
5556	}
5557
5558      emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5559      s390_load_address (dst_addr,
5560			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561      s390_load_address (src_addr,
5562			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5563
5564      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5565			   OPTAB_DIRECT);
5566      if (temp != blocks)
5567	emit_move_insn (blocks, temp);
5568
5569      emit_cmp_and_jump_insns (blocks, const0_rtx,
5570			       EQ, NULL_RTX, mode, 1, loop_end_label);
5571
5572      emit_jump (loop_start_label);
5573      emit_label (loop_end_label);
5574
5575      emit_insn (gen_cpymem_short (dst, src,
5576				   convert_to_mode (Pmode, count, 1)));
5577      emit_label (end_label);
5578    }
5579  return true;
5580}
5581
5582/* Emit code to set LEN bytes at DST to VAL.
5583   Make use of clrmem if VAL is zero.  */
5584
5585void
5586s390_expand_setmem (rtx dst, rtx len, rtx val)
5587{
5588  if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5589    return;
5590
5591  gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5592
5593  /* Expand setmem/clrmem for a constant length operand without a
5594     loop if it will be shorter that way.
5595     clrmem loop (with PFD)    is 30 bytes -> 5 * xc
5596     clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597     setmem loop (with PFD)    is 38 bytes -> ~4 * (mvi/stc + mvc)
5598     setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599  if (GET_CODE (len) == CONST_INT
5600      && ((val == const0_rtx
5601	   && (INTVAL (len) <= 256 * 4
5602	       || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5603	  || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5604      && (!TARGET_MVCLE || INTVAL (len) <= 256))
5605    {
5606      HOST_WIDE_INT o, l;
5607
5608      if (val == const0_rtx)
5609	/* clrmem: emit 256 byte blockwise XCs.  */
5610	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5611	  {
5612	    rtx newdst = adjust_address (dst, BLKmode, o);
5613	    emit_insn (gen_clrmem_short (newdst,
5614					 GEN_INT (l > 256 ? 255 : l - 1)));
5615	  }
5616      else
5617	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618	   setting first byte to val and using a 256 byte mvc with one
5619	   byte overlap to propagate the byte.  */
5620	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5621	  {
5622	    rtx newdst = adjust_address (dst, BLKmode, o);
5623	    emit_move_insn (adjust_address (dst, QImode, o), val);
5624	    if (l > 1)
5625	      {
5626		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5627		emit_insn (gen_cpymem_short (newdstp1, newdst,
5628					     GEN_INT (l > 257 ? 255 : l - 2)));
5629	      }
5630	  }
5631    }
5632
5633  else if (TARGET_MVCLE)
5634    {
5635      val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5636      if (TARGET_64BIT)
5637	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5638				       val));
5639      else
5640	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5641				       val));
5642    }
5643
5644  else
5645    {
5646      rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5647      rtx_code_label *loop_start_label = gen_label_rtx ();
5648      rtx_code_label *onebyte_end_label = gen_label_rtx ();
5649      rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5650      rtx_code_label *restbyte_end_label = gen_label_rtx ();
5651      machine_mode mode;
5652
5653      mode = GET_MODE (len);
5654      if (mode == VOIDmode)
5655	mode = Pmode;
5656
5657      dst_addr = gen_reg_rtx (Pmode);
5658      count = gen_reg_rtx (mode);
5659      blocks = gen_reg_rtx (mode);
5660
5661      convert_move (count, len, 1);
5662      emit_cmp_and_jump_insns (count, const0_rtx,
5663			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5664			       profile_probability::very_unlikely ());
5665
5666      /* We need to make a copy of the target address since memset is
5667	 supposed to return it unmodified.  We have to make it here
5668	 already since the new reg is used at onebyte_end_label.  */
5669      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5670      dst = change_address (dst, VOIDmode, dst_addr);
5671
5672      if (val != const0_rtx)
5673	{
5674	  /* When using the overlapping mvc the original target
5675	     address is only accessed as single byte entity (even by
5676	     the mvc reading this value).  */
5677	  set_mem_size (dst, 1);
5678	  dstp1 = adjust_address (dst, VOIDmode, 1);
5679	  emit_cmp_and_jump_insns (count,
5680				   const1_rtx, EQ, NULL_RTX, mode, 1,
5681				   onebyte_end_label,
5682				   profile_probability::very_unlikely ());
5683	}
5684
5685      /* There is one unconditional (mvi+mvc)/xc after the loop
5686	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687	 or one (xc) here leaves this number of bytes to be handled by
5688	 it.  */
5689      temp = expand_binop (mode, add_optab, count,
5690			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5691			   count, 1, OPTAB_DIRECT);
5692      if (temp != count)
5693	emit_move_insn (count, temp);
5694
5695      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5696			   OPTAB_DIRECT);
5697      if (temp != blocks)
5698	emit_move_insn (blocks, temp);
5699
5700      emit_cmp_and_jump_insns (blocks, const0_rtx,
5701			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5702
5703      emit_jump (loop_start_label);
5704
5705      if (val != const0_rtx)
5706	{
5707	  /* The 1 byte != 0 special case.  Not handled efficiently
5708	     since we require two jumps for that.  However, this
5709	     should be very rare.  */
5710	  emit_label (onebyte_end_label);
5711	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5712	  emit_jump (zerobyte_end_label);
5713	}
5714
5715      emit_label (loop_start_label);
5716
5717      if (TARGET_SETMEM_PFD (val, len))
5718	{
5719	  /* Issue a write prefetch.  */
5720	  rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5721	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5722				       const1_rtx, const0_rtx);
5723	  emit_insn (prefetch);
5724	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5725	}
5726
5727      if (val == const0_rtx)
5728	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5729      else
5730	{
5731	  /* Set the first byte in the block to the value and use an
5732	     overlapping mvc for the block.  */
5733	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5734	  emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5735	}
5736      s390_load_address (dst_addr,
5737			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5738
5739      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5740			   OPTAB_DIRECT);
5741      if (temp != blocks)
5742	emit_move_insn (blocks, temp);
5743
5744      emit_cmp_and_jump_insns (blocks, const0_rtx,
5745			       NE, NULL_RTX, mode, 1, loop_start_label);
5746
5747      emit_label (restbyte_end_label);
5748
5749      if (val == const0_rtx)
5750	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5751      else
5752	{
5753	  /* Set the first byte in the block to the value and use an
5754	     overlapping mvc for the block.  */
5755	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5756	  /* execute only uses the lowest 8 bits of count that's
5757	     exactly what we need here.  */
5758	  emit_insn (gen_cpymem_short (dstp1, dst,
5759				       convert_to_mode (Pmode, count, 1)));
5760	}
5761
5762      emit_label (zerobyte_end_label);
5763    }
5764}
5765
5766/* Emit code to compare LEN bytes at OP0 with those at OP1,
5767   and return the result in TARGET.  */
5768
5769bool
5770s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5771{
5772  rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5773  rtx tmp;
5774
5775  /* When tuning for z10 or higher we rely on the Glibc functions to
5776     do the right thing. Only for constant lengths below 64k we will
5777     generate inline code.  */
5778  if (s390_tune >= PROCESSOR_2097_Z10
5779      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5780    return false;
5781
5782  /* As the result of CMPINT is inverted compared to what we need,
5783     we have to swap the operands.  */
5784  tmp = op0; op0 = op1; op1 = tmp;
5785
5786  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5787    {
5788      if (INTVAL (len) > 0)
5789	{
5790	  emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5791	  emit_insn (gen_cmpint (target, ccreg));
5792	}
5793      else
5794	emit_move_insn (target, const0_rtx);
5795    }
5796  else if (TARGET_MVCLE)
5797    {
5798      emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5799      emit_insn (gen_cmpint (target, ccreg));
5800    }
5801  else
5802    {
5803      rtx addr0, addr1, count, blocks, temp;
5804      rtx_code_label *loop_start_label = gen_label_rtx ();
5805      rtx_code_label *loop_end_label = gen_label_rtx ();
5806      rtx_code_label *end_label = gen_label_rtx ();
5807      machine_mode mode;
5808
5809      mode = GET_MODE (len);
5810      if (mode == VOIDmode)
5811	mode = Pmode;
5812
5813      addr0 = gen_reg_rtx (Pmode);
5814      addr1 = gen_reg_rtx (Pmode);
5815      count = gen_reg_rtx (mode);
5816      blocks = gen_reg_rtx (mode);
5817
5818      convert_move (count, len, 1);
5819      emit_cmp_and_jump_insns (count, const0_rtx,
5820			       EQ, NULL_RTX, mode, 1, end_label);
5821
5822      emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5823      emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5824      op0 = change_address (op0, VOIDmode, addr0);
5825      op1 = change_address (op1, VOIDmode, addr1);
5826
5827      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5828			   OPTAB_DIRECT);
5829      if (temp != count)
5830	emit_move_insn (count, temp);
5831
5832      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5833			   OPTAB_DIRECT);
5834      if (temp != blocks)
5835	emit_move_insn (blocks, temp);
5836
5837      emit_cmp_and_jump_insns (blocks, const0_rtx,
5838			       EQ, NULL_RTX, mode, 1, loop_end_label);
5839
5840      emit_label (loop_start_label);
5841
5842      if (TARGET_Z10
5843	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5844	{
5845	  rtx prefetch;
5846
5847	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5848	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5849				   const0_rtx, const0_rtx);
5850	  emit_insn (prefetch);
5851	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5852
5853	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5854	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5855				   const0_rtx, const0_rtx);
5856	  emit_insn (prefetch);
5857	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5858	}
5859
5860      emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5861      temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5862      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5863			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5864      temp = gen_rtx_SET (pc_rtx, temp);
5865      emit_jump_insn (temp);
5866
5867      s390_load_address (addr0,
5868			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5869      s390_load_address (addr1,
5870			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5871
5872      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5873			   OPTAB_DIRECT);
5874      if (temp != blocks)
5875	emit_move_insn (blocks, temp);
5876
5877      emit_cmp_and_jump_insns (blocks, const0_rtx,
5878			       EQ, NULL_RTX, mode, 1, loop_end_label);
5879
5880      emit_jump (loop_start_label);
5881      emit_label (loop_end_label);
5882
5883      emit_insn (gen_cmpmem_short (op0, op1,
5884				   convert_to_mode (Pmode, count, 1)));
5885      emit_label (end_label);
5886
5887      emit_insn (gen_cmpint (target, ccreg));
5888    }
5889  return true;
5890}
5891
5892/* Emit a conditional jump to LABEL for condition code mask MASK using
5893   comparsion operator COMPARISON.  Return the emitted jump insn.  */
5894
5895static rtx_insn *
5896s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5897{
5898  rtx temp;
5899
5900  gcc_assert (comparison == EQ || comparison == NE);
5901  gcc_assert (mask > 0 && mask < 15);
5902
5903  temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5904			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5905  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5906			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5907  temp = gen_rtx_SET (pc_rtx, temp);
5908  return emit_jump_insn (temp);
5909}
5910
5911/* Emit the instructions to implement strlen of STRING and store the
5912   result in TARGET.  The string has the known ALIGNMENT.  This
5913   version uses vector instructions and is therefore not appropriate
5914   for targets prior to z13.  */
5915
5916void
5917s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5918{
5919  rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5920  rtx str_reg = gen_reg_rtx (V16QImode);
5921  rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5922  rtx str_idx_reg = gen_reg_rtx (Pmode);
5923  rtx result_reg = gen_reg_rtx (V16QImode);
5924  rtx is_aligned_label = gen_label_rtx ();
5925  rtx into_loop_label = NULL_RTX;
5926  rtx loop_start_label = gen_label_rtx ();
5927  rtx temp;
5928  rtx len = gen_reg_rtx (QImode);
5929  rtx cond;
5930
5931  s390_load_address (str_addr_base_reg, XEXP (string, 0));
5932  emit_move_insn (str_idx_reg, const0_rtx);
5933
5934  if (INTVAL (alignment) < 16)
5935    {
5936      /* Check whether the address happens to be aligned properly so
5937	 jump directly to the aligned loop.  */
5938      emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5939					    str_addr_base_reg, GEN_INT (15)),
5940			       const0_rtx, EQ, NULL_RTX,
5941			       Pmode, 1, is_aligned_label);
5942
5943      temp = gen_reg_rtx (Pmode);
5944      temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5945			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5946      gcc_assert (REG_P (temp));
5947      highest_index_to_load_reg =
5948	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5949		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5950      gcc_assert (REG_P (highest_index_to_load_reg));
5951      emit_insn (gen_vllv16qi (str_reg,
5952		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5953		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5954
5955      into_loop_label = gen_label_rtx ();
5956      s390_emit_jump (into_loop_label, NULL_RTX);
5957      emit_barrier ();
5958    }
5959
5960  emit_label (is_aligned_label);
5961  LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5962
5963  /* Reaching this point we are only performing 16 bytes aligned
5964     loads.  */
5965  emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5966
5967  emit_label (loop_start_label);
5968  LABEL_NUSES (loop_start_label) = 1;
5969
5970  /* Load 16 bytes of the string into VR.  */
5971  emit_move_insn (str_reg,
5972		  gen_rtx_MEM (V16QImode,
5973			       gen_rtx_PLUS (Pmode, str_idx_reg,
5974					     str_addr_base_reg)));
5975  if (into_loop_label != NULL_RTX)
5976    {
5977      emit_label (into_loop_label);
5978      LABEL_NUSES (into_loop_label) = 1;
5979    }
5980
5981  /* Increment string index by 16 bytes.  */
5982  expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5983		str_idx_reg, 1, OPTAB_DIRECT);
5984
5985  emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5986				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5987
5988  add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5989		    REG_BR_PROB,
5990		    profile_probability::very_likely ().to_reg_br_prob_note ());
5991  emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5992
5993  /* If the string pointer wasn't aligned we have loaded less then 16
5994     bytes and the remaining bytes got filled with zeros (by vll).
5995     Now we have to check whether the resulting index lies within the
5996     bytes actually part of the string.  */
5997
5998  cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5999			    highest_index_to_load_reg);
6000  s390_load_address (highest_index_to_load_reg,
6001		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6002				   const1_rtx));
6003  if (TARGET_64BIT)
6004    emit_insn (gen_movdicc (str_idx_reg, cond,
6005			    highest_index_to_load_reg, str_idx_reg));
6006  else
6007    emit_insn (gen_movsicc (str_idx_reg, cond,
6008			    highest_index_to_load_reg, str_idx_reg));
6009
6010  add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6011			profile_probability::very_unlikely ());
6012
6013  expand_binop (Pmode, add_optab, str_idx_reg,
6014		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6015  /* FIXME: len is already zero extended - so avoid the llgcr emitted
6016     here.  */
6017  temp = expand_binop (Pmode, add_optab, str_idx_reg,
6018		       convert_to_mode (Pmode, len, 1),
6019		       target, 1, OPTAB_DIRECT);
6020  if (temp != target)
6021    emit_move_insn (target, temp);
6022}
6023
6024void
6025s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6026{
6027  rtx temp = gen_reg_rtx (Pmode);
6028  rtx src_addr = XEXP (src, 0);
6029  rtx dst_addr = XEXP (dst, 0);
6030  rtx src_addr_reg = gen_reg_rtx (Pmode);
6031  rtx dst_addr_reg = gen_reg_rtx (Pmode);
6032  rtx offset = gen_reg_rtx (Pmode);
6033  rtx vsrc = gen_reg_rtx (V16QImode);
6034  rtx vpos = gen_reg_rtx (V16QImode);
6035  rtx loadlen = gen_reg_rtx (SImode);
6036  rtx gpos_qi = gen_reg_rtx(QImode);
6037  rtx gpos = gen_reg_rtx (SImode);
6038  rtx done_label = gen_label_rtx ();
6039  rtx loop_label = gen_label_rtx ();
6040  rtx exit_label = gen_label_rtx ();
6041  rtx full_label = gen_label_rtx ();
6042
6043  /* Perform a quick check for string ending on the first up to 16
6044     bytes and exit early if successful.  */
6045
6046  emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6047  emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6048  emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6049  emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6050  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6051  /* gpos is the byte index if a zero was found and 16 otherwise.
6052     So if it is lower than the loaded bytes we have a hit.  */
6053  emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6054			   full_label);
6055  emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6056
6057  force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6058		      1, OPTAB_DIRECT);
6059  emit_jump (exit_label);
6060  emit_barrier ();
6061
6062  emit_label (full_label);
6063  LABEL_NUSES (full_label) = 1;
6064
6065  /* Calculate `offset' so that src + offset points to the last byte
6066     before 16 byte alignment.  */
6067
6068  /* temp = src_addr & 0xf */
6069  force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6070		      1, OPTAB_DIRECT);
6071
6072  /* offset = 0xf - temp */
6073  emit_move_insn (offset, GEN_INT (15));
6074  force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6075		      1, OPTAB_DIRECT);
6076
6077  /* Store `offset' bytes in the dstination string.  The quick check
6078     has loaded at least `offset' bytes into vsrc.  */
6079
6080  emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6081
6082  /* Advance to the next byte to be loaded.  */
6083  force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6084		      1, OPTAB_DIRECT);
6085
6086  /* Make sure the addresses are single regs which can be used as a
6087     base.  */
6088  emit_move_insn (src_addr_reg, src_addr);
6089  emit_move_insn (dst_addr_reg, dst_addr);
6090
6091  /* MAIN LOOP */
6092
6093  emit_label (loop_label);
6094  LABEL_NUSES (loop_label) = 1;
6095
6096  emit_move_insn (vsrc,
6097		  gen_rtx_MEM (V16QImode,
6098			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6099
6100  emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6101				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6102  add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6103		    REG_BR_PROB, profile_probability::very_unlikely ()
6104				  .to_reg_br_prob_note ());
6105
6106  emit_move_insn (gen_rtx_MEM (V16QImode,
6107			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6108		  vsrc);
6109  /* offset += 16 */
6110  force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6111		      offset,  1, OPTAB_DIRECT);
6112
6113  emit_jump (loop_label);
6114  emit_barrier ();
6115
6116  /* REGULAR EXIT */
6117
6118  /* We are done.  Add the offset of the zero character to the dst_addr
6119     pointer to get the result.  */
6120
6121  emit_label (done_label);
6122  LABEL_NUSES (done_label) = 1;
6123
6124  force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6125		      1, OPTAB_DIRECT);
6126
6127  emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6128  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6129
6130  emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6131
6132  force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6133		      1, OPTAB_DIRECT);
6134
6135  /* EARLY EXIT */
6136
6137  emit_label (exit_label);
6138  LABEL_NUSES (exit_label) = 1;
6139}
6140
6141
6142/* Expand conditional increment or decrement using alc/slb instructions.
6143   Should generate code setting DST to either SRC or SRC + INCREMENT,
6144   depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145   Returns true if successful, false otherwise.
6146
6147   That makes it possible to implement some if-constructs without jumps e.g.:
6148   (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149   unsigned int a, b, c;
6150   if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6151   if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6152   if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6153   if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6154
6155   Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156   if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6157   if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158   if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6159   if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6160
6161bool
6162s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6163		   rtx dst, rtx src, rtx increment)
6164{
6165  machine_mode cmp_mode;
6166  machine_mode cc_mode;
6167  rtx op_res;
6168  rtx insn;
6169  rtvec p;
6170  int ret;
6171
6172  if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6173      && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6174    cmp_mode = SImode;
6175  else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6176	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6177    cmp_mode = DImode;
6178  else
6179    return false;
6180
6181  /* Try ADD LOGICAL WITH CARRY.  */
6182  if (increment == const1_rtx)
6183    {
6184      /* Determine CC mode to use.  */
6185      if (cmp_code == EQ || cmp_code == NE)
6186	{
6187	  if (cmp_op1 != const0_rtx)
6188	    {
6189	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6190					     NULL_RTX, 0, OPTAB_WIDEN);
6191	      cmp_op1 = const0_rtx;
6192	    }
6193
6194	  cmp_code = cmp_code == EQ ? LEU : GTU;
6195	}
6196
6197      if (cmp_code == LTU || cmp_code == LEU)
6198	{
6199	  rtx tem = cmp_op0;
6200	  cmp_op0 = cmp_op1;
6201	  cmp_op1 = tem;
6202	  cmp_code = swap_condition (cmp_code);
6203	}
6204
6205      switch (cmp_code)
6206	{
6207	  case GTU:
6208	    cc_mode = CCUmode;
6209	    break;
6210
6211	  case GEU:
6212	    cc_mode = CCL3mode;
6213	    break;
6214
6215	  default:
6216	    return false;
6217	}
6218
6219      /* Emit comparison instruction pattern. */
6220      if (!register_operand (cmp_op0, cmp_mode))
6221	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6222
6223      insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6224			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6225      /* We use insn_invalid_p here to add clobbers if required.  */
6226      ret = insn_invalid_p (emit_insn (insn), false);
6227      gcc_assert (!ret);
6228
6229      /* Emit ALC instruction pattern.  */
6230      op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6231			       gen_rtx_REG (cc_mode, CC_REGNUM),
6232			       const0_rtx);
6233
6234      if (src != const0_rtx)
6235	{
6236	  if (!register_operand (src, GET_MODE (dst)))
6237	    src = force_reg (GET_MODE (dst), src);
6238
6239	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6240	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6241	}
6242
6243      p = rtvec_alloc (2);
6244      RTVEC_ELT (p, 0) =
6245	gen_rtx_SET (dst, op_res);
6246      RTVEC_ELT (p, 1) =
6247	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6248      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6249
6250      return true;
6251    }
6252
6253  /* Try SUBTRACT LOGICAL WITH BORROW.  */
6254  if (increment == constm1_rtx)
6255    {
6256      /* Determine CC mode to use.  */
6257      if (cmp_code == EQ || cmp_code == NE)
6258	{
6259	  if (cmp_op1 != const0_rtx)
6260	    {
6261	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6262					     NULL_RTX, 0, OPTAB_WIDEN);
6263	      cmp_op1 = const0_rtx;
6264	    }
6265
6266	  cmp_code = cmp_code == EQ ? LEU : GTU;
6267	}
6268
6269      if (cmp_code == GTU || cmp_code == GEU)
6270	{
6271	  rtx tem = cmp_op0;
6272	  cmp_op0 = cmp_op1;
6273	  cmp_op1 = tem;
6274	  cmp_code = swap_condition (cmp_code);
6275	}
6276
6277      switch (cmp_code)
6278	{
6279	  case LEU:
6280	    cc_mode = CCUmode;
6281	    break;
6282
6283	  case LTU:
6284	    cc_mode = CCL3mode;
6285	    break;
6286
6287	  default:
6288	    return false;
6289	}
6290
6291      /* Emit comparison instruction pattern. */
6292      if (!register_operand (cmp_op0, cmp_mode))
6293	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6294
6295      insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6296			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6297      /* We use insn_invalid_p here to add clobbers if required.  */
6298      ret = insn_invalid_p (emit_insn (insn), false);
6299      gcc_assert (!ret);
6300
6301      /* Emit SLB instruction pattern.  */
6302      if (!register_operand (src, GET_MODE (dst)))
6303	src = force_reg (GET_MODE (dst), src);
6304
6305      op_res = gen_rtx_MINUS (GET_MODE (dst),
6306			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6307			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6308					      gen_rtx_REG (cc_mode, CC_REGNUM),
6309					      const0_rtx));
6310      p = rtvec_alloc (2);
6311      RTVEC_ELT (p, 0) =
6312	gen_rtx_SET (dst, op_res);
6313      RTVEC_ELT (p, 1) =
6314	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6315      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6316
6317      return true;
6318    }
6319
6320  return false;
6321}
6322
6323/* Expand code for the insv template. Return true if successful.  */
6324
6325bool
6326s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6327{
6328  int bitsize = INTVAL (op1);
6329  int bitpos = INTVAL (op2);
6330  machine_mode mode = GET_MODE (dest);
6331  machine_mode smode;
6332  int smode_bsize, mode_bsize;
6333  rtx op, clobber;
6334
6335  if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6336    return false;
6337
6338  /* Generate INSERT IMMEDIATE (IILL et al).  */
6339  /* (set (ze (reg)) (const_int)).  */
6340  if (TARGET_ZARCH
6341      && register_operand (dest, word_mode)
6342      && (bitpos % 16) == 0
6343      && (bitsize % 16) == 0
6344      && const_int_operand (src, VOIDmode))
6345    {
6346      HOST_WIDE_INT val = INTVAL (src);
6347      int regpos = bitpos + bitsize;
6348
6349      while (regpos > bitpos)
6350	{
6351	  machine_mode putmode;
6352	  int putsize;
6353
6354	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6355	    putmode = SImode;
6356	  else
6357	    putmode = HImode;
6358
6359	  putsize = GET_MODE_BITSIZE (putmode);
6360	  regpos -= putsize;
6361	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6362						GEN_INT (putsize),
6363						GEN_INT (regpos)),
6364			  gen_int_mode (val, putmode));
6365	  val >>= putsize;
6366	}
6367      gcc_assert (regpos == bitpos);
6368      return true;
6369    }
6370
6371  smode = smallest_int_mode_for_size (bitsize);
6372  smode_bsize = GET_MODE_BITSIZE (smode);
6373  mode_bsize = GET_MODE_BITSIZE (mode);
6374
6375  /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6376  if (bitpos == 0
6377      && (bitsize % BITS_PER_UNIT) == 0
6378      && MEM_P (dest)
6379      && (register_operand (src, word_mode)
6380	  || const_int_operand (src, VOIDmode)))
6381    {
6382      /* Emit standard pattern if possible.  */
6383      if (smode_bsize == bitsize)
6384	{
6385	  emit_move_insn (adjust_address (dest, smode, 0),
6386			  gen_lowpart (smode, src));
6387	  return true;
6388	}
6389
6390      /* (set (ze (mem)) (const_int)).  */
6391      else if (const_int_operand (src, VOIDmode))
6392	{
6393	  int size = bitsize / BITS_PER_UNIT;
6394	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6395					BLKmode,
6396					UNITS_PER_WORD - size);
6397
6398	  dest = adjust_address (dest, BLKmode, 0);
6399	  set_mem_size (dest, size);
6400	  s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6401	  return true;
6402	}
6403
6404      /* (set (ze (mem)) (reg)).  */
6405      else if (register_operand (src, word_mode))
6406	{
6407	  if (bitsize <= 32)
6408	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6409						  const0_rtx), src);
6410	  else
6411	    {
6412	      /* Emit st,stcmh sequence.  */
6413	      int stcmh_width = bitsize - 32;
6414	      int size = stcmh_width / BITS_PER_UNIT;
6415
6416	      emit_move_insn (adjust_address (dest, SImode, size),
6417			      gen_lowpart (SImode, src));
6418	      set_mem_size (dest, size);
6419	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6420						    GEN_INT (stcmh_width),
6421						    const0_rtx),
6422			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6423	    }
6424	  return true;
6425	}
6426    }
6427
6428  /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6429  if ((bitpos % BITS_PER_UNIT) == 0
6430      && (bitsize % BITS_PER_UNIT) == 0
6431      && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6432      && MEM_P (src)
6433      && (mode == DImode || mode == SImode)
6434      && register_operand (dest, mode))
6435    {
6436      /* Emit a strict_low_part pattern if possible.  */
6437      if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6438	{
6439	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6440	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6441	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6442	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6443	  return true;
6444	}
6445
6446      /* ??? There are more powerful versions of ICM that are not
6447	 completely represented in the md file.  */
6448    }
6449
6450  /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6451  if (TARGET_Z10 && (mode == DImode || mode == SImode))
6452    {
6453      machine_mode mode_s = GET_MODE (src);
6454
6455      if (CONSTANT_P (src))
6456	{
6457	  /* For constant zero values the representation with AND
6458	     appears to be folded in more situations than the (set
6459	     (zero_extract) ...).
6460	     We only do this when the start and end of the bitfield
6461	     remain in the same SImode chunk.  That way nihf or nilf
6462	     can be used.
6463	     The AND patterns might still generate a risbg for this.  */
6464	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6465	    return false;
6466	  else
6467	    src = force_reg (mode, src);
6468	}
6469      else if (mode_s != mode)
6470	{
6471	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6472	  src = force_reg (mode_s, src);
6473	  src = gen_lowpart (mode, src);
6474	}
6475
6476      op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6477      op = gen_rtx_SET (op, src);
6478
6479      if (!TARGET_ZEC12)
6480	{
6481	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6482	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6483	}
6484      emit_insn (op);
6485
6486      return true;
6487    }
6488
6489  return false;
6490}
6491
6492/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493   register that holds VAL of mode MODE shifted by COUNT bits.  */
6494
6495static inline rtx
6496s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6497{
6498  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6499			     NULL_RTX, 1, OPTAB_DIRECT);
6500  return expand_simple_binop (SImode, ASHIFT, val, count,
6501			      NULL_RTX, 1, OPTAB_DIRECT);
6502}
6503
6504/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505   the result in TARGET.  */
6506
6507void
6508s390_expand_vec_compare (rtx target, enum rtx_code cond,
6509			 rtx cmp_op1, rtx cmp_op2)
6510{
6511  machine_mode mode = GET_MODE (target);
6512  bool neg_p = false, swap_p = false;
6513  rtx tmp;
6514
6515  if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6516    {
6517      switch (cond)
6518	{
6519	  /* NE a != b -> !(a == b) */
6520	case NE:   cond = EQ; neg_p = true;                break;
6521	case UNGT:
6522	  emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6523	  return;
6524	case UNGE:
6525	  emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6526	  return;
6527	case LE:   cond = GE;               swap_p = true; break;
6528	  /* UNLE: (a u<= b) -> (b u>= a).  */
6529	case UNLE:
6530	  emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6531	  return;
6532	  /* LT: a < b -> b > a */
6533	case LT:   cond = GT;               swap_p = true; break;
6534	  /* UNLT: (a u< b) -> (b u> a).  */
6535	case UNLT:
6536	  emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6537	  return;
6538	case UNEQ:
6539	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6540	  return;
6541	case LTGT:
6542	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6543	  return;
6544	case ORDERED:
6545	  emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6546	  return;
6547	case UNORDERED:
6548	  emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6549	  return;
6550	default: break;
6551	}
6552    }
6553  else
6554    {
6555      switch (cond)
6556	{
6557	  /* NE: a != b -> !(a == b) */
6558	case NE:  cond = EQ;  neg_p = true;                break;
6559	  /* GE: a >= b -> !(b > a) */
6560	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6561	  /* GEU: a >= b -> !(b > a) */
6562	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6563	  /* LE: a <= b -> !(a > b) */
6564	case LE:  cond = GT;  neg_p = true;                break;
6565	  /* LEU: a <= b -> !(a > b) */
6566	case LEU: cond = GTU; neg_p = true;                break;
6567	  /* LT: a < b -> b > a */
6568	case LT:  cond = GT;                swap_p = true; break;
6569	  /* LTU: a < b -> b > a */
6570	case LTU: cond = GTU;               swap_p = true; break;
6571	default: break;
6572	}
6573    }
6574
6575  if (swap_p)
6576    {
6577      tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6578    }
6579
6580  emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6581						  mode,
6582						  cmp_op1, cmp_op2)));
6583  if (neg_p)
6584    emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6585}
6586
6587/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588   TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589   elements in CMP1 and CMP2 fulfill the comparison.
6590   This function is only used to emit patterns for the vx builtins and
6591   therefore only handles comparison codes required by the
6592   builtins.  */
6593void
6594s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6595			    rtx cmp1, rtx cmp2, bool all_p)
6596{
6597  machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6598  rtx tmp_reg = gen_reg_rtx (SImode);
6599  bool swap_p = false;
6600
6601  if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6602    {
6603      switch (code)
6604	{
6605	case EQ:
6606	case NE:
6607	  cc_producer_mode = CCVEQmode;
6608	  break;
6609	case GE:
6610	case LT:
6611	  code = swap_condition (code);
6612	  swap_p = true;
6613	  /* fallthrough */
6614	case GT:
6615	case LE:
6616	  cc_producer_mode = CCVIHmode;
6617	  break;
6618	case GEU:
6619	case LTU:
6620	  code = swap_condition (code);
6621	  swap_p = true;
6622	  /* fallthrough */
6623	case GTU:
6624	case LEU:
6625	  cc_producer_mode = CCVIHUmode;
6626	  break;
6627	default:
6628	  gcc_unreachable ();
6629	}
6630
6631      scratch_mode = GET_MODE (cmp1);
6632      /* These codes represent inverted CC interpretations.  Inverting
6633	 an ALL CC mode results in an ANY CC mode and the other way
6634	 around.  Invert the all_p flag here to compensate for
6635	 that.  */
6636      if (code == NE || code == LE || code == LEU)
6637	all_p = !all_p;
6638
6639      cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6640    }
6641  else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6642    {
6643      bool inv_p = false;
6644
6645      switch (code)
6646	{
6647	case EQ:   cc_producer_mode = CCVEQmode;  break;
6648	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6649	case GT:   cc_producer_mode = CCVFHmode;  break;
6650	case GE:   cc_producer_mode = CCVFHEmode; break;
6651	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6652	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6653	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6654	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6655	default: gcc_unreachable ();
6656	}
6657      scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6658
6659      if (inv_p)
6660	all_p = !all_p;
6661
6662      cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6663    }
6664  else
6665    gcc_unreachable ();
6666
6667  if (swap_p)
6668    {
6669      rtx tmp = cmp2;
6670      cmp2 = cmp1;
6671      cmp1 = tmp;
6672    }
6673
6674  emit_insn (gen_rtx_PARALLEL (VOIDmode,
6675	       gen_rtvec (2, gen_rtx_SET (
6676			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6677			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6678			  gen_rtx_CLOBBER (VOIDmode,
6679					   gen_rtx_SCRATCH (scratch_mode)))));
6680  emit_move_insn (target, const0_rtx);
6681  emit_move_insn (tmp_reg, const1_rtx);
6682
6683  emit_move_insn (target,
6684		  gen_rtx_IF_THEN_ELSE (SImode,
6685		    gen_rtx_fmt_ee (code, VOIDmode,
6686				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6687				    const0_rtx),
6688					tmp_reg, target));
6689}
6690
6691/* Invert the comparison CODE applied to a CC mode.  This is only safe
6692   if we know whether there result was created by a floating point
6693   compare or not.  For the CCV modes this is encoded as part of the
6694   mode.  */
6695enum rtx_code
6696s390_reverse_condition (machine_mode mode, enum rtx_code code)
6697{
6698  /* Reversal of FP compares takes care -- an ordered compare
6699     becomes an unordered compare and vice versa.  */
6700  if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6701    return reverse_condition_maybe_unordered (code);
6702  else if (mode == CCVIALLmode || mode == CCVIANYmode)
6703    return reverse_condition (code);
6704  else
6705    gcc_unreachable ();
6706}
6707
6708/* Generate a vector comparison expression loading either elements of
6709   THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6710   and CMP_OP2.  */
6711
6712void
6713s390_expand_vcond (rtx target, rtx then, rtx els,
6714		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6715{
6716  rtx tmp;
6717  machine_mode result_mode;
6718  rtx result_target;
6719
6720  machine_mode target_mode = GET_MODE (target);
6721  machine_mode cmp_mode = GET_MODE (cmp_op1);
6722  rtx op = (cond == LT) ? els : then;
6723
6724  /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725     and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6726     for short and byte (x >> 15 and x >> 7 respectively).  */
6727  if ((cond == LT || cond == GE)
6728      && target_mode == cmp_mode
6729      && cmp_op2 == CONST0_RTX (cmp_mode)
6730      && op == CONST0_RTX (target_mode)
6731      && s390_vector_mode_supported_p (target_mode)
6732      && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6733    {
6734      rtx negop = (cond == LT) ? then : els;
6735
6736      int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6737
6738      /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739      if (negop == CONST1_RTX (target_mode))
6740	{
6741	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6742					 GEN_INT (shift), target,
6743					 1, OPTAB_DIRECT);
6744	  if (res != target)
6745	    emit_move_insn (target, res);
6746	  return;
6747	}
6748
6749      /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750      else if (all_ones_operand (negop, target_mode))
6751	{
6752	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6753					 GEN_INT (shift), target,
6754					 0, OPTAB_DIRECT);
6755	  if (res != target)
6756	    emit_move_insn (target, res);
6757	  return;
6758	}
6759    }
6760
6761  /* We always use an integral type vector to hold the comparison
6762     result.  */
6763  result_mode = related_int_vector_mode (cmp_mode).require ();
6764  result_target = gen_reg_rtx (result_mode);
6765
6766  /* We allow vector immediates as comparison operands that
6767     can be handled by the optimization above but not by the
6768     following code.  Hence, force them into registers here.  */
6769  if (!REG_P (cmp_op1))
6770    cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6771
6772  if (!REG_P (cmp_op2))
6773    cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6774
6775  s390_expand_vec_compare (result_target, cond,
6776			   cmp_op1, cmp_op2);
6777
6778  /* If the results are supposed to be either -1 or 0 we are done
6779     since this is what our compare instructions generate anyway.  */
6780  if (all_ones_operand (then, GET_MODE (then))
6781      && const0_operand (els, GET_MODE (els)))
6782    {
6783      emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6784					      result_target, 0));
6785      return;
6786    }
6787
6788  /* Otherwise we will do a vsel afterwards.  */
6789  /* This gets triggered e.g.
6790     with gcc.c-torture/compile/pr53410-1.c */
6791  if (!REG_P (then))
6792    then = force_reg (target_mode, then);
6793
6794  if (!REG_P (els))
6795    els = force_reg (target_mode, els);
6796
6797  tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6798			result_target,
6799			CONST0_RTX (result_mode));
6800
6801  /* We compared the result against zero above so we have to swap then
6802     and els here.  */
6803  tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6804
6805  gcc_assert (target_mode == GET_MODE (then));
6806  emit_insn (gen_rtx_SET (target, tmp));
6807}
6808
6809/* Emit the RTX necessary to initialize the vector TARGET with values
6810   in VALS.  */
6811void
6812s390_expand_vec_init (rtx target, rtx vals)
6813{
6814  machine_mode mode = GET_MODE (target);
6815  machine_mode inner_mode = GET_MODE_INNER (mode);
6816  int n_elts = GET_MODE_NUNITS (mode);
6817  bool all_same = true, all_regs = true, all_const_int = true;
6818  rtx x;
6819  int i;
6820
6821  for (i = 0; i < n_elts; ++i)
6822    {
6823      x = XVECEXP (vals, 0, i);
6824
6825      if (!CONST_INT_P (x))
6826	all_const_int = false;
6827
6828      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6829	all_same = false;
6830
6831      if (!REG_P (x))
6832	all_regs = false;
6833    }
6834
6835  /* Use vector gen mask or vector gen byte mask if possible.  */
6836  if (all_same && all_const_int
6837      && (XVECEXP (vals, 0, 0) == const0_rtx
6838	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6839					       NULL, NULL)
6840	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6841    {
6842      emit_insn (gen_rtx_SET (target,
6843			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6844      return;
6845    }
6846
6847  /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6848  if (all_same)
6849    {
6850      rtx elem = XVECEXP (vals, 0, 0);
6851
6852      /* vec_splats accepts general_operand as source.  */
6853      if (!general_operand (elem, GET_MODE (elem)))
6854	elem = force_reg (inner_mode, elem);
6855
6856      emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6857      return;
6858    }
6859
6860  if (all_regs
6861      && REG_P (target)
6862      && n_elts == 2
6863      && GET_MODE_SIZE (inner_mode) == 8)
6864    {
6865      /* Use vector load pair.  */
6866      emit_insn (gen_rtx_SET (target,
6867			      gen_rtx_VEC_CONCAT (mode,
6868						  XVECEXP (vals, 0, 0),
6869						  XVECEXP (vals, 0, 1))));
6870      return;
6871    }
6872
6873  /* Use vector load logical element and zero.  */
6874  if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6875    {
6876      bool found = true;
6877
6878      x = XVECEXP (vals, 0, 0);
6879      if (memory_operand (x, inner_mode))
6880	{
6881	  for (i = 1; i < n_elts; ++i)
6882	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6883
6884	  if (found)
6885	    {
6886	      machine_mode half_mode = (inner_mode == SFmode
6887					? V2SFmode : V2SImode);
6888	      emit_insn (gen_rtx_SET (target,
6889			      gen_rtx_VEC_CONCAT (mode,
6890						  gen_rtx_VEC_CONCAT (half_mode,
6891								      x,
6892								      const0_rtx),
6893						  gen_rtx_VEC_CONCAT (half_mode,
6894								      const0_rtx,
6895								      const0_rtx))));
6896	      return;
6897	    }
6898	}
6899    }
6900
6901  /* We are about to set the vector elements one by one.  Zero out the
6902     full register first in order to help the data flow framework to
6903     detect it as full VR set.  */
6904  emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6905
6906  /* Unfortunately the vec_init expander is not allowed to fail.  So
6907     we have to implement the fallback ourselves.  */
6908  for (i = 0; i < n_elts; i++)
6909    {
6910      rtx elem = XVECEXP (vals, 0, i);
6911      if (!general_operand (elem, GET_MODE (elem)))
6912	elem = force_reg (inner_mode, elem);
6913
6914      emit_insn (gen_rtx_SET (target,
6915			      gen_rtx_UNSPEC (mode,
6916					      gen_rtvec (3, elem,
6917							 GEN_INT (i), target),
6918					      UNSPEC_VEC_SET)));
6919    }
6920}
6921
6922/* Structure to hold the initial parameters for a compare_and_swap operation
6923   in HImode and QImode.  */
6924
6925struct alignment_context
6926{
6927  rtx memsi;	  /* SI aligned memory location.  */
6928  rtx shift;	  /* Bit offset with regard to lsb.  */
6929  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6930  rtx modemaski;  /* ~modemask */
6931  bool aligned;	  /* True if memory is aligned, false else.  */
6932};
6933
6934/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935   structure AC for transparent simplifying, if the memory alignment is known
6936   to be at least 32bit.  MEM is the memory location for the actual operation
6937   and MODE its mode.  */
6938
6939static void
6940init_alignment_context (struct alignment_context *ac, rtx mem,
6941			machine_mode mode)
6942{
6943  ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6944  ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6945
6946  if (ac->aligned)
6947    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6948  else
6949    {
6950      /* Alignment is unknown.  */
6951      rtx byteoffset, addr, align;
6952
6953      /* Force the address into a register.  */
6954      addr = force_reg (Pmode, XEXP (mem, 0));
6955
6956      /* Align it to SImode.  */
6957      align = expand_simple_binop (Pmode, AND, addr,
6958				   GEN_INT (-GET_MODE_SIZE (SImode)),
6959				   NULL_RTX, 1, OPTAB_DIRECT);
6960      /* Generate MEM.  */
6961      ac->memsi = gen_rtx_MEM (SImode, align);
6962      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6963      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6964      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6965
6966      /* Calculate shiftcount.  */
6967      byteoffset = expand_simple_binop (Pmode, AND, addr,
6968					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6969					NULL_RTX, 1, OPTAB_DIRECT);
6970      /* As we already have some offset, evaluate the remaining distance.  */
6971      ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6972				      NULL_RTX, 1, OPTAB_DIRECT);
6973    }
6974
6975  /* Shift is the byte count, but we need the bitcount.  */
6976  ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6977				   NULL_RTX, 1, OPTAB_DIRECT);
6978
6979  /* Calculate masks.  */
6980  ac->modemask = expand_simple_binop (SImode, ASHIFT,
6981				      GEN_INT (GET_MODE_MASK (mode)),
6982				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6983  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6984				      NULL_RTX, 1);
6985}
6986
6987/* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6988   use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6989   perform the merge in SEQ2.  */
6990
6991static rtx
6992s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6993		    machine_mode mode, rtx val, rtx ins)
6994{
6995  rtx tmp;
6996
6997  if (ac->aligned)
6998    {
6999      start_sequence ();
7000      tmp = copy_to_mode_reg (SImode, val);
7001      if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7002			    const0_rtx, ins))
7003	{
7004	  *seq1 = NULL;
7005	  *seq2 = get_insns ();
7006	  end_sequence ();
7007	  return tmp;
7008	}
7009      end_sequence ();
7010    }
7011
7012  /* Failed to use insv.  Generate a two part shift and mask.  */
7013  start_sequence ();
7014  tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7015  *seq1 = get_insns ();
7016  end_sequence ();
7017
7018  start_sequence ();
7019  tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7020  *seq2 = get_insns ();
7021  end_sequence ();
7022
7023  return tmp;
7024}
7025
7026/* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
7027   the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028   value to set if CMP == MEM.  */
7029
7030static void
7031s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7032		    rtx cmp, rtx new_rtx, bool is_weak)
7033{
7034  struct alignment_context ac;
7035  rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7036  rtx res = gen_reg_rtx (SImode);
7037  rtx_code_label *csloop = NULL, *csend = NULL;
7038
7039  gcc_assert (MEM_P (mem));
7040
7041  init_alignment_context (&ac, mem, mode);
7042
7043  /* Load full word.  Subsequent loads are performed by CS.  */
7044  val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7045			     NULL_RTX, 1, OPTAB_DIRECT);
7046
7047  /* Prepare insertions of cmp and new_rtx into the loaded value.  When
7048     possible, we try to use insv to make this happen efficiently.  If
7049     that fails we'll generate code both inside and outside the loop.  */
7050  cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7051  newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7052
7053  if (seq0)
7054    emit_insn (seq0);
7055  if (seq1)
7056    emit_insn (seq1);
7057
7058  /* Start CS loop.  */
7059  if (!is_weak)
7060    {
7061      /* Begin assuming success.  */
7062      emit_move_insn (btarget, const1_rtx);
7063
7064      csloop = gen_label_rtx ();
7065      csend = gen_label_rtx ();
7066      emit_label (csloop);
7067    }
7068
7069  /* val = "<mem>00..0<mem>"
7070   * cmp = "00..0<cmp>00..0"
7071   * new = "00..0<new>00..0"
7072   */
7073
7074  emit_insn (seq2);
7075  emit_insn (seq3);
7076
7077  cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7078  if (is_weak)
7079    emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7080  else
7081    {
7082      rtx tmp;
7083
7084      /* Jump to end if we're done (likely?).  */
7085      s390_emit_jump (csend, cc);
7086
7087      /* Check for changes outside mode, and loop internal if so.
7088	 Arrange the moves so that the compare is adjacent to the
7089	 branch so that we can generate CRJ.  */
7090      tmp = copy_to_reg (val);
7091      force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7092			  1, OPTAB_DIRECT);
7093      cc = s390_emit_compare (NE, val, tmp);
7094      s390_emit_jump (csloop, cc);
7095
7096      /* Failed.  */
7097      emit_move_insn (btarget, const0_rtx);
7098      emit_label (csend);
7099    }
7100
7101  /* Return the correct part of the bitfield.  */
7102  convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7103					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7104}
7105
7106/* Variant of s390_expand_cs for SI, DI and TI modes.  */
7107static void
7108s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109		     rtx cmp, rtx new_rtx, bool is_weak)
7110{
7111  rtx output = vtarget;
7112  rtx_code_label *skip_cs_label = NULL;
7113  bool do_const_opt = false;
7114
7115  if (!register_operand (output, mode))
7116    output = gen_reg_rtx (mode);
7117
7118  /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119     with the constant first and skip the compare_and_swap because its very
7120     expensive and likely to fail anyway.
7121     Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7122     cause spurious in that case.
7123     Note 2: It may be useful to do this also for non-constant INPUT.
7124     Note 3: Currently only targets with "load on condition" are supported
7125     (z196 and newer).  */
7126
7127  if (TARGET_Z196
7128      && (mode == SImode || mode == DImode))
7129    do_const_opt = (is_weak && CONST_INT_P (cmp));
7130
7131  if (do_const_opt)
7132    {
7133      rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7134
7135      skip_cs_label = gen_label_rtx ();
7136      emit_move_insn (btarget, const0_rtx);
7137      if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7138	{
7139	  rtvec lt = rtvec_alloc (2);
7140
7141	  /* Load-and-test + conditional jump.  */
7142	  RTVEC_ELT (lt, 0)
7143	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7144	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7145	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7146	}
7147      else
7148	{
7149	  emit_move_insn (output, mem);
7150	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7151	}
7152      s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7153      add_reg_br_prob_note (get_last_insn (),
7154			    profile_probability::very_unlikely ());
7155      /* If the jump is not taken, OUTPUT is the expected value.  */
7156      cmp = output;
7157      /* Reload newval to a register manually, *after* the compare and jump
7158	 above.  Otherwise Reload might place it before the jump.  */
7159    }
7160  else
7161    cmp = force_reg (mode, cmp);
7162  new_rtx = force_reg (mode, new_rtx);
7163  s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7164			      (do_const_opt) ? CCZmode : CCZ1mode);
7165  if (skip_cs_label != NULL)
7166    emit_label (skip_cs_label);
7167
7168  /* We deliberately accept non-register operands in the predicate
7169     to ensure the write back to the output operand happens *before*
7170     the store-flags code below.  This makes it easier for combine
7171     to merge the store-flags code with a potential test-and-branch
7172     pattern following (immediately!) afterwards.  */
7173  if (output != vtarget)
7174    emit_move_insn (vtarget, output);
7175
7176  if (do_const_opt)
7177    {
7178      rtx cc, cond, ite;
7179
7180      /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181	 btarget has already been initialized with 0 above.  */
7182      cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7183      cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7184      ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7185      emit_insn (gen_rtx_SET (btarget, ite));
7186    }
7187  else
7188    {
7189      rtx cc, cond;
7190
7191      cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7192      cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7193      emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7194    }
7195}
7196
7197/* Expand an atomic compare and swap operation.  MEM is the memory location,
7198   CMP the old value to compare MEM with and NEW_RTX the value to set if
7199   CMP == MEM.  */
7200
7201void
7202s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7203		rtx cmp, rtx new_rtx, bool is_weak)
7204{
7205  switch (mode)
7206    {
7207    case E_TImode:
7208    case E_DImode:
7209    case E_SImode:
7210      s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7211      break;
7212    case E_HImode:
7213    case E_QImode:
7214      s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7215      break;
7216    default:
7217      gcc_unreachable ();
7218    }
7219}
7220
7221/* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222   The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7223   of MEM.  */
7224
7225void
7226s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7227{
7228  machine_mode mode = GET_MODE (mem);
7229  rtx_code_label *csloop;
7230
7231  if (TARGET_Z196
7232      && (mode == DImode || mode == SImode)
7233      && CONST_INT_P (input) && INTVAL (input) == 0)
7234    {
7235      emit_move_insn (output, const0_rtx);
7236      if (mode == DImode)
7237	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7238      else
7239	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7240      return;
7241    }
7242
7243  input = force_reg (mode, input);
7244  emit_move_insn (output, mem);
7245  csloop = gen_label_rtx ();
7246  emit_label (csloop);
7247  s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7248						      input, CCZ1mode));
7249}
7250
7251/* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7252   and VAL the value to play with.  If AFTER is true then store the value
7253   MEM holds after the operation, if AFTER is false then store the value MEM
7254   holds before the operation.  If TARGET is zero then discard that value, else
7255   store it to TARGET.  */
7256
7257void
7258s390_expand_atomic (machine_mode mode, enum rtx_code code,
7259		    rtx target, rtx mem, rtx val, bool after)
7260{
7261  struct alignment_context ac;
7262  rtx cmp;
7263  rtx new_rtx = gen_reg_rtx (SImode);
7264  rtx orig = gen_reg_rtx (SImode);
7265  rtx_code_label *csloop = gen_label_rtx ();
7266
7267  gcc_assert (!target || register_operand (target, VOIDmode));
7268  gcc_assert (MEM_P (mem));
7269
7270  init_alignment_context (&ac, mem, mode);
7271
7272  /* Shift val to the correct bit positions.
7273     Preserve "icm", but prevent "ex icm".  */
7274  if (!(ac.aligned && code == SET && MEM_P (val)))
7275    val = s390_expand_mask_and_shift (val, mode, ac.shift);
7276
7277  /* Further preparation insns.  */
7278  if (code == PLUS || code == MINUS)
7279    emit_move_insn (orig, val);
7280  else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7281    val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7282			       NULL_RTX, 1, OPTAB_DIRECT);
7283
7284  /* Load full word.  Subsequent loads are performed by CS.  */
7285  cmp = force_reg (SImode, ac.memsi);
7286
7287  /* Start CS loop.  */
7288  emit_label (csloop);
7289  emit_move_insn (new_rtx, cmp);
7290
7291  /* Patch new with val at correct position.  */
7292  switch (code)
7293    {
7294    case PLUS:
7295    case MINUS:
7296      val = expand_simple_binop (SImode, code, new_rtx, orig,
7297				 NULL_RTX, 1, OPTAB_DIRECT);
7298      val = expand_simple_binop (SImode, AND, val, ac.modemask,
7299				 NULL_RTX, 1, OPTAB_DIRECT);
7300      /* FALLTHRU */
7301    case SET:
7302      if (ac.aligned && MEM_P (val))
7303	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7304			 0, 0, SImode, val, false);
7305      else
7306	{
7307	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7308				     NULL_RTX, 1, OPTAB_DIRECT);
7309	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7310				     NULL_RTX, 1, OPTAB_DIRECT);
7311	}
7312      break;
7313    case AND:
7314    case IOR:
7315    case XOR:
7316      new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7317				 NULL_RTX, 1, OPTAB_DIRECT);
7318      break;
7319    case MULT: /* NAND */
7320      new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7321				 NULL_RTX, 1, OPTAB_DIRECT);
7322      new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7323				 NULL_RTX, 1, OPTAB_DIRECT);
7324      break;
7325    default:
7326      gcc_unreachable ();
7327    }
7328
7329  s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7330						      ac.memsi, cmp, new_rtx,
7331						      CCZ1mode));
7332
7333  /* Return the correct part of the bitfield.  */
7334  if (target)
7335    convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7336					       after ? new_rtx : cmp, ac.shift,
7337					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7338}
7339
7340/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341   We need to emit DTP-relative relocations.  */
7342
7343static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7344
7345static void
7346s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7347{
7348  switch (size)
7349    {
7350    case 4:
7351      fputs ("\t.long\t", file);
7352      break;
7353    case 8:
7354      fputs ("\t.quad\t", file);
7355      break;
7356    default:
7357      gcc_unreachable ();
7358    }
7359  output_addr_const (file, x);
7360  fputs ("@DTPOFF", file);
7361}
7362
7363/* Return the proper mode for REGNO being represented in the dwarf
7364   unwind table.  */
7365machine_mode
7366s390_dwarf_frame_reg_mode (int regno)
7367{
7368  machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7369
7370  /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7371  if (GENERAL_REGNO_P (regno))
7372    save_mode = Pmode;
7373
7374  /* The rightmost 64 bits of vector registers are call-clobbered.  */
7375  if (GET_MODE_SIZE (save_mode) > 8)
7376    save_mode = DImode;
7377
7378  return save_mode;
7379}
7380
7381#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382/* Implement TARGET_MANGLE_TYPE.  */
7383
7384static const char *
7385s390_mangle_type (const_tree type)
7386{
7387  type = TYPE_MAIN_VARIANT (type);
7388
7389  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7390      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7391    return NULL;
7392
7393  if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7394  if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7395  if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7396  if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7397
7398  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7399      && TARGET_LONG_DOUBLE_128)
7400    return "g";
7401
7402  /* For all other types, use normal C++ mangling.  */
7403  return NULL;
7404}
7405#endif
7406
7407/* In the name of slightly smaller debug output, and to cater to
7408   general assembler lossage, recognize various UNSPEC sequences
7409   and turn them back into a direct symbol reference.  */
7410
7411static rtx
7412s390_delegitimize_address (rtx orig_x)
7413{
7414  rtx x, y;
7415
7416  orig_x = delegitimize_mem_from_attrs (orig_x);
7417  x = orig_x;
7418
7419  /* Extract the symbol ref from:
7420     (plus:SI (reg:SI 12 %r12)
7421	      (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422				    UNSPEC_GOTOFF/PLTOFF)))
7423     and
7424     (plus:SI (reg:SI 12 %r12)
7425	      (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426					     UNSPEC_GOTOFF/PLTOFF)
7427				 (const_int 4 [0x4]))))  */
7428  if (GET_CODE (x) == PLUS
7429      && REG_P (XEXP (x, 0))
7430      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7431      && GET_CODE (XEXP (x, 1)) == CONST)
7432    {
7433      HOST_WIDE_INT offset = 0;
7434
7435      /* The const operand.  */
7436      y = XEXP (XEXP (x, 1), 0);
7437
7438      if (GET_CODE (y) == PLUS
7439	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7440	{
7441	  offset = INTVAL (XEXP (y, 1));
7442	  y = XEXP (y, 0);
7443	}
7444
7445      if (GET_CODE (y) == UNSPEC
7446	  && (XINT (y, 1) == UNSPEC_GOTOFF
7447	      || XINT (y, 1) == UNSPEC_PLTOFF))
7448	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7449    }
7450
7451  if (GET_CODE (x) != MEM)
7452    return orig_x;
7453
7454  x = XEXP (x, 0);
7455  if (GET_CODE (x) == PLUS
7456      && GET_CODE (XEXP (x, 1)) == CONST
7457      && GET_CODE (XEXP (x, 0)) == REG
7458      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7459    {
7460      y = XEXP (XEXP (x, 1), 0);
7461      if (GET_CODE (y) == UNSPEC
7462	  && XINT (y, 1) == UNSPEC_GOT)
7463	y = XVECEXP (y, 0, 0);
7464      else
7465	return orig_x;
7466    }
7467  else if (GET_CODE (x) == CONST)
7468    {
7469      /* Extract the symbol ref from:
7470	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471				       UNSPEC_PLT/GOTENT)))  */
7472
7473      y = XEXP (x, 0);
7474      if (GET_CODE (y) == UNSPEC
7475	  && (XINT (y, 1) == UNSPEC_GOTENT
7476	      || XINT (y, 1) == UNSPEC_PLT))
7477	y = XVECEXP (y, 0, 0);
7478      else
7479	return orig_x;
7480    }
7481  else
7482    return orig_x;
7483
7484  if (GET_MODE (orig_x) != Pmode)
7485    {
7486      if (GET_MODE (orig_x) == BLKmode)
7487	return orig_x;
7488      y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7489      if (y == NULL_RTX)
7490	return orig_x;
7491    }
7492  return y;
7493}
7494
7495/* Output operand OP to stdio stream FILE.
7496   OP is an address (register + offset) which is not used to address data;
7497   instead the rightmost bits are interpreted as the value.  */
7498
7499static void
7500print_addrstyle_operand (FILE *file, rtx op)
7501{
7502  HOST_WIDE_INT offset;
7503  rtx base;
7504
7505  /* Extract base register and offset.  */
7506  if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7507    gcc_unreachable ();
7508
7509  /* Sanity check.  */
7510  if (base)
7511    {
7512      gcc_assert (GET_CODE (base) == REG);
7513      gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7514      gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7515    }
7516
7517  /* Offsets are constricted to twelve bits.  */
7518  fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7519  if (base)
7520    fprintf (file, "(%s)", reg_names[REGNO (base)]);
7521}
7522
7523/* Print the shift count operand OP to FILE.
7524   OP is an address-style operand in a form which
7525   s390_valid_shift_count permits.  Subregs and no-op
7526   and-masking of the operand are stripped.  */
7527
7528static void
7529print_shift_count_operand (FILE *file, rtx op)
7530{
7531  /* No checking of the and mask required here.  */
7532  if (!s390_valid_shift_count (op, 0))
7533    gcc_unreachable ();
7534
7535  while (op && GET_CODE (op) == SUBREG)
7536    op = SUBREG_REG (op);
7537
7538  if (GET_CODE (op) == AND)
7539    op = XEXP (op, 0);
7540
7541  print_addrstyle_operand (file, op);
7542}
7543
7544/* Assigns the number of NOP halfwords to be emitted before and after the
7545   function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7546   If hotpatching is disabled for the function, the values are set to zero.
7547*/
7548
7549static void
7550s390_function_num_hotpatch_hw (tree decl,
7551			       int *hw_before,
7552			       int *hw_after)
7553{
7554  tree attr;
7555
7556  attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7557
7558  /* Handle the arguments of the hotpatch attribute.  The values
7559     specified via attribute might override the cmdline argument
7560     values.  */
7561  if (attr)
7562    {
7563      tree args = TREE_VALUE (attr);
7564
7565      *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7566      *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7567    }
7568  else
7569    {
7570      /* Use the values specified by the cmdline arguments.  */
7571      *hw_before = s390_hotpatch_hw_before_label;
7572      *hw_after = s390_hotpatch_hw_after_label;
7573    }
7574}
7575
7576/* Write the current .machine and .machinemode specification to the assembler
7577   file.  */
7578
7579#ifdef HAVE_AS_MACHINE_MACHINEMODE
7580static void
7581s390_asm_output_machine_for_arch (FILE *asm_out_file)
7582{
7583  fprintf (asm_out_file, "\t.machinemode %s\n",
7584	   (TARGET_ZARCH) ? "zarch" : "esa");
7585  fprintf (asm_out_file, "\t.machine \"%s",
7586	   processor_table[s390_arch].binutils_name);
7587  if (S390_USE_ARCHITECTURE_MODIFIERS)
7588    {
7589      int cpu_flags;
7590
7591      cpu_flags = processor_flags_table[(int) s390_arch];
7592      if (TARGET_HTM && !(cpu_flags & PF_TX))
7593	fprintf (asm_out_file, "+htm");
7594      else if (!TARGET_HTM && (cpu_flags & PF_TX))
7595	fprintf (asm_out_file, "+nohtm");
7596      if (TARGET_VX && !(cpu_flags & PF_VX))
7597	fprintf (asm_out_file, "+vx");
7598      else if (!TARGET_VX && (cpu_flags & PF_VX))
7599	fprintf (asm_out_file, "+novx");
7600    }
7601  fprintf (asm_out_file, "\"\n");
7602}
7603
7604/* Write an extra function header before the very start of the function.  */
7605
7606void
7607s390_asm_output_function_prefix (FILE *asm_out_file,
7608				 const char *fnname ATTRIBUTE_UNUSED)
7609{
7610  if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7611    return;
7612  /* Since only the function specific options are saved but not the indications
7613     which options are set, it's too much work here to figure out which options
7614     have actually changed.  Thus, generate .machine and .machinemode whenever a
7615     function has the target attribute or pragma.  */
7616  fprintf (asm_out_file, "\t.machinemode push\n");
7617  fprintf (asm_out_file, "\t.machine push\n");
7618  s390_asm_output_machine_for_arch (asm_out_file);
7619}
7620
7621/* Write an extra function footer after the very end of the function.  */
7622
7623void
7624s390_asm_declare_function_size (FILE *asm_out_file,
7625				const char *fnname, tree decl)
7626{
7627  if (!flag_inhibit_size_directive)
7628    ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7629  if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7630    return;
7631  fprintf (asm_out_file, "\t.machine pop\n");
7632  fprintf (asm_out_file, "\t.machinemode pop\n");
7633}
7634#endif
7635
7636/* Write the extra assembler code needed to declare a function properly.  */
7637
7638void
7639s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7640				tree decl)
7641{
7642  int hw_before, hw_after;
7643
7644  s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7645  if (hw_before > 0)
7646    {
7647      unsigned int function_alignment;
7648      int i;
7649
7650      /* Add a trampoline code area before the function label and initialize it
7651	 with two-byte nop instructions.  This area can be overwritten with code
7652	 that jumps to a patched version of the function.  */
7653      asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7654		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7655		   hw_before);
7656      for (i = 1; i < hw_before; i++)
7657	fputs ("\tnopr\t%r0\n", asm_out_file);
7658
7659      /* Note:  The function label must be aligned so that (a) the bytes of the
7660	 following nop do not cross a cacheline boundary, and (b) a jump address
7661	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662	 stored directly before the label without crossing a cacheline
7663	 boundary.  All this is necessary to make sure the trampoline code can
7664	 be changed atomically.
7665	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666	 if there are NOPs before the function label, the alignment is placed
7667	 before them.  So it is necessary to duplicate the alignment after the
7668	 NOPs.  */
7669      function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7670      if (! DECL_USER_ALIGN (decl))
7671	function_alignment
7672	  = MAX (function_alignment,
7673		 (unsigned int) align_functions.levels[0].get_value ());
7674      fputs ("\t# alignment for hotpatch\n", asm_out_file);
7675      ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7676    }
7677
7678  if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7679    {
7680      asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7681      asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7682      asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7683      asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7684      asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7685      asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7686		   s390_warn_framesize);
7687      asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7688      asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7689      asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7690      asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7691      asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7692      asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7693		   TARGET_PACKED_STACK);
7694      asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7695      asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7696      asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7697      asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7698		   s390_warn_dynamicstack_p);
7699    }
7700  ASM_OUTPUT_LABEL (asm_out_file, fname);
7701  if (hw_after > 0)
7702    asm_fprintf (asm_out_file,
7703		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7704		 hw_after);
7705}
7706
7707/* Output machine-dependent UNSPECs occurring in address constant X
7708   in assembler syntax to stdio stream FILE.  Returns true if the
7709   constant X could be recognized, false otherwise.  */
7710
7711static bool
7712s390_output_addr_const_extra (FILE *file, rtx x)
7713{
7714  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7715    switch (XINT (x, 1))
7716      {
7717      case UNSPEC_GOTENT:
7718	output_addr_const (file, XVECEXP (x, 0, 0));
7719	fprintf (file, "@GOTENT");
7720	return true;
7721      case UNSPEC_GOT:
7722	output_addr_const (file, XVECEXP (x, 0, 0));
7723	fprintf (file, "@GOT");
7724	return true;
7725      case UNSPEC_GOTOFF:
7726	output_addr_const (file, XVECEXP (x, 0, 0));
7727	fprintf (file, "@GOTOFF");
7728	return true;
7729      case UNSPEC_PLT:
7730	output_addr_const (file, XVECEXP (x, 0, 0));
7731	fprintf (file, "@PLT");
7732	return true;
7733      case UNSPEC_PLTOFF:
7734	output_addr_const (file, XVECEXP (x, 0, 0));
7735	fprintf (file, "@PLTOFF");
7736	return true;
7737      case UNSPEC_TLSGD:
7738	output_addr_const (file, XVECEXP (x, 0, 0));
7739	fprintf (file, "@TLSGD");
7740	return true;
7741      case UNSPEC_TLSLDM:
7742	assemble_name (file, get_some_local_dynamic_name ());
7743	fprintf (file, "@TLSLDM");
7744	return true;
7745      case UNSPEC_DTPOFF:
7746	output_addr_const (file, XVECEXP (x, 0, 0));
7747	fprintf (file, "@DTPOFF");
7748	return true;
7749      case UNSPEC_NTPOFF:
7750	output_addr_const (file, XVECEXP (x, 0, 0));
7751	fprintf (file, "@NTPOFF");
7752	return true;
7753      case UNSPEC_GOTNTPOFF:
7754	output_addr_const (file, XVECEXP (x, 0, 0));
7755	fprintf (file, "@GOTNTPOFF");
7756	return true;
7757      case UNSPEC_INDNTPOFF:
7758	output_addr_const (file, XVECEXP (x, 0, 0));
7759	fprintf (file, "@INDNTPOFF");
7760	return true;
7761      }
7762
7763  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7764    switch (XINT (x, 1))
7765      {
7766      case UNSPEC_POOL_OFFSET:
7767	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7768	output_addr_const (file, x);
7769	return true;
7770      }
7771  return false;
7772}
7773
7774/* Output address operand ADDR in assembler syntax to
7775   stdio stream FILE.  */
7776
7777void
7778print_operand_address (FILE *file, rtx addr)
7779{
7780  struct s390_address ad;
7781  memset (&ad, 0, sizeof (s390_address));
7782
7783  if (s390_loadrelative_operand_p (addr, NULL, NULL))
7784    {
7785      if (!TARGET_Z10)
7786	{
7787	  output_operand_lossage ("symbolic memory references are "
7788				  "only supported on z10 or later");
7789	  return;
7790	}
7791      output_addr_const (file, addr);
7792      return;
7793    }
7794
7795  if (!s390_decompose_address (addr, &ad)
7796      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7797      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7798    output_operand_lossage ("cannot decompose address");
7799
7800  if (ad.disp)
7801    output_addr_const (file, ad.disp);
7802  else
7803    fprintf (file, "0");
7804
7805  if (ad.base && ad.indx)
7806    fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7807			      reg_names[REGNO (ad.base)]);
7808  else if (ad.base)
7809    fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7810}
7811
7812/* Output operand X in assembler syntax to stdio stream FILE.
7813   CODE specified the format flag.  The following format flags
7814   are recognized:
7815
7816    'A': On z14 or higher: If operand is a mem print the alignment
7817	 hint usable with vl/vst prefixed by a comma.
7818    'C': print opcode suffix for branch condition.
7819    'D': print opcode suffix for inverse branch condition.
7820    'E': print opcode suffix for branch on index instruction.
7821    'G': print the size of the operand in bytes.
7822    'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823    'M': print the second word of a TImode operand.
7824    'N': print the second word of a DImode operand.
7825    'O': print only the displacement of a memory reference or address.
7826    'R': print only the base register of a memory reference or address.
7827    'S': print S-type memory reference (base+displacement).
7828    'Y': print address style operand without index (e.g. shift count or setmem
7829	 operand).
7830
7831    'b': print integer X as if it's an unsigned byte.
7832    'c': print integer X as if it's an signed byte.
7833    'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834    'f': "end" contiguous bitmask X in SImode.
7835    'h': print integer X as if it's a signed halfword.
7836    'i': print the first nonzero HImode part of X.
7837    'j': print the first HImode part unequal to -1 of X.
7838    'k': print the first nonzero SImode part of X.
7839    'm': print the first SImode part unequal to -1 of X.
7840    'o': print integer X as if it's an unsigned 32bit word.
7841    's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842    't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844    'x': print integer X as if it's an unsigned halfword.
7845    'v': print register number as vector register (v1 instead of f1).
7846*/
7847
7848void
7849print_operand (FILE *file, rtx x, int code)
7850{
7851  HOST_WIDE_INT ival;
7852
7853  switch (code)
7854    {
7855    case 'A':
7856      if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7857	{
7858	  if (MEM_ALIGN (x) >= 128)
7859	    fprintf (file, ",4");
7860	  else if (MEM_ALIGN (x) == 64)
7861	    fprintf (file, ",3");
7862	}
7863      return;
7864    case 'C':
7865      fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7866      return;
7867
7868    case 'D':
7869      fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7870      return;
7871
7872    case 'E':
7873      if (GET_CODE (x) == LE)
7874	fprintf (file, "l");
7875      else if (GET_CODE (x) == GT)
7876	fprintf (file, "h");
7877      else
7878	output_operand_lossage ("invalid comparison operator "
7879				"for 'E' output modifier");
7880      return;
7881
7882    case 'J':
7883      if (GET_CODE (x) == SYMBOL_REF)
7884	{
7885	  fprintf (file, "%s", ":tls_load:");
7886	  output_addr_const (file, x);
7887	}
7888      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7889	{
7890	  fprintf (file, "%s", ":tls_gdcall:");
7891	  output_addr_const (file, XVECEXP (x, 0, 0));
7892	}
7893      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7894	{
7895	  fprintf (file, "%s", ":tls_ldcall:");
7896	  const char *name = get_some_local_dynamic_name ();
7897	  gcc_assert (name);
7898	  assemble_name (file, name);
7899	}
7900      else
7901	output_operand_lossage ("invalid reference for 'J' output modifier");
7902      return;
7903
7904    case 'G':
7905      fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7906      return;
7907
7908    case 'O':
7909      {
7910	struct s390_address ad;
7911	int ret;
7912
7913	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7914
7915	if (!ret
7916	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7917	    || ad.indx)
7918	  {
7919	    output_operand_lossage ("invalid address for 'O' output modifier");
7920	    return;
7921	  }
7922
7923	if (ad.disp)
7924	  output_addr_const (file, ad.disp);
7925	else
7926	  fprintf (file, "0");
7927      }
7928      return;
7929
7930    case 'R':
7931      {
7932	struct s390_address ad;
7933	int ret;
7934
7935	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7936
7937	if (!ret
7938	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7939	    || ad.indx)
7940	  {
7941	    output_operand_lossage ("invalid address for 'R' output modifier");
7942	    return;
7943	  }
7944
7945	if (ad.base)
7946	  fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7947	else
7948	  fprintf (file, "0");
7949      }
7950      return;
7951
7952    case 'S':
7953      {
7954	struct s390_address ad;
7955	int ret;
7956
7957	if (!MEM_P (x))
7958	  {
7959	    output_operand_lossage ("memory reference expected for "
7960				    "'S' output modifier");
7961	    return;
7962	  }
7963	ret = s390_decompose_address (XEXP (x, 0), &ad);
7964
7965	if (!ret
7966	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7967	    || ad.indx)
7968	  {
7969	    output_operand_lossage ("invalid address for 'S' output modifier");
7970	    return;
7971	  }
7972
7973	if (ad.disp)
7974	  output_addr_const (file, ad.disp);
7975	else
7976	  fprintf (file, "0");
7977
7978	if (ad.base)
7979	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7980      }
7981      return;
7982
7983    case 'N':
7984      if (GET_CODE (x) == REG)
7985	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7986      else if (GET_CODE (x) == MEM)
7987	x = change_address (x, VOIDmode,
7988			    plus_constant (Pmode, XEXP (x, 0), 4));
7989      else
7990	output_operand_lossage ("register or memory expression expected "
7991				"for 'N' output modifier");
7992      break;
7993
7994    case 'M':
7995      if (GET_CODE (x) == REG)
7996	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7997      else if (GET_CODE (x) == MEM)
7998	x = change_address (x, VOIDmode,
7999			    plus_constant (Pmode, XEXP (x, 0), 8));
8000      else
8001	output_operand_lossage ("register or memory expression expected "
8002				"for 'M' output modifier");
8003      break;
8004
8005    case 'Y':
8006      print_shift_count_operand (file, x);
8007      return;
8008    }
8009
8010  switch (GET_CODE (x))
8011    {
8012    case REG:
8013      /* Print FP regs as fx instead of vx when they are accessed
8014	 through non-vector mode.  */
8015      if (code == 'v'
8016	  || VECTOR_NOFP_REG_P (x)
8017	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8018	  || (VECTOR_REG_P (x)
8019	      && (GET_MODE_SIZE (GET_MODE (x)) /
8020		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8021	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8022      else
8023	fprintf (file, "%s", reg_names[REGNO (x)]);
8024      break;
8025
8026    case MEM:
8027      output_address (GET_MODE (x), XEXP (x, 0));
8028      break;
8029
8030    case CONST:
8031    case CODE_LABEL:
8032    case LABEL_REF:
8033    case SYMBOL_REF:
8034      output_addr_const (file, x);
8035      break;
8036
8037    case CONST_INT:
8038      ival = INTVAL (x);
8039      switch (code)
8040	{
8041	case 0:
8042	  break;
8043	case 'b':
8044	  ival &= 0xff;
8045	  break;
8046	case 'c':
8047	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
8048	  break;
8049	case 'x':
8050	  ival &= 0xffff;
8051	  break;
8052	case 'h':
8053	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8054	  break;
8055	case 'i':
8056	  ival = s390_extract_part (x, HImode, 0);
8057	  break;
8058	case 'j':
8059	  ival = s390_extract_part (x, HImode, -1);
8060	  break;
8061	case 'k':
8062	  ival = s390_extract_part (x, SImode, 0);
8063	  break;
8064	case 'm':
8065	  ival = s390_extract_part (x, SImode, -1);
8066	  break;
8067	case 'o':
8068	  ival &= 0xffffffff;
8069	  break;
8070	case 'e': case 'f':
8071	case 's': case 't':
8072	  {
8073	    int start, end;
8074	    int len;
8075	    bool ok;
8076
8077	    len = (code == 's' || code == 'e' ? 64 : 32);
8078	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8079	    gcc_assert (ok);
8080	    if (code == 's' || code == 't')
8081	      ival = start;
8082	    else
8083	      ival = end;
8084	  }
8085	  break;
8086	default:
8087	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
8088	}
8089      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8090      break;
8091
8092    case CONST_WIDE_INT:
8093      if (code == 'b')
8094	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8095		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8096      else if (code == 'x')
8097	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8098		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8099      else if (code == 'h')
8100	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8101		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8102      else
8103	{
8104	  if (code == 0)
8105	    output_operand_lossage ("invalid constant - try using "
8106				    "an output modifier");
8107	  else
8108	    output_operand_lossage ("invalid constant for output modifier '%c'",
8109				    code);
8110	}
8111      break;
8112    case CONST_VECTOR:
8113      switch (code)
8114	{
8115	case 'h':
8116	  gcc_assert (const_vec_duplicate_p (x));
8117	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8118		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8119	  break;
8120	case 'e':
8121	case 's':
8122	  {
8123	    int start, end;
8124	    bool ok;
8125
8126	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8127	    gcc_assert (ok);
8128	    ival = (code == 's') ? start : end;
8129	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8130	  }
8131	  break;
8132	case 't':
8133	  {
8134	    unsigned mask;
8135	    bool ok = s390_bytemask_vector_p (x, &mask);
8136	    gcc_assert (ok);
8137	    fprintf (file, "%u", mask);
8138	  }
8139	  break;
8140
8141	default:
8142	  output_operand_lossage ("invalid constant vector for output "
8143				  "modifier '%c'", code);
8144	}
8145      break;
8146
8147    default:
8148      if (code == 0)
8149	output_operand_lossage ("invalid expression - try using "
8150				"an output modifier");
8151      else
8152	output_operand_lossage ("invalid expression for output "
8153				"modifier '%c'", code);
8154      break;
8155    }
8156}
8157
8158/* Target hook for assembling integer objects.  We need to define it
8159   here to work a round a bug in some versions of GAS, which couldn't
8160   handle values smaller than INT_MIN when printed in decimal.  */
8161
8162static bool
8163s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8164{
8165  if (size == 8 && aligned_p
8166      && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8167    {
8168      fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8169	       INTVAL (x));
8170      return true;
8171    }
8172  return default_assemble_integer (x, size, aligned_p);
8173}
8174
8175/* Returns true if register REGNO is used  for forming
8176   a memory address in expression X.  */
8177
8178static bool
8179reg_used_in_mem_p (int regno, rtx x)
8180{
8181  enum rtx_code code = GET_CODE (x);
8182  int i, j;
8183  const char *fmt;
8184
8185  if (code == MEM)
8186    {
8187      if (refers_to_regno_p (regno, XEXP (x, 0)))
8188	return true;
8189    }
8190  else if (code == SET
8191	   && GET_CODE (SET_DEST (x)) == PC)
8192    {
8193      if (refers_to_regno_p (regno, SET_SRC (x)))
8194	return true;
8195    }
8196
8197  fmt = GET_RTX_FORMAT (code);
8198  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8199    {
8200      if (fmt[i] == 'e'
8201	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8202	return true;
8203
8204      else if (fmt[i] == 'E')
8205	for (j = 0; j < XVECLEN (x, i); j++)
8206	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8207	    return true;
8208    }
8209  return false;
8210}
8211
8212/* Returns true if expression DEP_RTX sets an address register
8213   used by instruction INSN to address memory.  */
8214
8215static bool
8216addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8217{
8218  rtx target, pat;
8219
8220  if (NONJUMP_INSN_P (dep_rtx))
8221    dep_rtx = PATTERN (dep_rtx);
8222
8223  if (GET_CODE (dep_rtx) == SET)
8224    {
8225      target = SET_DEST (dep_rtx);
8226      if (GET_CODE (target) == STRICT_LOW_PART)
8227	target = XEXP (target, 0);
8228      while (GET_CODE (target) == SUBREG)
8229	target = SUBREG_REG (target);
8230
8231      if (GET_CODE (target) == REG)
8232	{
8233	  int regno = REGNO (target);
8234
8235	  if (s390_safe_attr_type (insn) == TYPE_LA)
8236	    {
8237	      pat = PATTERN (insn);
8238	      if (GET_CODE (pat) == PARALLEL)
8239		{
8240		  gcc_assert (XVECLEN (pat, 0) == 2);
8241		  pat = XVECEXP (pat, 0, 0);
8242		}
8243	      gcc_assert (GET_CODE (pat) == SET);
8244	      return refers_to_regno_p (regno, SET_SRC (pat));
8245	    }
8246	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8247	    return reg_used_in_mem_p (regno, PATTERN (insn));
8248	}
8249    }
8250  return false;
8251}
8252
8253/* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8254
8255int
8256s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8257{
8258  rtx dep_rtx = PATTERN (dep_insn);
8259  int i;
8260
8261  if (GET_CODE (dep_rtx) == SET
8262      && addr_generation_dependency_p (dep_rtx, insn))
8263    return 1;
8264  else if (GET_CODE (dep_rtx) == PARALLEL)
8265    {
8266      for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8267	{
8268	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8269	    return 1;
8270	}
8271    }
8272  return 0;
8273}
8274
8275
8276/* A C statement (sans semicolon) to update the integer scheduling priority
8277   INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8278   reduce the priority to execute INSN later.  Do not define this macro if
8279   you do not need to adjust the scheduling priorities of insns.
8280
8281   A STD instruction should be scheduled earlier,
8282   in order to use the bypass.  */
8283static int
8284s390_adjust_priority (rtx_insn *insn, int priority)
8285{
8286  if (! INSN_P (insn))
8287    return priority;
8288
8289  if (s390_tune <= PROCESSOR_2064_Z900)
8290    return priority;
8291
8292  switch (s390_safe_attr_type (insn))
8293    {
8294      case TYPE_FSTOREDF:
8295      case TYPE_FSTORESF:
8296	priority = priority << 3;
8297	break;
8298      case TYPE_STORE:
8299      case TYPE_STM:
8300	priority = priority << 1;
8301	break;
8302      default:
8303	break;
8304    }
8305  return priority;
8306}
8307
8308
8309/* The number of instructions that can be issued per cycle.  */
8310
8311static int
8312s390_issue_rate (void)
8313{
8314  switch (s390_tune)
8315    {
8316    case PROCESSOR_2084_Z990:
8317    case PROCESSOR_2094_Z9_109:
8318    case PROCESSOR_2094_Z9_EC:
8319    case PROCESSOR_2817_Z196:
8320      return 3;
8321    case PROCESSOR_2097_Z10:
8322      return 2;
8323    case PROCESSOR_2064_Z900:
8324      /* Starting with EC12 we use the sched_reorder hook to take care
8325	 of instruction dispatch constraints.  The algorithm only
8326	 picks the best instruction and assumes only a single
8327	 instruction gets issued per cycle.  */
8328    case PROCESSOR_2827_ZEC12:
8329    case PROCESSOR_2964_Z13:
8330    case PROCESSOR_3906_Z14:
8331    default:
8332      return 1;
8333    }
8334}
8335
8336static int
8337s390_first_cycle_multipass_dfa_lookahead (void)
8338{
8339  return 4;
8340}
8341
8342static void
8343annotate_constant_pool_refs_1 (rtx *x)
8344{
8345  int i, j;
8346  const char *fmt;
8347
8348  gcc_assert (GET_CODE (*x) != SYMBOL_REF
8349	      || !CONSTANT_POOL_ADDRESS_P (*x));
8350
8351  /* Literal pool references can only occur inside a MEM ...  */
8352  if (GET_CODE (*x) == MEM)
8353    {
8354      rtx memref = XEXP (*x, 0);
8355
8356      if (GET_CODE (memref) == SYMBOL_REF
8357	  && CONSTANT_POOL_ADDRESS_P (memref))
8358	{
8359	  rtx base = cfun->machine->base_reg;
8360	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8361				     UNSPEC_LTREF);
8362
8363	  *x = replace_equiv_address (*x, addr);
8364	  return;
8365	}
8366
8367      if (GET_CODE (memref) == CONST
8368	  && GET_CODE (XEXP (memref, 0)) == PLUS
8369	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8370	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8371	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8372	{
8373	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8374	  rtx sym = XEXP (XEXP (memref, 0), 0);
8375	  rtx base = cfun->machine->base_reg;
8376	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8377				     UNSPEC_LTREF);
8378
8379	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8380	  return;
8381	}
8382    }
8383
8384  /* ... or a load-address type pattern.  */
8385  if (GET_CODE (*x) == SET)
8386    {
8387      rtx addrref = SET_SRC (*x);
8388
8389      if (GET_CODE (addrref) == SYMBOL_REF
8390	  && CONSTANT_POOL_ADDRESS_P (addrref))
8391	{
8392	  rtx base = cfun->machine->base_reg;
8393	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8394				     UNSPEC_LTREF);
8395
8396	  SET_SRC (*x) = addr;
8397	  return;
8398	}
8399
8400      if (GET_CODE (addrref) == CONST
8401	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8402	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8403	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8404	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8405	{
8406	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8407	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8408	  rtx base = cfun->machine->base_reg;
8409	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8410				     UNSPEC_LTREF);
8411
8412	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8413	  return;
8414	}
8415    }
8416
8417  fmt = GET_RTX_FORMAT (GET_CODE (*x));
8418  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8419    {
8420      if (fmt[i] == 'e')
8421	{
8422	  annotate_constant_pool_refs_1 (&XEXP (*x, i));
8423	}
8424      else if (fmt[i] == 'E')
8425	{
8426	  for (j = 0; j < XVECLEN (*x, i); j++)
8427	    annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8428	}
8429    }
8430}
8431
8432/* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433   Fix up MEMs as required.
8434   Skip insns which support relative addressing, because they do not use a base
8435   register.  */
8436
8437static void
8438annotate_constant_pool_refs (rtx_insn *insn)
8439{
8440  if (s390_safe_relative_long_p (insn))
8441    return;
8442  annotate_constant_pool_refs_1 (&PATTERN (insn));
8443}
8444
8445static void
8446find_constant_pool_ref_1 (rtx x, rtx *ref)
8447{
8448  int i, j;
8449  const char *fmt;
8450
8451  /* Likewise POOL_ENTRY insns.  */
8452  if (GET_CODE (x) == UNSPEC_VOLATILE
8453      && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8454    return;
8455
8456  gcc_assert (GET_CODE (x) != SYMBOL_REF
8457	      || !CONSTANT_POOL_ADDRESS_P (x));
8458
8459  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8460    {
8461      rtx sym = XVECEXP (x, 0, 0);
8462      gcc_assert (GET_CODE (sym) == SYMBOL_REF
8463		  && CONSTANT_POOL_ADDRESS_P (sym));
8464
8465      if (*ref == NULL_RTX)
8466	*ref = sym;
8467      else
8468	gcc_assert (*ref == sym);
8469
8470      return;
8471    }
8472
8473  fmt = GET_RTX_FORMAT (GET_CODE (x));
8474  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8475    {
8476      if (fmt[i] == 'e')
8477	{
8478	  find_constant_pool_ref_1 (XEXP (x, i), ref);
8479	}
8480      else if (fmt[i] == 'E')
8481	{
8482	  for (j = 0; j < XVECLEN (x, i); j++)
8483	    find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8484	}
8485    }
8486}
8487
8488/* Find an annotated literal pool symbol referenced in INSN,
8489   and store it at REF.  Will abort if INSN contains references to
8490   more than one such pool symbol; multiple references to the same
8491   symbol are allowed, however.
8492
8493   The rtx pointed to by REF must be initialized to NULL_RTX
8494   by the caller before calling this routine.
8495
8496   Skip insns which support relative addressing, because they do not use a base
8497   register.  */
8498
8499static void
8500find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8501{
8502  if (s390_safe_relative_long_p (insn))
8503    return;
8504  find_constant_pool_ref_1 (PATTERN (insn), ref);
8505}
8506
8507static void
8508replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8509{
8510  int i, j;
8511  const char *fmt;
8512
8513  gcc_assert (*x != ref);
8514
8515  if (GET_CODE (*x) == UNSPEC
8516      && XINT (*x, 1) == UNSPEC_LTREF
8517      && XVECEXP (*x, 0, 0) == ref)
8518    {
8519      *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8520      return;
8521    }
8522
8523  if (GET_CODE (*x) == PLUS
8524      && GET_CODE (XEXP (*x, 1)) == CONST_INT
8525      && GET_CODE (XEXP (*x, 0)) == UNSPEC
8526      && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8527      && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8528    {
8529      rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8530      *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8531      return;
8532    }
8533
8534  fmt = GET_RTX_FORMAT (GET_CODE (*x));
8535  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8536    {
8537      if (fmt[i] == 'e')
8538	{
8539	  replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8540	}
8541      else if (fmt[i] == 'E')
8542	{
8543	  for (j = 0; j < XVECLEN (*x, i); j++)
8544	    replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8545	}
8546    }
8547}
8548
8549/* Replace every reference to the annotated literal pool
8550   symbol REF in INSN by its base plus OFFSET.
8551   Skip insns which support relative addressing, because they do not use a base
8552   register.  */
8553
8554static void
8555replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8556{
8557  if (s390_safe_relative_long_p (insn))
8558    return;
8559  replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8560}
8561
8562/* We keep a list of constants which we have to add to internal
8563   constant tables in the middle of large functions.  */
8564
8565#define NR_C_MODES 32
8566machine_mode constant_modes[NR_C_MODES] =
8567{
8568  TFmode, TImode, TDmode,
8569  V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8570  V4SFmode, V2DFmode, V1TFmode,
8571  DFmode, DImode, DDmode,
8572  V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8573  SFmode, SImode, SDmode,
8574  V4QImode, V2HImode, V1SImode,  V1SFmode,
8575  HImode,
8576  V2QImode, V1HImode,
8577  QImode,
8578  V1QImode
8579};
8580
8581struct constant
8582{
8583  struct constant *next;
8584  rtx value;
8585  rtx_code_label *label;
8586};
8587
8588struct constant_pool
8589{
8590  struct constant_pool *next;
8591  rtx_insn *first_insn;
8592  rtx_insn *pool_insn;
8593  bitmap insns;
8594  rtx_insn *emit_pool_after;
8595
8596  struct constant *constants[NR_C_MODES];
8597  struct constant *execute;
8598  rtx_code_label *label;
8599  int size;
8600};
8601
8602/* Allocate new constant_pool structure.  */
8603
8604static struct constant_pool *
8605s390_alloc_pool (void)
8606{
8607  struct constant_pool *pool;
8608  int i;
8609
8610  pool = (struct constant_pool *) xmalloc (sizeof *pool);
8611  pool->next = NULL;
8612  for (i = 0; i < NR_C_MODES; i++)
8613    pool->constants[i] = NULL;
8614
8615  pool->execute = NULL;
8616  pool->label = gen_label_rtx ();
8617  pool->first_insn = NULL;
8618  pool->pool_insn = NULL;
8619  pool->insns = BITMAP_ALLOC (NULL);
8620  pool->size = 0;
8621  pool->emit_pool_after = NULL;
8622
8623  return pool;
8624}
8625
8626/* Create new constant pool covering instructions starting at INSN
8627   and chain it to the end of POOL_LIST.  */
8628
8629static struct constant_pool *
8630s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8631{
8632  struct constant_pool *pool, **prev;
8633
8634  pool = s390_alloc_pool ();
8635  pool->first_insn = insn;
8636
8637  for (prev = pool_list; *prev; prev = &(*prev)->next)
8638    ;
8639  *prev = pool;
8640
8641  return pool;
8642}
8643
8644/* End range of instructions covered by POOL at INSN and emit
8645   placeholder insn representing the pool.  */
8646
8647static void
8648s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8649{
8650  rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8651
8652  if (!insn)
8653    insn = get_last_insn ();
8654
8655  pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8656  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8657}
8658
8659/* Add INSN to the list of insns covered by POOL.  */
8660
8661static void
8662s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8663{
8664  bitmap_set_bit (pool->insns, INSN_UID (insn));
8665}
8666
8667/* Return pool out of POOL_LIST that covers INSN.  */
8668
8669static struct constant_pool *
8670s390_find_pool (struct constant_pool *pool_list, rtx insn)
8671{
8672  struct constant_pool *pool;
8673
8674  for (pool = pool_list; pool; pool = pool->next)
8675    if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8676      break;
8677
8678  return pool;
8679}
8680
8681/* Add constant VAL of mode MODE to the constant pool POOL.  */
8682
8683static void
8684s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8685{
8686  struct constant *c;
8687  int i;
8688
8689  for (i = 0; i < NR_C_MODES; i++)
8690    if (constant_modes[i] == mode)
8691      break;
8692  gcc_assert (i != NR_C_MODES);
8693
8694  for (c = pool->constants[i]; c != NULL; c = c->next)
8695    if (rtx_equal_p (val, c->value))
8696      break;
8697
8698  if (c == NULL)
8699    {
8700      c = (struct constant *) xmalloc (sizeof *c);
8701      c->value = val;
8702      c->label = gen_label_rtx ();
8703      c->next = pool->constants[i];
8704      pool->constants[i] = c;
8705      pool->size += GET_MODE_SIZE (mode);
8706    }
8707}
8708
8709/* Return an rtx that represents the offset of X from the start of
8710   pool POOL.  */
8711
8712static rtx
8713s390_pool_offset (struct constant_pool *pool, rtx x)
8714{
8715  rtx label;
8716
8717  label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8718  x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8719		      UNSPEC_POOL_OFFSET);
8720  return gen_rtx_CONST (GET_MODE (x), x);
8721}
8722
8723/* Find constant VAL of mode MODE in the constant pool POOL.
8724   Return an RTX describing the distance from the start of
8725   the pool to the location of the new constant.  */
8726
8727static rtx
8728s390_find_constant (struct constant_pool *pool, rtx val,
8729		    machine_mode mode)
8730{
8731  struct constant *c;
8732  int i;
8733
8734  for (i = 0; i < NR_C_MODES; i++)
8735    if (constant_modes[i] == mode)
8736      break;
8737  gcc_assert (i != NR_C_MODES);
8738
8739  for (c = pool->constants[i]; c != NULL; c = c->next)
8740    if (rtx_equal_p (val, c->value))
8741      break;
8742
8743  gcc_assert (c);
8744
8745  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8746}
8747
8748/* Check whether INSN is an execute.  Return the label_ref to its
8749   execute target template if so, NULL_RTX otherwise.  */
8750
8751static rtx
8752s390_execute_label (rtx insn)
8753{
8754  if (INSN_P (insn)
8755      && GET_CODE (PATTERN (insn)) == PARALLEL
8756      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8757      && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8758	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8759    {
8760      if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8761	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8762      else
8763	{
8764	  gcc_assert (JUMP_P (insn));
8765	  /* For jump insns as execute target:
8766	     - There is one operand less in the parallel (the
8767	       modification register of the execute is always 0).
8768	     - The execute target label is wrapped into an
8769	       if_then_else in order to hide it from jump analysis.  */
8770	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8771	}
8772    }
8773
8774  return NULL_RTX;
8775}
8776
8777/* Find execute target for INSN in the constant pool POOL.
8778   Return an RTX describing the distance from the start of
8779   the pool to the location of the execute target.  */
8780
8781static rtx
8782s390_find_execute (struct constant_pool *pool, rtx insn)
8783{
8784  struct constant *c;
8785
8786  for (c = pool->execute; c != NULL; c = c->next)
8787    if (INSN_UID (insn) == INSN_UID (c->value))
8788      break;
8789
8790  gcc_assert (c);
8791
8792  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8793}
8794
8795/* For an execute INSN, extract the execute target template.  */
8796
8797static rtx
8798s390_execute_target (rtx insn)
8799{
8800  rtx pattern = PATTERN (insn);
8801  gcc_assert (s390_execute_label (insn));
8802
8803  if (XVECLEN (pattern, 0) == 2)
8804    {
8805      pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8806    }
8807  else
8808    {
8809      rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8810      int i;
8811
8812      for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8813	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8814
8815      pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8816    }
8817
8818  return pattern;
8819}
8820
8821/* Indicate that INSN cannot be duplicated.  This is the case for
8822   execute insns that carry a unique label.  */
8823
8824static bool
8825s390_cannot_copy_insn_p (rtx_insn *insn)
8826{
8827  rtx label = s390_execute_label (insn);
8828  return label && label != const0_rtx;
8829}
8830
8831/* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8832   do not emit the pool base label.  */
8833
8834static void
8835s390_dump_pool (struct constant_pool *pool, bool remote_label)
8836{
8837  struct constant *c;
8838  rtx_insn *insn = pool->pool_insn;
8839  int i;
8840
8841  /* Switch to rodata section.  */
8842  insn = emit_insn_after (gen_pool_section_start (), insn);
8843  INSN_ADDRESSES_NEW (insn, -1);
8844
8845  /* Ensure minimum pool alignment.  */
8846  insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8847  INSN_ADDRESSES_NEW (insn, -1);
8848
8849  /* Emit pool base label.  */
8850  if (!remote_label)
8851    {
8852      insn = emit_label_after (pool->label, insn);
8853      INSN_ADDRESSES_NEW (insn, -1);
8854    }
8855
8856  /* Dump constants in descending alignment requirement order,
8857     ensuring proper alignment for every constant.  */
8858  for (i = 0; i < NR_C_MODES; i++)
8859    for (c = pool->constants[i]; c; c = c->next)
8860      {
8861	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8862	rtx value = copy_rtx (c->value);
8863	if (GET_CODE (value) == CONST
8864	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8865	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8866	    && XVECLEN (XEXP (value, 0), 0) == 1)
8867	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8868
8869	insn = emit_label_after (c->label, insn);
8870	INSN_ADDRESSES_NEW (insn, -1);
8871
8872	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8873					 gen_rtvec (1, value),
8874					 UNSPECV_POOL_ENTRY);
8875	insn = emit_insn_after (value, insn);
8876	INSN_ADDRESSES_NEW (insn, -1);
8877      }
8878
8879  /* Ensure minimum alignment for instructions.  */
8880  insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8881  INSN_ADDRESSES_NEW (insn, -1);
8882
8883  /* Output in-pool execute template insns.  */
8884  for (c = pool->execute; c; c = c->next)
8885    {
8886      insn = emit_label_after (c->label, insn);
8887      INSN_ADDRESSES_NEW (insn, -1);
8888
8889      insn = emit_insn_after (s390_execute_target (c->value), insn);
8890      INSN_ADDRESSES_NEW (insn, -1);
8891    }
8892
8893  /* Switch back to previous section.  */
8894  insn = emit_insn_after (gen_pool_section_end (), insn);
8895  INSN_ADDRESSES_NEW (insn, -1);
8896
8897  insn = emit_barrier_after (insn);
8898  INSN_ADDRESSES_NEW (insn, -1);
8899
8900  /* Remove placeholder insn.  */
8901  remove_insn (pool->pool_insn);
8902}
8903
8904/* Free all memory used by POOL.  */
8905
8906static void
8907s390_free_pool (struct constant_pool *pool)
8908{
8909  struct constant *c, *next;
8910  int i;
8911
8912  for (i = 0; i < NR_C_MODES; i++)
8913    for (c = pool->constants[i]; c; c = next)
8914      {
8915	next = c->next;
8916	free (c);
8917      }
8918
8919  for (c = pool->execute; c; c = next)
8920    {
8921      next = c->next;
8922      free (c);
8923    }
8924
8925  BITMAP_FREE (pool->insns);
8926  free (pool);
8927}
8928
8929
8930/* Collect main literal pool.  Return NULL on overflow.  */
8931
8932static struct constant_pool *
8933s390_mainpool_start (void)
8934{
8935  struct constant_pool *pool;
8936  rtx_insn *insn;
8937
8938  pool = s390_alloc_pool ();
8939
8940  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8941    {
8942      if (NONJUMP_INSN_P (insn)
8943	  && GET_CODE (PATTERN (insn)) == SET
8944	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8945	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8946	{
8947	  /* There might be two main_pool instructions if base_reg
8948	     is call-clobbered; one for shrink-wrapped code and one
8949	     for the rest.  We want to keep the first.  */
8950	  if (pool->pool_insn)
8951	    {
8952	      insn = PREV_INSN (insn);
8953	      delete_insn (NEXT_INSN (insn));
8954	      continue;
8955	    }
8956	  pool->pool_insn = insn;
8957	}
8958
8959      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960	{
8961	  rtx pool_ref = NULL_RTX;
8962	  find_constant_pool_ref (insn, &pool_ref);
8963	  if (pool_ref)
8964	    {
8965	      rtx constant = get_pool_constant (pool_ref);
8966	      machine_mode mode = get_pool_mode (pool_ref);
8967	      s390_add_constant (pool, constant, mode);
8968	    }
8969	}
8970
8971      /* If hot/cold partitioning is enabled we have to make sure that
8972	 the literal pool is emitted in the same section where the
8973	 initialization of the literal pool base pointer takes place.
8974	 emit_pool_after is only used in the non-overflow case on non
8975	 Z cpus where we can emit the literal pool at the end of the
8976	 function body within the text section.  */
8977      if (NOTE_P (insn)
8978	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979	  && !pool->emit_pool_after)
8980	pool->emit_pool_after = PREV_INSN (insn);
8981    }
8982
8983  gcc_assert (pool->pool_insn || pool->size == 0);
8984
8985  if (pool->size >= 4096)
8986    {
8987      /* We're going to chunkify the pool, so remove the main
8988	 pool placeholder insn.  */
8989      remove_insn (pool->pool_insn);
8990
8991      s390_free_pool (pool);
8992      pool = NULL;
8993    }
8994
8995  /* If the functions ends with the section where the literal pool
8996     should be emitted set the marker to its end.  */
8997  if (pool && !pool->emit_pool_after)
8998    pool->emit_pool_after = get_last_insn ();
8999
9000  return pool;
9001}
9002
9003/* POOL holds the main literal pool as collected by s390_mainpool_start.
9004   Modify the current function to output the pool constants as well as
9005   the pool register setup instruction.  */
9006
9007static void
9008s390_mainpool_finish (struct constant_pool *pool)
9009{
9010  rtx base_reg = cfun->machine->base_reg;
9011  rtx set;
9012  rtx_insn *insn;
9013
9014  /* If the pool is empty, we're done.  */
9015  if (pool->size == 0)
9016    {
9017      /* We don't actually need a base register after all.  */
9018      cfun->machine->base_reg = NULL_RTX;
9019
9020      if (pool->pool_insn)
9021	remove_insn (pool->pool_insn);
9022      s390_free_pool (pool);
9023      return;
9024    }
9025
9026  /* We need correct insn addresses.  */
9027  shorten_branches (get_insns ());
9028
9029  /* Use a LARL to load the pool register.  The pool is
9030     located in the .rodata section, so we emit it after the function.  */
9031  set = gen_main_base_64 (base_reg, pool->label);
9032  insn = emit_insn_after (set, pool->pool_insn);
9033  INSN_ADDRESSES_NEW (insn, -1);
9034  remove_insn (pool->pool_insn);
9035
9036  insn = get_last_insn ();
9037  pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9038  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9039
9040  s390_dump_pool (pool, 0);
9041
9042  /* Replace all literal pool references.  */
9043
9044  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9045    {
9046      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9047	{
9048	  rtx addr, pool_ref = NULL_RTX;
9049	  find_constant_pool_ref (insn, &pool_ref);
9050	  if (pool_ref)
9051	    {
9052	      if (s390_execute_label (insn))
9053		addr = s390_find_execute (pool, insn);
9054	      else
9055		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9056						 get_pool_mode (pool_ref));
9057
9058	      replace_constant_pool_ref (insn, pool_ref, addr);
9059	      INSN_CODE (insn) = -1;
9060	    }
9061	}
9062    }
9063
9064
9065  /* Free the pool.  */
9066  s390_free_pool (pool);
9067}
9068
9069/* Chunkify the literal pool.  */
9070
9071#define S390_POOL_CHUNK_MIN	0xc00
9072#define S390_POOL_CHUNK_MAX	0xe00
9073
9074static struct constant_pool *
9075s390_chunkify_start (void)
9076{
9077  struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9078  bitmap far_labels;
9079  rtx_insn *insn;
9080
9081  /* We need correct insn addresses.  */
9082
9083  shorten_branches (get_insns ());
9084
9085  /* Scan all insns and move literals to pool chunks.  */
9086
9087  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9088    {
9089      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9090	{
9091	  rtx pool_ref = NULL_RTX;
9092	  find_constant_pool_ref (insn, &pool_ref);
9093	  if (pool_ref)
9094	    {
9095	      rtx constant = get_pool_constant (pool_ref);
9096	      machine_mode mode = get_pool_mode (pool_ref);
9097
9098	      if (!curr_pool)
9099		curr_pool = s390_start_pool (&pool_list, insn);
9100
9101	      s390_add_constant (curr_pool, constant, mode);
9102	      s390_add_pool_insn (curr_pool, insn);
9103	    }
9104	}
9105
9106      if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9107	{
9108	  if (curr_pool)
9109	    s390_add_pool_insn (curr_pool, insn);
9110	}
9111
9112      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9113	continue;
9114
9115      if (!curr_pool
9116	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9117	  || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9118	continue;
9119
9120      if (curr_pool->size < S390_POOL_CHUNK_MAX)
9121	continue;
9122
9123      s390_end_pool (curr_pool, NULL);
9124      curr_pool = NULL;
9125    }
9126
9127  if (curr_pool)
9128    s390_end_pool (curr_pool, NULL);
9129
9130  /* Find all labels that are branched into
9131     from an insn belonging to a different chunk.  */
9132
9133  far_labels = BITMAP_ALLOC (NULL);
9134
9135  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9136    {
9137      rtx_jump_table_data *table;
9138
9139      /* Labels marked with LABEL_PRESERVE_P can be target
9140	 of non-local jumps, so we have to mark them.
9141	 The same holds for named labels.
9142
9143	 Don't do that, however, if it is the label before
9144	 a jump table.  */
9145
9146      if (LABEL_P (insn)
9147	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9148	{
9149	  rtx_insn *vec_insn = NEXT_INSN (insn);
9150	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9151	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9152	}
9153      /* Check potential targets in a table jump (casesi_jump).  */
9154      else if (tablejump_p (insn, NULL, &table))
9155	{
9156	  rtx vec_pat = PATTERN (table);
9157	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9158
9159	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9160	    {
9161	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9162
9163	      if (s390_find_pool (pool_list, label)
9164		  != s390_find_pool (pool_list, insn))
9165		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9166	    }
9167	}
9168      /* If we have a direct jump (conditional or unconditional),
9169	 check all potential targets.  */
9170      else if (JUMP_P (insn))
9171	{
9172	  rtx pat = PATTERN (insn);
9173
9174	  if (GET_CODE (pat) == PARALLEL)
9175	    pat = XVECEXP (pat, 0, 0);
9176
9177	  if (GET_CODE (pat) == SET)
9178	    {
9179	      rtx label = JUMP_LABEL (insn);
9180	      if (label && !ANY_RETURN_P (label))
9181		{
9182		  if (s390_find_pool (pool_list, label)
9183		      != s390_find_pool (pool_list, insn))
9184		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9185		}
9186	    }
9187	}
9188    }
9189
9190  /* Insert base register reload insns before every pool.  */
9191
9192  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9193    {
9194      rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9195					 curr_pool->label);
9196      rtx_insn *insn = curr_pool->first_insn;
9197      INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9198    }
9199
9200  /* Insert base register reload insns at every far label.  */
9201
9202  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203    if (LABEL_P (insn)
9204	&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9205      {
9206	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9207	if (pool)
9208	  {
9209	    rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9210					       pool->label);
9211	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9212	  }
9213      }
9214
9215
9216  BITMAP_FREE (far_labels);
9217
9218
9219  /* Recompute insn addresses.  */
9220
9221  init_insn_lengths ();
9222  shorten_branches (get_insns ());
9223
9224  return pool_list;
9225}
9226
9227/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228   After we have decided to use this list, finish implementing
9229   all changes to the current function as required.  */
9230
9231static void
9232s390_chunkify_finish (struct constant_pool *pool_list)
9233{
9234  struct constant_pool *curr_pool = NULL;
9235  rtx_insn *insn;
9236
9237
9238  /* Replace all literal pool references.  */
9239
9240  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9241    {
9242      curr_pool = s390_find_pool (pool_list, insn);
9243      if (!curr_pool)
9244	continue;
9245
9246      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9247	{
9248	  rtx addr, pool_ref = NULL_RTX;
9249	  find_constant_pool_ref (insn, &pool_ref);
9250	  if (pool_ref)
9251	    {
9252	      if (s390_execute_label (insn))
9253		addr = s390_find_execute (curr_pool, insn);
9254	      else
9255		addr = s390_find_constant (curr_pool,
9256					   get_pool_constant (pool_ref),
9257					   get_pool_mode (pool_ref));
9258
9259	      replace_constant_pool_ref (insn, pool_ref, addr);
9260	      INSN_CODE (insn) = -1;
9261	    }
9262	}
9263    }
9264
9265  /* Dump out all literal pools.  */
9266
9267  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9268    s390_dump_pool (curr_pool, 0);
9269
9270  /* Free pool list.  */
9271
9272  while (pool_list)
9273    {
9274      struct constant_pool *next = pool_list->next;
9275      s390_free_pool (pool_list);
9276      pool_list = next;
9277    }
9278}
9279
9280/* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9281
9282void
9283s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9284{
9285  switch (GET_MODE_CLASS (mode))
9286    {
9287    case MODE_FLOAT:
9288    case MODE_DECIMAL_FLOAT:
9289      gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9290
9291      assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9292		     as_a <scalar_float_mode> (mode), align);
9293      break;
9294
9295    case MODE_INT:
9296      assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9297      mark_symbol_refs_as_used (exp);
9298      break;
9299
9300    case MODE_VECTOR_INT:
9301    case MODE_VECTOR_FLOAT:
9302      {
9303	int i;
9304	machine_mode inner_mode;
9305	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9306
9307	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9308	for (i = 0; i < XVECLEN (exp, 0); i++)
9309	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9310				  inner_mode,
9311				  i == 0
9312				  ? align
9313				  : GET_MODE_BITSIZE (inner_mode));
9314      }
9315      break;
9316
9317    default:
9318      gcc_unreachable ();
9319    }
9320}
9321
9322
9323/* Return an RTL expression representing the value of the return address
9324   for the frame COUNT steps up from the current frame.  FRAME is the
9325   frame pointer of that frame.  */
9326
9327rtx
9328s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9329{
9330  int offset;
9331  rtx addr;
9332
9333  /* Without backchain, we fail for all but the current frame.  */
9334
9335  if (!TARGET_BACKCHAIN && count > 0)
9336    return NULL_RTX;
9337
9338  /* For the current frame, we need to make sure the initial
9339     value of RETURN_REGNUM is actually saved.  */
9340
9341  if (count == 0)
9342    return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9343
9344  if (TARGET_PACKED_STACK)
9345    offset = -2 * UNITS_PER_LONG;
9346  else
9347    offset = RETURN_REGNUM * UNITS_PER_LONG;
9348
9349  addr = plus_constant (Pmode, frame, offset);
9350  addr = memory_address (Pmode, addr);
9351  return gen_rtx_MEM (Pmode, addr);
9352}
9353
9354/* Return an RTL expression representing the back chain stored in
9355   the current stack frame.  */
9356
9357rtx
9358s390_back_chain_rtx (void)
9359{
9360  rtx chain;
9361
9362  gcc_assert (TARGET_BACKCHAIN);
9363
9364  if (TARGET_PACKED_STACK)
9365    chain = plus_constant (Pmode, stack_pointer_rtx,
9366			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9367  else
9368    chain = stack_pointer_rtx;
9369
9370  chain = gen_rtx_MEM (Pmode, chain);
9371  return chain;
9372}
9373
9374/* Find first call clobbered register unused in a function.
9375   This could be used as base register in a leaf function
9376   or for holding the return address before epilogue.  */
9377
9378static int
9379find_unused_clobbered_reg (void)
9380{
9381  int i;
9382  for (i = 0; i < 6; i++)
9383    if (!df_regs_ever_live_p (i))
9384      return i;
9385  return 0;
9386}
9387
9388
9389/* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9390   clobbered hard regs in SETREG.  */
9391
9392static void
9393s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9394{
9395  char *regs_ever_clobbered = (char *)data;
9396  unsigned int i, regno;
9397  machine_mode mode = GET_MODE (setreg);
9398
9399  if (GET_CODE (setreg) == SUBREG)
9400    {
9401      rtx inner = SUBREG_REG (setreg);
9402      if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9403	return;
9404      regno = subreg_regno (setreg);
9405    }
9406  else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9407    regno = REGNO (setreg);
9408  else
9409    return;
9410
9411  for (i = regno;
9412       i < end_hard_regno (mode, regno);
9413       i++)
9414    regs_ever_clobbered[i] = 1;
9415}
9416
9417/* Walks through all basic blocks of the current function looking
9418   for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9419   of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420   each of those regs.  */
9421
9422static void
9423s390_regs_ever_clobbered (char regs_ever_clobbered[])
9424{
9425  basic_block cur_bb;
9426  rtx_insn *cur_insn;
9427  unsigned int i;
9428
9429  memset (regs_ever_clobbered, 0, 32);
9430
9431  /* For non-leaf functions we have to consider all call clobbered regs to be
9432     clobbered.  */
9433  if (!crtl->is_leaf)
9434    {
9435      for (i = 0; i < 32; i++)
9436	regs_ever_clobbered[i] = call_used_regs[i];
9437    }
9438
9439  /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9440     this work is done by liveness analysis (mark_regs_live_at_end).
9441     Special care is needed for functions containing landing pads.  Landing pads
9442     may use the eh registers, but the code which sets these registers is not
9443     contained in that function.  Hence s390_regs_ever_clobbered is not able to
9444     deal with this automatically.  */
9445  if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9446    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9447      if (crtl->calls_eh_return
9448	  || (cfun->machine->has_landing_pad_p
9449	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9450	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9451
9452  /* For nonlocal gotos all call-saved registers have to be saved.
9453     This flag is also set for the unwinding code in libgcc.
9454     See expand_builtin_unwind_init.  For regs_ever_live this is done by
9455     reload.  */
9456  if (crtl->saves_all_registers)
9457    for (i = 0; i < 32; i++)
9458      if (!call_used_regs[i])
9459	regs_ever_clobbered[i] = 1;
9460
9461  FOR_EACH_BB_FN (cur_bb, cfun)
9462    {
9463      FOR_BB_INSNS (cur_bb, cur_insn)
9464	{
9465	  rtx pat;
9466
9467	  if (!INSN_P (cur_insn))
9468	    continue;
9469
9470	  pat = PATTERN (cur_insn);
9471
9472	  /* Ignore GPR restore insns.  */
9473	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9474	    {
9475	      if (GET_CODE (pat) == SET
9476		  && GENERAL_REG_P (SET_DEST (pat)))
9477		{
9478		  /* lgdr  */
9479		  if (GET_MODE (SET_SRC (pat)) == DImode
9480		      && FP_REG_P (SET_SRC (pat)))
9481		    continue;
9482
9483		  /* l / lg  */
9484		  if (GET_CODE (SET_SRC (pat)) == MEM)
9485		    continue;
9486		}
9487
9488	      /* lm / lmg */
9489	      if (GET_CODE (pat) == PARALLEL
9490		  && load_multiple_operation (pat, VOIDmode))
9491		continue;
9492	    }
9493
9494	  note_stores (cur_insn,
9495		       s390_reg_clobbered_rtx,
9496		       regs_ever_clobbered);
9497	}
9498    }
9499}
9500
9501/* Determine the frame area which actually has to be accessed
9502   in the function epilogue. The values are stored at the
9503   given pointers AREA_BOTTOM (address of the lowest used stack
9504   address) and AREA_TOP (address of the first item which does
9505   not belong to the stack frame).  */
9506
9507static void
9508s390_frame_area (int *area_bottom, int *area_top)
9509{
9510  int b, t;
9511
9512  b = INT_MAX;
9513  t = INT_MIN;
9514
9515  if (cfun_frame_layout.first_restore_gpr != -1)
9516    {
9517      b = (cfun_frame_layout.gprs_offset
9518	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9519      t = b + (cfun_frame_layout.last_restore_gpr
9520	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9521    }
9522
9523  if (TARGET_64BIT && cfun_save_high_fprs_p)
9524    {
9525      b = MIN (b, cfun_frame_layout.f8_offset);
9526      t = MAX (t, (cfun_frame_layout.f8_offset
9527		   + cfun_frame_layout.high_fprs * 8));
9528    }
9529
9530  if (!TARGET_64BIT)
9531    {
9532      if (cfun_fpr_save_p (FPR4_REGNUM))
9533	{
9534	  b = MIN (b, cfun_frame_layout.f4_offset);
9535	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9536	}
9537      if (cfun_fpr_save_p (FPR6_REGNUM))
9538	{
9539	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9540	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9541	}
9542    }
9543  *area_bottom = b;
9544  *area_top = t;
9545}
9546/* Update gpr_save_slots in the frame layout trying to make use of
9547   FPRs as GPR save slots.
9548   This is a helper routine of s390_register_info.  */
9549
9550static void
9551s390_register_info_gprtofpr ()
9552{
9553  int save_reg_slot = FPR0_REGNUM;
9554  int i, j;
9555
9556  if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9557    return;
9558
9559  /* builtin_eh_return needs to be able to modify the return address
9560     on the stack.  It could also adjust the FPR save slot instead but
9561     is it worth the trouble?!  */
9562  if (crtl->calls_eh_return)
9563    return;
9564
9565  for (i = 15; i >= 6; i--)
9566    {
9567      if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9568	continue;
9569
9570      /* Advance to the next FP register which can be used as a
9571	 GPR save slot.  */
9572      while ((!call_used_regs[save_reg_slot]
9573	      || df_regs_ever_live_p (save_reg_slot)
9574	      || cfun_fpr_save_p (save_reg_slot))
9575	     && FP_REGNO_P (save_reg_slot))
9576	save_reg_slot++;
9577      if (!FP_REGNO_P (save_reg_slot))
9578	{
9579	  /* We only want to use ldgr/lgdr if we can get rid of
9580	     stm/lm entirely.  So undo the gpr slot allocation in
9581	     case we ran out of FPR save slots.  */
9582	  for (j = 6; j <= 15; j++)
9583	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9584	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9585	  break;
9586	}
9587      cfun_gpr_save_slot (i) = save_reg_slot++;
9588    }
9589}
9590
9591/* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9592   stdarg.
9593   This is a helper routine for s390_register_info.  */
9594
9595static void
9596s390_register_info_stdarg_fpr ()
9597{
9598  int i;
9599  int min_fpr;
9600  int max_fpr;
9601
9602  /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603     f0-f4 for 64 bit.  */
9604  if (!cfun->stdarg
9605      || !TARGET_HARD_FLOAT
9606      || !cfun->va_list_fpr_size
9607      || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9608    return;
9609
9610  min_fpr = crtl->args.info.fprs;
9611  max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9612  if (max_fpr >= FP_ARG_NUM_REG)
9613    max_fpr = FP_ARG_NUM_REG - 1;
9614
9615  /* FPR argument regs start at f0.  */
9616  min_fpr += FPR0_REGNUM;
9617  max_fpr += FPR0_REGNUM;
9618
9619  for (i = min_fpr; i <= max_fpr; i++)
9620    cfun_set_fpr_save (i);
9621}
9622
9623/* Reserve the GPR save slots for GPRs which need to be saved due to
9624   stdarg.
9625   This is a helper routine for s390_register_info.  */
9626
9627static void
9628s390_register_info_stdarg_gpr ()
9629{
9630  int i;
9631  int min_gpr;
9632  int max_gpr;
9633
9634  if (!cfun->stdarg
9635      || !cfun->va_list_gpr_size
9636      || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9637    return;
9638
9639  min_gpr = crtl->args.info.gprs;
9640  max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9641  if (max_gpr >= GP_ARG_NUM_REG)
9642    max_gpr = GP_ARG_NUM_REG - 1;
9643
9644  /* GPR argument regs start at r2.  */
9645  min_gpr += GPR2_REGNUM;
9646  max_gpr += GPR2_REGNUM;
9647
9648  /* If r6 was supposed to be saved into an FPR and now needs to go to
9649     the stack for vararg we have to adjust the restore range to make
9650     sure that the restore is done from stack as well.  */
9651  if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9652      && min_gpr <= GPR6_REGNUM
9653      && max_gpr >= GPR6_REGNUM)
9654    {
9655      if (cfun_frame_layout.first_restore_gpr == -1
9656	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9657	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9658      if (cfun_frame_layout.last_restore_gpr == -1
9659	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9660	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9661    }
9662
9663  if (cfun_frame_layout.first_save_gpr == -1
9664      || cfun_frame_layout.first_save_gpr > min_gpr)
9665    cfun_frame_layout.first_save_gpr = min_gpr;
9666
9667  if (cfun_frame_layout.last_save_gpr == -1
9668      || cfun_frame_layout.last_save_gpr < max_gpr)
9669    cfun_frame_layout.last_save_gpr = max_gpr;
9670
9671  for (i = min_gpr; i <= max_gpr; i++)
9672    cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9673}
9674
9675/* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676   prologue and epilogue.  */
9677
9678static void
9679s390_register_info_set_ranges ()
9680{
9681  int i, j;
9682
9683  /* Find the first and the last save slot supposed to use the stack
9684     to set the restore range.
9685     Vararg regs might be marked as save to stack but only the
9686     call-saved regs really need restoring (i.e. r6).  This code
9687     assumes that the vararg regs have not yet been recorded in
9688     cfun_gpr_save_slot.  */
9689  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9690  for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9691  cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9692  cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9693  cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9694  cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9695}
9696
9697/* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698   for registers which need to be saved in function prologue.
9699   This function can be used until the insns emitted for save/restore
9700   of the regs are visible in the RTL stream.  */
9701
9702static void
9703s390_register_info ()
9704{
9705  int i;
9706  char clobbered_regs[32];
9707
9708  gcc_assert (!epilogue_completed);
9709
9710  if (reload_completed)
9711    /* After reload we rely on our own routine to determine which
9712       registers need saving.  */
9713    s390_regs_ever_clobbered (clobbered_regs);
9714  else
9715    /* During reload we use regs_ever_live as a base since reload
9716       does changes in there which we otherwise would not be aware
9717       of.  */
9718    for (i = 0; i < 32; i++)
9719      clobbered_regs[i] = df_regs_ever_live_p (i);
9720
9721  for (i = 0; i < 32; i++)
9722    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9723
9724  /* Mark the call-saved FPRs which need to be saved.
9725     This needs to be done before checking the special GPRs since the
9726     stack pointer usage depends on whether high FPRs have to be saved
9727     or not.  */
9728  cfun_frame_layout.fpr_bitmap = 0;
9729  cfun_frame_layout.high_fprs = 0;
9730  for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9731    if (clobbered_regs[i] && !call_used_regs[i])
9732      {
9733	cfun_set_fpr_save (i);
9734	if (i >= FPR8_REGNUM)
9735	  cfun_frame_layout.high_fprs++;
9736      }
9737
9738  /* Register 12 is used for GOT address, but also as temp in prologue
9739     for split-stack stdarg functions (unless r14 is available).  */
9740  clobbered_regs[12]
9741    |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9742	|| (flag_split_stack && cfun->stdarg
9743	    && (crtl->is_leaf || TARGET_TPF_PROFILING
9744		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9745
9746  clobbered_regs[BASE_REGNUM]
9747    |= (cfun->machine->base_reg
9748	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9749
9750  clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9751    |= !!frame_pointer_needed;
9752
9753  /* On pre z900 machines this might take until machine dependent
9754     reorg to decide.
9755     save_return_addr_p will only be set on non-zarch machines so
9756     there is no risk that r14 goes into an FPR instead of a stack
9757     slot.  */
9758  clobbered_regs[RETURN_REGNUM]
9759    |= (!crtl->is_leaf
9760	|| TARGET_TPF_PROFILING
9761	|| cfun_frame_layout.save_return_addr_p
9762	|| crtl->calls_eh_return);
9763
9764  clobbered_regs[STACK_POINTER_REGNUM]
9765    |= (!crtl->is_leaf
9766	|| TARGET_TPF_PROFILING
9767	|| cfun_save_high_fprs_p
9768	|| get_frame_size () > 0
9769	|| (reload_completed && cfun_frame_layout.frame_size > 0)
9770	|| cfun->calls_alloca);
9771
9772  memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9773
9774  for (i = 6; i < 16; i++)
9775    if (clobbered_regs[i])
9776      cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9777
9778  s390_register_info_stdarg_fpr ();
9779  s390_register_info_gprtofpr ();
9780  s390_register_info_set_ranges ();
9781  /* stdarg functions might need to save GPRs 2 to 6.  This might
9782     override the GPR->FPR save decision made by
9783     s390_register_info_gprtofpr for r6 since vararg regs must go to
9784     the stack.  */
9785  s390_register_info_stdarg_gpr ();
9786}
9787
9788/* Return true if REGNO is a global register, but not one
9789   of the special ones that need to be saved/restored in anyway.  */
9790
9791static inline bool
9792global_not_special_regno_p (int regno)
9793{
9794  return (global_regs[regno]
9795	  /* These registers are special and need to be
9796	     restored in any case.  */
9797	  && !(regno == STACK_POINTER_REGNUM
9798	       || regno == RETURN_REGNUM
9799	       || regno == BASE_REGNUM
9800	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9801}
9802
9803/* This function is called by s390_optimize_prologue in order to get
9804   rid of unnecessary GPR save/restore instructions.  The register info
9805   for the GPRs is re-computed and the ranges are re-calculated.  */
9806
9807static void
9808s390_optimize_register_info ()
9809{
9810  char clobbered_regs[32];
9811  int i;
9812
9813  gcc_assert (epilogue_completed);
9814
9815  s390_regs_ever_clobbered (clobbered_regs);
9816
9817  /* Global registers do not need to be saved and restored unless it
9818     is one of our special regs.  (r12, r13, r14, or r15).  */
9819  for (i = 0; i < 32; i++)
9820    clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9821
9822  /* There is still special treatment needed for cases invisible to
9823     s390_regs_ever_clobbered.  */
9824  clobbered_regs[RETURN_REGNUM]
9825    |= (TARGET_TPF_PROFILING
9826	/* When expanding builtin_return_addr in ESA mode we do not
9827	   know whether r14 will later be needed as scratch reg when
9828	   doing branch splitting.  So the builtin always accesses the
9829	   r14 save slot and we need to stick to the save/restore
9830	   decision for r14 even if it turns out that it didn't get
9831	   clobbered.  */
9832	|| cfun_frame_layout.save_return_addr_p
9833	|| crtl->calls_eh_return);
9834
9835  memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9836
9837  for (i = 6; i < 16; i++)
9838    if (!clobbered_regs[i])
9839      cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9840
9841  s390_register_info_set_ranges ();
9842  s390_register_info_stdarg_gpr ();
9843}
9844
9845/* Fill cfun->machine with info about frame of current function.  */
9846
9847static void
9848s390_frame_info (void)
9849{
9850  HOST_WIDE_INT lowest_offset;
9851
9852  cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9853  cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9854
9855  /* The va_arg builtin uses a constant distance of 16 *
9856     UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857     pointer.  So even if we are going to save the stack pointer in an
9858     FPR we need the stack space in order to keep the offsets
9859     correct.  */
9860  if (cfun->stdarg && cfun_save_arg_fprs_p)
9861    {
9862      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9863
9864      if (cfun_frame_layout.first_save_gpr_slot == -1)
9865	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9866    }
9867
9868  cfun_frame_layout.frame_size = get_frame_size ();
9869  if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9870    fatal_error (input_location,
9871		 "total size of local variables exceeds architecture limit");
9872
9873  if (!TARGET_PACKED_STACK)
9874    {
9875      /* Fixed stack layout.  */
9876      cfun_frame_layout.backchain_offset = 0;
9877      cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9878      cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9879      cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9880      cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9881				       * UNITS_PER_LONG);
9882    }
9883  else if (TARGET_BACKCHAIN)
9884    {
9885      /* Kernel stack layout - packed stack, backchain, no float  */
9886      gcc_assert (TARGET_SOFT_FLOAT);
9887      cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9888					    - UNITS_PER_LONG);
9889
9890      /* The distance between the backchain and the return address
9891	 save slot must not change.  So we always need a slot for the
9892	 stack pointer which resides in between.  */
9893      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9894
9895      cfun_frame_layout.gprs_offset
9896	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9897
9898      /* FPRs will not be saved.  Nevertheless pick sane values to
9899	 keep area calculations valid.  */
9900      cfun_frame_layout.f0_offset =
9901	cfun_frame_layout.f4_offset =
9902	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9903    }
9904  else
9905    {
9906      int num_fprs;
9907
9908      /* Packed stack layout without backchain.  */
9909
9910      /* With stdarg FPRs need their dedicated slots.  */
9911      num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9912		  : (cfun_fpr_save_p (FPR4_REGNUM) +
9913		     cfun_fpr_save_p (FPR6_REGNUM)));
9914      cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9915
9916      num_fprs = (cfun->stdarg ? 2
9917		  : (cfun_fpr_save_p (FPR0_REGNUM)
9918		     + cfun_fpr_save_p (FPR2_REGNUM)));
9919      cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9920
9921      cfun_frame_layout.gprs_offset
9922	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9923
9924      cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9925				     - cfun_frame_layout.high_fprs * 8);
9926    }
9927
9928  if (cfun_save_high_fprs_p)
9929    cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9930
9931  if (!crtl->is_leaf)
9932    cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9933
9934  /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935     sized area at the bottom of the stack.  This is required also for
9936     leaf functions.  When GCC generates a local stack reference it
9937     will always add STACK_POINTER_OFFSET to all these references.  */
9938  if (crtl->is_leaf
9939      && !TARGET_TPF_PROFILING
9940      && cfun_frame_layout.frame_size == 0
9941      && !cfun->calls_alloca)
9942    return;
9943
9944  /* Calculate the number of bytes we have used in our own register
9945     save area.  With the packed stack layout we can re-use the
9946     remaining bytes for normal stack elements.  */
9947
9948  if (TARGET_PACKED_STACK)
9949    lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9950			      cfun_frame_layout.f4_offset),
9951			 cfun_frame_layout.gprs_offset);
9952  else
9953    lowest_offset = 0;
9954
9955  if (TARGET_BACKCHAIN)
9956    lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9957
9958  cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9959
9960  /* If under 31 bit an odd number of gprs has to be saved we have to
9961     adjust the frame size to sustain 8 byte alignment of stack
9962     frames.  */
9963  cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9964				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
9965				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9966}
9967
9968/* Generate frame layout.  Fills in register and frame data for the current
9969   function in cfun->machine.  This routine can be called multiple times;
9970   it will re-do the complete frame layout every time.  */
9971
9972static void
9973s390_init_frame_layout (void)
9974{
9975  HOST_WIDE_INT frame_size;
9976  int base_used;
9977
9978  /* After LRA the frame layout is supposed to be read-only and should
9979     not be re-computed.  */
9980  if (reload_completed)
9981    return;
9982
9983  do
9984    {
9985      frame_size = cfun_frame_layout.frame_size;
9986
9987      /* Try to predict whether we'll need the base register.  */
9988      base_used = crtl->uses_const_pool
9989		  || (!DISP_IN_RANGE (frame_size)
9990		      && !CONST_OK_FOR_K (frame_size));
9991
9992      /* Decide which register to use as literal pool base.  In small
9993	 leaf functions, try to use an unused call-clobbered register
9994	 as base register to avoid save/restore overhead.  */
9995      if (!base_used)
9996	cfun->machine->base_reg = NULL_RTX;
9997      else
9998	{
9999	  int br = 0;
10000
10001	  if (crtl->is_leaf)
10002	    /* Prefer r5 (most likely to be free).  */
10003	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10004	      ;
10005	  cfun->machine->base_reg =
10006	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10007	}
10008
10009      s390_register_info ();
10010      s390_frame_info ();
10011    }
10012  while (frame_size != cfun_frame_layout.frame_size);
10013}
10014
10015/* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016   the TX is nonescaping.  A transaction is considered escaping if
10017   there is at least one path from tbegin returning CC0 to the
10018   function exit block without an tend.
10019
10020   The check so far has some limitations:
10021   - only single tbegin/tend BBs are supported
10022   - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023   - when CC is copied to a GPR and the CC0 check is done with the GPR
10024     this is not supported
10025*/
10026
10027static void
10028s390_optimize_nonescaping_tx (void)
10029{
10030  const unsigned int CC0 = 1 << 3;
10031  basic_block tbegin_bb = NULL;
10032  basic_block tend_bb = NULL;
10033  basic_block bb;
10034  rtx_insn *insn;
10035  bool result = true;
10036  int bb_index;
10037  rtx_insn *tbegin_insn = NULL;
10038
10039  if (!cfun->machine->tbegin_p)
10040    return;
10041
10042  for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10043    {
10044      bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10045
10046      if (!bb)
10047	continue;
10048
10049      FOR_BB_INSNS (bb, insn)
10050	{
10051	  rtx ite, cc, pat, target;
10052	  unsigned HOST_WIDE_INT mask;
10053
10054	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10055	    continue;
10056
10057	  pat = PATTERN (insn);
10058
10059	  if (GET_CODE (pat) == PARALLEL)
10060	    pat = XVECEXP (pat, 0, 0);
10061
10062	  if (GET_CODE (pat) != SET
10063	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10064	    continue;
10065
10066	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10067	    {
10068	      rtx_insn *tmp;
10069
10070	      tbegin_insn = insn;
10071
10072	      /* Just return if the tbegin doesn't have clobbers.  */
10073	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
10074		return;
10075
10076	      if (tbegin_bb != NULL)
10077		return;
10078
10079	      /* Find the next conditional jump.  */
10080	      for (tmp = NEXT_INSN (insn);
10081		   tmp != NULL_RTX;
10082		   tmp = NEXT_INSN (tmp))
10083		{
10084		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10085		    return;
10086		  if (!JUMP_P (tmp))
10087		    continue;
10088
10089		  ite = SET_SRC (PATTERN (tmp));
10090		  if (GET_CODE (ite) != IF_THEN_ELSE)
10091		    continue;
10092
10093		  cc = XEXP (XEXP (ite, 0), 0);
10094		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10095		      || GET_MODE (cc) != CCRAWmode
10096		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10097		    return;
10098
10099		  if (bb->succs->length () != 2)
10100		    return;
10101
10102		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10103		  if (GET_CODE (XEXP (ite, 0)) == NE)
10104		    mask ^= 0xf;
10105
10106		  if (mask == CC0)
10107		    target = XEXP (ite, 1);
10108		  else if (mask == (CC0 ^ 0xf))
10109		    target = XEXP (ite, 2);
10110		  else
10111		    return;
10112
10113		  {
10114		    edge_iterator ei;
10115		    edge e1, e2;
10116
10117		    ei = ei_start (bb->succs);
10118		    e1 = ei_safe_edge (ei);
10119		    ei_next (&ei);
10120		    e2 = ei_safe_edge (ei);
10121
10122		    if (e2->flags & EDGE_FALLTHRU)
10123		      {
10124			e2 = e1;
10125			e1 = ei_safe_edge (ei);
10126		      }
10127
10128		    if (!(e1->flags & EDGE_FALLTHRU))
10129		      return;
10130
10131		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10132		  }
10133		  if (tmp == BB_END (bb))
10134		    break;
10135		}
10136	    }
10137
10138	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10139	    {
10140	      if (tend_bb != NULL)
10141		return;
10142	      tend_bb = bb;
10143	    }
10144	}
10145    }
10146
10147  /* Either we successfully remove the FPR clobbers here or we are not
10148     able to do anything for this TX.  Both cases don't qualify for
10149     another look.  */
10150  cfun->machine->tbegin_p = false;
10151
10152  if (tbegin_bb == NULL || tend_bb == NULL)
10153    return;
10154
10155  calculate_dominance_info (CDI_POST_DOMINATORS);
10156  result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10157  free_dominance_info (CDI_POST_DOMINATORS);
10158
10159  if (!result)
10160    return;
10161
10162  PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10163			    gen_rtvec (2,
10164				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10165				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10166  INSN_CODE (tbegin_insn) = -1;
10167  df_insn_rescan (tbegin_insn);
10168
10169  return;
10170}
10171
10172/* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10173   have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10174
10175static unsigned int
10176s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10177{
10178  return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10179}
10180
10181/* Implement TARGET_HARD_REGNO_MODE_OK.
10182
10183   Integer modes <= word size fit into any GPR.
10184   Integer modes > word size fit into successive GPRs, starting with
10185   an even-numbered register.
10186   SImode and DImode fit into FPRs as well.
10187
10188   Floating point modes <= word size fit into any FPR or GPR.
10189   Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190   into any FPR, or an even-odd GPR pair.
10191   TFmode fits only into an even-odd FPR pair.
10192
10193   Complex floating point modes fit either into two FPRs, or into
10194   successive GPRs (again starting with an even number).
10195   TCmode fits only into two successive even-odd FPR pairs.
10196
10197   Condition code modes fit only into the CC register.  */
10198
10199static bool
10200s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10201{
10202  if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10203    return false;
10204
10205  switch (REGNO_REG_CLASS (regno))
10206    {
10207    case VEC_REGS:
10208      return ((GET_MODE_CLASS (mode) == MODE_INT
10209	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10210	      || mode == DFmode
10211	      || (TARGET_VXE && mode == SFmode)
10212	      || s390_vector_mode_supported_p (mode));
10213      break;
10214    case FP_REGS:
10215      if (TARGET_VX
10216	  && ((GET_MODE_CLASS (mode) == MODE_INT
10217	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10218	      || mode == DFmode
10219	      || s390_vector_mode_supported_p (mode)))
10220	return true;
10221
10222      if (REGNO_PAIR_OK (regno, mode))
10223	{
10224	  if (mode == SImode || mode == DImode)
10225	    return true;
10226
10227	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10228	    return true;
10229	}
10230      break;
10231    case ADDR_REGS:
10232      if (FRAME_REGNO_P (regno) && mode == Pmode)
10233	return true;
10234
10235      /* fallthrough */
10236    case GENERAL_REGS:
10237      if (REGNO_PAIR_OK (regno, mode))
10238	{
10239	  if (TARGET_ZARCH
10240	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10241	    return true;
10242	}
10243      break;
10244    case CC_REGS:
10245      if (GET_MODE_CLASS (mode) == MODE_CC)
10246	return true;
10247      break;
10248    case ACCESS_REGS:
10249      if (REGNO_PAIR_OK (regno, mode))
10250	{
10251	  if (mode == SImode || mode == Pmode)
10252	    return true;
10253	}
10254      break;
10255    default:
10256      return false;
10257    }
10258
10259  return false;
10260}
10261
10262/* Implement TARGET_MODES_TIEABLE_P.  */
10263
10264static bool
10265s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10266{
10267  return ((mode1 == SFmode || mode1 == DFmode)
10268	  == (mode2 == SFmode || mode2 == DFmode));
10269}
10270
10271/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10272
10273bool
10274s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10275{
10276   /* Once we've decided upon a register to use as base register, it must
10277      no longer be used for any other purpose.  */
10278  if (cfun->machine->base_reg)
10279    if (REGNO (cfun->machine->base_reg) == old_reg
10280	|| REGNO (cfun->machine->base_reg) == new_reg)
10281      return false;
10282
10283  /* Prevent regrename from using call-saved regs which haven't
10284     actually been saved.  This is necessary since regrename assumes
10285     the backend save/restore decisions are based on
10286     df_regs_ever_live.  Since we have our own routine we have to tell
10287     regrename manually about it.  */
10288  if (GENERAL_REGNO_P (new_reg)
10289      && !call_used_regs[new_reg]
10290      && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10291    return false;
10292
10293  return true;
10294}
10295
10296/* Return nonzero if register REGNO can be used as a scratch register
10297   in peephole2.  */
10298
10299static bool
10300s390_hard_regno_scratch_ok (unsigned int regno)
10301{
10302  /* See s390_hard_regno_rename_ok.  */
10303  if (GENERAL_REGNO_P (regno)
10304      && !call_used_regs[regno]
10305      && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10306    return false;
10307
10308  return true;
10309}
10310
10311/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10312   code that runs in z/Architecture mode, but conforms to the 31-bit
10313   ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314   bytes are saved across calls, however.  */
10315
10316static bool
10317s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10318				     machine_mode mode)
10319{
10320  if (!TARGET_64BIT
10321      && TARGET_ZARCH
10322      && GET_MODE_SIZE (mode) > 4
10323      && ((regno >= 6 && regno <= 15) || regno == 32))
10324    return true;
10325
10326  if (TARGET_VX
10327      && GET_MODE_SIZE (mode) > 8
10328      && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10329	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10330    return true;
10331
10332  return false;
10333}
10334
10335/* Maximum number of registers to represent a value of mode MODE
10336   in a register of class RCLASS.  */
10337
10338int
10339s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10340{
10341  int reg_size;
10342  bool reg_pair_required_p = false;
10343
10344  switch (rclass)
10345    {
10346    case FP_REGS:
10347    case VEC_REGS:
10348      reg_size = TARGET_VX ? 16 : 8;
10349
10350      /* TF and TD modes would fit into a VR but we put them into a
10351	 register pair since we do not have 128bit FP instructions on
10352	 full VRs.  */
10353      if (TARGET_VX
10354	  && SCALAR_FLOAT_MODE_P (mode)
10355	  && GET_MODE_SIZE (mode) >= 16)
10356	reg_pair_required_p = true;
10357
10358      /* Even if complex types would fit into a single FPR/VR we force
10359	 them into a register pair to deal with the parts more easily.
10360	 (FIXME: What about complex ints?)  */
10361      if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10362	reg_pair_required_p = true;
10363      break;
10364    case ACCESS_REGS:
10365      reg_size = 4;
10366      break;
10367    default:
10368      reg_size = UNITS_PER_WORD;
10369      break;
10370    }
10371
10372  if (reg_pair_required_p)
10373    return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10374
10375  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10376}
10377
10378/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10379
10380static bool
10381s390_can_change_mode_class (machine_mode from_mode,
10382			    machine_mode to_mode,
10383			    reg_class_t rclass)
10384{
10385  machine_mode small_mode;
10386  machine_mode big_mode;
10387
10388  /* V1TF and TF have different representations in vector
10389     registers.  */
10390  if (reg_classes_intersect_p (VEC_REGS, rclass)
10391      && ((from_mode == V1TFmode && to_mode == TFmode)
10392	  || (from_mode == TFmode && to_mode == V1TFmode)))
10393    return false;
10394
10395  if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10396    return true;
10397
10398  if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10399    {
10400      small_mode = from_mode;
10401      big_mode = to_mode;
10402    }
10403  else
10404    {
10405      small_mode = to_mode;
10406      big_mode = from_mode;
10407    }
10408
10409  /* Values residing in VRs are little-endian style.  All modes are
10410     placed left-aligned in an VR.  This means that we cannot allow
10411     switching between modes with differing sizes.  Also if the vector
10412     facility is available we still place TFmode values in VR register
10413     pairs, since the only instructions we have operating on TFmodes
10414     only deal with register pairs.  Therefore we have to allow DFmode
10415     subregs of TFmodes to enable the TFmode splitters.  */
10416  if (reg_classes_intersect_p (VEC_REGS, rclass)
10417      && (GET_MODE_SIZE (small_mode) < 8
10418	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10419    return false;
10420
10421  /* Likewise for access registers, since they have only half the
10422     word size on 64-bit.  */
10423  if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10424    return false;
10425
10426  return true;
10427}
10428
10429/* Return true if we use LRA instead of reload pass.  */
10430static bool
10431s390_lra_p (void)
10432{
10433  return s390_lra_flag;
10434}
10435
10436/* Return true if register FROM can be eliminated via register TO.  */
10437
10438static bool
10439s390_can_eliminate (const int from, const int to)
10440{
10441  /* We have not marked the base register as fixed.
10442     Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443     If a function requires the base register, we say here that this
10444     elimination cannot be performed.  This will cause reload to free
10445     up the base register (as if it were fixed).  On the other hand,
10446     if the current function does *not* require the base register, we
10447     say here the elimination succeeds, which in turn allows reload
10448     to allocate the base register for any other purpose.  */
10449  if (from == BASE_REGNUM && to == BASE_REGNUM)
10450    {
10451      s390_init_frame_layout ();
10452      return cfun->machine->base_reg == NULL_RTX;
10453    }
10454
10455  /* Everything else must point into the stack frame.  */
10456  gcc_assert (to == STACK_POINTER_REGNUM
10457	      || to == HARD_FRAME_POINTER_REGNUM);
10458
10459  gcc_assert (from == FRAME_POINTER_REGNUM
10460	      || from == ARG_POINTER_REGNUM
10461	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10462
10463  /* Make sure we actually saved the return address.  */
10464  if (from == RETURN_ADDRESS_POINTER_REGNUM)
10465    if (!crtl->calls_eh_return
10466	&& !cfun->stdarg
10467	&& !cfun_frame_layout.save_return_addr_p)
10468      return false;
10469
10470  return true;
10471}
10472
10473/* Return offset between register FROM and TO initially after prolog.  */
10474
10475HOST_WIDE_INT
10476s390_initial_elimination_offset (int from, int to)
10477{
10478  HOST_WIDE_INT offset;
10479
10480  /* ??? Why are we called for non-eliminable pairs?  */
10481  if (!s390_can_eliminate (from, to))
10482    return 0;
10483
10484  switch (from)
10485    {
10486    case FRAME_POINTER_REGNUM:
10487      offset = (get_frame_size()
10488		+ STACK_POINTER_OFFSET
10489		+ crtl->outgoing_args_size);
10490      break;
10491
10492    case ARG_POINTER_REGNUM:
10493      s390_init_frame_layout ();
10494      offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10495      break;
10496
10497    case RETURN_ADDRESS_POINTER_REGNUM:
10498      s390_init_frame_layout ();
10499
10500      if (cfun_frame_layout.first_save_gpr_slot == -1)
10501	{
10502	  /* If it turns out that for stdarg nothing went into the reg
10503	     save area we also do not need the return address
10504	     pointer.  */
10505	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10506	    return 0;
10507
10508	  gcc_unreachable ();
10509	}
10510
10511      /* In order to make the following work it is not necessary for
10512	 r14 to have a save slot.  It is sufficient if one other GPR
10513	 got one.  Since the GPRs are always stored without gaps we
10514	 are able to calculate where the r14 save slot would
10515	 reside.  */
10516      offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10517		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10518		UNITS_PER_LONG);
10519      break;
10520
10521    case BASE_REGNUM:
10522      offset = 0;
10523      break;
10524
10525    default:
10526      gcc_unreachable ();
10527    }
10528
10529  return offset;
10530}
10531
10532/* Emit insn to save fpr REGNUM at offset OFFSET relative
10533   to register BASE.  Return generated insn.  */
10534
10535static rtx
10536save_fpr (rtx base, int offset, int regnum)
10537{
10538  rtx addr;
10539  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10540
10541  if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10542    set_mem_alias_set (addr, get_varargs_alias_set ());
10543  else
10544    set_mem_alias_set (addr, get_frame_alias_set ());
10545
10546  return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10547}
10548
10549/* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550   to register BASE.  Return generated insn.  */
10551
10552static rtx
10553restore_fpr (rtx base, int offset, int regnum)
10554{
10555  rtx addr;
10556  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10557  set_mem_alias_set (addr, get_frame_alias_set ());
10558
10559  return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10560}
10561
10562/* Generate insn to save registers FIRST to LAST into
10563   the register save area located at offset OFFSET
10564   relative to register BASE.  */
10565
10566static rtx
10567save_gprs (rtx base, int offset, int first, int last)
10568{
10569  rtx addr, insn, note;
10570  int i;
10571
10572  addr = plus_constant (Pmode, base, offset);
10573  addr = gen_rtx_MEM (Pmode, addr);
10574
10575  set_mem_alias_set (addr, get_frame_alias_set ());
10576
10577  /* Special-case single register.  */
10578  if (first == last)
10579    {
10580      if (TARGET_64BIT)
10581	insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10582      else
10583	insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10584
10585      if (!global_not_special_regno_p (first))
10586	RTX_FRAME_RELATED_P (insn) = 1;
10587      return insn;
10588    }
10589
10590
10591  insn = gen_store_multiple (addr,
10592			     gen_rtx_REG (Pmode, first),
10593			     GEN_INT (last - first + 1));
10594
10595  if (first <= 6 && cfun->stdarg)
10596    for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10597      {
10598	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10599
10600	if (first + i <= 6)
10601	  set_mem_alias_set (mem, get_varargs_alias_set ());
10602      }
10603
10604  /* We need to set the FRAME_RELATED flag on all SETs
10605     inside the store-multiple pattern.
10606
10607     However, we must not emit DWARF records for registers 2..5
10608     if they are stored for use by variable arguments ...
10609
10610     ??? Unfortunately, it is not enough to simply not the
10611     FRAME_RELATED flags for those SETs, because the first SET
10612     of the PARALLEL is always treated as if it had the flag
10613     set, even if it does not.  Therefore we emit a new pattern
10614     without those registers as REG_FRAME_RELATED_EXPR note.  */
10615
10616  if (first >= 6 && !global_not_special_regno_p (first))
10617    {
10618      rtx pat = PATTERN (insn);
10619
10620      for (i = 0; i < XVECLEN (pat, 0); i++)
10621	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10622	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10623								     0, i)))))
10624	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10625
10626      RTX_FRAME_RELATED_P (insn) = 1;
10627    }
10628  else if (last >= 6)
10629    {
10630      int start;
10631
10632      for (start = first >= 6 ? first : 6; start <= last; start++)
10633	if (!global_not_special_regno_p (start))
10634	  break;
10635
10636      if (start > last)
10637	return insn;
10638
10639      addr = plus_constant (Pmode, base,
10640			    offset + (start - first) * UNITS_PER_LONG);
10641
10642      if (start == last)
10643	{
10644	  if (TARGET_64BIT)
10645	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10646			      gen_rtx_REG (Pmode, start));
10647	  else
10648	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10649			      gen_rtx_REG (Pmode, start));
10650	  note = PATTERN (note);
10651
10652	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10653	  RTX_FRAME_RELATED_P (insn) = 1;
10654
10655	  return insn;
10656	}
10657
10658      note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10659				 gen_rtx_REG (Pmode, start),
10660				 GEN_INT (last - start + 1));
10661      note = PATTERN (note);
10662
10663      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10664
10665      for (i = 0; i < XVECLEN (note, 0); i++)
10666	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10667	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10668								     0, i)))))
10669	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10670
10671      RTX_FRAME_RELATED_P (insn) = 1;
10672    }
10673
10674  return insn;
10675}
10676
10677/* Generate insn to restore registers FIRST to LAST from
10678   the register save area located at offset OFFSET
10679   relative to register BASE.  */
10680
10681static rtx
10682restore_gprs (rtx base, int offset, int first, int last)
10683{
10684  rtx addr, insn;
10685
10686  addr = plus_constant (Pmode, base, offset);
10687  addr = gen_rtx_MEM (Pmode, addr);
10688  set_mem_alias_set (addr, get_frame_alias_set ());
10689
10690  /* Special-case single register.  */
10691  if (first == last)
10692    {
10693      if (TARGET_64BIT)
10694	insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10695      else
10696	insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10697
10698      RTX_FRAME_RELATED_P (insn) = 1;
10699      return insn;
10700    }
10701
10702  insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10703			    addr,
10704			    GEN_INT (last - first + 1));
10705  RTX_FRAME_RELATED_P (insn) = 1;
10706  return insn;
10707}
10708
10709/* Return insn sequence to load the GOT register.  */
10710
10711rtx_insn *
10712s390_load_got (void)
10713{
10714  rtx_insn *insns;
10715
10716  /* We cannot use pic_offset_table_rtx here since we use this
10717     function also for non-pic if __tls_get_offset is called and in
10718     that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10719     aren't usable.  */
10720  rtx got_rtx = gen_rtx_REG (Pmode, 12);
10721
10722  start_sequence ();
10723
10724  emit_move_insn (got_rtx, s390_got_symbol ());
10725
10726  insns = get_insns ();
10727  end_sequence ();
10728  return insns;
10729}
10730
10731/* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732   and the change to the stack pointer.  */
10733
10734static void
10735s390_emit_stack_tie (void)
10736{
10737  rtx mem = gen_frame_mem (BLKmode,
10738			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10739
10740  emit_insn (gen_stack_tie (mem));
10741}
10742
10743/* Copy GPRS into FPR save slots.  */
10744
10745static void
10746s390_save_gprs_to_fprs (void)
10747{
10748  int i;
10749
10750  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10751    return;
10752
10753  for (i = 6; i < 16; i++)
10754    {
10755      if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10756	{
10757	  rtx_insn *insn =
10758	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10759			    gen_rtx_REG (DImode, i));
10760	  RTX_FRAME_RELATED_P (insn) = 1;
10761	  /* This prevents dwarf2cfi from interpreting the set.  Doing
10762	     so it might emit def_cfa_register infos setting an FPR as
10763	     new CFA.  */
10764	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10765	}
10766    }
10767}
10768
10769/* Restore GPRs from FPR save slots.  */
10770
10771static void
10772s390_restore_gprs_from_fprs (void)
10773{
10774  int i;
10775
10776  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10777    return;
10778
10779  /* Restore the GPRs starting with the stack pointer.  That way the
10780     stack pointer already has its original value when it comes to
10781     restoring the hard frame pointer.  So we can set the cfa reg back
10782     to the stack pointer.  */
10783  for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10784    {
10785      rtx_insn *insn;
10786
10787      if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10788	continue;
10789
10790      rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10791
10792      if (i == STACK_POINTER_REGNUM)
10793	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10794      else
10795	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10796
10797      df_set_regs_ever_live (i, true);
10798      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10799
10800      /* If either the stack pointer or the frame pointer get restored
10801	 set the CFA value to its value at function start.  Doing this
10802	 for the frame pointer results in .cfi_def_cfa_register 15
10803	 what is ok since if the stack pointer got modified it has
10804	 been restored already.  */
10805      if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10806	add_reg_note (insn, REG_CFA_DEF_CFA,
10807		      plus_constant (Pmode, stack_pointer_rtx,
10808				     STACK_POINTER_OFFSET));
10809      RTX_FRAME_RELATED_P (insn) = 1;
10810    }
10811}
10812
10813
10814/* A pass run immediately before shrink-wrapping and prologue and epilogue
10815   generation.  */
10816
10817namespace {
10818
10819const pass_data pass_data_s390_early_mach =
10820{
10821  RTL_PASS, /* type */
10822  "early_mach", /* name */
10823  OPTGROUP_NONE, /* optinfo_flags */
10824  TV_MACH_DEP, /* tv_id */
10825  0, /* properties_required */
10826  0, /* properties_provided */
10827  0, /* properties_destroyed */
10828  0, /* todo_flags_start */
10829  ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10830};
10831
10832class pass_s390_early_mach : public rtl_opt_pass
10833{
10834public:
10835  pass_s390_early_mach (gcc::context *ctxt)
10836    : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10837  {}
10838
10839  /* opt_pass methods: */
10840  virtual unsigned int execute (function *);
10841
10842}; // class pass_s390_early_mach
10843
10844unsigned int
10845pass_s390_early_mach::execute (function *fun)
10846{
10847  rtx_insn *insn;
10848
10849  /* Try to get rid of the FPR clobbers.  */
10850  s390_optimize_nonescaping_tx ();
10851
10852  /* Re-compute register info.  */
10853  s390_register_info ();
10854
10855  /* If we're using a base register, ensure that it is always valid for
10856     the first non-prologue instruction.  */
10857  if (fun->machine->base_reg)
10858    emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10859
10860  /* Annotate all constant pool references to let the scheduler know
10861     they implicitly use the base register.  */
10862  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10863    if (INSN_P (insn))
10864      {
10865	annotate_constant_pool_refs (insn);
10866	df_insn_rescan (insn);
10867      }
10868  return 0;
10869}
10870
10871} // anon namespace
10872
10873rtl_opt_pass *
10874make_pass_s390_early_mach (gcc::context *ctxt)
10875{
10876  return new pass_s390_early_mach (ctxt);
10877}
10878
10879/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880   - push too big immediates to the literal pool and annotate the refs
10881   - emit frame related notes for stack pointer changes.  */
10882
10883static rtx
10884s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10885{
10886  rtx_insn *insn;
10887  rtx orig_offset = offset;
10888
10889  gcc_assert (REG_P (target));
10890  gcc_assert (REG_P (reg));
10891  gcc_assert (CONST_INT_P (offset));
10892
10893  if (offset == const0_rtx)                               /* lr/lgr */
10894    {
10895      insn = emit_move_insn (target, reg);
10896    }
10897  else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
10898    {
10899      insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10900						   offset));
10901    }
10902  else
10903    {
10904      if (!satisfies_constraint_K (offset)                /* ahi/aghi */
10905	  && (!TARGET_EXTIMM
10906	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
10907		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10908	offset = force_const_mem (Pmode, offset);
10909
10910      if (target != reg)
10911	{
10912	  insn = emit_move_insn (target, reg);
10913	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10914	}
10915
10916      insn = emit_insn (gen_add2_insn (target, offset));
10917
10918      if (!CONST_INT_P (offset))
10919	{
10920	  annotate_constant_pool_refs (insn);
10921
10922	  if (frame_related_p)
10923	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924			  gen_rtx_SET (target,
10925				       gen_rtx_PLUS (Pmode, target,
10926						     orig_offset)));
10927	}
10928    }
10929
10930  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10931
10932  /* If this is a stack adjustment and we are generating a stack clash
10933     prologue, then add a REG_STACK_CHECK note to signal that this insn
10934     should be left alone.  */
10935  if (flag_stack_clash_protection && target == stack_pointer_rtx)
10936    add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10937
10938  return insn;
10939}
10940
10941/* Emit a compare instruction with a volatile memory access as stack
10942   probe.  It does not waste store tags and does not clobber any
10943   registers apart from the condition code.  */
10944static void
10945s390_emit_stack_probe (rtx addr)
10946{
10947  rtx tmp = gen_rtx_MEM (Pmode, addr);
10948  MEM_VOLATILE_P (tmp) = 1;
10949  s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10950  emit_insn (gen_blockage ());
10951}
10952
10953/* Use a runtime loop if we have to emit more probes than this.  */
10954#define MIN_UNROLL_PROBES 3
10955
10956/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10957   if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
10958   probe relative to the stack pointer.
10959
10960   Note that SIZE is negative.
10961
10962   The return value is true if TEMP_REG has been clobbered.  */
10963static bool
10964allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10965		      rtx temp_reg)
10966{
10967  bool temp_reg_clobbered_p = false;
10968  HOST_WIDE_INT probe_interval
10969    = 1 << param_stack_clash_protection_probe_interval;
10970  HOST_WIDE_INT guard_size
10971    = 1 << param_stack_clash_protection_guard_size;
10972
10973  if (flag_stack_clash_protection)
10974    {
10975      if (last_probe_offset + -INTVAL (size) < guard_size)
10976	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10977      else
10978	{
10979	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10980	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10981	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10982	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10983
10984	  if (num_probes < MIN_UNROLL_PROBES)
10985	    {
10986	      /* Emit unrolled probe statements.  */
10987
10988	      for (unsigned int i = 0; i < num_probes; i++)
10989		{
10990		  s390_prologue_plus_offset (stack_pointer_rtx,
10991					     stack_pointer_rtx,
10992					     GEN_INT (-probe_interval), true);
10993		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10994						       stack_pointer_rtx,
10995						       offset));
10996		}
10997	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10998	    }
10999	  else
11000	    {
11001	      /* Emit a loop probing the pages.  */
11002
11003	      rtx_code_label *loop_start_label = gen_label_rtx ();
11004
11005	      /* From now on temp_reg will be the CFA register.  */
11006	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11007					 GEN_INT (-rounded_size), true);
11008	      emit_label (loop_start_label);
11009
11010	      s390_prologue_plus_offset (stack_pointer_rtx,
11011					 stack_pointer_rtx,
11012					 GEN_INT (-probe_interval), false);
11013	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11014						   stack_pointer_rtx,
11015						   offset));
11016	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11017				       GT, NULL_RTX,
11018				       Pmode, 1, loop_start_label);
11019
11020	      /* Without this make_edges ICEes.  */
11021	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
11022	      LABEL_NUSES (loop_start_label) = 1;
11023
11024	      /* That's going to be a NOP since stack pointer and
11025		 temp_reg are supposed to be the same here.  We just
11026		 emit it to set the CFA reg back to r15.  */
11027	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11028					 const0_rtx, true);
11029	      temp_reg_clobbered_p = true;
11030	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11031	    }
11032
11033	  /* Handle any residual allocation request.  */
11034	  s390_prologue_plus_offset (stack_pointer_rtx,
11035				     stack_pointer_rtx,
11036				     GEN_INT (-residual), true);
11037	  last_probe_offset += residual;
11038	  if (last_probe_offset >= probe_interval)
11039	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11040						 stack_pointer_rtx,
11041						 GEN_INT (residual
11042							  - UNITS_PER_LONG)));
11043
11044	  return temp_reg_clobbered_p;
11045	}
11046    }
11047
11048  /* Subtract frame size from stack pointer.  */
11049  s390_prologue_plus_offset (stack_pointer_rtx,
11050			     stack_pointer_rtx,
11051			     size, true);
11052
11053  return temp_reg_clobbered_p;
11054}
11055
11056/* Expand the prologue into a bunch of separate insns.  */
11057
11058void
11059s390_emit_prologue (void)
11060{
11061  rtx insn, addr;
11062  rtx temp_reg;
11063  int i;
11064  int offset;
11065  int next_fpr = 0;
11066
11067  /* Choose best register to use for temp use within prologue.
11068     TPF with profiling must avoid the register 14 - the tracing function
11069     needs the original contents of r14 to be preserved.  */
11070
11071  if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11072      && !crtl->is_leaf
11073      && !TARGET_TPF_PROFILING)
11074    temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11075  else if (flag_split_stack && cfun->stdarg)
11076    temp_reg = gen_rtx_REG (Pmode, 12);
11077  else
11078    temp_reg = gen_rtx_REG (Pmode, 1);
11079
11080  /* When probing for stack-clash mitigation, we have to track the distance
11081     between the stack pointer and closest known reference.
11082
11083     Most of the time we have to make a worst case assumption.  The
11084     only exception is when TARGET_BACKCHAIN is active, in which case
11085     we know *sp (offset 0) was written.  */
11086  HOST_WIDE_INT probe_interval
11087    = 1 << param_stack_clash_protection_probe_interval;
11088  HOST_WIDE_INT last_probe_offset
11089    = (TARGET_BACKCHAIN
11090       ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11091       : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11092
11093  s390_save_gprs_to_fprs ();
11094
11095  /* Save call saved gprs.  */
11096  if (cfun_frame_layout.first_save_gpr != -1)
11097    {
11098      insn = save_gprs (stack_pointer_rtx,
11099			cfun_frame_layout.gprs_offset +
11100			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11101					  - cfun_frame_layout.first_save_gpr_slot),
11102			cfun_frame_layout.first_save_gpr,
11103			cfun_frame_layout.last_save_gpr);
11104
11105      /* This is not 100% correct.  If we have more than one register saved,
11106	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11107      last_probe_offset
11108	= (cfun_frame_layout.gprs_offset +
11109	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11110			     - cfun_frame_layout.first_save_gpr_slot));
11111
11112      emit_insn (insn);
11113    }
11114
11115  /* Dummy insn to mark literal pool slot.  */
11116
11117  if (cfun->machine->base_reg)
11118    emit_insn (gen_main_pool (cfun->machine->base_reg));
11119
11120  offset = cfun_frame_layout.f0_offset;
11121
11122  /* Save f0 and f2.  */
11123  for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11124    {
11125      if (cfun_fpr_save_p (i))
11126	{
11127	  save_fpr (stack_pointer_rtx, offset, i);
11128	  if (offset < last_probe_offset)
11129	    last_probe_offset = offset;
11130	  offset += 8;
11131	}
11132      else if (!TARGET_PACKED_STACK || cfun->stdarg)
11133	offset += 8;
11134    }
11135
11136  /* Save f4 and f6.  */
11137  offset = cfun_frame_layout.f4_offset;
11138  for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11139    {
11140      if (cfun_fpr_save_p (i))
11141	{
11142	  insn = save_fpr (stack_pointer_rtx, offset, i);
11143	  if (offset < last_probe_offset)
11144	    last_probe_offset = offset;
11145	  offset += 8;
11146
11147	  /* If f4 and f6 are call clobbered they are saved due to
11148	     stdargs and therefore are not frame related.  */
11149	  if (!call_used_regs[i])
11150	    RTX_FRAME_RELATED_P (insn) = 1;
11151	}
11152      else if (!TARGET_PACKED_STACK || call_used_regs[i])
11153	offset += 8;
11154    }
11155
11156  if (TARGET_PACKED_STACK
11157      && cfun_save_high_fprs_p
11158      && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11159    {
11160      offset = (cfun_frame_layout.f8_offset
11161		+ (cfun_frame_layout.high_fprs - 1) * 8);
11162
11163      for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11164	if (cfun_fpr_save_p (i))
11165	  {
11166	    insn = save_fpr (stack_pointer_rtx, offset, i);
11167	    if (offset < last_probe_offset)
11168	      last_probe_offset = offset;
11169
11170	    RTX_FRAME_RELATED_P (insn) = 1;
11171	    offset -= 8;
11172	  }
11173      if (offset >= cfun_frame_layout.f8_offset)
11174	next_fpr = i;
11175    }
11176
11177  if (!TARGET_PACKED_STACK)
11178    next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11179
11180  if (flag_stack_usage_info)
11181    current_function_static_stack_size = cfun_frame_layout.frame_size;
11182
11183  /* Decrement stack pointer.  */
11184
11185  if (cfun_frame_layout.frame_size > 0)
11186    {
11187      rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11188      rtx_insn *stack_pointer_backup_loc;
11189      bool temp_reg_clobbered_p;
11190
11191      if (s390_stack_size)
11192	{
11193	  HOST_WIDE_INT stack_guard;
11194
11195	  if (s390_stack_guard)
11196	    stack_guard = s390_stack_guard;
11197	  else
11198	    {
11199	      /* If no value for stack guard is provided the smallest power of 2
11200		 larger than the current frame size is chosen.  */
11201	      stack_guard = 1;
11202	      while (stack_guard < cfun_frame_layout.frame_size)
11203		stack_guard <<= 1;
11204	    }
11205
11206	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11207	    {
11208	      warning (0, "frame size of function %qs is %wd"
11209		       " bytes exceeding user provided stack limit of "
11210		       "%d bytes.  "
11211		       "An unconditional trap is added.",
11212		       current_function_name(), cfun_frame_layout.frame_size,
11213		       s390_stack_size);
11214	      emit_insn (gen_trap ());
11215	      emit_barrier ();
11216	    }
11217	  else
11218	    {
11219	      /* stack_guard has to be smaller than s390_stack_size.
11220		 Otherwise we would emit an AND with zero which would
11221		 not match the test under mask pattern.  */
11222	      if (stack_guard >= s390_stack_size)
11223		{
11224		  warning (0, "frame size of function %qs is %wd"
11225			   " bytes which is more than half the stack size. "
11226			   "The dynamic check would not be reliable. "
11227			   "No check emitted for this function.",
11228			   current_function_name(),
11229			   cfun_frame_layout.frame_size);
11230		}
11231	      else
11232		{
11233		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11234						    & ~(stack_guard - 1));
11235
11236		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11237				       GEN_INT (stack_check_mask));
11238		  if (TARGET_64BIT)
11239		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11240							 t, const0_rtx),
11241					     t, const0_rtx, const0_rtx));
11242		  else
11243		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11244							 t, const0_rtx),
11245					     t, const0_rtx, const0_rtx));
11246		}
11247	    }
11248	}
11249
11250      if (s390_warn_framesize > 0
11251	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11252	warning (0, "frame size of %qs is %wd bytes",
11253		 current_function_name (), cfun_frame_layout.frame_size);
11254
11255      if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11256	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11257
11258      /* Save the location where we could backup the incoming stack
11259	 pointer.  */
11260      stack_pointer_backup_loc = get_last_insn ();
11261
11262      temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11263						   temp_reg);
11264
11265      if (TARGET_BACKCHAIN || next_fpr)
11266	{
11267	  if (temp_reg_clobbered_p)
11268	    {
11269	      /* allocate_stack_space had to make use of temp_reg and
11270		 we need it to hold a backup of the incoming stack
11271		 pointer.  Calculate back that value from the current
11272		 stack pointer.  */
11273	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11274					 GEN_INT (cfun_frame_layout.frame_size),
11275					 false);
11276	    }
11277	  else
11278	    {
11279	      /* allocate_stack_space didn't actually required
11280		 temp_reg.  Insert the stack pointer backup insn
11281		 before the stack pointer decrement code - knowing now
11282		 that the value will survive.  */
11283	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11284			       stack_pointer_backup_loc);
11285	    }
11286	}
11287
11288      /* Set backchain.  */
11289
11290      if (TARGET_BACKCHAIN)
11291	{
11292	  if (cfun_frame_layout.backchain_offset)
11293	    addr = gen_rtx_MEM (Pmode,
11294				plus_constant (Pmode, stack_pointer_rtx,
11295				  cfun_frame_layout.backchain_offset));
11296	  else
11297	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11298	  set_mem_alias_set (addr, get_frame_alias_set ());
11299	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11300	}
11301
11302      /* If we support non-call exceptions (e.g. for Java),
11303	 we need to make sure the backchain pointer is set up
11304	 before any possibly trapping memory access.  */
11305      if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11306	{
11307	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11308	  emit_clobber (addr);
11309	}
11310    }
11311  else if (flag_stack_clash_protection)
11312    dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11313
11314  /* Save fprs 8 - 15 (64 bit ABI).  */
11315
11316  if (cfun_save_high_fprs_p && next_fpr)
11317    {
11318      /* If the stack might be accessed through a different register
11319	 we have to make sure that the stack pointer decrement is not
11320	 moved below the use of the stack slots.  */
11321      s390_emit_stack_tie ();
11322
11323      insn = emit_insn (gen_add2_insn (temp_reg,
11324				       GEN_INT (cfun_frame_layout.f8_offset)));
11325
11326      offset = 0;
11327
11328      for (i = FPR8_REGNUM; i <= next_fpr; i++)
11329	if (cfun_fpr_save_p (i))
11330	  {
11331	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11332				      cfun_frame_layout.frame_size
11333				      + cfun_frame_layout.f8_offset
11334				      + offset);
11335
11336	    insn = save_fpr (temp_reg, offset, i);
11337	    offset += 8;
11338	    RTX_FRAME_RELATED_P (insn) = 1;
11339	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11340			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11341				       gen_rtx_REG (DFmode, i)));
11342	  }
11343    }
11344
11345  /* Set frame pointer, if needed.  */
11346
11347  if (frame_pointer_needed)
11348    {
11349      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11350      RTX_FRAME_RELATED_P (insn) = 1;
11351    }
11352
11353  /* Set up got pointer, if needed.  */
11354
11355  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11356    {
11357      rtx_insn *insns = s390_load_got ();
11358
11359      for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11360	annotate_constant_pool_refs (insn);
11361
11362      emit_insn (insns);
11363    }
11364
11365#if TARGET_TPF != 0
11366  if (TARGET_TPF_PROFILING)
11367    {
11368      /* Generate a BAS instruction to serve as a function entry
11369	 intercept to facilitate the use of tracing algorithms located
11370	 at the branch target.  */
11371      emit_insn (gen_prologue_tpf (
11372		   GEN_INT (s390_tpf_trace_hook_prologue_check),
11373		   GEN_INT (s390_tpf_trace_hook_prologue_target)));
11374
11375      /* Emit a blockage here so that all code lies between the
11376	 profiling mechanisms.  */
11377      emit_insn (gen_blockage ());
11378    }
11379#endif
11380}
11381
11382/* Expand the epilogue into a bunch of separate insns.  */
11383
11384void
11385s390_emit_epilogue (bool sibcall)
11386{
11387  rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11388  int area_bottom, area_top, offset = 0;
11389  int next_offset;
11390  int i;
11391
11392#if TARGET_TPF != 0
11393  if (TARGET_TPF_PROFILING)
11394    {
11395      /* Generate a BAS instruction to serve as a function entry
11396	 intercept to facilitate the use of tracing algorithms located
11397	 at the branch target.  */
11398
11399      /* Emit a blockage here so that all code lies between the
11400	 profiling mechanisms.  */
11401      emit_insn (gen_blockage ());
11402
11403      emit_insn (gen_epilogue_tpf (
11404		   GEN_INT (s390_tpf_trace_hook_epilogue_check),
11405		   GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11406    }
11407#endif
11408
11409  /* Check whether to use frame or stack pointer for restore.  */
11410
11411  frame_pointer = (frame_pointer_needed
11412		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11413
11414  s390_frame_area (&area_bottom, &area_top);
11415
11416  /* Check whether we can access the register save area.
11417     If not, increment the frame pointer as required.  */
11418
11419  if (area_top <= area_bottom)
11420    {
11421      /* Nothing to restore.  */
11422    }
11423  else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11424	   && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11425    {
11426      /* Area is in range.  */
11427      offset = cfun_frame_layout.frame_size;
11428    }
11429  else
11430    {
11431      rtx_insn *insn;
11432      rtx frame_off, cfa;
11433
11434      offset = area_bottom < 0 ? -area_bottom : 0;
11435      frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11436
11437      cfa = gen_rtx_SET (frame_pointer,
11438			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11439      if (DISP_IN_RANGE (INTVAL (frame_off)))
11440	{
11441	  rtx set;
11442
11443	  set = gen_rtx_SET (frame_pointer,
11444			     gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11445	  insn = emit_insn (set);
11446	}
11447      else
11448	{
11449	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11450	    frame_off = force_const_mem (Pmode, frame_off);
11451
11452	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11453	  annotate_constant_pool_refs (insn);
11454	}
11455      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11456      RTX_FRAME_RELATED_P (insn) = 1;
11457    }
11458
11459  /* Restore call saved fprs.  */
11460
11461  if (TARGET_64BIT)
11462    {
11463      if (cfun_save_high_fprs_p)
11464	{
11465	  next_offset = cfun_frame_layout.f8_offset;
11466	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11467	    {
11468	      if (cfun_fpr_save_p (i))
11469		{
11470		  restore_fpr (frame_pointer,
11471			       offset + next_offset, i);
11472		  cfa_restores
11473		    = alloc_reg_note (REG_CFA_RESTORE,
11474				      gen_rtx_REG (DFmode, i), cfa_restores);
11475		  next_offset += 8;
11476		}
11477	    }
11478	}
11479
11480    }
11481  else
11482    {
11483      next_offset = cfun_frame_layout.f4_offset;
11484      /* f4, f6 */
11485      for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11486	{
11487	  if (cfun_fpr_save_p (i))
11488	    {
11489	      restore_fpr (frame_pointer,
11490			   offset + next_offset, i);
11491	      cfa_restores
11492		= alloc_reg_note (REG_CFA_RESTORE,
11493				  gen_rtx_REG (DFmode, i), cfa_restores);
11494	      next_offset += 8;
11495	    }
11496	  else if (!TARGET_PACKED_STACK)
11497	    next_offset += 8;
11498	}
11499
11500    }
11501
11502  /* Restore call saved gprs.  */
11503
11504  if (cfun_frame_layout.first_restore_gpr != -1)
11505    {
11506      rtx insn, addr;
11507      int i;
11508
11509      /* Check for global register and save them
11510	 to stack location from where they get restored.  */
11511
11512      for (i = cfun_frame_layout.first_restore_gpr;
11513	   i <= cfun_frame_layout.last_restore_gpr;
11514	   i++)
11515	{
11516	  if (global_not_special_regno_p (i))
11517	    {
11518	      addr = plus_constant (Pmode, frame_pointer,
11519				    offset + cfun_frame_layout.gprs_offset
11520				    + (i - cfun_frame_layout.first_save_gpr_slot)
11521				    * UNITS_PER_LONG);
11522	      addr = gen_rtx_MEM (Pmode, addr);
11523	      set_mem_alias_set (addr, get_frame_alias_set ());
11524	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11525	    }
11526	  else
11527	    cfa_restores
11528	      = alloc_reg_note (REG_CFA_RESTORE,
11529				gen_rtx_REG (Pmode, i), cfa_restores);
11530	}
11531
11532      /* Fetch return address from stack before load multiple,
11533	 this will do good for scheduling.
11534
11535	 Only do this if we already decided that r14 needs to be
11536	 saved to a stack slot. (And not just because r14 happens to
11537	 be in between two GPRs which need saving.)  Otherwise it
11538	 would be difficult to take that decision back in
11539	 s390_optimize_prologue.
11540
11541	 This optimization is only helpful on in-order machines.  */
11542      if (! sibcall
11543	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11544	  && s390_tune <= PROCESSOR_2097_Z10)
11545	{
11546	  int return_regnum = find_unused_clobbered_reg();
11547	  if (!return_regnum
11548	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11549		  && !TARGET_CPU_Z10
11550		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11551	    {
11552	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11553	      return_regnum = 4;
11554	    }
11555	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11556
11557	  addr = plus_constant (Pmode, frame_pointer,
11558				offset + cfun_frame_layout.gprs_offset
11559				+ (RETURN_REGNUM
11560				   - cfun_frame_layout.first_save_gpr_slot)
11561				* UNITS_PER_LONG);
11562	  addr = gen_rtx_MEM (Pmode, addr);
11563	  set_mem_alias_set (addr, get_frame_alias_set ());
11564	  emit_move_insn (return_reg, addr);
11565
11566	  /* Once we did that optimization we have to make sure
11567	     s390_optimize_prologue does not try to remove the store
11568	     of r14 since we will not be able to find the load issued
11569	     here.  */
11570	  cfun_frame_layout.save_return_addr_p = true;
11571	}
11572
11573      insn = restore_gprs (frame_pointer,
11574			   offset + cfun_frame_layout.gprs_offset
11575			   + (cfun_frame_layout.first_restore_gpr
11576			      - cfun_frame_layout.first_save_gpr_slot)
11577			   * UNITS_PER_LONG,
11578			   cfun_frame_layout.first_restore_gpr,
11579			   cfun_frame_layout.last_restore_gpr);
11580      insn = emit_insn (insn);
11581      REG_NOTES (insn) = cfa_restores;
11582      add_reg_note (insn, REG_CFA_DEF_CFA,
11583		    plus_constant (Pmode, stack_pointer_rtx,
11584				   STACK_POINTER_OFFSET));
11585      RTX_FRAME_RELATED_P (insn) = 1;
11586    }
11587
11588  s390_restore_gprs_from_fprs ();
11589
11590  if (! sibcall)
11591    {
11592      if (!return_reg && !s390_can_use_return_insn ())
11593        /* We planned to emit (return), be we are not allowed to.  */
11594        return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11595
11596      if (return_reg)
11597        /* Emit (return) and (use).  */
11598        emit_jump_insn (gen_return_use (return_reg));
11599      else
11600        /* The fact that RETURN_REGNUM is used is already reflected by
11601           EPILOGUE_USES.  Emit plain (return).  */
11602        emit_jump_insn (gen_return ());
11603    }
11604}
11605
11606/* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11607
11608static void
11609s300_set_up_by_prologue (hard_reg_set_container *regs)
11610{
11611  if (cfun->machine->base_reg
11612      && !call_used_regs[REGNO (cfun->machine->base_reg)])
11613    SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11614}
11615
11616/* -fsplit-stack support.  */
11617
11618/* A SYMBOL_REF for __morestack.  */
11619static GTY(()) rtx morestack_ref;
11620
11621/* When using -fsplit-stack, the allocation routines set a field in
11622   the TCB to the bottom of the stack plus this much space, measured
11623   in bytes.  */
11624
11625#define SPLIT_STACK_AVAILABLE 1024
11626
11627/* Emit the parmblock for __morestack into .rodata section.  It
11628   consists of 3 pointer size entries:
11629   - frame size
11630   - size of stack arguments
11631   - offset between parm block and __morestack return label  */
11632
11633void
11634s390_output_split_stack_data (rtx parm_block, rtx call_done,
11635			      rtx frame_size, rtx args_size)
11636{
11637  rtx ops[] = { parm_block, call_done };
11638
11639  switch_to_section (targetm.asm_out.function_rodata_section
11640		     (current_function_decl));
11641
11642  if (TARGET_64BIT)
11643    output_asm_insn (".align\t8", NULL);
11644  else
11645    output_asm_insn (".align\t4", NULL);
11646
11647  (*targetm.asm_out.internal_label) (asm_out_file, "L",
11648				     CODE_LABEL_NUMBER (parm_block));
11649  if (TARGET_64BIT)
11650    {
11651      output_asm_insn (".quad\t%0", &frame_size);
11652      output_asm_insn (".quad\t%0", &args_size);
11653      output_asm_insn (".quad\t%1-%0", ops);
11654    }
11655  else
11656    {
11657      output_asm_insn (".long\t%0", &frame_size);
11658      output_asm_insn (".long\t%0", &args_size);
11659      output_asm_insn (".long\t%1-%0", ops);
11660    }
11661
11662  switch_to_section (current_function_section ());
11663}
11664
11665/* Emit -fsplit-stack prologue, which goes before the regular function
11666   prologue.  */
11667
11668void
11669s390_expand_split_stack_prologue (void)
11670{
11671  rtx r1, guard, cc = NULL;
11672  rtx_insn *insn;
11673  /* Offset from thread pointer to __private_ss.  */
11674  int psso = TARGET_64BIT ? 0x38 : 0x20;
11675  /* Pointer size in bytes.  */
11676  /* Frame size and argument size - the two parameters to __morestack.  */
11677  HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11678  /* Align argument size to 8 bytes - simplifies __morestack code.  */
11679  HOST_WIDE_INT args_size = crtl->args.size >= 0
11680			    ? ((crtl->args.size + 7) & ~7)
11681			    : 0;
11682  /* Label to be called by __morestack.  */
11683  rtx_code_label *call_done = NULL;
11684  rtx_code_label *parm_base = NULL;
11685  rtx tmp;
11686
11687  gcc_assert (flag_split_stack && reload_completed);
11688
11689  r1 = gen_rtx_REG (Pmode, 1);
11690
11691  /* If no stack frame will be allocated, don't do anything.  */
11692  if (!frame_size)
11693    {
11694      if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11695	{
11696	  /* If va_start is used, just use r15.  */
11697	  emit_move_insn (r1,
11698			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11699				       GEN_INT (STACK_POINTER_OFFSET)));
11700
11701	}
11702      return;
11703    }
11704
11705  if (morestack_ref == NULL_RTX)
11706    {
11707      morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11708      SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11709					   | SYMBOL_FLAG_FUNCTION);
11710    }
11711
11712  if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11713    {
11714      /* If frame_size will fit in an add instruction, do a stack space
11715	 check, and only call __morestack if there's not enough space.  */
11716
11717      /* Get thread pointer.  r1 is the only register we can always destroy - r0
11718	 could contain a static chain (and cannot be used to address memory
11719	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
11720      emit_insn (gen_get_thread_pointer (Pmode, r1));
11721      /* Aim at __private_ss.  */
11722      guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11723
11724      /* If less that 1kiB used, skip addition and compare directly with
11725	 __private_ss.  */
11726      if (frame_size > SPLIT_STACK_AVAILABLE)
11727	{
11728	  emit_move_insn (r1, guard);
11729	  if (TARGET_64BIT)
11730	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11731	  else
11732	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11733	  guard = r1;
11734	}
11735
11736      /* Compare the (maybe adjusted) guard with the stack pointer.  */
11737      cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11738    }
11739
11740  call_done = gen_label_rtx ();
11741  parm_base = gen_label_rtx ();
11742  LABEL_NUSES (parm_base)++;
11743  LABEL_NUSES (call_done)++;
11744
11745  /* %r1 = litbase.  */
11746  insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11747  add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11748  LABEL_NUSES (parm_base)++;
11749
11750  /* Now, we need to call __morestack.  It has very special calling
11751     conventions: it preserves param/return/static chain registers for
11752     calling main function body, and looks for its own parameters at %r1. */
11753  if (cc != NULL)
11754    tmp = gen_split_stack_cond_call (Pmode,
11755				     morestack_ref,
11756				     parm_base,
11757				     call_done,
11758				     GEN_INT (frame_size),
11759				     GEN_INT (args_size),
11760				     cc);
11761  else
11762    tmp = gen_split_stack_call (Pmode,
11763				morestack_ref,
11764				parm_base,
11765				call_done,
11766				GEN_INT (frame_size),
11767				GEN_INT (args_size));
11768
11769  insn = emit_jump_insn (tmp);
11770  JUMP_LABEL (insn) = call_done;
11771  add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11772  add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11773
11774  if (cc != NULL)
11775    {
11776      /* Mark the jump as very unlikely to be taken.  */
11777      add_reg_br_prob_note (insn,
11778			    profile_probability::very_unlikely ());
11779
11780      if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11781	{
11782	  /* If va_start is used, and __morestack was not called, just use
11783	     r15.  */
11784	  emit_move_insn (r1,
11785			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11786				       GEN_INT (STACK_POINTER_OFFSET)));
11787	}
11788    }
11789  else
11790    {
11791      emit_barrier ();
11792    }
11793
11794  /* __morestack will call us here.  */
11795
11796  emit_label (call_done);
11797}
11798
11799/* We may have to tell the dataflow pass that the split stack prologue
11800   is initializing a register.  */
11801
11802static void
11803s390_live_on_entry (bitmap regs)
11804{
11805  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11806    {
11807      gcc_assert (flag_split_stack);
11808      bitmap_set_bit (regs, 1);
11809    }
11810}
11811
11812/* Return true if the function can use simple_return to return outside
11813   of a shrink-wrapped region.  At present shrink-wrapping is supported
11814   in all cases.  */
11815
11816bool
11817s390_can_use_simple_return_insn (void)
11818{
11819  return true;
11820}
11821
11822/* Return true if the epilogue is guaranteed to contain only a return
11823   instruction and if a direct return can therefore be used instead.
11824   One of the main advantages of using direct return instructions
11825   is that we can then use conditional returns.  */
11826
11827bool
11828s390_can_use_return_insn (void)
11829{
11830  int i;
11831
11832  if (!reload_completed)
11833    return false;
11834
11835  if (crtl->profile)
11836    return false;
11837
11838  if (TARGET_TPF_PROFILING)
11839    return false;
11840
11841  for (i = 0; i < 16; i++)
11842    if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11843      return false;
11844
11845  /* For 31 bit this is not covered by the frame_size check below
11846     since f4, f6 are saved in the register save area without needing
11847     additional stack space.  */
11848  if (!TARGET_64BIT
11849      && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11850    return false;
11851
11852  if (cfun->machine->base_reg
11853      && !call_used_regs[REGNO (cfun->machine->base_reg)])
11854    return false;
11855
11856  return cfun_frame_layout.frame_size == 0;
11857}
11858
11859/* The VX ABI differs for vararg functions.  Therefore we need the
11860   prototype of the callee to be available when passing vector type
11861   values.  */
11862static const char *
11863s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11864{
11865  return ((TARGET_VX_ABI
11866	   && typelist == 0
11867	   && VECTOR_TYPE_P (TREE_TYPE (val))
11868	   && (funcdecl == NULL_TREE
11869	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
11870		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11871	  ? N_("vector argument passed to unprototyped function")
11872	  : NULL);
11873}
11874
11875
11876/* Return the size in bytes of a function argument of
11877   type TYPE and/or mode MODE.  At least one of TYPE or
11878   MODE must be specified.  */
11879
11880static int
11881s390_function_arg_size (machine_mode mode, const_tree type)
11882{
11883  if (type)
11884    return int_size_in_bytes (type);
11885
11886  /* No type info available for some library calls ...  */
11887  if (mode != BLKmode)
11888    return GET_MODE_SIZE (mode);
11889
11890  /* If we have neither type nor mode, abort */
11891  gcc_unreachable ();
11892}
11893
11894/* Return true if a function argument of type TYPE and mode MODE
11895   is to be passed in a vector register, if available.  */
11896
11897bool
11898s390_function_arg_vector (machine_mode mode, const_tree type)
11899{
11900  if (!TARGET_VX_ABI)
11901    return false;
11902
11903  if (s390_function_arg_size (mode, type) > 16)
11904    return false;
11905
11906  /* No type info available for some library calls ...  */
11907  if (!type)
11908    return VECTOR_MODE_P (mode);
11909
11910  /* The ABI says that record types with a single member are treated
11911     just like that member would be.  */
11912  int empty_base_seen = 0;
11913  const_tree orig_type = type;
11914  while (TREE_CODE (type) == RECORD_TYPE)
11915    {
11916      tree field, single = NULL_TREE;
11917
11918      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11919	{
11920	  if (TREE_CODE (field) != FIELD_DECL)
11921	    continue;
11922
11923	  if (DECL_FIELD_ABI_IGNORED (field))
11924	    {
11925	      if (lookup_attribute ("no_unique_address",
11926				    DECL_ATTRIBUTES (field)))
11927		empty_base_seen |= 2;
11928	      else
11929		empty_base_seen |= 1;
11930	      continue;
11931	    }
11932
11933	  if (single == NULL_TREE)
11934	    single = TREE_TYPE (field);
11935	  else
11936	    return false;
11937	}
11938
11939      if (single == NULL_TREE)
11940	return false;
11941      else
11942	{
11943	  /* If the field declaration adds extra byte due to
11944	     e.g. padding this is not accepted as vector type.  */
11945	  if (int_size_in_bytes (single) <= 0
11946	      || int_size_in_bytes (single) != int_size_in_bytes (type))
11947	    return false;
11948	  type = single;
11949	}
11950    }
11951
11952  if (!VECTOR_TYPE_P (type))
11953    return false;
11954
11955  if (warn_psabi && empty_base_seen)
11956    {
11957      static unsigned last_reported_type_uid;
11958      unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
11959      if (uid != last_reported_type_uid)
11960	{
11961	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
11962	  last_reported_type_uid = uid;
11963	  if (empty_base_seen & 1)
11964	    inform (input_location,
11965		    "parameter passing for argument of type %qT when C++17 "
11966		    "is enabled changed to match C++14 %{in GCC 10.1%}",
11967		    orig_type, url);
11968	  else
11969	    inform (input_location,
11970		    "parameter passing for argument of type %qT with "
11971		    "%<[[no_unique_address]]%> members changed "
11972		    "%{in GCC 10.1%}", orig_type, url);
11973	}
11974    }
11975  return true;
11976}
11977
11978/* Return true if a function argument of type TYPE and mode MODE
11979   is to be passed in a floating-point register, if available.  */
11980
11981static bool
11982s390_function_arg_float (machine_mode mode, const_tree type)
11983{
11984  if (s390_function_arg_size (mode, type) > 8)
11985    return false;
11986
11987  /* Soft-float changes the ABI: no floating-point registers are used.  */
11988  if (TARGET_SOFT_FLOAT)
11989    return false;
11990
11991  /* No type info available for some library calls ...  */
11992  if (!type)
11993    return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11994
11995  /* The ABI says that record types with a single member are treated
11996     just like that member would be.  */
11997  int empty_base_seen = 0;
11998  const_tree orig_type = type;
11999  while (TREE_CODE (type) == RECORD_TYPE)
12000    {
12001      tree field, single = NULL_TREE;
12002
12003      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12004	{
12005	  if (TREE_CODE (field) != FIELD_DECL)
12006	    continue;
12007	  if (DECL_FIELD_ABI_IGNORED (field))
12008	    {
12009	      if (lookup_attribute ("no_unique_address",
12010				    DECL_ATTRIBUTES (field)))
12011		empty_base_seen |= 2;
12012	      else
12013		empty_base_seen |= 1;
12014	      continue;
12015	    }
12016
12017	  if (single == NULL_TREE)
12018	    single = TREE_TYPE (field);
12019	  else
12020	    return false;
12021	}
12022
12023      if (single == NULL_TREE)
12024	return false;
12025      else
12026	type = single;
12027    }
12028
12029  if (TREE_CODE (type) != REAL_TYPE)
12030    return false;
12031
12032  if (warn_psabi && empty_base_seen)
12033    {
12034      static unsigned last_reported_type_uid;
12035      unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12036      if (uid != last_reported_type_uid)
12037	{
12038	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12039	  last_reported_type_uid = uid;
12040	  if (empty_base_seen & 1)
12041	    inform (input_location,
12042		    "parameter passing for argument of type %qT when C++17 "
12043		    "is enabled changed to match C++14 %{in GCC 10.1%}",
12044		    orig_type, url);
12045	  else
12046	    inform (input_location,
12047		    "parameter passing for argument of type %qT with "
12048		    "%<[[no_unique_address]]%> members changed "
12049		    "%{in GCC 10.1%}", orig_type, url);
12050	}
12051    }
12052
12053  return true;
12054}
12055
12056/* Return true if a function argument of type TYPE and mode MODE
12057   is to be passed in an integer register, or a pair of integer
12058   registers, if available.  */
12059
12060static bool
12061s390_function_arg_integer (machine_mode mode, const_tree type)
12062{
12063  int size = s390_function_arg_size (mode, type);
12064  if (size > 8)
12065    return false;
12066
12067  /* No type info available for some library calls ...  */
12068  if (!type)
12069    return GET_MODE_CLASS (mode) == MODE_INT
12070	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
12071
12072  /* We accept small integral (and similar) types.  */
12073  if (INTEGRAL_TYPE_P (type)
12074      || POINTER_TYPE_P (type)
12075      || TREE_CODE (type) == NULLPTR_TYPE
12076      || TREE_CODE (type) == OFFSET_TYPE
12077      || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12078    return true;
12079
12080  /* We also accept structs of size 1, 2, 4, 8 that are not
12081     passed in floating-point registers.  */
12082  if (AGGREGATE_TYPE_P (type)
12083      && exact_log2 (size) >= 0
12084      && !s390_function_arg_float (mode, type))
12085    return true;
12086
12087  return false;
12088}
12089
12090/* Return 1 if a function argument ARG is to be passed by reference.
12091   The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12092   are passed by value, all other structures (and complex numbers) are
12093   passed by reference.  */
12094
12095static bool
12096s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12097{
12098  int size = s390_function_arg_size (arg.mode, arg.type);
12099
12100  if (s390_function_arg_vector (arg.mode, arg.type))
12101    return false;
12102
12103  if (size > 8)
12104    return true;
12105
12106  if (tree type = arg.type)
12107    {
12108      if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12109	return true;
12110
12111      if (TREE_CODE (type) == COMPLEX_TYPE
12112	  || TREE_CODE (type) == VECTOR_TYPE)
12113	return true;
12114    }
12115
12116  return false;
12117}
12118
12119/* Update the data in CUM to advance over argument ARG.  */
12120
12121static void
12122s390_function_arg_advance (cumulative_args_t cum_v,
12123			   const function_arg_info &arg)
12124{
12125  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12126
12127  if (s390_function_arg_vector (arg.mode, arg.type))
12128    {
12129      /* We are called for unnamed vector stdarg arguments which are
12130	 passed on the stack.  In this case this hook does not have to
12131	 do anything since stack arguments are tracked by common
12132	 code.  */
12133      if (!arg.named)
12134	return;
12135      cum->vrs += 1;
12136    }
12137  else if (s390_function_arg_float (arg.mode, arg.type))
12138    {
12139      cum->fprs += 1;
12140    }
12141  else if (s390_function_arg_integer (arg.mode, arg.type))
12142    {
12143      int size = s390_function_arg_size (arg.mode, arg.type);
12144      cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12145    }
12146  else
12147    gcc_unreachable ();
12148}
12149
12150/* Define where to put the arguments to a function.
12151   Value is zero to push the argument on the stack,
12152   or a hard register in which to store the argument.
12153
12154   CUM is a variable of type CUMULATIVE_ARGS which gives info about
12155    the preceding args and about the function being called.
12156   ARG is a description of the argument.
12157
12158   On S/390, we use general purpose registers 2 through 6 to
12159   pass integer, pointer, and certain structure arguments, and
12160   floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12161   to pass floating point arguments.  All remaining arguments
12162   are pushed to the stack.  */
12163
12164static rtx
12165s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12166{
12167  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12168
12169  if (!arg.named)
12170    s390_check_type_for_vector_abi (arg.type, true, false);
12171
12172  if (s390_function_arg_vector (arg.mode, arg.type))
12173    {
12174      /* Vector arguments being part of the ellipsis are passed on the
12175	 stack.  */
12176      if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12177	return NULL_RTX;
12178
12179      return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12180    }
12181  else if (s390_function_arg_float (arg.mode, arg.type))
12182    {
12183      if (cum->fprs + 1 > FP_ARG_NUM_REG)
12184	return NULL_RTX;
12185      else
12186	return gen_rtx_REG (arg.mode, cum->fprs + 16);
12187    }
12188  else if (s390_function_arg_integer (arg.mode, arg.type))
12189    {
12190      int size = s390_function_arg_size (arg.mode, arg.type);
12191      int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12192
12193      if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12194	return NULL_RTX;
12195      else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12196	return gen_rtx_REG (arg.mode, cum->gprs + 2);
12197      else if (n_gprs == 2)
12198	{
12199	  rtvec p = rtvec_alloc (2);
12200
12201	  RTVEC_ELT (p, 0)
12202	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12203					 const0_rtx);
12204	  RTVEC_ELT (p, 1)
12205	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12206					 GEN_INT (4));
12207
12208	  return gen_rtx_PARALLEL (arg.mode, p);
12209	}
12210    }
12211
12212  /* After the real arguments, expand_call calls us once again with an
12213     end marker.  Whatever we return here is passed as operand 2 to the
12214     call expanders.
12215
12216     We don't need this feature ...  */
12217  else if (arg.end_marker_p ())
12218    return const0_rtx;
12219
12220  gcc_unreachable ();
12221}
12222
12223/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12224   left-justified when placed on the stack during parameter passing.  */
12225
12226static pad_direction
12227s390_function_arg_padding (machine_mode mode, const_tree type)
12228{
12229  if (s390_function_arg_vector (mode, type))
12230    return PAD_UPWARD;
12231
12232  return default_function_arg_padding (mode, type);
12233}
12234
12235/* Return true if return values of type TYPE should be returned
12236   in a memory buffer whose address is passed by the caller as
12237   hidden first argument.  */
12238
12239static bool
12240s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12241{
12242  /* We accept small integral (and similar) types.  */
12243  if (INTEGRAL_TYPE_P (type)
12244      || POINTER_TYPE_P (type)
12245      || TREE_CODE (type) == OFFSET_TYPE
12246      || TREE_CODE (type) == REAL_TYPE)
12247    return int_size_in_bytes (type) > 8;
12248
12249  /* vector types which fit into a VR.  */
12250  if (TARGET_VX_ABI
12251      && VECTOR_TYPE_P (type)
12252      && int_size_in_bytes (type) <= 16)
12253    return false;
12254
12255  /* Aggregates and similar constructs are always returned
12256     in memory.  */
12257  if (AGGREGATE_TYPE_P (type)
12258      || TREE_CODE (type) == COMPLEX_TYPE
12259      || VECTOR_TYPE_P (type))
12260    return true;
12261
12262  /* ??? We get called on all sorts of random stuff from
12263     aggregate_value_p.  We can't abort, but it's not clear
12264     what's safe to return.  Pretend it's a struct I guess.  */
12265  return true;
12266}
12267
12268/* Function arguments and return values are promoted to word size.  */
12269
12270static machine_mode
12271s390_promote_function_mode (const_tree type, machine_mode mode,
12272			    int *punsignedp,
12273			    const_tree fntype ATTRIBUTE_UNUSED,
12274			    int for_return ATTRIBUTE_UNUSED)
12275{
12276  if (INTEGRAL_MODE_P (mode)
12277      && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12278    {
12279      if (type != NULL_TREE && POINTER_TYPE_P (type))
12280	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12281      return Pmode;
12282    }
12283
12284  return mode;
12285}
12286
12287/* Define where to return a (scalar) value of type RET_TYPE.
12288   If RET_TYPE is null, define where to return a (scalar)
12289   value of mode MODE from a libcall.  */
12290
12291static rtx
12292s390_function_and_libcall_value (machine_mode mode,
12293				 const_tree ret_type,
12294				 const_tree fntype_or_decl,
12295				 bool outgoing ATTRIBUTE_UNUSED)
12296{
12297  /* For vector return types it is important to use the RET_TYPE
12298     argument whenever available since the middle-end might have
12299     changed the mode to a scalar mode.  */
12300  bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12301			    || (!ret_type && VECTOR_MODE_P (mode)));
12302
12303  /* For normal functions perform the promotion as
12304     promote_function_mode would do.  */
12305  if (ret_type)
12306    {
12307      int unsignedp = TYPE_UNSIGNED (ret_type);
12308      mode = promote_function_mode (ret_type, mode, &unsignedp,
12309				    fntype_or_decl, 1);
12310    }
12311
12312  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12313	      || SCALAR_FLOAT_MODE_P (mode)
12314	      || (TARGET_VX_ABI && vector_ret_type_p));
12315  gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12316
12317  if (TARGET_VX_ABI && vector_ret_type_p)
12318    return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12319  else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12320    return gen_rtx_REG (mode, 16);
12321  else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12322	   || UNITS_PER_LONG == UNITS_PER_WORD)
12323    return gen_rtx_REG (mode, 2);
12324  else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12325    {
12326      /* This case is triggered when returning a 64 bit value with
12327	 -m31 -mzarch.  Although the value would fit into a single
12328	 register it has to be forced into a 32 bit register pair in
12329	 order to match the ABI.  */
12330      rtvec p = rtvec_alloc (2);
12331
12332      RTVEC_ELT (p, 0)
12333	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12334      RTVEC_ELT (p, 1)
12335	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12336
12337      return gen_rtx_PARALLEL (mode, p);
12338    }
12339
12340  gcc_unreachable ();
12341}
12342
12343/* Define where to return a scalar return value of type RET_TYPE.  */
12344
12345static rtx
12346s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12347		     bool outgoing)
12348{
12349  return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12350					  fn_decl_or_type, outgoing);
12351}
12352
12353/* Define where to return a scalar libcall return value of mode
12354   MODE.  */
12355
12356static rtx
12357s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12358{
12359  return s390_function_and_libcall_value (mode, NULL_TREE,
12360					  NULL_TREE, true);
12361}
12362
12363
12364/* Create and return the va_list datatype.
12365
12366   On S/390, va_list is an array type equivalent to
12367
12368      typedef struct __va_list_tag
12369	{
12370	    long __gpr;
12371	    long __fpr;
12372	    void *__overflow_arg_area;
12373	    void *__reg_save_area;
12374	} va_list[1];
12375
12376   where __gpr and __fpr hold the number of general purpose
12377   or floating point arguments used up to now, respectively,
12378   __overflow_arg_area points to the stack location of the
12379   next argument passed on the stack, and __reg_save_area
12380   always points to the start of the register area in the
12381   call frame of the current function.  The function prologue
12382   saves all registers used for argument passing into this
12383   area if the function uses variable arguments.  */
12384
12385static tree
12386s390_build_builtin_va_list (void)
12387{
12388  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12389
12390  record = lang_hooks.types.make_type (RECORD_TYPE);
12391
12392  type_decl =
12393    build_decl (BUILTINS_LOCATION,
12394		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12395
12396  f_gpr = build_decl (BUILTINS_LOCATION,
12397		      FIELD_DECL, get_identifier ("__gpr"),
12398		      long_integer_type_node);
12399  f_fpr = build_decl (BUILTINS_LOCATION,
12400		      FIELD_DECL, get_identifier ("__fpr"),
12401		      long_integer_type_node);
12402  f_ovf = build_decl (BUILTINS_LOCATION,
12403		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12404		      ptr_type_node);
12405  f_sav = build_decl (BUILTINS_LOCATION,
12406		      FIELD_DECL, get_identifier ("__reg_save_area"),
12407		      ptr_type_node);
12408
12409  va_list_gpr_counter_field = f_gpr;
12410  va_list_fpr_counter_field = f_fpr;
12411
12412  DECL_FIELD_CONTEXT (f_gpr) = record;
12413  DECL_FIELD_CONTEXT (f_fpr) = record;
12414  DECL_FIELD_CONTEXT (f_ovf) = record;
12415  DECL_FIELD_CONTEXT (f_sav) = record;
12416
12417  TYPE_STUB_DECL (record) = type_decl;
12418  TYPE_NAME (record) = type_decl;
12419  TYPE_FIELDS (record) = f_gpr;
12420  DECL_CHAIN (f_gpr) = f_fpr;
12421  DECL_CHAIN (f_fpr) = f_ovf;
12422  DECL_CHAIN (f_ovf) = f_sav;
12423
12424  layout_type (record);
12425
12426  /* The correct type is an array type of one element.  */
12427  return build_array_type (record, build_index_type (size_zero_node));
12428}
12429
12430/* Implement va_start by filling the va_list structure VALIST.
12431   STDARG_P is always true, and ignored.
12432   NEXTARG points to the first anonymous stack argument.
12433
12434   The following global variables are used to initialize
12435   the va_list structure:
12436
12437     crtl->args.info:
12438       holds number of gprs and fprs used for named arguments.
12439     crtl->args.arg_offset_rtx:
12440       holds the offset of the first anonymous stack argument
12441       (relative to the virtual arg pointer).  */
12442
12443static void
12444s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12445{
12446  HOST_WIDE_INT n_gpr, n_fpr;
12447  int off;
12448  tree f_gpr, f_fpr, f_ovf, f_sav;
12449  tree gpr, fpr, ovf, sav, t;
12450
12451  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12452  f_fpr = DECL_CHAIN (f_gpr);
12453  f_ovf = DECL_CHAIN (f_fpr);
12454  f_sav = DECL_CHAIN (f_ovf);
12455
12456  valist = build_simple_mem_ref (valist);
12457  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12458  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12459  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12460  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12461
12462  /* Count number of gp and fp argument registers used.  */
12463
12464  n_gpr = crtl->args.info.gprs;
12465  n_fpr = crtl->args.info.fprs;
12466
12467  if (cfun->va_list_gpr_size)
12468    {
12469      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12470		  build_int_cst (NULL_TREE, n_gpr));
12471      TREE_SIDE_EFFECTS (t) = 1;
12472      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12473    }
12474
12475  if (cfun->va_list_fpr_size)
12476    {
12477      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12478		  build_int_cst (NULL_TREE, n_fpr));
12479      TREE_SIDE_EFFECTS (t) = 1;
12480      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12481    }
12482
12483  if (flag_split_stack
12484     && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12485	 == NULL)
12486     && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12487    {
12488      rtx reg;
12489      rtx_insn *seq;
12490
12491      reg = gen_reg_rtx (Pmode);
12492      cfun->machine->split_stack_varargs_pointer = reg;
12493
12494      start_sequence ();
12495      emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12496      seq = get_insns ();
12497      end_sequence ();
12498
12499      push_topmost_sequence ();
12500      emit_insn_after (seq, entry_of_function ());
12501      pop_topmost_sequence ();
12502    }
12503
12504  /* Find the overflow area.
12505     FIXME: This currently is too pessimistic when the vector ABI is
12506     enabled.  In that case we *always* set up the overflow area
12507     pointer.  */
12508  if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12509      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12510      || TARGET_VX_ABI)
12511    {
12512      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12513	t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12514      else
12515	t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12516
12517      off = INTVAL (crtl->args.arg_offset_rtx);
12518      off = off < 0 ? 0 : off;
12519      if (TARGET_DEBUG_ARG)
12520	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12521		 (int)n_gpr, (int)n_fpr, off);
12522
12523      t = fold_build_pointer_plus_hwi (t, off);
12524
12525      t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12526      TREE_SIDE_EFFECTS (t) = 1;
12527      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12528    }
12529
12530  /* Find the register save area.  */
12531  if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12532      || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12533    {
12534      t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12535      t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12536
12537      t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12538      TREE_SIDE_EFFECTS (t) = 1;
12539      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12540    }
12541}
12542
12543/* Implement va_arg by updating the va_list structure
12544   VALIST as required to retrieve an argument of type
12545   TYPE, and returning that argument.
12546
12547   Generates code equivalent to:
12548
12549   if (integral value) {
12550     if (size  <= 4 && args.gpr < 5 ||
12551	 size  > 4 && args.gpr < 4 )
12552       ret = args.reg_save_area[args.gpr+8]
12553     else
12554       ret = *args.overflow_arg_area++;
12555   } else if (vector value) {
12556       ret = *args.overflow_arg_area;
12557       args.overflow_arg_area += size / 8;
12558   } else if (float value) {
12559     if (args.fgpr < 2)
12560       ret = args.reg_save_area[args.fpr+64]
12561     else
12562       ret = *args.overflow_arg_area++;
12563   } else if (aggregate value) {
12564     if (args.gpr < 5)
12565       ret = *args.reg_save_area[args.gpr]
12566     else
12567       ret = **args.overflow_arg_area++;
12568   } */
12569
12570static tree
12571s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12572		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12573{
12574  tree f_gpr, f_fpr, f_ovf, f_sav;
12575  tree gpr, fpr, ovf, sav, reg, t, u;
12576  int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12577  tree lab_false, lab_over = NULL_TREE;
12578  tree addr = create_tmp_var (ptr_type_node, "addr");
12579  bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12580			a stack slot.  */
12581
12582  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12583  f_fpr = DECL_CHAIN (f_gpr);
12584  f_ovf = DECL_CHAIN (f_fpr);
12585  f_sav = DECL_CHAIN (f_ovf);
12586
12587  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12588  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12589  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12590
12591  /* The tree for args* cannot be shared between gpr/fpr and ovf since
12592     both appear on a lhs.  */
12593  valist = unshare_expr (valist);
12594  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12595
12596  size = int_size_in_bytes (type);
12597
12598  s390_check_type_for_vector_abi (type, true, false);
12599
12600  if (pass_va_arg_by_reference (type))
12601    {
12602      if (TARGET_DEBUG_ARG)
12603	{
12604	  fprintf (stderr, "va_arg: aggregate type");
12605	  debug_tree (type);
12606	}
12607
12608      /* Aggregates are passed by reference.  */
12609      indirect_p = 1;
12610      reg = gpr;
12611      n_reg = 1;
12612
12613      /* kernel stack layout on 31 bit: It is assumed here that no padding
12614	 will be added by s390_frame_info because for va_args always an even
12615	 number of gprs has to be saved r15-r2 = 14 regs.  */
12616      sav_ofs = 2 * UNITS_PER_LONG;
12617      sav_scale = UNITS_PER_LONG;
12618      size = UNITS_PER_LONG;
12619      max_reg = GP_ARG_NUM_REG - n_reg;
12620      left_align_p = false;
12621    }
12622  else if (s390_function_arg_vector (TYPE_MODE (type), type))
12623    {
12624      if (TARGET_DEBUG_ARG)
12625	{
12626	  fprintf (stderr, "va_arg: vector type");
12627	  debug_tree (type);
12628	}
12629
12630      indirect_p = 0;
12631      reg = NULL_TREE;
12632      n_reg = 0;
12633      sav_ofs = 0;
12634      sav_scale = 8;
12635      max_reg = 0;
12636      left_align_p = true;
12637    }
12638  else if (s390_function_arg_float (TYPE_MODE (type), type))
12639    {
12640      if (TARGET_DEBUG_ARG)
12641	{
12642	  fprintf (stderr, "va_arg: float type");
12643	  debug_tree (type);
12644	}
12645
12646      /* FP args go in FP registers, if present.  */
12647      indirect_p = 0;
12648      reg = fpr;
12649      n_reg = 1;
12650      sav_ofs = 16 * UNITS_PER_LONG;
12651      sav_scale = 8;
12652      max_reg = FP_ARG_NUM_REG - n_reg;
12653      left_align_p = false;
12654    }
12655  else
12656    {
12657      if (TARGET_DEBUG_ARG)
12658	{
12659	  fprintf (stderr, "va_arg: other type");
12660	  debug_tree (type);
12661	}
12662
12663      /* Otherwise into GP registers.  */
12664      indirect_p = 0;
12665      reg = gpr;
12666      n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12667
12668      /* kernel stack layout on 31 bit: It is assumed here that no padding
12669	 will be added by s390_frame_info because for va_args always an even
12670	 number of gprs has to be saved r15-r2 = 14 regs.  */
12671      sav_ofs = 2 * UNITS_PER_LONG;
12672
12673      if (size < UNITS_PER_LONG)
12674	sav_ofs += UNITS_PER_LONG - size;
12675
12676      sav_scale = UNITS_PER_LONG;
12677      max_reg = GP_ARG_NUM_REG - n_reg;
12678      left_align_p = false;
12679    }
12680
12681  /* Pull the value out of the saved registers ...  */
12682
12683  if (reg != NULL_TREE)
12684    {
12685      /*
12686	if (reg > ((typeof (reg))max_reg))
12687	  goto lab_false;
12688
12689	addr = sav + sav_ofs + reg * save_scale;
12690
12691	goto lab_over;
12692
12693	lab_false:
12694      */
12695
12696      lab_false = create_artificial_label (UNKNOWN_LOCATION);
12697      lab_over = create_artificial_label (UNKNOWN_LOCATION);
12698
12699      t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12700      t = build2 (GT_EXPR, boolean_type_node, reg, t);
12701      u = build1 (GOTO_EXPR, void_type_node, lab_false);
12702      t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12703      gimplify_and_add (t, pre_p);
12704
12705      t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12706      u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12707		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12708      t = fold_build_pointer_plus (t, u);
12709
12710      gimplify_assign (addr, t, pre_p);
12711
12712      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12713
12714      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12715    }
12716
12717  /* ... Otherwise out of the overflow area.  */
12718
12719  t = ovf;
12720  if (size < UNITS_PER_LONG && !left_align_p)
12721    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12722
12723  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12724
12725  gimplify_assign (addr, t, pre_p);
12726
12727  if (size < UNITS_PER_LONG && left_align_p)
12728    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12729  else
12730    t = fold_build_pointer_plus_hwi (t, size);
12731
12732  gimplify_assign (ovf, t, pre_p);
12733
12734  if (reg != NULL_TREE)
12735    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12736
12737
12738  /* Increment register save count.  */
12739
12740  if (n_reg > 0)
12741    {
12742      u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12743		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12744      gimplify_and_add (u, pre_p);
12745    }
12746
12747  if (indirect_p)
12748    {
12749      t = build_pointer_type_for_mode (build_pointer_type (type),
12750				       ptr_mode, true);
12751      addr = fold_convert (t, addr);
12752      addr = build_va_arg_indirect_ref (addr);
12753    }
12754  else
12755    {
12756      t = build_pointer_type_for_mode (type, ptr_mode, true);
12757      addr = fold_convert (t, addr);
12758    }
12759
12760  return build_va_arg_indirect_ref (addr);
12761}
12762
12763/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12764   expanders.
12765   DEST  - Register location where CC will be stored.
12766   TDB   - Pointer to a 256 byte area where to store the transaction.
12767	   diagnostic block. NULL if TDB is not needed.
12768   RETRY - Retry count value.  If non-NULL a retry loop for CC2
12769	   is emitted
12770   CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12771		    of the tbegin instruction pattern.  */
12772
12773void
12774s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12775{
12776  rtx retry_plus_two = gen_reg_rtx (SImode);
12777  rtx retry_reg = gen_reg_rtx (SImode);
12778  rtx_code_label *retry_label = NULL;
12779
12780  if (retry != NULL_RTX)
12781    {
12782      emit_move_insn (retry_reg, retry);
12783      emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12784      emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12785      retry_label = gen_label_rtx ();
12786      emit_label (retry_label);
12787    }
12788
12789  if (clobber_fprs_p)
12790    {
12791      if (TARGET_VX)
12792	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12793				     tdb));
12794      else
12795	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12796				 tdb));
12797    }
12798  else
12799    emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12800				     tdb));
12801
12802  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12803					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12804								   CC_REGNUM)),
12805					UNSPEC_CC_TO_INT));
12806  if (retry != NULL_RTX)
12807    {
12808      const int CC0 = 1 << 3;
12809      const int CC1 = 1 << 2;
12810      const int CC3 = 1 << 0;
12811      rtx jump;
12812      rtx count = gen_reg_rtx (SImode);
12813      rtx_code_label *leave_label = gen_label_rtx ();
12814
12815      /* Exit for success and permanent failures.  */
12816      jump = s390_emit_jump (leave_label,
12817			     gen_rtx_EQ (VOIDmode,
12818			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
12819			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12820      LABEL_NUSES (leave_label) = 1;
12821
12822      /* CC2 - transient failure. Perform retry with ppa.  */
12823      emit_move_insn (count, retry_plus_two);
12824      emit_insn (gen_subsi3 (count, count, retry_reg));
12825      emit_insn (gen_tx_assist (count));
12826      jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12827					      retry_reg,
12828					      retry_reg));
12829      JUMP_LABEL (jump) = retry_label;
12830      LABEL_NUSES (retry_label) = 1;
12831      emit_label (leave_label);
12832    }
12833}
12834
12835
12836/* Return the decl for the target specific builtin with the function
12837   code FCODE.  */
12838
12839static tree
12840s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12841{
12842  if (fcode >= S390_BUILTIN_MAX)
12843    return error_mark_node;
12844
12845  return s390_builtin_decls[fcode];
12846}
12847
12848/* We call mcount before the function prologue.  So a profiled leaf
12849   function should stay a leaf function.  */
12850
12851static bool
12852s390_keep_leaf_when_profiled ()
12853{
12854  return true;
12855}
12856
12857/* Output assembly code for the trampoline template to
12858   stdio stream FILE.
12859
12860   On S/390, we use gpr 1 internally in the trampoline code;
12861   gpr 0 is used to hold the static chain.  */
12862
12863static void
12864s390_asm_trampoline_template (FILE *file)
12865{
12866  rtx op[2];
12867  op[0] = gen_rtx_REG (Pmode, 0);
12868  op[1] = gen_rtx_REG (Pmode, 1);
12869
12870  if (TARGET_64BIT)
12871    {
12872      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12873      output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
12874      output_asm_insn ("br\t%1", op);             /* 2 byte */
12875      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12876    }
12877  else
12878    {
12879      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12880      output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
12881      output_asm_insn ("br\t%1", op);             /* 2 byte */
12882      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12883    }
12884}
12885
12886/* Emit RTL insns to initialize the variable parts of a trampoline.
12887   FNADDR is an RTX for the address of the function's pure code.
12888   CXT is an RTX for the static chain value for the function.  */
12889
12890static void
12891s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12892{
12893  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12894  rtx mem;
12895
12896  emit_block_move (m_tramp, assemble_trampoline_template (),
12897		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12898
12899  mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12900  emit_move_insn (mem, cxt);
12901  mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12902  emit_move_insn (mem, fnaddr);
12903}
12904
12905static void
12906output_asm_nops (const char *user, int hw)
12907{
12908  asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12909  while (hw > 0)
12910    {
12911      if (hw >= 3)
12912	{
12913	  output_asm_insn ("brcl\t0,0", NULL);
12914	  hw -= 3;
12915	}
12916      else if (hw >= 2)
12917	{
12918	  output_asm_insn ("bc\t0,0", NULL);
12919	  hw -= 2;
12920	}
12921      else
12922	{
12923	  output_asm_insn ("bcr\t0,0", NULL);
12924	  hw -= 1;
12925	}
12926    }
12927}
12928
12929/* Output assembler code to FILE to increment profiler label # LABELNO
12930   for profiling a function entry.  */
12931
12932void
12933s390_function_profiler (FILE *file, int labelno)
12934{
12935  rtx op[8];
12936
12937  char label[128];
12938  ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12939
12940  fprintf (file, "# function profiler \n");
12941
12942  op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12943  op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12944  op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12945  op[7] = GEN_INT (UNITS_PER_LONG);
12946
12947  op[2] = gen_rtx_REG (Pmode, 1);
12948  op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12949  SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12950
12951  op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12952  if (flag_pic)
12953    {
12954      op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12955      op[4] = gen_rtx_CONST (Pmode, op[4]);
12956    }
12957
12958  if (flag_record_mcount)
12959    fprintf (file, "1:\n");
12960
12961  if (flag_fentry)
12962    {
12963      if (flag_nop_mcount)
12964	output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12965      else if (cfun->static_chain_decl)
12966	warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12967		 "with %<-mfentry%> on s390");
12968      else
12969	output_asm_insn ("brasl\t0,%4", op);
12970    }
12971  else if (TARGET_64BIT)
12972    {
12973      if (flag_nop_mcount)
12974	output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12975			 /* brasl */ 3 + /* lg */ 3);
12976      else
12977	{
12978	  output_asm_insn ("stg\t%0,%1", op);
12979	  if (flag_dwarf2_cfi_asm)
12980	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12981	  output_asm_insn ("larl\t%2,%3", op);
12982	  output_asm_insn ("brasl\t%0,%4", op);
12983	  output_asm_insn ("lg\t%0,%1", op);
12984	  if (flag_dwarf2_cfi_asm)
12985	    output_asm_insn (".cfi_restore\t%0", op);
12986	}
12987    }
12988  else
12989    {
12990      if (flag_nop_mcount)
12991	output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12992			 /* brasl */ 3 + /* l */ 2);
12993      else
12994	{
12995	  output_asm_insn ("st\t%0,%1", op);
12996	  if (flag_dwarf2_cfi_asm)
12997	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12998	  output_asm_insn ("larl\t%2,%3", op);
12999	  output_asm_insn ("brasl\t%0,%4", op);
13000	  output_asm_insn ("l\t%0,%1", op);
13001	  if (flag_dwarf2_cfi_asm)
13002	    output_asm_insn (".cfi_restore\t%0", op);
13003	}
13004    }
13005
13006  if (flag_record_mcount)
13007    {
13008      fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13009      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13010      fprintf (file, "\t.previous\n");
13011    }
13012}
13013
13014/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13015   into its SYMBOL_REF_FLAGS.  */
13016
13017static void
13018s390_encode_section_info (tree decl, rtx rtl, int first)
13019{
13020  default_encode_section_info (decl, rtl, first);
13021
13022  if (TREE_CODE (decl) == VAR_DECL)
13023    {
13024      /* Store the alignment to be able to check if we can use
13025	 a larl/load-relative instruction.  We only handle the cases
13026	 that can go wrong (i.e. no FUNC_DECLs).  */
13027      if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13028	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13029      else if (DECL_ALIGN (decl) % 32)
13030	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13031      else if (DECL_ALIGN (decl) % 64)
13032	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13033    }
13034
13035  /* Literal pool references don't have a decl so they are handled
13036     differently here.  We rely on the information in the MEM_ALIGN
13037     entry to decide upon the alignment.  */
13038  if (MEM_P (rtl)
13039      && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13040      && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13041    {
13042      if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13043	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13044      else if (MEM_ALIGN (rtl) % 32)
13045	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13046      else if (MEM_ALIGN (rtl) % 64)
13047	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13048    }
13049}
13050
13051/* Output thunk to FILE that implements a C++ virtual function call (with
13052   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
13053   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13054   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13055   relative to the resulting this pointer.  */
13056
13057static void
13058s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13059		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13060		      tree function)
13061{
13062  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13063  rtx op[10];
13064  int nonlocal = 0;
13065
13066  assemble_start_function (thunk, fnname);
13067  /* Make sure unwind info is emitted for the thunk if needed.  */
13068  final_start_function (emit_barrier (), file, 1);
13069
13070  /* Operand 0 is the target function.  */
13071  op[0] = XEXP (DECL_RTL (function), 0);
13072  if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13073    {
13074      nonlocal = 1;
13075      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13076			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13077      op[0] = gen_rtx_CONST (Pmode, op[0]);
13078    }
13079
13080  /* Operand 1 is the 'this' pointer.  */
13081  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13082    op[1] = gen_rtx_REG (Pmode, 3);
13083  else
13084    op[1] = gen_rtx_REG (Pmode, 2);
13085
13086  /* Operand 2 is the delta.  */
13087  op[2] = GEN_INT (delta);
13088
13089  /* Operand 3 is the vcall_offset.  */
13090  op[3] = GEN_INT (vcall_offset);
13091
13092  /* Operand 4 is the temporary register.  */
13093  op[4] = gen_rtx_REG (Pmode, 1);
13094
13095  /* Operands 5 to 8 can be used as labels.  */
13096  op[5] = NULL_RTX;
13097  op[6] = NULL_RTX;
13098  op[7] = NULL_RTX;
13099  op[8] = NULL_RTX;
13100
13101  /* Operand 9 can be used for temporary register.  */
13102  op[9] = NULL_RTX;
13103
13104  /* Generate code.  */
13105  if (TARGET_64BIT)
13106    {
13107      /* Setup literal pool pointer if required.  */
13108      if ((!DISP_IN_RANGE (delta)
13109	   && !CONST_OK_FOR_K (delta)
13110	   && !CONST_OK_FOR_Os (delta))
13111	  || (!DISP_IN_RANGE (vcall_offset)
13112	      && !CONST_OK_FOR_K (vcall_offset)
13113	      && !CONST_OK_FOR_Os (vcall_offset)))
13114	{
13115	  op[5] = gen_label_rtx ();
13116	  output_asm_insn ("larl\t%4,%5", op);
13117	}
13118
13119      /* Add DELTA to this pointer.  */
13120      if (delta)
13121	{
13122	  if (CONST_OK_FOR_J (delta))
13123	    output_asm_insn ("la\t%1,%2(%1)", op);
13124	  else if (DISP_IN_RANGE (delta))
13125	    output_asm_insn ("lay\t%1,%2(%1)", op);
13126	  else if (CONST_OK_FOR_K (delta))
13127	    output_asm_insn ("aghi\t%1,%2", op);
13128	  else if (CONST_OK_FOR_Os (delta))
13129	    output_asm_insn ("agfi\t%1,%2", op);
13130	  else
13131	    {
13132	      op[6] = gen_label_rtx ();
13133	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13134	    }
13135	}
13136
13137      /* Perform vcall adjustment.  */
13138      if (vcall_offset)
13139	{
13140	  if (DISP_IN_RANGE (vcall_offset))
13141	    {
13142	      output_asm_insn ("lg\t%4,0(%1)", op);
13143	      output_asm_insn ("ag\t%1,%3(%4)", op);
13144	    }
13145	  else if (CONST_OK_FOR_K (vcall_offset))
13146	    {
13147	      output_asm_insn ("lghi\t%4,%3", op);
13148	      output_asm_insn ("ag\t%4,0(%1)", op);
13149	      output_asm_insn ("ag\t%1,0(%4)", op);
13150	    }
13151	  else if (CONST_OK_FOR_Os (vcall_offset))
13152	    {
13153	      output_asm_insn ("lgfi\t%4,%3", op);
13154	      output_asm_insn ("ag\t%4,0(%1)", op);
13155	      output_asm_insn ("ag\t%1,0(%4)", op);
13156	    }
13157	  else
13158	    {
13159	      op[7] = gen_label_rtx ();
13160	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13161	      output_asm_insn ("ag\t%4,0(%1)", op);
13162	      output_asm_insn ("ag\t%1,0(%4)", op);
13163	    }
13164	}
13165
13166      /* Jump to target.  */
13167      output_asm_insn ("jg\t%0", op);
13168
13169      /* Output literal pool if required.  */
13170      if (op[5])
13171	{
13172	  output_asm_insn (".align\t4", op);
13173	  targetm.asm_out.internal_label (file, "L",
13174					  CODE_LABEL_NUMBER (op[5]));
13175	}
13176      if (op[6])
13177	{
13178	  targetm.asm_out.internal_label (file, "L",
13179					  CODE_LABEL_NUMBER (op[6]));
13180	  output_asm_insn (".long\t%2", op);
13181	}
13182      if (op[7])
13183	{
13184	  targetm.asm_out.internal_label (file, "L",
13185					  CODE_LABEL_NUMBER (op[7]));
13186	  output_asm_insn (".long\t%3", op);
13187	}
13188    }
13189  else
13190    {
13191      /* Setup base pointer if required.  */
13192      if (!vcall_offset
13193	  || (!DISP_IN_RANGE (delta)
13194	      && !CONST_OK_FOR_K (delta)
13195	      && !CONST_OK_FOR_Os (delta))
13196	  || (!DISP_IN_RANGE (delta)
13197	      && !CONST_OK_FOR_K (vcall_offset)
13198	      && !CONST_OK_FOR_Os (vcall_offset)))
13199	{
13200	  op[5] = gen_label_rtx ();
13201	  output_asm_insn ("basr\t%4,0", op);
13202	  targetm.asm_out.internal_label (file, "L",
13203					  CODE_LABEL_NUMBER (op[5]));
13204	}
13205
13206      /* Add DELTA to this pointer.  */
13207      if (delta)
13208	{
13209	  if (CONST_OK_FOR_J (delta))
13210	    output_asm_insn ("la\t%1,%2(%1)", op);
13211	  else if (DISP_IN_RANGE (delta))
13212	    output_asm_insn ("lay\t%1,%2(%1)", op);
13213	  else if (CONST_OK_FOR_K (delta))
13214	    output_asm_insn ("ahi\t%1,%2", op);
13215	  else if (CONST_OK_FOR_Os (delta))
13216	    output_asm_insn ("afi\t%1,%2", op);
13217	  else
13218	    {
13219	      op[6] = gen_label_rtx ();
13220	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13221	    }
13222	}
13223
13224      /* Perform vcall adjustment.  */
13225      if (vcall_offset)
13226	{
13227	  if (CONST_OK_FOR_J (vcall_offset))
13228	    {
13229	      output_asm_insn ("l\t%4,0(%1)", op);
13230	      output_asm_insn ("a\t%1,%3(%4)", op);
13231	    }
13232	  else if (DISP_IN_RANGE (vcall_offset))
13233	    {
13234	      output_asm_insn ("l\t%4,0(%1)", op);
13235	      output_asm_insn ("ay\t%1,%3(%4)", op);
13236	    }
13237	  else if (CONST_OK_FOR_K (vcall_offset))
13238	    {
13239	      output_asm_insn ("lhi\t%4,%3", op);
13240	      output_asm_insn ("a\t%4,0(%1)", op);
13241	      output_asm_insn ("a\t%1,0(%4)", op);
13242	    }
13243	  else if (CONST_OK_FOR_Os (vcall_offset))
13244	    {
13245	      output_asm_insn ("iilf\t%4,%3", op);
13246	      output_asm_insn ("a\t%4,0(%1)", op);
13247	      output_asm_insn ("a\t%1,0(%4)", op);
13248	    }
13249	  else
13250	    {
13251	      op[7] = gen_label_rtx ();
13252	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13253	      output_asm_insn ("a\t%4,0(%1)", op);
13254	      output_asm_insn ("a\t%1,0(%4)", op);
13255	    }
13256
13257	  /* We had to clobber the base pointer register.
13258	     Re-setup the base pointer (with a different base).  */
13259	  op[5] = gen_label_rtx ();
13260	  output_asm_insn ("basr\t%4,0", op);
13261	  targetm.asm_out.internal_label (file, "L",
13262					  CODE_LABEL_NUMBER (op[5]));
13263	}
13264
13265      /* Jump to target.  */
13266      op[8] = gen_label_rtx ();
13267
13268      if (!flag_pic)
13269	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13270      else if (!nonlocal)
13271	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13272      /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13273      else if (flag_pic == 1)
13274	{
13275	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13276	  output_asm_insn ("l\t%4,%0(%4)", op);
13277	}
13278      else if (flag_pic == 2)
13279	{
13280	  op[9] = gen_rtx_REG (Pmode, 0);
13281	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13282	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13283	  output_asm_insn ("ar\t%4,%9", op);
13284	  output_asm_insn ("l\t%4,0(%4)", op);
13285	}
13286
13287      output_asm_insn ("br\t%4", op);
13288
13289      /* Output literal pool.  */
13290      output_asm_insn (".align\t4", op);
13291
13292      if (nonlocal && flag_pic == 2)
13293	output_asm_insn (".long\t%0", op);
13294      if (nonlocal)
13295	{
13296	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13297	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13298	}
13299
13300      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13301      if (!flag_pic)
13302	output_asm_insn (".long\t%0", op);
13303      else
13304	output_asm_insn (".long\t%0-%5", op);
13305
13306      if (op[6])
13307	{
13308	  targetm.asm_out.internal_label (file, "L",
13309					  CODE_LABEL_NUMBER (op[6]));
13310	  output_asm_insn (".long\t%2", op);
13311	}
13312      if (op[7])
13313	{
13314	  targetm.asm_out.internal_label (file, "L",
13315					  CODE_LABEL_NUMBER (op[7]));
13316	  output_asm_insn (".long\t%3", op);
13317	}
13318    }
13319  final_end_function ();
13320  assemble_end_function (thunk, fnname);
13321}
13322
13323/* Output either an indirect jump or an indirect call
13324   (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13325   using a branch trampoline disabling branch target prediction.  */
13326
13327void
13328s390_indirect_branch_via_thunk (unsigned int regno,
13329				unsigned int return_addr_regno,
13330				rtx comparison_operator,
13331				enum s390_indirect_branch_type type)
13332{
13333  enum s390_indirect_branch_option option;
13334
13335  if (type == s390_indirect_branch_type_return)
13336    {
13337      if (s390_return_addr_from_memory ())
13338	option = s390_opt_function_return_mem;
13339      else
13340	option = s390_opt_function_return_reg;
13341    }
13342  else if (type == s390_indirect_branch_type_jump)
13343    option = s390_opt_indirect_branch_jump;
13344  else if (type == s390_indirect_branch_type_call)
13345    option = s390_opt_indirect_branch_call;
13346  else
13347    gcc_unreachable ();
13348
13349  if (TARGET_INDIRECT_BRANCH_TABLE)
13350    {
13351      char label[32];
13352
13353      ASM_GENERATE_INTERNAL_LABEL (label,
13354				   indirect_branch_table_label[option],
13355				   indirect_branch_table_label_no[option]++);
13356      ASM_OUTPUT_LABEL (asm_out_file, label);
13357    }
13358
13359  if (return_addr_regno != INVALID_REGNUM)
13360    {
13361      gcc_assert (comparison_operator == NULL_RTX);
13362      fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13363    }
13364  else
13365    {
13366      fputs (" \tjg", asm_out_file);
13367      if (comparison_operator != NULL_RTX)
13368	print_operand (asm_out_file, comparison_operator, 'C');
13369
13370      fputs ("\t", asm_out_file);
13371    }
13372
13373  if (TARGET_CPU_Z10)
13374    fprintf (asm_out_file,
13375	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13376	     regno);
13377  else
13378    fprintf (asm_out_file,
13379	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13380	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13381
13382  if ((option == s390_opt_indirect_branch_jump
13383       && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13384      || (option == s390_opt_indirect_branch_call
13385	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13386      || (option == s390_opt_function_return_reg
13387	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13388      || (option == s390_opt_function_return_mem
13389	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13390    {
13391      if (TARGET_CPU_Z10)
13392	indirect_branch_z10thunk_mask |= (1 << regno);
13393      else
13394	indirect_branch_prez10thunk_mask |= (1 << regno);
13395    }
13396}
13397
13398/* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13399   either be an address register or a label pointing to the location
13400   of the jump instruction.  */
13401
13402void
13403s390_indirect_branch_via_inline_thunk (rtx execute_target)
13404{
13405  if (TARGET_INDIRECT_BRANCH_TABLE)
13406    {
13407      char label[32];
13408
13409      ASM_GENERATE_INTERNAL_LABEL (label,
13410				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13411				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13412      ASM_OUTPUT_LABEL (asm_out_file, label);
13413    }
13414
13415  if (!TARGET_ZARCH)
13416    fputs ("\t.machinemode zarch\n", asm_out_file);
13417
13418  if (REG_P (execute_target))
13419    fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13420  else
13421    output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13422
13423  if (!TARGET_ZARCH)
13424    fputs ("\t.machinemode esa\n", asm_out_file);
13425
13426  fputs ("0:\tj\t0b\n", asm_out_file);
13427}
13428
13429static bool
13430s390_valid_pointer_mode (scalar_int_mode mode)
13431{
13432  return (mode == SImode || (TARGET_64BIT && mode == DImode));
13433}
13434
13435/* Checks whether the given CALL_EXPR would use a caller
13436   saved register.  This is used to decide whether sibling call
13437   optimization could be performed on the respective function
13438   call.  */
13439
13440static bool
13441s390_call_saved_register_used (tree call_expr)
13442{
13443  CUMULATIVE_ARGS cum_v;
13444  cumulative_args_t cum;
13445  tree parameter;
13446  rtx parm_rtx;
13447  int reg, i;
13448
13449  INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13450  cum = pack_cumulative_args (&cum_v);
13451
13452  for (i = 0; i < call_expr_nargs (call_expr); i++)
13453    {
13454      parameter = CALL_EXPR_ARG (call_expr, i);
13455      gcc_assert (parameter);
13456
13457      /* For an undeclared variable passed as parameter we will get
13458	 an ERROR_MARK node here.  */
13459      if (TREE_CODE (parameter) == ERROR_MARK)
13460	return true;
13461
13462      /* We assume that in the target function all parameters are
13463	 named.  This only has an impact on vector argument register
13464	 usage none of which is call-saved.  */
13465      function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13466      apply_pass_by_reference_rules (&cum_v, arg);
13467
13468      parm_rtx = s390_function_arg (cum, arg);
13469
13470      s390_function_arg_advance (cum, arg);
13471
13472      if (!parm_rtx)
13473	continue;
13474
13475      if (REG_P (parm_rtx))
13476	{
13477	  int size = s390_function_arg_size (arg.mode, arg.type);
13478	  int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
13479
13480	  for (reg = 0; reg < nregs; reg++)
13481	    if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13482	      return true;
13483	}
13484      else if (GET_CODE (parm_rtx) == PARALLEL)
13485	{
13486	  int i;
13487
13488	  for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13489	    {
13490	      rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13491
13492	      gcc_assert (REG_P (r));
13493	      gcc_assert (REG_NREGS (r) == 1);
13494
13495	      if (!call_used_or_fixed_reg_p (REGNO (r)))
13496		return true;
13497	    }
13498	}
13499    }
13500  return false;
13501}
13502
13503/* Return true if the given call expression can be
13504   turned into a sibling call.
13505   DECL holds the declaration of the function to be called whereas
13506   EXP is the call expression itself.  */
13507
13508static bool
13509s390_function_ok_for_sibcall (tree decl, tree exp)
13510{
13511  /* The TPF epilogue uses register 1.  */
13512  if (TARGET_TPF_PROFILING)
13513    return false;
13514
13515  /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13516     which would have to be restored before the sibcall.  */
13517  if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13518    return false;
13519
13520  /* The thunks for indirect branches require r1 if no exrl is
13521     available.  r1 might not be available when doing a sibling
13522     call.  */
13523  if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13524      && !TARGET_CPU_Z10
13525      && !decl)
13526    return false;
13527
13528  /* Register 6 on s390 is available as an argument register but unfortunately
13529     "caller saved". This makes functions needing this register for arguments
13530     not suitable for sibcalls.  */
13531  return !s390_call_saved_register_used (exp);
13532}
13533
13534/* Return the fixed registers used for condition codes.  */
13535
13536static bool
13537s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13538{
13539  *p1 = CC_REGNUM;
13540  *p2 = INVALID_REGNUM;
13541
13542  return true;
13543}
13544
13545/* This function is used by the call expanders of the machine description.
13546   It emits the call insn itself together with the necessary operations
13547   to adjust the target address and returns the emitted insn.
13548   ADDR_LOCATION is the target address rtx
13549   TLS_CALL the location of the thread-local symbol
13550   RESULT_REG the register where the result of the call should be stored
13551   RETADDR_REG the register where the return address should be stored
13552	       If this parameter is NULL_RTX the call is considered
13553	       to be a sibling call.  */
13554
13555rtx_insn *
13556s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13557		rtx retaddr_reg)
13558{
13559  bool plt_call = false;
13560  rtx_insn *insn;
13561  rtx vec[4] = { NULL_RTX };
13562  int elts = 0;
13563  rtx *call = &vec[0];
13564  rtx *clobber_ret_reg = &vec[1];
13565  rtx *use = &vec[2];
13566  rtx *clobber_thunk_reg = &vec[3];
13567  int i;
13568
13569  /* Direct function calls need special treatment.  */
13570  if (GET_CODE (addr_location) == SYMBOL_REF)
13571    {
13572      /* When calling a global routine in PIC mode, we must
13573	 replace the symbol itself with the PLT stub.  */
13574      if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13575	{
13576	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13577	    {
13578	      addr_location = gen_rtx_UNSPEC (Pmode,
13579					      gen_rtvec (1, addr_location),
13580					      UNSPEC_PLT);
13581	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13582	      plt_call = true;
13583	    }
13584	  else
13585	    /* For -fpic code the PLT entries might use r12 which is
13586	       call-saved.  Therefore we cannot do a sibcall when
13587	       calling directly using a symbol ref.  When reaching
13588	       this point we decided (in s390_function_ok_for_sibcall)
13589	       to do a sibcall for a function pointer but one of the
13590	       optimizers was able to get rid of the function pointer
13591	       by propagating the symbol ref into the call.  This
13592	       optimization is illegal for S/390 so we turn the direct
13593	       call into a indirect call again.  */
13594	    addr_location = force_reg (Pmode, addr_location);
13595	}
13596    }
13597
13598  /* If it is already an indirect call or the code above moved the
13599     SYMBOL_REF to somewhere else make sure the address can be found in
13600     register 1.  */
13601  if (retaddr_reg == NULL_RTX
13602      && GET_CODE (addr_location) != SYMBOL_REF
13603      && !plt_call)
13604    {
13605      emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13606      addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13607    }
13608
13609  if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13610      && GET_CODE (addr_location) != SYMBOL_REF
13611      && !plt_call)
13612    {
13613      /* Indirect branch thunks require the target to be a single GPR.  */
13614      addr_location = force_reg (Pmode, addr_location);
13615
13616      /* Without exrl the indirect branch thunks need an additional
13617	 register for larl;ex */
13618      if (!TARGET_CPU_Z10)
13619	{
13620	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13621	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13622	}
13623    }
13624
13625  addr_location = gen_rtx_MEM (QImode, addr_location);
13626  *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13627
13628  if (result_reg != NULL_RTX)
13629    *call = gen_rtx_SET (result_reg, *call);
13630
13631  if (retaddr_reg != NULL_RTX)
13632    {
13633      *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13634
13635      if (tls_call != NULL_RTX)
13636	*use = gen_rtx_USE (VOIDmode, tls_call);
13637    }
13638
13639
13640  for (i = 0; i < 4; i++)
13641    if (vec[i] != NULL_RTX)
13642      elts++;
13643
13644  if (elts > 1)
13645    {
13646      rtvec v;
13647      int e = 0;
13648
13649      v = rtvec_alloc (elts);
13650      for (i = 0; i < 4; i++)
13651	if (vec[i] != NULL_RTX)
13652	  {
13653	    RTVEC_ELT (v, e) = vec[i];
13654	    e++;
13655	  }
13656
13657      *call = gen_rtx_PARALLEL (VOIDmode, v);
13658    }
13659
13660  insn = emit_call_insn (*call);
13661
13662  /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13663  if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13664    {
13665      /* s390_function_ok_for_sibcall should
13666	 have denied sibcalls in this case.  */
13667      gcc_assert (retaddr_reg != NULL_RTX);
13668      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13669    }
13670  return insn;
13671}
13672
13673/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13674
13675static void
13676s390_conditional_register_usage (void)
13677{
13678  int i;
13679
13680  if (flag_pic)
13681    fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13682  fixed_regs[BASE_REGNUM] = 0;
13683  fixed_regs[RETURN_REGNUM] = 0;
13684  if (TARGET_64BIT)
13685    {
13686      for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13687	call_used_regs[i] = 0;
13688    }
13689  else
13690    {
13691      call_used_regs[FPR4_REGNUM] = 0;
13692      call_used_regs[FPR6_REGNUM] = 0;
13693    }
13694
13695  if (TARGET_SOFT_FLOAT)
13696    {
13697      for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13698	fixed_regs[i] = 1;
13699    }
13700
13701  /* Disable v16 - v31 for non-vector target.  */
13702  if (!TARGET_VX)
13703    {
13704      for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13705	fixed_regs[i] = call_used_regs[i] = 1;
13706    }
13707}
13708
13709/* Corresponding function to eh_return expander.  */
13710
13711static GTY(()) rtx s390_tpf_eh_return_symbol;
13712void
13713s390_emit_tpf_eh_return (rtx target)
13714{
13715  rtx_insn *insn;
13716  rtx reg, orig_ra;
13717
13718  if (!s390_tpf_eh_return_symbol)
13719    s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13720
13721  reg = gen_rtx_REG (Pmode, 2);
13722  orig_ra = gen_rtx_REG (Pmode, 3);
13723
13724  emit_move_insn (reg, target);
13725  emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13726  insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13727				     gen_rtx_REG (Pmode, RETURN_REGNUM));
13728  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13729  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13730
13731  emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13732}
13733
13734/* Rework the prologue/epilogue to avoid saving/restoring
13735   registers unnecessarily.  */
13736
13737static void
13738s390_optimize_prologue (void)
13739{
13740  rtx_insn *insn, *new_insn, *next_insn;
13741
13742  /* Do a final recompute of the frame-related data.  */
13743  s390_optimize_register_info ();
13744
13745  /* If all special registers are in fact used, there's nothing we
13746     can do, so no point in walking the insn list.  */
13747
13748  if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13749      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13750    return;
13751
13752  /* Search for prologue/epilogue insns and replace them.  */
13753  for (insn = get_insns (); insn; insn = next_insn)
13754    {
13755      int first, last, off;
13756      rtx set, base, offset;
13757      rtx pat;
13758
13759      next_insn = NEXT_INSN (insn);
13760
13761      if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13762	continue;
13763
13764      pat = PATTERN (insn);
13765
13766      /* Remove ldgr/lgdr instructions used for saving and restore
13767	 GPRs if possible.  */
13768      if (TARGET_Z10)
13769	{
13770	  rtx tmp_pat = pat;
13771
13772	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13773	    tmp_pat = XVECEXP (pat, 0, 0);
13774
13775	  if (GET_CODE (tmp_pat) == SET
13776	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
13777	      && REG_P (SET_SRC (tmp_pat))
13778	      && REG_P (SET_DEST (tmp_pat)))
13779	    {
13780	      int src_regno = REGNO (SET_SRC (tmp_pat));
13781	      int dest_regno = REGNO (SET_DEST (tmp_pat));
13782	      int gpr_regno;
13783	      int fpr_regno;
13784
13785	      if (!((GENERAL_REGNO_P (src_regno)
13786		     && FP_REGNO_P (dest_regno))
13787		    || (FP_REGNO_P (src_regno)
13788			&& GENERAL_REGNO_P (dest_regno))))
13789		continue;
13790
13791	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13792	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13793
13794	      /* GPR must be call-saved, FPR must be call-clobbered.  */
13795	      if (!call_used_regs[fpr_regno]
13796		  || call_used_regs[gpr_regno])
13797		continue;
13798
13799	      /* It must not happen that what we once saved in an FPR now
13800		 needs a stack slot.  */
13801	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13802
13803	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13804		{
13805		  remove_insn (insn);
13806		  continue;
13807		}
13808	    }
13809	}
13810
13811      if (GET_CODE (pat) == PARALLEL
13812	  && store_multiple_operation (pat, VOIDmode))
13813	{
13814	  set = XVECEXP (pat, 0, 0);
13815	  first = REGNO (SET_SRC (set));
13816	  last = first + XVECLEN (pat, 0) - 1;
13817	  offset = const0_rtx;
13818	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13819	  off = INTVAL (offset);
13820
13821	  if (GET_CODE (base) != REG || off < 0)
13822	    continue;
13823	  if (cfun_frame_layout.first_save_gpr != -1
13824	      && (cfun_frame_layout.first_save_gpr < first
13825		  || cfun_frame_layout.last_save_gpr > last))
13826	    continue;
13827	  if (REGNO (base) != STACK_POINTER_REGNUM
13828	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13829	    continue;
13830	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13831	    continue;
13832
13833	  if (cfun_frame_layout.first_save_gpr != -1)
13834	    {
13835	      rtx s_pat = save_gprs (base,
13836				     off + (cfun_frame_layout.first_save_gpr
13837					    - first) * UNITS_PER_LONG,
13838				     cfun_frame_layout.first_save_gpr,
13839				     cfun_frame_layout.last_save_gpr);
13840	      new_insn = emit_insn_before (s_pat, insn);
13841	      INSN_ADDRESSES_NEW (new_insn, -1);
13842	    }
13843
13844	  remove_insn (insn);
13845	  continue;
13846	}
13847
13848      if (cfun_frame_layout.first_save_gpr == -1
13849	  && GET_CODE (pat) == SET
13850	  && GENERAL_REG_P (SET_SRC (pat))
13851	  && GET_CODE (SET_DEST (pat)) == MEM)
13852	{
13853	  set = pat;
13854	  first = REGNO (SET_SRC (set));
13855	  offset = const0_rtx;
13856	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13857	  off = INTVAL (offset);
13858
13859	  if (GET_CODE (base) != REG || off < 0)
13860	    continue;
13861	  if (REGNO (base) != STACK_POINTER_REGNUM
13862	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13863	    continue;
13864
13865	  remove_insn (insn);
13866	  continue;
13867	}
13868
13869      if (GET_CODE (pat) == PARALLEL
13870	  && load_multiple_operation (pat, VOIDmode))
13871	{
13872	  set = XVECEXP (pat, 0, 0);
13873	  first = REGNO (SET_DEST (set));
13874	  last = first + XVECLEN (pat, 0) - 1;
13875	  offset = const0_rtx;
13876	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13877	  off = INTVAL (offset);
13878
13879	  if (GET_CODE (base) != REG || off < 0)
13880	    continue;
13881
13882	  if (cfun_frame_layout.first_restore_gpr != -1
13883	      && (cfun_frame_layout.first_restore_gpr < first
13884		  || cfun_frame_layout.last_restore_gpr > last))
13885	    continue;
13886	  if (REGNO (base) != STACK_POINTER_REGNUM
13887	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13888	    continue;
13889	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13890	    continue;
13891
13892	  if (cfun_frame_layout.first_restore_gpr != -1)
13893	    {
13894	      rtx rpat = restore_gprs (base,
13895				       off + (cfun_frame_layout.first_restore_gpr
13896					      - first) * UNITS_PER_LONG,
13897				       cfun_frame_layout.first_restore_gpr,
13898				       cfun_frame_layout.last_restore_gpr);
13899
13900	      /* Remove REG_CFA_RESTOREs for registers that we no
13901		 longer need to save.  */
13902	      REG_NOTES (rpat) = REG_NOTES (insn);
13903	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13904		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13905		    && ((int) REGNO (XEXP (*ptr, 0))
13906			< cfun_frame_layout.first_restore_gpr))
13907		  *ptr = XEXP (*ptr, 1);
13908		else
13909		  ptr = &XEXP (*ptr, 1);
13910	      new_insn = emit_insn_before (rpat, insn);
13911	      RTX_FRAME_RELATED_P (new_insn) = 1;
13912	      INSN_ADDRESSES_NEW (new_insn, -1);
13913	    }
13914
13915	  remove_insn (insn);
13916	  continue;
13917	}
13918
13919      if (cfun_frame_layout.first_restore_gpr == -1
13920	  && GET_CODE (pat) == SET
13921	  && GENERAL_REG_P (SET_DEST (pat))
13922	  && GET_CODE (SET_SRC (pat)) == MEM)
13923	{
13924	  set = pat;
13925	  first = REGNO (SET_DEST (set));
13926	  offset = const0_rtx;
13927	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13928	  off = INTVAL (offset);
13929
13930	  if (GET_CODE (base) != REG || off < 0)
13931	    continue;
13932
13933	  if (REGNO (base) != STACK_POINTER_REGNUM
13934	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13935	    continue;
13936
13937	  remove_insn (insn);
13938	  continue;
13939	}
13940    }
13941}
13942
13943/* On z10 and later the dynamic branch prediction must see the
13944   backward jump within a certain windows.  If not it falls back to
13945   the static prediction.  This function rearranges the loop backward
13946   branch in a way which makes the static prediction always correct.
13947   The function returns true if it added an instruction.  */
13948static bool
13949s390_fix_long_loop_prediction (rtx_insn *insn)
13950{
13951  rtx set = single_set (insn);
13952  rtx code_label, label_ref;
13953  rtx_insn *uncond_jump;
13954  rtx_insn *cur_insn;
13955  rtx tmp;
13956  int distance;
13957
13958  /* This will exclude branch on count and branch on index patterns
13959     since these are correctly statically predicted.  */
13960  if (!set
13961      || SET_DEST (set) != pc_rtx
13962      || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13963    return false;
13964
13965  /* Skip conditional returns.  */
13966  if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13967      && XEXP (SET_SRC (set), 2) == pc_rtx)
13968    return false;
13969
13970  label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13971	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13972
13973  gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13974
13975  code_label = XEXP (label_ref, 0);
13976
13977  if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13978      || INSN_ADDRESSES (INSN_UID (insn)) == -1
13979      || (INSN_ADDRESSES (INSN_UID (insn))
13980	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13981    return false;
13982
13983  for (distance = 0, cur_insn = PREV_INSN (insn);
13984       distance < PREDICT_DISTANCE - 6;
13985       distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13986    if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13987      return false;
13988
13989  rtx_code_label *new_label = gen_label_rtx ();
13990  uncond_jump = emit_jump_insn_after (
13991		  gen_rtx_SET (pc_rtx,
13992			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
13993		  insn);
13994  emit_label_after (new_label, uncond_jump);
13995
13996  tmp = XEXP (SET_SRC (set), 1);
13997  XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13998  XEXP (SET_SRC (set), 2) = tmp;
13999  INSN_CODE (insn) = -1;
14000
14001  XEXP (label_ref, 0) = new_label;
14002  JUMP_LABEL (insn) = new_label;
14003  JUMP_LABEL (uncond_jump) = code_label;
14004
14005  return true;
14006}
14007
14008/* Returns 1 if INSN reads the value of REG for purposes not related
14009   to addressing of memory, and 0 otherwise.  */
14010static int
14011s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14012{
14013  return reg_referenced_p (reg, PATTERN (insn))
14014    && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14015}
14016
14017/* Starting from INSN find_cond_jump looks downwards in the insn
14018   stream for a single jump insn which is the last user of the
14019   condition code set in INSN.  */
14020static rtx_insn *
14021find_cond_jump (rtx_insn *insn)
14022{
14023  for (; insn; insn = NEXT_INSN (insn))
14024    {
14025      rtx ite, cc;
14026
14027      if (LABEL_P (insn))
14028	break;
14029
14030      if (!JUMP_P (insn))
14031	{
14032	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14033	    break;
14034	  continue;
14035	}
14036
14037      /* This will be triggered by a return.  */
14038      if (GET_CODE (PATTERN (insn)) != SET)
14039	break;
14040
14041      gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14042      ite = SET_SRC (PATTERN (insn));
14043
14044      if (GET_CODE (ite) != IF_THEN_ELSE)
14045	break;
14046
14047      cc = XEXP (XEXP (ite, 0), 0);
14048      if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14049	break;
14050
14051      if (find_reg_note (insn, REG_DEAD, cc))
14052	return insn;
14053      break;
14054    }
14055
14056  return NULL;
14057}
14058
14059/* Swap the condition in COND and the operands in OP0 and OP1 so that
14060   the semantics does not change.  If NULL_RTX is passed as COND the
14061   function tries to find the conditional jump starting with INSN.  */
14062static void
14063s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14064{
14065  rtx tmp = *op0;
14066
14067  if (cond == NULL_RTX)
14068    {
14069      rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14070      rtx set = jump ? single_set (jump) : NULL_RTX;
14071
14072      if (set == NULL_RTX)
14073	return;
14074
14075      cond = XEXP (SET_SRC (set), 0);
14076    }
14077
14078  *op0 = *op1;
14079  *op1 = tmp;
14080  PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14081}
14082
14083/* On z10, instructions of the compare-and-branch family have the
14084   property to access the register occurring as second operand with
14085   its bits complemented.  If such a compare is grouped with a second
14086   instruction that accesses the same register non-complemented, and
14087   if that register's value is delivered via a bypass, then the
14088   pipeline recycles, thereby causing significant performance decline.
14089   This function locates such situations and exchanges the two
14090   operands of the compare.  The function return true whenever it
14091   added an insn.  */
14092static bool
14093s390_z10_optimize_cmp (rtx_insn *insn)
14094{
14095  rtx_insn *prev_insn, *next_insn;
14096  bool insn_added_p = false;
14097  rtx cond, *op0, *op1;
14098
14099  if (GET_CODE (PATTERN (insn)) == PARALLEL)
14100    {
14101      /* Handle compare and branch and branch on count
14102	 instructions.  */
14103      rtx pattern = single_set (insn);
14104
14105      if (!pattern
14106	  || SET_DEST (pattern) != pc_rtx
14107	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14108	return false;
14109
14110      cond = XEXP (SET_SRC (pattern), 0);
14111      op0 = &XEXP (cond, 0);
14112      op1 = &XEXP (cond, 1);
14113    }
14114  else if (GET_CODE (PATTERN (insn)) == SET)
14115    {
14116      rtx src, dest;
14117
14118      /* Handle normal compare instructions.  */
14119      src = SET_SRC (PATTERN (insn));
14120      dest = SET_DEST (PATTERN (insn));
14121
14122      if (!REG_P (dest)
14123	  || !CC_REGNO_P (REGNO (dest))
14124	  || GET_CODE (src) != COMPARE)
14125	return false;
14126
14127      /* s390_swap_cmp will try to find the conditional
14128	 jump when passing NULL_RTX as condition.  */
14129      cond = NULL_RTX;
14130      op0 = &XEXP (src, 0);
14131      op1 = &XEXP (src, 1);
14132    }
14133  else
14134    return false;
14135
14136  if (!REG_P (*op0) || !REG_P (*op1))
14137    return false;
14138
14139  if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14140    return false;
14141
14142  /* Swap the COMPARE arguments and its mask if there is a
14143     conflicting access in the previous insn.  */
14144  prev_insn = prev_active_insn (insn);
14145  if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14146      && reg_referenced_p (*op1, PATTERN (prev_insn)))
14147    s390_swap_cmp (cond, op0, op1, insn);
14148
14149  /* Check if there is a conflict with the next insn. If there
14150     was no conflict with the previous insn, then swap the
14151     COMPARE arguments and its mask.  If we already swapped
14152     the operands, or if swapping them would cause a conflict
14153     with the previous insn, issue a NOP after the COMPARE in
14154     order to separate the two instuctions.  */
14155  next_insn = next_active_insn (insn);
14156  if (next_insn != NULL_RTX && INSN_P (next_insn)
14157      && s390_non_addr_reg_read_p (*op1, next_insn))
14158    {
14159      if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14160	  && s390_non_addr_reg_read_p (*op0, prev_insn))
14161	{
14162	  if (REGNO (*op1) == 0)
14163	    emit_insn_after (gen_nop_lr1 (), insn);
14164	  else
14165	    emit_insn_after (gen_nop_lr0 (), insn);
14166	  insn_added_p = true;
14167	}
14168      else
14169	s390_swap_cmp (cond, op0, op1, insn);
14170    }
14171  return insn_added_p;
14172}
14173
14174/* Number of INSNs to be scanned backward in the last BB of the loop
14175   and forward in the first BB of the loop.  This usually should be a
14176   bit more than the number of INSNs which could go into one
14177   group.  */
14178#define S390_OSC_SCAN_INSN_NUM 5
14179
14180/* Scan LOOP for static OSC collisions and return true if a osc_break
14181   should be issued for this loop.  */
14182static bool
14183s390_adjust_loop_scan_osc (struct loop* loop)
14184
14185{
14186  HARD_REG_SET modregs, newregs;
14187  rtx_insn *insn, *store_insn = NULL;
14188  rtx set;
14189  struct s390_address addr_store, addr_load;
14190  subrtx_iterator::array_type array;
14191  int insn_count;
14192
14193  CLEAR_HARD_REG_SET (modregs);
14194
14195  insn_count = 0;
14196  FOR_BB_INSNS_REVERSE (loop->latch, insn)
14197    {
14198      if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14199	continue;
14200
14201      insn_count++;
14202      if (insn_count > S390_OSC_SCAN_INSN_NUM)
14203	return false;
14204
14205      find_all_hard_reg_sets (insn, &newregs, true);
14206      modregs |= newregs;
14207
14208      set = single_set (insn);
14209      if (!set)
14210	continue;
14211
14212      if (MEM_P (SET_DEST (set))
14213	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14214	{
14215	  store_insn = insn;
14216	  break;
14217	}
14218    }
14219
14220  if (store_insn == NULL_RTX)
14221    return false;
14222
14223  insn_count = 0;
14224  FOR_BB_INSNS (loop->header, insn)
14225    {
14226      if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14227	continue;
14228
14229      if (insn == store_insn)
14230	return false;
14231
14232      insn_count++;
14233      if (insn_count > S390_OSC_SCAN_INSN_NUM)
14234	return false;
14235
14236      find_all_hard_reg_sets (insn, &newregs, true);
14237      modregs |= newregs;
14238
14239      set = single_set (insn);
14240      if (!set)
14241	continue;
14242
14243      /* An intermediate store disrupts static OSC checking
14244	 anyway.  */
14245      if (MEM_P (SET_DEST (set))
14246	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14247	return false;
14248
14249      FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14250	if (MEM_P (*iter)
14251	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14252	    && rtx_equal_p (addr_load.base, addr_store.base)
14253	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14254	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14255	  {
14256	    if ((addr_load.base != NULL_RTX
14257		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14258		|| (addr_load.indx != NULL_RTX
14259		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14260	      return true;
14261	  }
14262    }
14263  return false;
14264}
14265
14266/* Look for adjustments which can be done on simple innermost
14267   loops.  */
14268static void
14269s390_adjust_loops ()
14270{
14271  struct loop *loop = NULL;
14272
14273  df_analyze ();
14274  compute_bb_for_insn ();
14275
14276  /* Find the loops.  */
14277  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14278
14279  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14280    {
14281      if (dump_file)
14282	{
14283	  flow_loop_dump (loop, dump_file, NULL, 0);
14284	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14285	}
14286      if (loop->latch == NULL
14287	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14288	  || !s390_adjust_loop_scan_osc (loop))
14289	{
14290	  if (dump_file)
14291	    {
14292	      if (loop->latch == NULL)
14293		fprintf (dump_file, " muliple backward jumps\n");
14294	      else
14295		{
14296		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14297			   INSN_UID (BB_HEAD (loop->header)),
14298			   INSN_UID (BB_END (loop->latch)));
14299		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14300		    fprintf (dump_file, " loop does not end with jump\n");
14301		  else
14302		    fprintf (dump_file, " not instrumented\n");
14303		}
14304	    }
14305	}
14306      else
14307	{
14308	  rtx_insn *new_insn;
14309
14310	  if (dump_file)
14311	    fprintf (dump_file, " adding OSC break insn: ");
14312	  new_insn = emit_insn_before (gen_osc_break (),
14313				       BB_END (loop->latch));
14314	  INSN_ADDRESSES_NEW (new_insn, -1);
14315	}
14316    }
14317
14318  loop_optimizer_finalize ();
14319
14320  df_finish_pass (false);
14321}
14322
14323/* Perform machine-dependent processing.  */
14324
14325static void
14326s390_reorg (void)
14327{
14328  struct constant_pool *pool;
14329  rtx_insn *insn;
14330  int hw_before, hw_after;
14331
14332  if (s390_tune == PROCESSOR_2964_Z13)
14333    s390_adjust_loops ();
14334
14335  /* Make sure all splits have been performed; splits after
14336     machine_dependent_reorg might confuse insn length counts.  */
14337  split_all_insns_noflow ();
14338
14339  /* Install the main literal pool and the associated base
14340     register load insns.  The literal pool might be > 4096 bytes in
14341     size, so that some of its elements cannot be directly accessed.
14342
14343     To fix this, we split the single literal pool into multiple
14344     pool chunks, reloading the pool base register at various
14345     points throughout the function to ensure it always points to
14346     the pool chunk the following code expects.  */
14347
14348  /* Collect the literal pool.  */
14349  pool = s390_mainpool_start ();
14350  if (pool)
14351    {
14352      /* Finish up literal pool related changes.  */
14353      s390_mainpool_finish (pool);
14354    }
14355  else
14356    {
14357      /* If literal pool overflowed, chunkify it.  */
14358      pool = s390_chunkify_start ();
14359      s390_chunkify_finish (pool);
14360    }
14361
14362  /* Generate out-of-pool execute target insns.  */
14363  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14364    {
14365      rtx label;
14366      rtx_insn *target;
14367
14368      label = s390_execute_label (insn);
14369      if (!label)
14370	continue;
14371
14372      gcc_assert (label != const0_rtx);
14373
14374      target = emit_label (XEXP (label, 0));
14375      INSN_ADDRESSES_NEW (target, -1);
14376
14377      if (JUMP_P (insn))
14378	{
14379	  target = emit_jump_insn (s390_execute_target (insn));
14380	  /* This is important in order to keep a table jump
14381	     pointing at the jump table label.  Only this makes it
14382	     being recognized as table jump.  */
14383	  JUMP_LABEL (target) = JUMP_LABEL (insn);
14384	}
14385      else
14386	target = emit_insn (s390_execute_target (insn));
14387      INSN_ADDRESSES_NEW (target, -1);
14388    }
14389
14390  /* Try to optimize prologue and epilogue further.  */
14391  s390_optimize_prologue ();
14392
14393  /* Walk over the insns and do some >=z10 specific changes.  */
14394  if (s390_tune >= PROCESSOR_2097_Z10)
14395    {
14396      rtx_insn *insn;
14397      bool insn_added_p = false;
14398
14399      /* The insn lengths and addresses have to be up to date for the
14400	 following manipulations.  */
14401      shorten_branches (get_insns ());
14402
14403      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14404	{
14405	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14406	    continue;
14407
14408	  if (JUMP_P (insn))
14409	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14410
14411	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14412	       || GET_CODE (PATTERN (insn)) == SET)
14413	      && s390_tune == PROCESSOR_2097_Z10)
14414	    insn_added_p |= s390_z10_optimize_cmp (insn);
14415	}
14416
14417      /* Adjust branches if we added new instructions.  */
14418      if (insn_added_p)
14419	shorten_branches (get_insns ());
14420    }
14421
14422  s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14423  if (hw_after > 0)
14424    {
14425      rtx_insn *insn;
14426
14427      /* Insert NOPs for hotpatching. */
14428      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14429	/* Emit NOPs
14430	    1. inside the area covered by debug information to allow setting
14431	       breakpoints at the NOPs,
14432	    2. before any insn which results in an asm instruction,
14433	    3. before in-function labels to avoid jumping to the NOPs, for
14434	       example as part of a loop,
14435	    4. before any barrier in case the function is completely empty
14436	       (__builtin_unreachable ()) and has neither internal labels nor
14437	       active insns.
14438	*/
14439	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14440	  break;
14441      /* Output a series of NOPs before the first active insn.  */
14442      while (insn && hw_after > 0)
14443	{
14444	  if (hw_after >= 3)
14445	    {
14446	      emit_insn_before (gen_nop_6_byte (), insn);
14447	      hw_after -= 3;
14448	    }
14449	  else if (hw_after >= 2)
14450	    {
14451	      emit_insn_before (gen_nop_4_byte (), insn);
14452	      hw_after -= 2;
14453	    }
14454	  else
14455	    {
14456	      emit_insn_before (gen_nop_2_byte (), insn);
14457	      hw_after -= 1;
14458	    }
14459	}
14460    }
14461}
14462
14463/* Return true if INSN is a fp load insn writing register REGNO.  */
14464static inline bool
14465s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14466{
14467  rtx set;
14468  enum attr_type flag = s390_safe_attr_type (insn);
14469
14470  if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14471    return false;
14472
14473  set = single_set (insn);
14474
14475  if (set == NULL_RTX)
14476    return false;
14477
14478  if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14479    return false;
14480
14481  if (REGNO (SET_DEST (set)) != regno)
14482    return false;
14483
14484  return true;
14485}
14486
14487/* This value describes the distance to be avoided between an
14488   arithmetic fp instruction and an fp load writing the same register.
14489   Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14490   fine but the exact value has to be avoided. Otherwise the FP
14491   pipeline will throw an exception causing a major penalty.  */
14492#define Z10_EARLYLOAD_DISTANCE 7
14493
14494/* Rearrange the ready list in order to avoid the situation described
14495   for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14496   moved to the very end of the ready list.  */
14497static void
14498s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14499{
14500  unsigned int regno;
14501  int nready = *nready_p;
14502  rtx_insn *tmp;
14503  int i;
14504  rtx_insn *insn;
14505  rtx set;
14506  enum attr_type flag;
14507  int distance;
14508
14509  /* Skip DISTANCE - 1 active insns.  */
14510  for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14511       distance > 0 && insn != NULL_RTX;
14512       distance--, insn = prev_active_insn (insn))
14513    if (CALL_P (insn) || JUMP_P (insn))
14514      return;
14515
14516  if (insn == NULL_RTX)
14517    return;
14518
14519  set = single_set (insn);
14520
14521  if (set == NULL_RTX || !REG_P (SET_DEST (set))
14522      || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14523    return;
14524
14525  flag = s390_safe_attr_type (insn);
14526
14527  if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14528    return;
14529
14530  regno = REGNO (SET_DEST (set));
14531  i = nready - 1;
14532
14533  while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14534    i--;
14535
14536  if (!i)
14537    return;
14538
14539  tmp = ready[i];
14540  memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14541  ready[0] = tmp;
14542}
14543
14544/* Returns TRUE if BB is entered via a fallthru edge and all other
14545   incoming edges are less than likely.  */
14546static bool
14547s390_bb_fallthru_entry_likely (basic_block bb)
14548{
14549  edge e, fallthru_edge;
14550  edge_iterator ei;
14551
14552  if (!bb)
14553    return false;
14554
14555  fallthru_edge = find_fallthru_edge (bb->preds);
14556  if (!fallthru_edge)
14557    return false;
14558
14559  FOR_EACH_EDGE (e, ei, bb->preds)
14560    if (e != fallthru_edge
14561	&& e->probability >= profile_probability::likely ())
14562      return false;
14563
14564  return true;
14565}
14566
14567struct s390_sched_state
14568{
14569  /* Number of insns in the group.  */
14570  int group_state;
14571  /* Execution side of the group.  */
14572  int side;
14573  /* Group can only hold two insns.  */
14574  bool group_of_two;
14575} s390_sched_state;
14576
14577static struct s390_sched_state sched_state = {0, 1, false};
14578
14579#define S390_SCHED_ATTR_MASK_CRACKED    0x1
14580#define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14581#define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14582#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14583#define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14584
14585static unsigned int
14586s390_get_sched_attrmask (rtx_insn *insn)
14587{
14588  unsigned int mask = 0;
14589
14590  switch (s390_tune)
14591    {
14592    case PROCESSOR_2827_ZEC12:
14593      if (get_attr_zEC12_cracked (insn))
14594	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14595      if (get_attr_zEC12_expanded (insn))
14596	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14597      if (get_attr_zEC12_endgroup (insn))
14598	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14599      if (get_attr_zEC12_groupalone (insn))
14600	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14601      break;
14602    case PROCESSOR_2964_Z13:
14603      if (get_attr_z13_cracked (insn))
14604	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14605      if (get_attr_z13_expanded (insn))
14606	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14607      if (get_attr_z13_endgroup (insn))
14608	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14609      if (get_attr_z13_groupalone (insn))
14610	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14611      if (get_attr_z13_groupoftwo (insn))
14612	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14613      break;
14614    case PROCESSOR_3906_Z14:
14615      if (get_attr_z14_cracked (insn))
14616	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14617      if (get_attr_z14_expanded (insn))
14618	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14619      if (get_attr_z14_endgroup (insn))
14620	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14621      if (get_attr_z14_groupalone (insn))
14622	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14623      if (get_attr_z14_groupoftwo (insn))
14624	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14625      break;
14626    case PROCESSOR_8561_Z15:
14627      if (get_attr_z15_cracked (insn))
14628	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14629      if (get_attr_z15_expanded (insn))
14630	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14631      if (get_attr_z15_endgroup (insn))
14632	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14633      if (get_attr_z15_groupalone (insn))
14634	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14635      if (get_attr_z15_groupoftwo (insn))
14636	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14637      break;
14638    default:
14639      gcc_unreachable ();
14640    }
14641  return mask;
14642}
14643
14644static unsigned int
14645s390_get_unit_mask (rtx_insn *insn, int *units)
14646{
14647  unsigned int mask = 0;
14648
14649  switch (s390_tune)
14650    {
14651    case PROCESSOR_2964_Z13:
14652      *units = 4;
14653      if (get_attr_z13_unit_lsu (insn))
14654	mask |= 1 << 0;
14655      if (get_attr_z13_unit_fxa (insn))
14656	mask |= 1 << 1;
14657      if (get_attr_z13_unit_fxb (insn))
14658	mask |= 1 << 2;
14659      if (get_attr_z13_unit_vfu (insn))
14660	mask |= 1 << 3;
14661      break;
14662    case PROCESSOR_3906_Z14:
14663      *units = 4;
14664      if (get_attr_z14_unit_lsu (insn))
14665	mask |= 1 << 0;
14666      if (get_attr_z14_unit_fxa (insn))
14667	mask |= 1 << 1;
14668      if (get_attr_z14_unit_fxb (insn))
14669	mask |= 1 << 2;
14670      if (get_attr_z14_unit_vfu (insn))
14671	mask |= 1 << 3;
14672      break;
14673    case PROCESSOR_8561_Z15:
14674      *units = 4;
14675      if (get_attr_z15_unit_lsu (insn))
14676	mask |= 1 << 0;
14677      if (get_attr_z15_unit_fxa (insn))
14678	mask |= 1 << 1;
14679      if (get_attr_z15_unit_fxb (insn))
14680	mask |= 1 << 2;
14681      if (get_attr_z15_unit_vfu (insn))
14682	mask |= 1 << 3;
14683      break;
14684    default:
14685      gcc_unreachable ();
14686    }
14687  return mask;
14688}
14689
14690static bool
14691s390_is_fpd (rtx_insn *insn)
14692{
14693  if (insn == NULL_RTX)
14694    return false;
14695
14696  return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14697    || get_attr_z15_unit_fpd (insn);
14698}
14699
14700static bool
14701s390_is_fxd (rtx_insn *insn)
14702{
14703  if (insn == NULL_RTX)
14704    return false;
14705
14706  return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14707    || get_attr_z15_unit_fxd (insn);
14708}
14709
14710/* Returns TRUE if INSN is a long-running instruction.  */
14711static bool
14712s390_is_longrunning (rtx_insn *insn)
14713{
14714  if (insn == NULL_RTX)
14715    return false;
14716
14717  return s390_is_fxd (insn) || s390_is_fpd (insn);
14718}
14719
14720
14721/* Return the scheduling score for INSN.  The higher the score the
14722   better.  The score is calculated from the OOO scheduling attributes
14723   of INSN and the scheduling state sched_state.  */
14724static int
14725s390_sched_score (rtx_insn *insn)
14726{
14727  unsigned int mask = s390_get_sched_attrmask (insn);
14728  int score = 0;
14729
14730  switch (sched_state.group_state)
14731    {
14732    case 0:
14733      /* Try to put insns into the first slot which would otherwise
14734	 break a group.  */
14735      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14736	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14737	score += 5;
14738      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14739	score += 10;
14740      break;
14741    case 1:
14742      /* Prefer not cracked insns while trying to put together a
14743	 group.  */
14744      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14745	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14746	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14747	score += 10;
14748      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14749	score += 5;
14750      /* If we are in a group of two already, try to schedule another
14751	 group-of-two insn to avoid shortening another group.  */
14752      if (sched_state.group_of_two
14753	  && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14754	score += 15;
14755      break;
14756    case 2:
14757      /* Prefer not cracked insns while trying to put together a
14758	 group.  */
14759      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14760	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14761	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14762	score += 10;
14763      /* Prefer endgroup insns in the last slot.  */
14764      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14765	score += 10;
14766      /* Try to avoid group-of-two insns in the last slot as they will
14767	 shorten this group as well as the next one.  */
14768      if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14769	score = MAX (0, score - 15);
14770      break;
14771    }
14772
14773  if (s390_tune >= PROCESSOR_2964_Z13)
14774    {
14775      int units, i;
14776      unsigned unit_mask, m = 1;
14777
14778      unit_mask = s390_get_unit_mask (insn, &units);
14779      gcc_assert (units <= MAX_SCHED_UNITS);
14780
14781      /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14782	 ago the last insn of this unit type got scheduled.  This is
14783	 supposed to help providing a proper instruction mix to the
14784	 CPU.  */
14785      for (i = 0; i < units; i++, m <<= 1)
14786	if (m & unit_mask)
14787	  score += (last_scheduled_unit_distance[i][sched_state.side]
14788	      * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14789
14790      int other_side = 1 - sched_state.side;
14791
14792      /* Try to delay long-running insns when side is busy.  */
14793      if (s390_is_longrunning (insn))
14794	{
14795	  if (s390_is_fxd (insn))
14796	    {
14797	      if (fxd_longrunning[sched_state.side]
14798		  && fxd_longrunning[other_side]
14799		  <= fxd_longrunning[sched_state.side])
14800		score = MAX (0, score - 10);
14801
14802	      else if (fxd_longrunning[other_side]
14803		  >= fxd_longrunning[sched_state.side])
14804		score += 10;
14805	    }
14806
14807	  if (s390_is_fpd (insn))
14808	    {
14809	      if (fpd_longrunning[sched_state.side]
14810		  && fpd_longrunning[other_side]
14811		  <= fpd_longrunning[sched_state.side])
14812		score = MAX (0, score - 10);
14813
14814	      else if (fpd_longrunning[other_side]
14815		  >= fpd_longrunning[sched_state.side])
14816		score += 10;
14817	    }
14818	}
14819    }
14820
14821  return score;
14822}
14823
14824/* This function is called via hook TARGET_SCHED_REORDER before
14825   issuing one insn from list READY which contains *NREADYP entries.
14826   For target z10 it reorders load instructions to avoid early load
14827   conflicts in the floating point pipeline  */
14828static int
14829s390_sched_reorder (FILE *file, int verbose,
14830		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14831{
14832  if (s390_tune == PROCESSOR_2097_Z10
14833      && reload_completed
14834      && *nreadyp > 1)
14835    s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14836
14837  if (s390_tune >= PROCESSOR_2827_ZEC12
14838      && reload_completed
14839      && *nreadyp > 1)
14840    {
14841      int i;
14842      int last_index = *nreadyp - 1;
14843      int max_index = -1;
14844      int max_score = -1;
14845      rtx_insn *tmp;
14846
14847      /* Just move the insn with the highest score to the top (the
14848	 end) of the list.  A full sort is not needed since a conflict
14849	 in the hazard recognition cannot happen.  So the top insn in
14850	 the ready list will always be taken.  */
14851      for (i = last_index; i >= 0; i--)
14852	{
14853	  int score;
14854
14855	  if (recog_memoized (ready[i]) < 0)
14856	    continue;
14857
14858	  score = s390_sched_score (ready[i]);
14859	  if (score > max_score)
14860	    {
14861	      max_score = score;
14862	      max_index = i;
14863	    }
14864	}
14865
14866      if (max_index != -1)
14867	{
14868	  if (max_index != last_index)
14869	    {
14870	      tmp = ready[max_index];
14871	      ready[max_index] = ready[last_index];
14872	      ready[last_index] = tmp;
14873
14874	      if (verbose > 5)
14875		fprintf (file,
14876			 ";;\t\tBACKEND: move insn %d to the top of list\n",
14877			 INSN_UID (ready[last_index]));
14878	    }
14879	  else if (verbose > 5)
14880	    fprintf (file,
14881		     ";;\t\tBACKEND: best insn %d already on top\n",
14882		     INSN_UID (ready[last_index]));
14883	}
14884
14885      if (verbose > 5)
14886	{
14887	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
14888		   sched_state.group_state);
14889
14890	  for (i = last_index; i >= 0; i--)
14891	    {
14892	      unsigned int sched_mask;
14893	      rtx_insn *insn = ready[i];
14894
14895	      if (recog_memoized (insn) < 0)
14896		continue;
14897
14898	      sched_mask = s390_get_sched_attrmask (insn);
14899	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14900		       INSN_UID (insn),
14901		       s390_sched_score (insn));
14902#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14903					   ((M) & sched_mask) ? #ATTR : "");
14904	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14905	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14906	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14907	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14908#undef PRINT_SCHED_ATTR
14909	      if (s390_tune >= PROCESSOR_2964_Z13)
14910		{
14911		  unsigned int unit_mask, m = 1;
14912		  int units, j;
14913
14914		  unit_mask  = s390_get_unit_mask (insn, &units);
14915		  fprintf (file, "(units:");
14916		  for (j = 0; j < units; j++, m <<= 1)
14917		    if (m & unit_mask)
14918		      fprintf (file, " u%d", j);
14919		  fprintf (file, ")");
14920		}
14921	      fprintf (file, "\n");
14922	    }
14923	}
14924    }
14925
14926  return s390_issue_rate ();
14927}
14928
14929
14930/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14931   the scheduler has issued INSN.  It stores the last issued insn into
14932   last_scheduled_insn in order to make it available for
14933   s390_sched_reorder.  */
14934static int
14935s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14936{
14937  last_scheduled_insn = insn;
14938
14939  bool ends_group = false;
14940
14941  if (s390_tune >= PROCESSOR_2827_ZEC12
14942      && reload_completed
14943      && recog_memoized (insn) >= 0)
14944    {
14945      unsigned int mask = s390_get_sched_attrmask (insn);
14946
14947      if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14948	sched_state.group_of_two = true;
14949
14950      /* If this is a group-of-two insn, we actually ended the last group
14951	 and this insn is the first one of the new group.  */
14952      if (sched_state.group_state == 2 && sched_state.group_of_two)
14953	{
14954	  sched_state.side = sched_state.side ? 0 : 1;
14955	  sched_state.group_state = 0;
14956	}
14957
14958      /* Longrunning and side bookkeeping.  */
14959      for (int i = 0; i < 2; i++)
14960	{
14961	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14962	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14963	}
14964
14965      unsigned latency = insn_default_latency (insn);
14966      if (s390_is_longrunning (insn))
14967	{
14968	  if (s390_is_fxd (insn))
14969	    fxd_longrunning[sched_state.side] = latency;
14970	  else
14971	    fpd_longrunning[sched_state.side] = latency;
14972	}
14973
14974      if (s390_tune >= PROCESSOR_2964_Z13)
14975	{
14976	  int units, i;
14977	  unsigned unit_mask, m = 1;
14978
14979	  unit_mask = s390_get_unit_mask (insn, &units);
14980	  gcc_assert (units <= MAX_SCHED_UNITS);
14981
14982	  for (i = 0; i < units; i++, m <<= 1)
14983	    if (m & unit_mask)
14984	      last_scheduled_unit_distance[i][sched_state.side] = 0;
14985	    else if (last_scheduled_unit_distance[i][sched_state.side]
14986		< MAX_SCHED_MIX_DISTANCE)
14987	      last_scheduled_unit_distance[i][sched_state.side]++;
14988	}
14989
14990      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14991	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14992	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14993	  || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14994	{
14995	  sched_state.group_state = 0;
14996	  ends_group = true;
14997	}
14998      else
14999	{
15000	  switch (sched_state.group_state)
15001	    {
15002	    case 0:
15003	      sched_state.group_state++;
15004	      break;
15005	    case 1:
15006	      sched_state.group_state++;
15007	      if (sched_state.group_of_two)
15008		{
15009		  sched_state.group_state = 0;
15010		  ends_group = true;
15011		}
15012	      break;
15013	    case 2:
15014	      sched_state.group_state++;
15015	      ends_group = true;
15016	      break;
15017	    }
15018	}
15019
15020      if (verbose > 5)
15021	{
15022	  unsigned int sched_mask;
15023
15024	  sched_mask = s390_get_sched_attrmask (insn);
15025
15026	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15027#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15028	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15029	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15030	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15031	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15032#undef PRINT_SCHED_ATTR
15033
15034	  if (s390_tune >= PROCESSOR_2964_Z13)
15035	    {
15036	      unsigned int unit_mask, m = 1;
15037	      int units, j;
15038
15039	      unit_mask  = s390_get_unit_mask (insn, &units);
15040	      fprintf (file, "(units:");
15041	      for (j = 0; j < units; j++, m <<= 1)
15042		if (m & unit_mask)
15043		  fprintf (file, " %d", j);
15044	      fprintf (file, ")");
15045	    }
15046	  fprintf (file, " sched state: %d\n", sched_state.group_state);
15047
15048	  if (s390_tune >= PROCESSOR_2964_Z13)
15049	    {
15050	      int units, j;
15051
15052	      s390_get_unit_mask (insn, &units);
15053
15054	      fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15055	      for (j = 0; j < units; j++)
15056		fprintf (file, "%d:%d ", j,
15057		    last_scheduled_unit_distance[j][sched_state.side]);
15058	      fprintf (file, "\n");
15059	    }
15060	}
15061
15062      /* If this insn ended a group, the next will be on the other side.  */
15063      if (ends_group)
15064	{
15065	  sched_state.group_state = 0;
15066	  sched_state.side = sched_state.side ? 0 : 1;
15067	  sched_state.group_of_two = false;
15068	}
15069    }
15070
15071  if (GET_CODE (PATTERN (insn)) != USE
15072      && GET_CODE (PATTERN (insn)) != CLOBBER)
15073    return more - 1;
15074  else
15075    return more;
15076}
15077
15078static void
15079s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15080		 int verbose ATTRIBUTE_UNUSED,
15081		 int max_ready ATTRIBUTE_UNUSED)
15082{
15083  /* If the next basic block is most likely entered via a fallthru edge
15084     we keep the last sched state.  Otherwise we start a new group.
15085     The scheduler traverses basic blocks in "instruction stream" ordering
15086     so if we see a fallthru edge here, sched_state will be of its
15087     source block.
15088
15089     current_sched_info->prev_head is the insn before the first insn of the
15090     block of insns to be scheduled.
15091     */
15092  rtx_insn *insn = current_sched_info->prev_head
15093    ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15094  basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15095  if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15096    {
15097      last_scheduled_insn = NULL;
15098      memset (last_scheduled_unit_distance, 0,
15099	  MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15100      sched_state.group_state = 0;
15101      sched_state.group_of_two = false;
15102    }
15103}
15104
15105/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15106   a new number struct loop *loop should be unrolled if tuned for cpus with
15107   a built-in stride prefetcher.
15108   The loop is analyzed for memory accesses by calling check_dpu for
15109   each rtx of the loop. Depending on the loop_depth and the amount of
15110   memory accesses a new number <=nunroll is returned to improve the
15111   behavior of the hardware prefetch unit.  */
15112static unsigned
15113s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15114{
15115  basic_block *bbs;
15116  rtx_insn *insn;
15117  unsigned i;
15118  unsigned mem_count = 0;
15119
15120  if (s390_tune < PROCESSOR_2097_Z10)
15121    return nunroll;
15122
15123  /* Count the number of memory references within the loop body.  */
15124  bbs = get_loop_body (loop);
15125  subrtx_iterator::array_type array;
15126  for (i = 0; i < loop->num_nodes; i++)
15127    FOR_BB_INSNS (bbs[i], insn)
15128      if (INSN_P (insn) && INSN_CODE (insn) != -1)
15129	{
15130	  rtx set;
15131
15132	  /* The runtime of small loops with memory block operations
15133	     will be determined by the memory operation.  Doing
15134	     unrolling doesn't help here.  Measurements to confirm
15135	     this where only done on recent CPU levels.  So better do
15136	     not change anything for older CPUs.  */
15137	  if (s390_tune >= PROCESSOR_2964_Z13
15138	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15139	      && ((set = single_set (insn)) != NULL_RTX)
15140	      && ((GET_MODE (SET_DEST (set)) == BLKmode
15141		   && (GET_MODE (SET_SRC (set)) == BLKmode
15142		       || SET_SRC (set) == const0_rtx))
15143		  || (GET_CODE (SET_SRC (set)) == COMPARE
15144		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15145		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15146	    return 1;
15147
15148	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15149	    if (MEM_P (*iter))
15150	      mem_count += 1;
15151	}
15152  free (bbs);
15153
15154  /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
15155  if (mem_count == 0)
15156    return nunroll;
15157
15158  switch (loop_depth(loop))
15159    {
15160    case 1:
15161      return MIN (nunroll, 28 / mem_count);
15162    case 2:
15163      return MIN (nunroll, 22 / mem_count);
15164    default:
15165      return MIN (nunroll, 16 / mem_count);
15166    }
15167}
15168
15169/* Restore the current options.  This is a hook function and also called
15170   internally.  */
15171
15172static void
15173s390_function_specific_restore (struct gcc_options *opts,
15174				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15175{
15176  opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15177}
15178
15179static void
15180s390_default_align (struct gcc_options *opts)
15181{
15182  /* Set the default function alignment to 16 in order to get rid of
15183     some unwanted performance effects. */
15184  if (opts->x_flag_align_functions && !opts->x_str_align_functions
15185      && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15186    opts->x_str_align_functions = "16";
15187}
15188
15189static void
15190s390_override_options_after_change (void)
15191{
15192  s390_default_align (&global_options);
15193}
15194
15195static void
15196s390_option_override_internal (struct gcc_options *opts,
15197			       const struct gcc_options *opts_set)
15198{
15199  /* Architecture mode defaults according to ABI.  */
15200  if (!(opts_set->x_target_flags & MASK_ZARCH))
15201    {
15202      if (TARGET_64BIT)
15203	opts->x_target_flags |= MASK_ZARCH;
15204      else
15205	opts->x_target_flags &= ~MASK_ZARCH;
15206    }
15207
15208  /* Set the march default in case it hasn't been specified on cmdline.  */
15209  if (!opts_set->x_s390_arch)
15210    opts->x_s390_arch = PROCESSOR_2064_Z900;
15211
15212  opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15213
15214  /* Determine processor to tune for.  */
15215  if (!opts_set->x_s390_tune)
15216    opts->x_s390_tune = opts->x_s390_arch;
15217
15218  opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15219
15220  /* Sanity checks.  */
15221  if (opts->x_s390_arch == PROCESSOR_NATIVE
15222      || opts->x_s390_tune == PROCESSOR_NATIVE)
15223    gcc_unreachable ();
15224  if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15225    error ("64-bit ABI not supported in ESA/390 mode");
15226
15227  if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15228      || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15229      || opts->x_s390_function_return == indirect_branch_thunk_inline
15230      || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15231      || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15232    error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15233
15234  if (opts->x_s390_indirect_branch != indirect_branch_keep)
15235    {
15236      if (!opts_set->x_s390_indirect_branch_call)
15237	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15238
15239      if (!opts_set->x_s390_indirect_branch_jump)
15240	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15241    }
15242
15243  if (opts->x_s390_function_return != indirect_branch_keep)
15244    {
15245      if (!opts_set->x_s390_function_return_reg)
15246	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15247
15248      if (!opts_set->x_s390_function_return_mem)
15249	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15250    }
15251
15252  /* Enable hardware transactions if available and not explicitly
15253     disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15254  if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15255    {
15256      if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15257	opts->x_target_flags |= MASK_OPT_HTM;
15258      else
15259	opts->x_target_flags &= ~MASK_OPT_HTM;
15260    }
15261
15262  if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15263    {
15264      if (TARGET_OPT_VX_P (opts->x_target_flags))
15265	{
15266	  if (!TARGET_CPU_VX_P (opts))
15267	    error ("hardware vector support not available on %s",
15268		   processor_table[(int)opts->x_s390_arch].name);
15269	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15270	    error ("hardware vector support not available with "
15271		   "%<-msoft-float%>");
15272	}
15273    }
15274  else
15275    {
15276      if (TARGET_CPU_VX_P (opts))
15277	/* Enable vector support if available and not explicitly disabled
15278	   by user.  E.g. with -m31 -march=z13 -mzarch */
15279	opts->x_target_flags |= MASK_OPT_VX;
15280      else
15281	opts->x_target_flags &= ~MASK_OPT_VX;
15282    }
15283
15284  /* Use hardware DFP if available and not explicitly disabled by
15285     user. E.g. with -m31 -march=z10 -mzarch   */
15286  if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15287    {
15288      if (TARGET_DFP_P (opts))
15289	opts->x_target_flags |= MASK_HARD_DFP;
15290      else
15291	opts->x_target_flags &= ~MASK_HARD_DFP;
15292    }
15293
15294  if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15295    {
15296      if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15297	{
15298	  if (!TARGET_CPU_DFP_P (opts))
15299	    error ("hardware decimal floating point instructions"
15300		   " not available on %s",
15301		   processor_table[(int)opts->x_s390_arch].name);
15302	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15303	    error ("hardware decimal floating point instructions"
15304		   " not available in ESA/390 mode");
15305	}
15306      else
15307	opts->x_target_flags &= ~MASK_HARD_DFP;
15308    }
15309
15310  if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15311      && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15312    {
15313      if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15314	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15315	error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15316	       "%<-msoft-float%>");
15317
15318      opts->x_target_flags &= ~MASK_HARD_DFP;
15319    }
15320
15321  if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15322      && TARGET_PACKED_STACK_P (opts->x_target_flags)
15323      && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15324    error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15325	   "supported in combination");
15326
15327  if (opts->x_s390_stack_size)
15328    {
15329      if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15330	error ("stack size must be greater than the stack guard value");
15331      else if (opts->x_s390_stack_size > 1 << 16)
15332	error ("stack size must not be greater than 64k");
15333    }
15334  else if (opts->x_s390_stack_guard)
15335    error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15336
15337  /* Our implementation of the stack probe requires the probe interval
15338     to be used as displacement in an address operand.  The maximum
15339     probe interval currently is 64k.  This would exceed short
15340     displacements.  Trim that value down to 4k if that happens.  This
15341     might result in too many probes being generated only on the
15342     oldest supported machine level z900.  */
15343  if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15344    param_stack_clash_protection_probe_interval = 12;
15345
15346#if TARGET_TPF != 0
15347  if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15348    error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15349
15350  if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15351    error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15352
15353  if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15354    error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15355
15356  if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15357    error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15358
15359  if (s390_tpf_trace_skip)
15360    {
15361      opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15362      opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15363    }
15364#endif
15365
15366#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15367  if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15368    opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15369#endif
15370
15371  if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15372    {
15373      SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15374			   100);
15375      SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15376      SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15377			   2000);
15378      SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15379			   64);
15380    }
15381
15382  SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15383		       256);
15384  /* values for loop prefetching */
15385  SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15386  SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15387  /* s390 has more than 2 levels and the size is much larger.  Since
15388     we are always running virtualized assume that we only get a small
15389     part of the caches above l1.  */
15390  SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15391  SET_OPTION_IF_UNSET (opts, opts_set,
15392		       param_prefetch_min_insn_to_mem_ratio, 2);
15393  SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15394
15395  /* Use the alternative scheduling-pressure algorithm by default.  */
15396  SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15397  SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15398
15399  /* Use aggressive inlining parameters.  */
15400  if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15401    {
15402      SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15403      SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15404    }
15405
15406  /* Set the default alignment.  */
15407  s390_default_align (opts);
15408
15409  /* Call target specific restore function to do post-init work.  At the moment,
15410     this just sets opts->x_s390_cost_pointer.  */
15411  s390_function_specific_restore (opts, NULL);
15412
15413  /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15414     because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15415     not the case when the code runs before the prolog. */
15416  if (opts->x_flag_fentry && !TARGET_64BIT)
15417    error ("%<-mfentry%> is supported only for 64-bit CPUs");
15418}
15419
15420static void
15421s390_option_override (void)
15422{
15423  unsigned int i;
15424  cl_deferred_option *opt;
15425  vec<cl_deferred_option> *v =
15426    (vec<cl_deferred_option> *) s390_deferred_options;
15427
15428  if (v)
15429    FOR_EACH_VEC_ELT (*v, i, opt)
15430      {
15431	switch (opt->opt_index)
15432	  {
15433	  case OPT_mhotpatch_:
15434	    {
15435	      int val1;
15436	      int val2;
15437	      char *s = strtok (ASTRDUP (opt->arg), ",");
15438	      char *t = strtok (NULL, "\0");
15439
15440	      if (t != NULL)
15441		{
15442		  val1 = integral_argument (s);
15443		  val2 = integral_argument (t);
15444		}
15445	      else
15446		{
15447		  val1 = -1;
15448		  val2 = -1;
15449		}
15450	      if (val1 == -1 || val2 == -1)
15451		{
15452		  /* argument is not a plain number */
15453		  error ("arguments to %qs should be non-negative integers",
15454			 "-mhotpatch=n,m");
15455		  break;
15456		}
15457	      else if (val1 > s390_hotpatch_hw_max
15458		       || val2 > s390_hotpatch_hw_max)
15459		{
15460		  error ("argument to %qs is too large (max. %d)",
15461			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15462		  break;
15463		}
15464	      s390_hotpatch_hw_before_label = val1;
15465	      s390_hotpatch_hw_after_label = val2;
15466	      break;
15467	    }
15468	  default:
15469	    gcc_unreachable ();
15470	  }
15471      }
15472
15473  /* Set up function hooks.  */
15474  init_machine_status = s390_init_machine_status;
15475
15476  s390_option_override_internal (&global_options, &global_options_set);
15477
15478  /* Save the initial options in case the user does function specific
15479     options.  */
15480  target_option_default_node = build_target_option_node (&global_options);
15481  target_option_current_node = target_option_default_node;
15482
15483  /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15484     requires the arch flags to be evaluated already.  Since prefetching
15485     is beneficial on s390, we enable it if available.  */
15486  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15487    flag_prefetch_loop_arrays = 1;
15488
15489  if (!s390_pic_data_is_text_relative && !flag_pic)
15490    error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15491	   "%<-fpic%>/%<-fPIC%>");
15492
15493  if (TARGET_TPF)
15494    {
15495      /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15496	 debuggers do not yet support DWARF 3/4.  */
15497      if (!global_options_set.x_dwarf_strict)
15498	dwarf_strict = 1;
15499      if (!global_options_set.x_dwarf_version)
15500	dwarf_version = 2;
15501    }
15502}
15503
15504#if S390_USE_TARGET_ATTRIBUTE
15505/* Inner function to process the attribute((target(...))), take an argument and
15506   set the current options from the argument. If we have a list, recursively go
15507   over the list.  */
15508
15509static bool
15510s390_valid_target_attribute_inner_p (tree args,
15511				     struct gcc_options *opts,
15512				     struct gcc_options *new_opts_set,
15513				     bool force_pragma)
15514{
15515  char *next_optstr;
15516  bool ret = true;
15517
15518#define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15519#define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15520  static const struct
15521  {
15522    const char *string;
15523    size_t len;
15524    int opt;
15525    int has_arg;
15526    int only_as_pragma;
15527  } attrs[] = {
15528    /* enum options */
15529    S390_ATTRIB ("arch=", OPT_march_, 1),
15530    S390_ATTRIB ("tune=", OPT_mtune_, 1),
15531    /* uinteger options */
15532    S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15533    S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15534    S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15535    S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15536    /* flag options */
15537    S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15538    S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15539    S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15540    S390_ATTRIB ("htm", OPT_mhtm, 0),
15541    S390_ATTRIB ("vx", OPT_mvx, 0),
15542    S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15543    S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15544    S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15545    S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15546    S390_PRAGMA ("zvector", OPT_mzvector, 0),
15547    /* boolean options */
15548    S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15549  };
15550#undef S390_ATTRIB
15551#undef S390_PRAGMA
15552
15553  /* If this is a list, recurse to get the options.  */
15554  if (TREE_CODE (args) == TREE_LIST)
15555    {
15556      bool ret = true;
15557      int num_pragma_values;
15558      int i;
15559
15560      /* Note: attribs.c:decl_attributes prepends the values from
15561	 current_target_pragma to the list of target attributes.  To determine
15562	 whether we're looking at a value of the attribute or the pragma we
15563	 assume that the first [list_length (current_target_pragma)] values in
15564	 the list are the values from the pragma.  */
15565      num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15566	? list_length (current_target_pragma) : 0;
15567      for (i = 0; args; args = TREE_CHAIN (args), i++)
15568	{
15569	  bool is_pragma;
15570
15571	  is_pragma = (force_pragma || i < num_pragma_values);
15572	  if (TREE_VALUE (args)
15573	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15574						       opts, new_opts_set,
15575						       is_pragma))
15576	    {
15577	      ret = false;
15578	    }
15579	}
15580      return ret;
15581    }
15582
15583  else if (TREE_CODE (args) != STRING_CST)
15584    {
15585      error ("attribute %<target%> argument not a string");
15586      return false;
15587    }
15588
15589  /* Handle multiple arguments separated by commas.  */
15590  next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15591
15592  while (next_optstr && *next_optstr != '\0')
15593    {
15594      char *p = next_optstr;
15595      char *orig_p = p;
15596      char *comma = strchr (next_optstr, ',');
15597      size_t len, opt_len;
15598      int opt;
15599      bool opt_set_p;
15600      char ch;
15601      unsigned i;
15602      int mask = 0;
15603      enum cl_var_type var_type;
15604      bool found;
15605
15606      if (comma)
15607	{
15608	  *comma = '\0';
15609	  len = comma - next_optstr;
15610	  next_optstr = comma + 1;
15611	}
15612      else
15613	{
15614	  len = strlen (p);
15615	  next_optstr = NULL;
15616	}
15617
15618      /* Recognize no-xxx.  */
15619      if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15620	{
15621	  opt_set_p = false;
15622	  p += 3;
15623	  len -= 3;
15624	}
15625      else
15626	opt_set_p = true;
15627
15628      /* Find the option.  */
15629      ch = *p;
15630      found = false;
15631      for (i = 0; i < ARRAY_SIZE (attrs); i++)
15632	{
15633	  opt_len = attrs[i].len;
15634	  if (ch == attrs[i].string[0]
15635	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15636	      && memcmp (p, attrs[i].string, opt_len) == 0)
15637	    {
15638	      opt = attrs[i].opt;
15639	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15640		continue;
15641	      mask = cl_options[opt].var_value;
15642	      var_type = cl_options[opt].var_type;
15643	      found = true;
15644	      break;
15645	    }
15646	}
15647
15648      /* Process the option.  */
15649      if (!found)
15650	{
15651	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15652	  return false;
15653	}
15654      else if (attrs[i].only_as_pragma && !force_pragma)
15655	{
15656	  /* Value is not allowed for the target attribute.  */
15657	  error ("value %qs is not supported by attribute %<target%>",
15658		 attrs[i].string);
15659	  return false;
15660	}
15661
15662      else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15663	{
15664	  if (var_type == CLVC_BIT_CLEAR)
15665	    opt_set_p = !opt_set_p;
15666
15667	  if (opt_set_p)
15668	    opts->x_target_flags |= mask;
15669	  else
15670	    opts->x_target_flags &= ~mask;
15671	  new_opts_set->x_target_flags |= mask;
15672	}
15673
15674      else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15675	{
15676	  int value;
15677
15678	  if (cl_options[opt].cl_uinteger)
15679	    {
15680	      /* Unsigned integer argument.  Code based on the function
15681		 decode_cmdline_option () in opts-common.c.  */
15682	      value = integral_argument (p + opt_len);
15683	    }
15684	  else
15685	    value = (opt_set_p) ? 1 : 0;
15686
15687	  if (value != -1)
15688	    {
15689	      struct cl_decoded_option decoded;
15690
15691	      /* Value range check; only implemented for numeric and boolean
15692		 options at the moment.  */
15693	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15694	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15695	      set_option (opts, new_opts_set, opt, value,
15696			  p + opt_len, DK_UNSPECIFIED, input_location,
15697			  global_dc);
15698	    }
15699	  else
15700	    {
15701	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15702	      ret = false;
15703	    }
15704	}
15705
15706      else if (cl_options[opt].var_type == CLVC_ENUM)
15707	{
15708	  bool arg_ok;
15709	  int value;
15710
15711	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15712	  if (arg_ok)
15713	    set_option (opts, new_opts_set, opt, value,
15714			p + opt_len, DK_UNSPECIFIED, input_location,
15715			global_dc);
15716	  else
15717	    {
15718	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15719	      ret = false;
15720	    }
15721	}
15722
15723      else
15724	gcc_unreachable ();
15725    }
15726  return ret;
15727}
15728
15729/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15730
15731tree
15732s390_valid_target_attribute_tree (tree args,
15733				  struct gcc_options *opts,
15734				  const struct gcc_options *opts_set,
15735				  bool force_pragma)
15736{
15737  tree t = NULL_TREE;
15738  struct gcc_options new_opts_set;
15739
15740  memset (&new_opts_set, 0, sizeof (new_opts_set));
15741
15742  /* Process each of the options on the chain.  */
15743  if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15744					     force_pragma))
15745    return error_mark_node;
15746
15747  /* If some option was set (even if it has not changed), rerun
15748     s390_option_override_internal, and then save the options away.  */
15749  if (new_opts_set.x_target_flags
15750      || new_opts_set.x_s390_arch
15751      || new_opts_set.x_s390_tune
15752      || new_opts_set.x_s390_stack_guard
15753      || new_opts_set.x_s390_stack_size
15754      || new_opts_set.x_s390_branch_cost
15755      || new_opts_set.x_s390_warn_framesize
15756      || new_opts_set.x_s390_warn_dynamicstack_p)
15757    {
15758      const unsigned char *src = (const unsigned char *)opts_set;
15759      unsigned char *dest = (unsigned char *)&new_opts_set;
15760      unsigned int i;
15761
15762      /* Merge the original option flags into the new ones.  */
15763      for (i = 0; i < sizeof(*opts_set); i++)
15764	dest[i] |= src[i];
15765
15766      /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15767      s390_option_override_internal (opts, &new_opts_set);
15768      /* Save the current options unless we are validating options for
15769	 #pragma.  */
15770      t = build_target_option_node (opts);
15771    }
15772  return t;
15773}
15774
15775/* Hook to validate attribute((target("string"))).  */
15776
15777static bool
15778s390_valid_target_attribute_p (tree fndecl,
15779			       tree ARG_UNUSED (name),
15780			       tree args,
15781			       int ARG_UNUSED (flags))
15782{
15783  struct gcc_options func_options;
15784  tree new_target, new_optimize;
15785  bool ret = true;
15786
15787  /* attribute((target("default"))) does nothing, beyond
15788     affecting multi-versioning.  */
15789  if (TREE_VALUE (args)
15790      && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15791      && TREE_CHAIN (args) == NULL_TREE
15792      && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15793    return true;
15794
15795  tree old_optimize = build_optimization_node (&global_options);
15796
15797  /* Get the optimization options of the current function.  */
15798  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15799
15800  if (!func_optimize)
15801    func_optimize = old_optimize;
15802
15803  /* Init func_options.  */
15804  memset (&func_options, 0, sizeof (func_options));
15805  init_options_struct (&func_options, NULL);
15806  lang_hooks.init_options_struct (&func_options);
15807
15808  cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15809
15810  /* Initialize func_options to the default before its target options can
15811     be set.  */
15812  cl_target_option_restore (&func_options,
15813			    TREE_TARGET_OPTION (target_option_default_node));
15814
15815  new_target = s390_valid_target_attribute_tree (args, &func_options,
15816						 &global_options_set,
15817						 (args ==
15818						  current_target_pragma));
15819  new_optimize = build_optimization_node (&func_options);
15820  if (new_target == error_mark_node)
15821    ret = false;
15822  else if (fndecl && new_target)
15823    {
15824      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15825      if (old_optimize != new_optimize)
15826	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15827    }
15828  return ret;
15829}
15830
15831/* Hook to determine if one function can safely inline another.  */
15832
15833static bool
15834s390_can_inline_p (tree caller, tree callee)
15835{
15836  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15837  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15838
15839  if (!callee_tree)
15840    callee_tree = target_option_default_node;
15841  if (!caller_tree)
15842    caller_tree = target_option_default_node;
15843  if (callee_tree == caller_tree)
15844    return true;
15845
15846  struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15847  struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15848  bool ret = true;
15849
15850  if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15851      != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15852    ret = false;
15853
15854  /* Don't inline functions to be compiled for a more recent arch into a
15855     function for an older arch.  */
15856  else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15857    ret = false;
15858
15859  /* Inlining a hard float function into a soft float function is only
15860     allowed if the hard float function doesn't actually make use of
15861     floating point.
15862
15863     We are called from FEs for multi-versioning call optimization, so
15864     beware of ipa_fn_summaries not available.  */
15865  else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15866	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15867	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15868		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15869	   && (! ipa_fn_summaries
15870	       || ipa_fn_summaries->get
15871	       (cgraph_node::get (callee))->fp_expressions))
15872    ret = false;
15873
15874  return ret;
15875}
15876#endif
15877
15878/* Set VAL to correct enum value according to the indirect-branch or
15879   function-return attribute in ATTR.  */
15880
15881static inline void
15882s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15883{
15884  const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15885  if (strcmp (str, "keep") == 0)
15886    *val = indirect_branch_keep;
15887  else if (strcmp (str, "thunk") == 0)
15888    *val = indirect_branch_thunk;
15889  else if (strcmp (str, "thunk-inline") == 0)
15890    *val = indirect_branch_thunk_inline;
15891  else if (strcmp (str, "thunk-extern") == 0)
15892    *val = indirect_branch_thunk_extern;
15893}
15894
15895/* Memorize the setting for -mindirect-branch* and -mfunction-return*
15896   from either the cmdline or the function attributes in
15897   cfun->machine.  */
15898
15899static void
15900s390_indirect_branch_settings (tree fndecl)
15901{
15902  tree attr;
15903
15904  if (!fndecl)
15905    return;
15906
15907  /* Initialize with the cmdline options and let the attributes
15908     override it.  */
15909  cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15910  cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15911
15912  cfun->machine->function_return_reg = s390_function_return_reg;
15913  cfun->machine->function_return_mem = s390_function_return_mem;
15914
15915  if ((attr = lookup_attribute ("indirect_branch",
15916				DECL_ATTRIBUTES (fndecl))))
15917    {
15918      s390_indirect_branch_attrvalue (attr,
15919				      &cfun->machine->indirect_branch_jump);
15920      s390_indirect_branch_attrvalue (attr,
15921				      &cfun->machine->indirect_branch_call);
15922    }
15923
15924  if ((attr = lookup_attribute ("indirect_branch_jump",
15925				DECL_ATTRIBUTES (fndecl))))
15926    s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15927
15928  if ((attr = lookup_attribute ("indirect_branch_call",
15929				DECL_ATTRIBUTES (fndecl))))
15930    s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15931
15932  if ((attr = lookup_attribute ("function_return",
15933				DECL_ATTRIBUTES (fndecl))))
15934    {
15935      s390_indirect_branch_attrvalue (attr,
15936				      &cfun->machine->function_return_reg);
15937      s390_indirect_branch_attrvalue (attr,
15938				      &cfun->machine->function_return_mem);
15939    }
15940
15941  if ((attr = lookup_attribute ("function_return_reg",
15942				DECL_ATTRIBUTES (fndecl))))
15943    s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15944
15945  if ((attr = lookup_attribute ("function_return_mem",
15946				DECL_ATTRIBUTES (fndecl))))
15947    s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15948}
15949
15950#if S390_USE_TARGET_ATTRIBUTE
15951/* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15952   cache.  */
15953
15954void
15955s390_activate_target_options (tree new_tree)
15956{
15957  cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15958  if (TREE_TARGET_GLOBALS (new_tree))
15959    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15960  else if (new_tree == target_option_default_node)
15961    restore_target_globals (&default_target_globals);
15962  else
15963    TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15964  s390_previous_fndecl = NULL_TREE;
15965}
15966#endif
15967
15968/* Establish appropriate back-end context for processing the function
15969   FNDECL.  The argument might be NULL to indicate processing at top
15970   level, outside of any function scope.  */
15971static void
15972s390_set_current_function (tree fndecl)
15973{
15974#if S390_USE_TARGET_ATTRIBUTE
15975  /* Only change the context if the function changes.  This hook is called
15976     several times in the course of compiling a function, and we don't want to
15977     slow things down too much or call target_reinit when it isn't safe.  */
15978  if (fndecl == s390_previous_fndecl)
15979    {
15980      s390_indirect_branch_settings (fndecl);
15981      return;
15982    }
15983
15984  tree old_tree;
15985  if (s390_previous_fndecl == NULL_TREE)
15986    old_tree = target_option_current_node;
15987  else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15988    old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15989  else
15990    old_tree = target_option_default_node;
15991
15992  if (fndecl == NULL_TREE)
15993    {
15994      if (old_tree != target_option_current_node)
15995	s390_activate_target_options (target_option_current_node);
15996      return;
15997    }
15998
15999  tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16000  if (new_tree == NULL_TREE)
16001    new_tree = target_option_default_node;
16002
16003  if (old_tree != new_tree)
16004    s390_activate_target_options (new_tree);
16005  s390_previous_fndecl = fndecl;
16006#endif
16007  s390_indirect_branch_settings (fndecl);
16008}
16009
16010/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
16011
16012static bool
16013s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16014				     unsigned int align ATTRIBUTE_UNUSED,
16015				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
16016				     bool speed_p ATTRIBUTE_UNUSED)
16017{
16018  return (size == 1 || size == 2
16019	  || size == 4 || (TARGET_ZARCH && size == 8));
16020}
16021
16022/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
16023
16024static void
16025s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16026{
16027  tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16028  tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16029  tree call_efpc = build_call_expr (efpc, 0);
16030  tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16031
16032#define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
16033#define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
16034#define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
16035#define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16036#define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
16037#define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
16038
16039  /* Generates the equivalent of feholdexcept (&fenv_var)
16040
16041     fenv_var = __builtin_s390_efpc ();
16042     __builtin_s390_sfpc (fenv_var & mask) */
16043  tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16044			 NULL_TREE, NULL_TREE);
16045  tree new_fpc
16046    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16047	      build_int_cst (unsigned_type_node,
16048			     ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16049			       | FPC_EXCEPTION_MASK)));
16050  tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16051  *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16052
16053  /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16054
16055     __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16056  new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16057		    build_int_cst (unsigned_type_node,
16058				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16059  *clear = build_call_expr (sfpc, 1, new_fpc);
16060
16061  /* Generates the equivalent of feupdateenv (fenv_var)
16062
16063  old_fpc = __builtin_s390_efpc ();
16064  __builtin_s390_sfpc (fenv_var);
16065  __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
16066
16067  old_fpc = create_tmp_var_raw (unsigned_type_node);
16068  tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16069			       NULL_TREE, NULL_TREE);
16070
16071  set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16072
16073  tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16074				  build_int_cst (unsigned_type_node,
16075						 FPC_FLAGS_MASK));
16076  raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16077			     build_int_cst (unsigned_type_node,
16078					    FPC_FLAGS_SHIFT));
16079  tree atomic_feraiseexcept
16080    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16081  raise_old_except = build_call_expr (atomic_feraiseexcept,
16082				      1, raise_old_except);
16083
16084  *update = build2 (COMPOUND_EXPR, void_type_node,
16085		    build2 (COMPOUND_EXPR, void_type_node,
16086			    store_old_fpc, set_new_fpc),
16087		    raise_old_except);
16088
16089#undef FPC_EXCEPTION_MASK
16090#undef FPC_FLAGS_MASK
16091#undef FPC_DXC_MASK
16092#undef FPC_EXCEPTION_MASK_SHIFT
16093#undef FPC_FLAGS_SHIFT
16094#undef FPC_DXC_SHIFT
16095}
16096
16097/* Return the vector mode to be used for inner mode MODE when doing
16098   vectorization.  */
16099static machine_mode
16100s390_preferred_simd_mode (scalar_mode mode)
16101{
16102  if (TARGET_VXE)
16103    switch (mode)
16104      {
16105      case E_SFmode:
16106	return V4SFmode;
16107      default:;
16108      }
16109
16110  if (TARGET_VX)
16111    switch (mode)
16112      {
16113      case E_DFmode:
16114	return V2DFmode;
16115      case E_DImode:
16116	return V2DImode;
16117      case E_SImode:
16118	return V4SImode;
16119      case E_HImode:
16120	return V8HImode;
16121      case E_QImode:
16122	return V16QImode;
16123      default:;
16124      }
16125  return word_mode;
16126}
16127
16128/* Our hardware does not require vectors to be strictly aligned.  */
16129static bool
16130s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16131				  const_tree type ATTRIBUTE_UNUSED,
16132				  int misalignment ATTRIBUTE_UNUSED,
16133				  bool is_packed ATTRIBUTE_UNUSED)
16134{
16135  if (TARGET_VX)
16136    return true;
16137
16138  return default_builtin_support_vector_misalignment (mode, type, misalignment,
16139						      is_packed);
16140}
16141
16142/* The vector ABI requires vector types to be aligned on an 8 byte
16143   boundary (our stack alignment).  However, we allow this to be
16144   overriden by the user, while this definitely breaks the ABI.  */
16145static HOST_WIDE_INT
16146s390_vector_alignment (const_tree type)
16147{
16148  tree size = TYPE_SIZE (type);
16149
16150  if (!TARGET_VX_ABI)
16151    return default_vector_alignment (type);
16152
16153  if (TYPE_USER_ALIGN (type))
16154    return TYPE_ALIGN (type);
16155
16156  if (tree_fits_uhwi_p (size)
16157      && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16158    return tree_to_uhwi (size);
16159
16160  return BIGGEST_ALIGNMENT;
16161}
16162
16163/* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
16164   LARL instruction.  */
16165
16166static HOST_WIDE_INT
16167s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16168{
16169  return MAX (align, 16);
16170}
16171
16172#ifdef HAVE_AS_MACHINE_MACHINEMODE
16173/* Implement TARGET_ASM_FILE_START.  */
16174static void
16175s390_asm_file_start (void)
16176{
16177  default_file_start ();
16178  s390_asm_output_machine_for_arch (asm_out_file);
16179}
16180#endif
16181
16182/* Implement TARGET_ASM_FILE_END.  */
16183static void
16184s390_asm_file_end (void)
16185{
16186#ifdef HAVE_AS_GNU_ATTRIBUTE
16187  varpool_node *vnode;
16188  cgraph_node *cnode;
16189
16190  FOR_EACH_VARIABLE (vnode)
16191    if (TREE_PUBLIC (vnode->decl))
16192      s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16193
16194  FOR_EACH_FUNCTION (cnode)
16195    if (TREE_PUBLIC (cnode->decl))
16196      s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16197
16198
16199  if (s390_vector_abi != 0)
16200    fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16201	     s390_vector_abi);
16202#endif
16203  file_end_indicate_exec_stack ();
16204
16205  if (flag_split_stack)
16206    file_end_indicate_split_stack ();
16207}
16208
16209/* Return true if TYPE is a vector bool type.  */
16210static inline bool
16211s390_vector_bool_type_p (const_tree type)
16212{
16213  return TYPE_VECTOR_OPAQUE (type);
16214}
16215
16216/* Return the diagnostic message string if the binary operation OP is
16217   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16218static const char*
16219s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16220{
16221  bool bool1_p, bool2_p;
16222  bool plusminus_p;
16223  bool muldiv_p;
16224  bool compare_p;
16225  machine_mode mode1, mode2;
16226
16227  if (!TARGET_ZVECTOR)
16228    return NULL;
16229
16230  if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16231    return NULL;
16232
16233  bool1_p = s390_vector_bool_type_p (type1);
16234  bool2_p = s390_vector_bool_type_p (type2);
16235
16236  /* Mixing signed and unsigned types is forbidden for all
16237     operators.  */
16238  if (!bool1_p && !bool2_p
16239      && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16240    return N_("types differ in signedness");
16241
16242  plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16243  muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16244	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16245	      || op == ROUND_DIV_EXPR);
16246  compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16247	       || op == EQ_EXPR || op == NE_EXPR);
16248
16249  if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16250    return N_("binary operator does not support two vector bool operands");
16251
16252  if (bool1_p != bool2_p && (muldiv_p || compare_p))
16253    return N_("binary operator does not support vector bool operand");
16254
16255  mode1 = TYPE_MODE (type1);
16256  mode2 = TYPE_MODE (type2);
16257
16258  if (bool1_p != bool2_p && plusminus_p
16259      && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16260	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16261    return N_("binary operator does not support mixing vector "
16262	      "bool with floating point vector operands");
16263
16264  return NULL;
16265}
16266
16267/* Implement TARGET_C_EXCESS_PRECISION.
16268
16269   FIXME: For historical reasons, float_t and double_t are typedef'ed to
16270   double on s390, causing operations on float_t to operate in a higher
16271   precision than is necessary.  However, it is not the case that SFmode
16272   operations have implicit excess precision, and we generate more optimal
16273   code if we let the compiler know no implicit extra precision is added.
16274
16275   That means when we are compiling with -fexcess-precision=fast, the value
16276   we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16277   float_t (though they would be correct for -fexcess-precision=standard).
16278
16279   A complete fix would modify glibc to remove the unnecessary typedef
16280   of float_t to double.  */
16281
16282static enum flt_eval_method
16283s390_excess_precision (enum excess_precision_type type)
16284{
16285  switch (type)
16286    {
16287      case EXCESS_PRECISION_TYPE_IMPLICIT:
16288      case EXCESS_PRECISION_TYPE_FAST:
16289	/* The fastest type to promote to will always be the native type,
16290	   whether that occurs with implicit excess precision or
16291	   otherwise.  */
16292	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16293      case EXCESS_PRECISION_TYPE_STANDARD:
16294	/* Otherwise, when we are in a standards compliant mode, to
16295	   ensure consistency with the implementation in glibc, report that
16296	   float is evaluated to the range and precision of double.  */
16297	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16298      default:
16299	gcc_unreachable ();
16300    }
16301  return FLT_EVAL_METHOD_UNPREDICTABLE;
16302}
16303
16304/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16305
16306static unsigned HOST_WIDE_INT
16307s390_asan_shadow_offset (void)
16308{
16309  return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16310}
16311
16312#ifdef HAVE_GAS_HIDDEN
16313# define USE_HIDDEN_LINKONCE 1
16314#else
16315# define USE_HIDDEN_LINKONCE 0
16316#endif
16317
16318/* Output an indirect branch trampoline for target register REGNO.  */
16319
16320static void
16321s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16322{
16323  tree decl;
16324  char thunk_label[32];
16325  int i;
16326
16327  if (z10_p)
16328    sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16329  else
16330    sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16331	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16332
16333  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16334		     get_identifier (thunk_label),
16335		     build_function_type_list (void_type_node, NULL_TREE));
16336  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16337				   NULL_TREE, void_type_node);
16338  TREE_PUBLIC (decl) = 1;
16339  TREE_STATIC (decl) = 1;
16340  DECL_IGNORED_P (decl) = 1;
16341
16342  if (USE_HIDDEN_LINKONCE)
16343    {
16344      cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16345
16346      targetm.asm_out.unique_section (decl, 0);
16347      switch_to_section (get_named_section (decl, NULL, 0));
16348
16349      targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16350      fputs ("\t.hidden\t", asm_out_file);
16351      assemble_name (asm_out_file, thunk_label);
16352      putc ('\n', asm_out_file);
16353      ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16354    }
16355  else
16356    {
16357      switch_to_section (text_section);
16358      ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16359    }
16360
16361  DECL_INITIAL (decl) = make_node (BLOCK);
16362  current_function_decl = decl;
16363  allocate_struct_function (decl, false);
16364  init_function_start (decl);
16365  cfun->is_thunk = true;
16366  first_function_block_is_cold = false;
16367  final_start_function (emit_barrier (), asm_out_file, 1);
16368
16369  /* This makes CFI at least usable for indirect jumps.
16370
16371     Stopping in the thunk: backtrace will point to the thunk target
16372     is if it was interrupted by a signal.  For a call this means that
16373     the call chain will be: caller->callee->thunk   */
16374  if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16375    {
16376      fputs ("\t.cfi_signal_frame\n", asm_out_file);
16377      fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16378      for (i = 0; i < FPR15_REGNUM; i++)
16379	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16380    }
16381
16382  if (z10_p)
16383    {
16384      /* exrl  0,1f  */
16385
16386      /* We generate a thunk for z10 compiled code although z10 is
16387	 currently not enabled.  Tell the assembler to accept the
16388	 instruction.  */
16389      if (!TARGET_CPU_Z10)
16390	{
16391	  fputs ("\t.machine push\n", asm_out_file);
16392	  fputs ("\t.machine z10\n", asm_out_file);
16393	}
16394      /* We use exrl even if -mzarch hasn't been specified on the
16395	 command line so we have to tell the assembler to accept
16396	 it.  */
16397      if (!TARGET_ZARCH)
16398	fputs ("\t.machinemode zarch\n", asm_out_file);
16399
16400      fputs ("\texrl\t0,1f\n", asm_out_file);
16401
16402      if (!TARGET_ZARCH)
16403	fputs ("\t.machinemode esa\n", asm_out_file);
16404
16405      if (!TARGET_CPU_Z10)
16406	fputs ("\t.machine pop\n", asm_out_file);
16407    }
16408  else
16409    {
16410      /* larl %r1,1f  */
16411      fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16412	       INDIRECT_BRANCH_THUNK_REGNUM);
16413
16414      /* ex 0,0(%r1)  */
16415      fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16416	       INDIRECT_BRANCH_THUNK_REGNUM);
16417    }
16418
16419  /* 0:    j 0b  */
16420  fputs ("0:\tj\t0b\n", asm_out_file);
16421
16422  /* 1:    br <regno>  */
16423  fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16424
16425  final_end_function ();
16426  init_insn_lengths ();
16427  free_after_compilation (cfun);
16428  set_cfun (NULL);
16429  current_function_decl = NULL;
16430}
16431
16432/* Implement the asm.code_end target hook.  */
16433
16434static void
16435s390_code_end (void)
16436{
16437  int i;
16438
16439  for (i = 1; i < 16; i++)
16440    {
16441      if (indirect_branch_z10thunk_mask & (1 << i))
16442	s390_output_indirect_thunk_function (i, true);
16443
16444      if (indirect_branch_prez10thunk_mask & (1 << i))
16445	s390_output_indirect_thunk_function (i, false);
16446    }
16447
16448  if (TARGET_INDIRECT_BRANCH_TABLE)
16449    {
16450      int o;
16451      int i;
16452
16453      for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16454	{
16455	  if (indirect_branch_table_label_no[o] == 0)
16456	    continue;
16457
16458	  switch_to_section (get_section (indirect_branch_table_name[o],
16459					  0,
16460					  NULL_TREE));
16461	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16462	    {
16463	      char label_start[32];
16464
16465	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16466					   indirect_branch_table_label[o], i);
16467
16468	      fputs ("\t.long\t", asm_out_file);
16469	      assemble_name_raw (asm_out_file, label_start);
16470	      fputs ("-.\n", asm_out_file);
16471	    }
16472	  switch_to_section (current_function_section ());
16473	}
16474    }
16475}
16476
16477/* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16478
16479unsigned int
16480s390_case_values_threshold (void)
16481{
16482  /* Disabling branch prediction for indirect jumps makes jump tables
16483     much more expensive.  */
16484  if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16485    return 20;
16486
16487  return default_case_values_threshold ();
16488}
16489
16490/* Evaluate the insns between HEAD and TAIL and do back-end to install
16491   back-end specific dependencies.
16492
16493   Establish an ANTI dependency between r11 and r15 restores from FPRs
16494   to prevent the instructions scheduler from reordering them since
16495   this would break CFI.  No further handling in the sched_reorder
16496   hook is required since the r11 and r15 restore will never appear in
16497   the same ready list with that change.  */
16498void
16499s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16500{
16501  if (!frame_pointer_needed || !epilogue_completed)
16502    return;
16503
16504  while (head != tail && DEBUG_INSN_P (head))
16505    head = NEXT_INSN (head);
16506
16507  rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16508
16509  for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16510    {
16511      rtx set = single_set (insn);
16512      if (!INSN_P (insn)
16513	  || !RTX_FRAME_RELATED_P (insn)
16514	  || set == NULL_RTX
16515	  || !REG_P (SET_DEST (set))
16516	  || !FP_REG_P (SET_SRC (set)))
16517	continue;
16518
16519      if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16520	r11_restore = insn;
16521
16522      if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16523	r15_restore = insn;
16524    }
16525
16526  if (r11_restore == NULL || r15_restore == NULL)
16527    return;
16528  add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16529}
16530
16531/* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts.  */
16532
16533static unsigned HOST_WIDE_INT
16534s390_shift_truncation_mask (machine_mode mode)
16535{
16536  return mode == DImode || mode == SImode ? 63 : 0;
16537}
16538
16539/* Initialize GCC target structure.  */
16540
16541#undef  TARGET_ASM_ALIGNED_HI_OP
16542#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16543#undef  TARGET_ASM_ALIGNED_DI_OP
16544#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16545#undef  TARGET_ASM_INTEGER
16546#define TARGET_ASM_INTEGER s390_assemble_integer
16547
16548#undef  TARGET_ASM_OPEN_PAREN
16549#define TARGET_ASM_OPEN_PAREN ""
16550
16551#undef  TARGET_ASM_CLOSE_PAREN
16552#define TARGET_ASM_CLOSE_PAREN ""
16553
16554#undef TARGET_OPTION_OVERRIDE
16555#define TARGET_OPTION_OVERRIDE s390_option_override
16556
16557#ifdef TARGET_THREAD_SSP_OFFSET
16558#undef TARGET_STACK_PROTECT_GUARD
16559#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16560#endif
16561
16562#undef	TARGET_ENCODE_SECTION_INFO
16563#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16564
16565#undef TARGET_SCALAR_MODE_SUPPORTED_P
16566#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16567
16568#ifdef HAVE_AS_TLS
16569#undef TARGET_HAVE_TLS
16570#define TARGET_HAVE_TLS true
16571#endif
16572#undef TARGET_CANNOT_FORCE_CONST_MEM
16573#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16574
16575#undef TARGET_DELEGITIMIZE_ADDRESS
16576#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16577
16578#undef TARGET_LEGITIMIZE_ADDRESS
16579#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16580
16581#undef TARGET_RETURN_IN_MEMORY
16582#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16583
16584#undef  TARGET_INIT_BUILTINS
16585#define TARGET_INIT_BUILTINS s390_init_builtins
16586#undef  TARGET_EXPAND_BUILTIN
16587#define TARGET_EXPAND_BUILTIN s390_expand_builtin
16588#undef  TARGET_BUILTIN_DECL
16589#define TARGET_BUILTIN_DECL s390_builtin_decl
16590
16591#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16592#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16593
16594#undef TARGET_ASM_OUTPUT_MI_THUNK
16595#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16596#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16597#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16598
16599#undef TARGET_C_EXCESS_PRECISION
16600#define TARGET_C_EXCESS_PRECISION s390_excess_precision
16601
16602#undef  TARGET_SCHED_ADJUST_PRIORITY
16603#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16604#undef TARGET_SCHED_ISSUE_RATE
16605#define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16606#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16607#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16608
16609#undef TARGET_SCHED_VARIABLE_ISSUE
16610#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16611#undef TARGET_SCHED_REORDER
16612#define TARGET_SCHED_REORDER s390_sched_reorder
16613#undef TARGET_SCHED_INIT
16614#define TARGET_SCHED_INIT s390_sched_init
16615
16616#undef TARGET_CANNOT_COPY_INSN_P
16617#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16618#undef TARGET_RTX_COSTS
16619#define TARGET_RTX_COSTS s390_rtx_costs
16620#undef TARGET_ADDRESS_COST
16621#define TARGET_ADDRESS_COST s390_address_cost
16622#undef TARGET_REGISTER_MOVE_COST
16623#define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16624#undef TARGET_MEMORY_MOVE_COST
16625#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16626#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16627#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16628  s390_builtin_vectorization_cost
16629
16630#undef TARGET_MACHINE_DEPENDENT_REORG
16631#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16632
16633#undef TARGET_VALID_POINTER_MODE
16634#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16635
16636#undef TARGET_BUILD_BUILTIN_VA_LIST
16637#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16638#undef TARGET_EXPAND_BUILTIN_VA_START
16639#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16640#undef TARGET_ASAN_SHADOW_OFFSET
16641#define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16642#undef TARGET_GIMPLIFY_VA_ARG_EXPR
16643#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16644
16645#undef TARGET_PROMOTE_FUNCTION_MODE
16646#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16647#undef TARGET_PASS_BY_REFERENCE
16648#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16649
16650#undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16651#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16652
16653#undef TARGET_FUNCTION_OK_FOR_SIBCALL
16654#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16655#undef TARGET_FUNCTION_ARG
16656#define TARGET_FUNCTION_ARG s390_function_arg
16657#undef TARGET_FUNCTION_ARG_ADVANCE
16658#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16659#undef TARGET_FUNCTION_ARG_PADDING
16660#define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16661#undef TARGET_FUNCTION_VALUE
16662#define TARGET_FUNCTION_VALUE s390_function_value
16663#undef TARGET_LIBCALL_VALUE
16664#define TARGET_LIBCALL_VALUE s390_libcall_value
16665#undef TARGET_STRICT_ARGUMENT_NAMING
16666#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16667
16668#undef TARGET_KEEP_LEAF_WHEN_PROFILED
16669#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16670
16671#undef TARGET_FIXED_CONDITION_CODE_REGS
16672#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16673
16674#undef TARGET_CC_MODES_COMPATIBLE
16675#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16676
16677#undef TARGET_INVALID_WITHIN_DOLOOP
16678#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16679
16680#ifdef HAVE_AS_TLS
16681#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16682#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16683#endif
16684
16685#undef TARGET_DWARF_FRAME_REG_MODE
16686#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16687
16688#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16689#undef TARGET_MANGLE_TYPE
16690#define TARGET_MANGLE_TYPE s390_mangle_type
16691#endif
16692
16693#undef TARGET_SCALAR_MODE_SUPPORTED_P
16694#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16695
16696#undef TARGET_VECTOR_MODE_SUPPORTED_P
16697#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16698
16699#undef  TARGET_PREFERRED_RELOAD_CLASS
16700#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16701
16702#undef TARGET_SECONDARY_RELOAD
16703#define TARGET_SECONDARY_RELOAD s390_secondary_reload
16704#undef TARGET_SECONDARY_MEMORY_NEEDED
16705#define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16706#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16707#define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16708
16709#undef TARGET_LIBGCC_CMP_RETURN_MODE
16710#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16711
16712#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16713#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16714
16715#undef TARGET_LEGITIMATE_ADDRESS_P
16716#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16717
16718#undef TARGET_LEGITIMATE_CONSTANT_P
16719#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16720
16721#undef TARGET_LRA_P
16722#define TARGET_LRA_P s390_lra_p
16723
16724#undef TARGET_CAN_ELIMINATE
16725#define TARGET_CAN_ELIMINATE s390_can_eliminate
16726
16727#undef TARGET_CONDITIONAL_REGISTER_USAGE
16728#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16729
16730#undef TARGET_LOOP_UNROLL_ADJUST
16731#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16732
16733#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16734#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16735#undef TARGET_TRAMPOLINE_INIT
16736#define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16737
16738/* PR 79421 */
16739#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16740#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16741
16742#undef TARGET_UNWIND_WORD_MODE
16743#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16744
16745#undef TARGET_CANONICALIZE_COMPARISON
16746#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16747
16748#undef TARGET_HARD_REGNO_SCRATCH_OK
16749#define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16750
16751#undef TARGET_HARD_REGNO_NREGS
16752#define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16753#undef TARGET_HARD_REGNO_MODE_OK
16754#define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16755#undef TARGET_MODES_TIEABLE_P
16756#define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16757
16758#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16759#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16760  s390_hard_regno_call_part_clobbered
16761
16762#undef TARGET_ATTRIBUTE_TABLE
16763#define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16764
16765#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16766#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16767
16768#undef TARGET_SET_UP_BY_PROLOGUE
16769#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16770
16771#undef TARGET_EXTRA_LIVE_ON_ENTRY
16772#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16773
16774#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16775#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16776  s390_use_by_pieces_infrastructure_p
16777
16778#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16779#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16780
16781#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16782#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16783
16784#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16785#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16786
16787#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16788#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16789
16790#undef TARGET_VECTOR_ALIGNMENT
16791#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16792
16793#undef TARGET_INVALID_BINARY_OP
16794#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16795
16796#ifdef HAVE_AS_MACHINE_MACHINEMODE
16797#undef TARGET_ASM_FILE_START
16798#define TARGET_ASM_FILE_START s390_asm_file_start
16799#endif
16800
16801#undef TARGET_ASM_FILE_END
16802#define TARGET_ASM_FILE_END s390_asm_file_end
16803
16804#undef TARGET_SET_CURRENT_FUNCTION
16805#define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16806
16807#if S390_USE_TARGET_ATTRIBUTE
16808#undef TARGET_OPTION_VALID_ATTRIBUTE_P
16809#define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16810
16811#undef TARGET_CAN_INLINE_P
16812#define TARGET_CAN_INLINE_P s390_can_inline_p
16813#endif
16814
16815#undef TARGET_OPTION_RESTORE
16816#define TARGET_OPTION_RESTORE s390_function_specific_restore
16817
16818#undef TARGET_CAN_CHANGE_MODE_CLASS
16819#define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16820
16821#undef TARGET_CONSTANT_ALIGNMENT
16822#define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16823
16824#undef TARGET_ASM_CODE_END
16825#define TARGET_ASM_CODE_END s390_code_end
16826
16827#undef TARGET_CASE_VALUES_THRESHOLD
16828#define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16829
16830#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16831#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16832  s390_sched_dependencies_evaluation
16833
16834#undef TARGET_SHIFT_TRUNCATION_MASK
16835#define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16836
16837/* Use only short displacement, since long displacement is not available for
16838   the floating point instructions.  */
16839#undef TARGET_MAX_ANCHOR_OFFSET
16840#define TARGET_MAX_ANCHOR_OFFSET 0xfff
16841
16842struct gcc_target targetm = TARGET_INITIALIZER;
16843
16844#include "gt-s390.h"
16845