1/* Subroutines used for code generation on IBM S/390 and zSeries
2   Copyright (C) 1999-2015 Free Software Foundation, Inc.
3   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                  Ulrich Weigand (uweigand@de.ibm.com) and
5                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "rtl.h"
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
37#include "tree.h"
38#include "fold-const.h"
39#include "print-tree.h"
40#include "stringpool.h"
41#include "stor-layout.h"
42#include "varasm.h"
43#include "calls.h"
44#include "tm_p.h"
45#include "regs.h"
46#include "hard-reg-set.h"
47#include "insn-config.h"
48#include "conditions.h"
49#include "output.h"
50#include "insn-attr.h"
51#include "flags.h"
52#include "except.h"
53#include "function.h"
54#include "recog.h"
55#include "hashtab.h"
56#include "statistics.h"
57#include "real.h"
58#include "fixed-value.h"
59#include "expmed.h"
60#include "dojump.h"
61#include "explow.h"
62#include "emit-rtl.h"
63#include "stmt.h"
64#include "expr.h"
65#include "reload.h"
66#include "diagnostic-core.h"
67#include "predict.h"
68#include "dominance.h"
69#include "cfg.h"
70#include "cfgrtl.h"
71#include "cfganal.h"
72#include "lcm.h"
73#include "cfgbuild.h"
74#include "cfgcleanup.h"
75#include "basic-block.h"
76#include "ggc.h"
77#include "target.h"
78#include "target-def.h"
79#include "debug.h"
80#include "langhooks.h"
81#include "insn-codes.h"
82#include "optabs.h"
83#include "hash-table.h"
84#include "tree-ssa-alias.h"
85#include "internal-fn.h"
86#include "gimple-fold.h"
87#include "tree-eh.h"
88#include "gimple-expr.h"
89#include "is-a.h"
90#include "gimple.h"
91#include "gimplify.h"
92#include "df.h"
93#include "params.h"
94#include "cfgloop.h"
95#include "opts.h"
96#include "tree-pass.h"
97#include "context.h"
98#include "builtins.h"
99#include "rtl-iter.h"
100#include "intl.h"
101#include "plugin-api.h"
102#include "ipa-ref.h"
103#include "cgraph.h"
104#include "tm-constrs.h"
105
106/* Define the specific costs for a given cpu.  */
107
108struct processor_costs
109{
110  /* multiplication */
111  const int m;        /* cost of an M instruction.  */
112  const int mghi;     /* cost of an MGHI instruction.  */
113  const int mh;       /* cost of an MH instruction.  */
114  const int mhi;      /* cost of an MHI instruction.  */
115  const int ml;       /* cost of an ML instruction.  */
116  const int mr;       /* cost of an MR instruction.  */
117  const int ms;       /* cost of an MS instruction.  */
118  const int msg;      /* cost of an MSG instruction.  */
119  const int msgf;     /* cost of an MSGF instruction.  */
120  const int msgfr;    /* cost of an MSGFR instruction.  */
121  const int msgr;     /* cost of an MSGR instruction.  */
122  const int msr;      /* cost of an MSR instruction.  */
123  const int mult_df;  /* cost of multiplication in DFmode.  */
124  const int mxbr;
125  /* square root */
126  const int sqxbr;    /* cost of square root in TFmode.  */
127  const int sqdbr;    /* cost of square root in DFmode.  */
128  const int sqebr;    /* cost of square root in SFmode.  */
129  /* multiply and add */
130  const int madbr;    /* cost of multiply and add in DFmode.  */
131  const int maebr;    /* cost of multiply and add in SFmode.  */
132  /* division */
133  const int dxbr;
134  const int ddbr;
135  const int debr;
136  const int dlgr;
137  const int dlr;
138  const int dr;
139  const int dsgfr;
140  const int dsgr;
141};
142
143const struct processor_costs *s390_cost;
144
145static const
146struct processor_costs z900_cost =
147{
148  COSTS_N_INSNS (5),     /* M     */
149  COSTS_N_INSNS (10),    /* MGHI  */
150  COSTS_N_INSNS (5),     /* MH    */
151  COSTS_N_INSNS (4),     /* MHI   */
152  COSTS_N_INSNS (5),     /* ML    */
153  COSTS_N_INSNS (5),     /* MR    */
154  COSTS_N_INSNS (4),     /* MS    */
155  COSTS_N_INSNS (15),    /* MSG   */
156  COSTS_N_INSNS (7),     /* MSGF  */
157  COSTS_N_INSNS (7),     /* MSGFR */
158  COSTS_N_INSNS (10),    /* MSGR  */
159  COSTS_N_INSNS (4),     /* MSR   */
160  COSTS_N_INSNS (7),     /* multiplication in DFmode */
161  COSTS_N_INSNS (13),    /* MXBR */
162  COSTS_N_INSNS (136),   /* SQXBR */
163  COSTS_N_INSNS (44),    /* SQDBR */
164  COSTS_N_INSNS (35),    /* SQEBR */
165  COSTS_N_INSNS (18),    /* MADBR */
166  COSTS_N_INSNS (13),    /* MAEBR */
167  COSTS_N_INSNS (134),   /* DXBR */
168  COSTS_N_INSNS (30),    /* DDBR */
169  COSTS_N_INSNS (27),    /* DEBR */
170  COSTS_N_INSNS (220),   /* DLGR */
171  COSTS_N_INSNS (34),    /* DLR */
172  COSTS_N_INSNS (34),    /* DR */
173  COSTS_N_INSNS (32),    /* DSGFR */
174  COSTS_N_INSNS (32),    /* DSGR */
175};
176
177static const
178struct processor_costs z990_cost =
179{
180  COSTS_N_INSNS (4),     /* M     */
181  COSTS_N_INSNS (2),     /* MGHI  */
182  COSTS_N_INSNS (2),     /* MH    */
183  COSTS_N_INSNS (2),     /* MHI   */
184  COSTS_N_INSNS (4),     /* ML    */
185  COSTS_N_INSNS (4),     /* MR    */
186  COSTS_N_INSNS (5),     /* MS    */
187  COSTS_N_INSNS (6),     /* MSG   */
188  COSTS_N_INSNS (4),     /* MSGF  */
189  COSTS_N_INSNS (4),     /* MSGFR */
190  COSTS_N_INSNS (4),     /* MSGR  */
191  COSTS_N_INSNS (4),     /* MSR   */
192  COSTS_N_INSNS (1),     /* multiplication in DFmode */
193  COSTS_N_INSNS (28),    /* MXBR */
194  COSTS_N_INSNS (130),   /* SQXBR */
195  COSTS_N_INSNS (66),    /* SQDBR */
196  COSTS_N_INSNS (38),    /* SQEBR */
197  COSTS_N_INSNS (1),     /* MADBR */
198  COSTS_N_INSNS (1),     /* MAEBR */
199  COSTS_N_INSNS (60),    /* DXBR */
200  COSTS_N_INSNS (40),    /* DDBR */
201  COSTS_N_INSNS (26),    /* DEBR */
202  COSTS_N_INSNS (176),   /* DLGR */
203  COSTS_N_INSNS (31),    /* DLR */
204  COSTS_N_INSNS (31),    /* DR */
205  COSTS_N_INSNS (31),    /* DSGFR */
206  COSTS_N_INSNS (31),    /* DSGR */
207};
208
209static const
210struct processor_costs z9_109_cost =
211{
212  COSTS_N_INSNS (4),     /* M     */
213  COSTS_N_INSNS (2),     /* MGHI  */
214  COSTS_N_INSNS (2),     /* MH    */
215  COSTS_N_INSNS (2),     /* MHI   */
216  COSTS_N_INSNS (4),     /* ML    */
217  COSTS_N_INSNS (4),     /* MR    */
218  COSTS_N_INSNS (5),     /* MS    */
219  COSTS_N_INSNS (6),     /* MSG   */
220  COSTS_N_INSNS (4),     /* MSGF  */
221  COSTS_N_INSNS (4),     /* MSGFR */
222  COSTS_N_INSNS (4),     /* MSGR  */
223  COSTS_N_INSNS (4),     /* MSR   */
224  COSTS_N_INSNS (1),     /* multiplication in DFmode */
225  COSTS_N_INSNS (28),    /* MXBR */
226  COSTS_N_INSNS (130),   /* SQXBR */
227  COSTS_N_INSNS (66),    /* SQDBR */
228  COSTS_N_INSNS (38),    /* SQEBR */
229  COSTS_N_INSNS (1),     /* MADBR */
230  COSTS_N_INSNS (1),     /* MAEBR */
231  COSTS_N_INSNS (60),    /* DXBR */
232  COSTS_N_INSNS (40),    /* DDBR */
233  COSTS_N_INSNS (26),    /* DEBR */
234  COSTS_N_INSNS (30),    /* DLGR */
235  COSTS_N_INSNS (23),    /* DLR */
236  COSTS_N_INSNS (23),    /* DR */
237  COSTS_N_INSNS (24),    /* DSGFR */
238  COSTS_N_INSNS (24),    /* DSGR */
239};
240
241static const
242struct processor_costs z10_cost =
243{
244  COSTS_N_INSNS (10),    /* M     */
245  COSTS_N_INSNS (10),    /* MGHI  */
246  COSTS_N_INSNS (10),    /* MH    */
247  COSTS_N_INSNS (10),    /* MHI   */
248  COSTS_N_INSNS (10),    /* ML    */
249  COSTS_N_INSNS (10),    /* MR    */
250  COSTS_N_INSNS (10),    /* MS    */
251  COSTS_N_INSNS (10),    /* MSG   */
252  COSTS_N_INSNS (10),    /* MSGF  */
253  COSTS_N_INSNS (10),    /* MSGFR */
254  COSTS_N_INSNS (10),    /* MSGR  */
255  COSTS_N_INSNS (10),    /* MSR   */
256  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
257  COSTS_N_INSNS (50),    /* MXBR */
258  COSTS_N_INSNS (120),   /* SQXBR */
259  COSTS_N_INSNS (52),    /* SQDBR */
260  COSTS_N_INSNS (38),    /* SQEBR */
261  COSTS_N_INSNS (1),     /* MADBR */
262  COSTS_N_INSNS (1),     /* MAEBR */
263  COSTS_N_INSNS (111),   /* DXBR */
264  COSTS_N_INSNS (39),    /* DDBR */
265  COSTS_N_INSNS (32),    /* DEBR */
266  COSTS_N_INSNS (160),   /* DLGR */
267  COSTS_N_INSNS (71),    /* DLR */
268  COSTS_N_INSNS (71),    /* DR */
269  COSTS_N_INSNS (71),    /* DSGFR */
270  COSTS_N_INSNS (71),    /* DSGR */
271};
272
273static const
274struct processor_costs z196_cost =
275{
276  COSTS_N_INSNS (7),     /* M     */
277  COSTS_N_INSNS (5),     /* MGHI  */
278  COSTS_N_INSNS (5),     /* MH    */
279  COSTS_N_INSNS (5),     /* MHI   */
280  COSTS_N_INSNS (7),     /* ML    */
281  COSTS_N_INSNS (7),     /* MR    */
282  COSTS_N_INSNS (6),     /* MS    */
283  COSTS_N_INSNS (8),     /* MSG   */
284  COSTS_N_INSNS (6),     /* MSGF  */
285  COSTS_N_INSNS (6),     /* MSGFR */
286  COSTS_N_INSNS (8),     /* MSGR  */
287  COSTS_N_INSNS (6),     /* MSR   */
288  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
289  COSTS_N_INSNS (40),    /* MXBR B+40 */
290  COSTS_N_INSNS (100),   /* SQXBR B+100 */
291  COSTS_N_INSNS (42),    /* SQDBR B+42 */
292  COSTS_N_INSNS (28),    /* SQEBR B+28 */
293  COSTS_N_INSNS (1),     /* MADBR B */
294  COSTS_N_INSNS (1),     /* MAEBR B */
295  COSTS_N_INSNS (101),   /* DXBR B+101 */
296  COSTS_N_INSNS (29),    /* DDBR */
297  COSTS_N_INSNS (22),    /* DEBR */
298  COSTS_N_INSNS (160),   /* DLGR cracked */
299  COSTS_N_INSNS (160),   /* DLR cracked */
300  COSTS_N_INSNS (160),   /* DR expanded */
301  COSTS_N_INSNS (160),   /* DSGFR cracked */
302  COSTS_N_INSNS (160),   /* DSGR cracked */
303};
304
305static const
306struct processor_costs zEC12_cost =
307{
308  COSTS_N_INSNS (7),     /* M     */
309  COSTS_N_INSNS (5),     /* MGHI  */
310  COSTS_N_INSNS (5),     /* MH    */
311  COSTS_N_INSNS (5),     /* MHI   */
312  COSTS_N_INSNS (7),     /* ML    */
313  COSTS_N_INSNS (7),     /* MR    */
314  COSTS_N_INSNS (6),     /* MS    */
315  COSTS_N_INSNS (8),     /* MSG   */
316  COSTS_N_INSNS (6),     /* MSGF  */
317  COSTS_N_INSNS (6),     /* MSGFR */
318  COSTS_N_INSNS (8),     /* MSGR  */
319  COSTS_N_INSNS (6),     /* MSR   */
320  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
321  COSTS_N_INSNS (40),    /* MXBR B+40 */
322  COSTS_N_INSNS (100),   /* SQXBR B+100 */
323  COSTS_N_INSNS (42),    /* SQDBR B+42 */
324  COSTS_N_INSNS (28),    /* SQEBR B+28 */
325  COSTS_N_INSNS (1),     /* MADBR B */
326  COSTS_N_INSNS (1),     /* MAEBR B */
327  COSTS_N_INSNS (131),   /* DXBR B+131 */
328  COSTS_N_INSNS (29),    /* DDBR */
329  COSTS_N_INSNS (22),    /* DEBR */
330  COSTS_N_INSNS (160),   /* DLGR cracked */
331  COSTS_N_INSNS (160),   /* DLR cracked */
332  COSTS_N_INSNS (160),   /* DR expanded */
333  COSTS_N_INSNS (160),   /* DSGFR cracked */
334  COSTS_N_INSNS (160),   /* DSGR cracked */
335};
336
337extern int reload_completed;
338
339/* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
340static rtx_insn *last_scheduled_insn;
341#define MAX_SCHED_UNITS 3
342static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
343
344/* The maximum score added for an instruction whose unit hasn't been
345   in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
346   give instruction mix scheduling more priority over instruction
347   grouping.  */
348#define MAX_SCHED_MIX_SCORE      8
349
350/* The maximum distance up to which individual scores will be
351   calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
352   Increase this with the OOO windows size of the machine.  */
353#define MAX_SCHED_MIX_DISTANCE 100
354
355/* Structure used to hold the components of a S/390 memory
356   address.  A legitimate address on S/390 is of the general
357   form
358          base + index + displacement
359   where any of the components is optional.
360
361   base and index are registers of the class ADDR_REGS,
362   displacement is an unsigned 12-bit immediate constant.  */
363
364struct s390_address
365{
366  rtx base;
367  rtx indx;
368  rtx disp;
369  bool pointer;
370  bool literal_pool;
371};
372
373/* The following structure is embedded in the machine
374   specific part of struct function.  */
375
376struct GTY (()) s390_frame_layout
377{
378  /* Offset within stack frame.  */
379  HOST_WIDE_INT gprs_offset;
380  HOST_WIDE_INT f0_offset;
381  HOST_WIDE_INT f4_offset;
382  HOST_WIDE_INT f8_offset;
383  HOST_WIDE_INT backchain_offset;
384
385  /* Number of first and last gpr where slots in the register
386     save area are reserved for.  */
387  int first_save_gpr_slot;
388  int last_save_gpr_slot;
389
390  /* Location (FP register number) where GPRs (r0-r15) should
391     be saved to.
392      0 - does not need to be saved at all
393     -1 - stack slot  */
394#define SAVE_SLOT_NONE   0
395#define SAVE_SLOT_STACK -1
396  signed char gpr_save_slots[16];
397
398  /* Number of first and last gpr to be saved, restored.  */
399  int first_save_gpr;
400  int first_restore_gpr;
401  int last_save_gpr;
402  int last_restore_gpr;
403
404  /* Bits standing for floating point registers. Set, if the
405     respective register has to be saved. Starting with reg 16 (f0)
406     at the rightmost bit.
407     Bit 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
408     fpr 15 13 11  9 14 12 10  8  7  5  3  1  6  4  2  0
409     reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16  */
410  unsigned int fpr_bitmap;
411
412  /* Number of floating point registers f8-f15 which must be saved.  */
413  int high_fprs;
414
415  /* Set if return address needs to be saved.
416     This flag is set by s390_return_addr_rtx if it could not use
417     the initial value of r14 and therefore depends on r14 saved
418     to the stack.  */
419  bool save_return_addr_p;
420
421  /* Size of stack frame.  */
422  HOST_WIDE_INT frame_size;
423};
424
425/* Define the structure for the machine field in struct function.  */
426
427struct GTY(()) machine_function
428{
429  struct s390_frame_layout frame_layout;
430
431  /* Literal pool base register.  */
432  rtx base_reg;
433
434  /* True if we may need to perform branch splitting.  */
435  bool split_branches_pending_p;
436
437  bool has_landing_pad_p;
438
439  /* True if the current function may contain a tbegin clobbering
440     FPRs.  */
441  bool tbegin_p;
442};
443
444/* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
445
446#define cfun_frame_layout (cfun->machine->frame_layout)
447#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
448#define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
449				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
450				 : cfun_frame_layout.fpr_bitmap & 0x03))
451#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
452  cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
453#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
454  (1 << (REGNO - FPR0_REGNUM)))
455#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
456  (1 << (REGNO - FPR0_REGNUM))))
457#define cfun_gpr_save_slot(REGNO) \
458  cfun->machine->frame_layout.gpr_save_slots[REGNO]
459
460/* Number of GPRs and FPRs used for argument passing.  */
461#define GP_ARG_NUM_REG 5
462#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
463#define VEC_ARG_NUM_REG 8
464
465/* A couple of shortcuts.  */
466#define CONST_OK_FOR_J(x) \
467	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
468#define CONST_OK_FOR_K(x) \
469	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
470#define CONST_OK_FOR_Os(x) \
471        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
472#define CONST_OK_FOR_Op(x) \
473        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
474#define CONST_OK_FOR_On(x) \
475        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
476
477#define REGNO_PAIR_OK(REGNO, MODE)                               \
478  (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
479
480/* That's the read ahead of the dynamic branch prediction unit in
481   bytes on a z10 (or higher) CPU.  */
482#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
483
484
485/* Indicate which ABI has been used for passing vector args.
486   0 - no vector type arguments have been passed where the ABI is relevant
487   1 - the old ABI has been used
488   2 - a vector type argument has been passed either in a vector register
489       or on the stack by value  */
490static int s390_vector_abi = 0;
491
492/* Set the vector ABI marker if TYPE is subject to the vector ABI
493   switch.  The vector ABI affects only vector data types.  There are
494   two aspects of the vector ABI relevant here:
495
496   1. vectors >= 16 bytes have an alignment of 8 bytes with the new
497   ABI and natural alignment with the old.
498
499   2. vector <= 16 bytes are passed in VRs or by value on the stack
500   with the new ABI but by reference on the stack with the old.
501
502   If ARG_P is true TYPE is used for a function argument or return
503   value.  The ABI marker then is set for all vector data types.  If
504   ARG_P is false only type 1 vectors are being checked.  */
505
506static void
507s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
508{
509  static hash_set<const_tree> visited_types_hash;
510
511  if (s390_vector_abi)
512    return;
513
514  if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
515    return;
516
517  if (visited_types_hash.contains (type))
518    return;
519
520  visited_types_hash.add (type);
521
522  if (VECTOR_TYPE_P (type))
523    {
524      int type_size = int_size_in_bytes (type);
525
526      /* Outside arguments only the alignment is changing and this
527	 only happens for vector types >= 16 bytes.  */
528      if (!arg_p && type_size < 16)
529	return;
530
531      /* In arguments vector types > 16 are passed as before (GCC
532	 never enforced the bigger alignment for arguments which was
533	 required by the old vector ABI).  However, it might still be
534	 ABI relevant due to the changed alignment if it is a struct
535	 member.  */
536      if (arg_p && type_size > 16 && !in_struct_p)
537	return;
538
539      s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
540    }
541  else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
542    {
543      /* ARRAY_TYPE: Since with neither of the ABIs we have more than
544	 natural alignment there will never be ABI dependent padding
545	 in an array type.  That's why we do not set in_struct_p to
546	 true here.  */
547      s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
548    }
549  else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
550    {
551      tree arg_chain;
552
553      /* Check the return type.  */
554      s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
555
556      for (arg_chain = TYPE_ARG_TYPES (type);
557	   arg_chain;
558	   arg_chain = TREE_CHAIN (arg_chain))
559	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
560    }
561  else if (RECORD_OR_UNION_TYPE_P (type))
562    {
563      tree field;
564
565      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
566	{
567	  if (TREE_CODE (field) != FIELD_DECL)
568	    continue;
569
570	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
571	}
572    }
573}
574
575
576/* System z builtins.  */
577
578#include "s390-builtins.h"
579
580const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
581  {
582#undef B_DEF
583#undef OB_DEF
584#undef OB_DEF_VAR
585#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
586#define OB_DEF(...)
587#define OB_DEF_VAR(...)
588#include "s390-builtins.def"
589    0
590  };
591
592const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
593  {
594#undef B_DEF
595#undef OB_DEF
596#undef OB_DEF_VAR
597#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
598#define OB_DEF(...)
599#define OB_DEF_VAR(...)
600#include "s390-builtins.def"
601    0
602  };
603
604const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
605  {
606#undef B_DEF
607#undef OB_DEF
608#undef OB_DEF_VAR
609#define B_DEF(...)
610#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
611#define OB_DEF_VAR(...)
612#include "s390-builtins.def"
613    0
614  };
615
616const unsigned int
617opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
618  {
619#undef B_DEF
620#undef OB_DEF
621#undef OB_DEF_VAR
622#define B_DEF(...)
623#define OB_DEF(...)
624#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
625#include "s390-builtins.def"
626    0
627  };
628
629tree s390_builtin_types[BT_MAX];
630tree s390_builtin_fn_types[BT_FN_MAX];
631tree s390_builtin_decls[S390_BUILTIN_MAX +
632			S390_OVERLOADED_BUILTIN_MAX +
633			S390_OVERLOADED_BUILTIN_VAR_MAX];
634
635static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
636#undef B_DEF
637#undef OB_DEF
638#undef OB_DEF_VAR
639#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
640#define OB_DEF(...)
641#define OB_DEF_VAR(...)
642
643#include "s390-builtins.def"
644  CODE_FOR_nothing
645};
646
647static void
648s390_init_builtins (void)
649{
650  /* These definitions are being used in s390-builtins.def.  */
651  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
652				       NULL, NULL);
653  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
654  tree c_uint64_type_node;
655  unsigned int bflags_mask = (BFLAGS_MASK_INIT);
656
657  bflags_mask |= (TARGET_VX)  ? B_VX  : 0;
658  bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
659
660  /* The uint64_type_node from tree.c is not compatible to the C99
661     uint64_t data type.  What we want is c_uint64_type_node from
662     c-common.c.  But since backend code is not supposed to interface
663     with the frontend we recreate it here.  */
664  if (TARGET_64BIT)
665    c_uint64_type_node = long_unsigned_type_node;
666  else
667    c_uint64_type_node = long_long_unsigned_type_node;
668
669#undef DEF_TYPE
670#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P)		\
671  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))	\
672    s390_builtin_types[INDEX] = (!CONST_P) ?		\
673      (NODE) : build_type_variant ((NODE), 1, 0);
674
675#undef DEF_POINTER_TYPE
676#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE)			\
677  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
678    s390_builtin_types[INDEX] =						\
679      build_pointer_type (s390_builtin_types[INDEX_BASE]);
680
681#undef DEF_DISTINCT_TYPE
682#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE)			\
683  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
684    s390_builtin_types[INDEX] =						\
685      build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
686
687#undef DEF_VECTOR_TYPE
688#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS)		\
689  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
690    s390_builtin_types[INDEX] =						\
691      build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692
693#undef DEF_OPAQUE_VECTOR_TYPE
694#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS)	\
695  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
696    s390_builtin_types[INDEX] =						\
697      build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
698
699#undef DEF_FN_TYPE
700#define DEF_FN_TYPE(INDEX, BFLAGS, args...)			\
701  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))		\
702    s390_builtin_fn_types[INDEX] =				\
703    build_function_type_list (args, NULL_TREE);
704#undef DEF_OV_TYPE
705#define DEF_OV_TYPE(...)
706#include "s390-builtin-types.def"
707
708#undef B_DEF
709#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
710  if (((BFLAGS) & ~bflags_mask) == 0)					\
711    s390_builtin_decls[S390_BUILTIN_##NAME] =				\
712      add_builtin_function ("__builtin_" #NAME,				\
713			    s390_builtin_fn_types[FNTYPE],		\
714			    S390_BUILTIN_##NAME,			\
715			    BUILT_IN_MD,				\
716			    NULL,					\
717			    ATTRS);
718#undef OB_DEF
719#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
720  if (((BFLAGS) & ~bflags_mask) == 0)					\
721    s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
722      add_builtin_function ("__builtin_" #NAME,				\
723			    s390_builtin_fn_types[FNTYPE],		\
724			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
725			    BUILT_IN_MD,				\
726			    NULL,					\
727			    0);
728#undef OB_DEF_VAR
729#define OB_DEF_VAR(...)
730#include "s390-builtins.def"
731
732}
733
734/* Return true if ARG is appropriate as argument number ARGNUM of
735   builtin DECL.  The operand flags from s390-builtins.def have to
736   passed as OP_FLAGS.  */
737bool
738s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
739{
740  if (O_UIMM_P (op_flags))
741    {
742      int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
743      int bitwidth = bitwidths[op_flags - O_U1];
744
745      if (!tree_fits_uhwi_p (arg)
746	  || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
747	{
748	  error("constant argument %d for builtin %qF is out of range (0.."
749		HOST_WIDE_INT_PRINT_UNSIGNED ")",
750		argnum, decl,
751		((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
752	  return false;
753	}
754    }
755
756  if (O_SIMM_P (op_flags))
757    {
758      int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
759      int bitwidth = bitwidths[op_flags - O_S2];
760
761      if (!tree_fits_shwi_p (arg)
762	  || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
763	  || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
764	{
765	  error("constant argument %d for builtin %qF is out of range ("
766		HOST_WIDE_INT_PRINT_DEC ".."
767		HOST_WIDE_INT_PRINT_DEC ")",
768		argnum, decl,
769		-((HOST_WIDE_INT)1 << (bitwidth - 1)),
770		((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
771	  return false;
772	}
773    }
774  return true;
775}
776
777/* Expand an expression EXP that calls a built-in function,
778   with result going to TARGET if that's convenient
779   (and in mode MODE if that's convenient).
780   SUBTARGET may be used as the target for computing one of EXP's operands.
781   IGNORE is nonzero if the value is to be ignored.  */
782
783static rtx
784s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
785		     machine_mode mode ATTRIBUTE_UNUSED,
786		     int ignore ATTRIBUTE_UNUSED)
787{
788#define MAX_ARGS 5
789
790  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
791  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
792  enum insn_code icode;
793  rtx op[MAX_ARGS], pat;
794  int arity;
795  bool nonvoid;
796  tree arg;
797  call_expr_arg_iterator iter;
798  unsigned int all_op_flags = opflags_for_builtin (fcode);
799  machine_mode last_vec_mode = VOIDmode;
800
801  if (TARGET_DEBUG_ARG)
802    {
803      fprintf (stderr,
804	       "s390_expand_builtin, code = %4d, %s\n",
805	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
806    }
807
808  if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
809      && fcode < S390_ALL_BUILTIN_MAX)
810    {
811      gcc_unreachable ();
812    }
813  else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
814    {
815      icode = code_for_builtin[fcode];
816      /* Set a flag in the machine specific cfun part in order to support
817	 saving/restoring of FPRs.  */
818      if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
819	cfun->machine->tbegin_p = true;
820    }
821  else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
822    {
823      error ("Unresolved overloaded builtin");
824      return const0_rtx;
825    }
826  else
827    internal_error ("bad builtin fcode");
828
829  if (icode == 0)
830    internal_error ("bad builtin icode");
831
832  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
833
834  if (nonvoid)
835    {
836      machine_mode tmode = insn_data[icode].operand[0].mode;
837      if (!target
838	  || GET_MODE (target) != tmode
839	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
840	target = gen_reg_rtx (tmode);
841
842      /* There are builtins (e.g. vec_promote) with no vector
843	 arguments but an element selector.  So we have to also look
844	 at the vector return type when emitting the modulo
845	 operation.  */
846      if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
847	last_vec_mode = insn_data[icode].operand[0].mode;
848    }
849
850  arity = 0;
851  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
852    {
853      const struct insn_operand_data *insn_op;
854      unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
855
856      all_op_flags = all_op_flags >> O_SHIFT;
857
858      if (arg == error_mark_node)
859	return NULL_RTX;
860      if (arity >= MAX_ARGS)
861	return NULL_RTX;
862
863      if (O_IMM_P (op_flags)
864	  && TREE_CODE (arg) != INTEGER_CST)
865	{
866	  error ("constant value required for builtin %qF argument %d",
867		 fndecl, arity + 1);
868	  return const0_rtx;
869	}
870
871      if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
872	return const0_rtx;
873
874      insn_op = &insn_data[icode].operand[arity + nonvoid];
875      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
876
877      /* expand_expr truncates constants to the target mode only if it
878	 is "convenient".  However, our checks below rely on this
879	 being done.  */
880      if (CONST_INT_P (op[arity])
881	  && SCALAR_INT_MODE_P (insn_op->mode)
882	  && GET_MODE (op[arity]) != insn_op->mode)
883	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
884						 insn_op->mode));
885
886      /* Wrap the expanded RTX for pointer types into a MEM expr with
887	 the proper mode.  This allows us to use e.g. (match_operand
888	 "memory_operand"..) in the insn patterns instead of (mem
889	 (match_operand "address_operand)).  This is helpful for
890	 patterns not just accepting MEMs.  */
891      if (POINTER_TYPE_P (TREE_TYPE (arg))
892	  && insn_op->predicate != address_operand)
893	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
894
895      /* Expand the module operation required on element selectors.  */
896      if (op_flags == O_ELEM)
897	{
898	  gcc_assert (last_vec_mode != VOIDmode);
899	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
900					     op[arity],
901					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
902					     NULL_RTX, 1, OPTAB_DIRECT);
903	}
904
905      /* Record the vector mode used for an element selector.  This assumes:
906	 1. There is no builtin with two different vector modes and an element selector
907         2. The element selector comes after the vector type it is referring to.
908	 This currently the true for all the builtins but FIXME we
909	 should better check for that.  */
910      if (VECTOR_MODE_P (insn_op->mode))
911	last_vec_mode = insn_op->mode;
912
913      if (insn_op->predicate (op[arity], insn_op->mode))
914	{
915	  arity++;
916	  continue;
917	}
918
919      if (MEM_P (op[arity])
920	  && insn_op->predicate == memory_operand
921	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
922	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
923	{
924	  op[arity] = replace_equiv_address (op[arity],
925					     copy_to_mode_reg (Pmode,
926					       XEXP (op[arity], 0)));
927	}
928      else if (GET_MODE (op[arity]) == insn_op->mode
929	       || GET_MODE (op[arity]) == VOIDmode
930	       || (insn_op->predicate == address_operand
931		   && GET_MODE (op[arity]) == Pmode))
932	{
933	  /* An address_operand usually has VOIDmode in the expander
934	     so we cannot use this.  */
935	  machine_mode target_mode =
936	    (insn_op->predicate == address_operand
937	     ? Pmode : insn_op->mode);
938	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
939	}
940
941      if (!insn_op->predicate (op[arity], insn_op->mode))
942	{
943	  error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
944	  return const0_rtx;
945	}
946      arity++;
947    }
948
949  if (last_vec_mode != VOIDmode && !TARGET_VX)
950    {
951      error ("Vector type builtin %qF is not supported without -mvx "
952	     "(default with -march=z13).",
953	     fndecl);
954      return const0_rtx;
955    }
956
957  switch (arity)
958    {
959    case 0:
960      pat = GEN_FCN (icode) (target);
961      break;
962    case 1:
963      if (nonvoid)
964        pat = GEN_FCN (icode) (target, op[0]);
965      else
966	pat = GEN_FCN (icode) (op[0]);
967      break;
968    case 2:
969      if (nonvoid)
970	pat = GEN_FCN (icode) (target, op[0], op[1]);
971      else
972	pat = GEN_FCN (icode) (op[0], op[1]);
973      break;
974    case 3:
975      if (nonvoid)
976	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
977      else
978	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
979      break;
980    case 4:
981      if (nonvoid)
982	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
983      else
984	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
985      break;
986    case 5:
987      if (nonvoid)
988	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
989      else
990	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
991      break;
992    case 6:
993      if (nonvoid)
994	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
995      else
996	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
997      break;
998    default:
999      gcc_unreachable ();
1000    }
1001  if (!pat)
1002    return NULL_RTX;
1003  emit_insn (pat);
1004
1005  if (nonvoid)
1006    return target;
1007  else
1008    return const0_rtx;
1009}
1010
1011
1012static const int s390_hotpatch_hw_max = 1000000;
1013static int s390_hotpatch_hw_before_label = 0;
1014static int s390_hotpatch_hw_after_label = 0;
1015
1016/* Check whether the hotpatch attribute is applied to a function and, if it has
1017   an argument, the argument is valid.  */
1018
1019static tree
1020s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1021				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1022{
1023  tree expr;
1024  tree expr2;
1025  int err;
1026
1027  if (TREE_CODE (*node) != FUNCTION_DECL)
1028    {
1029      warning (OPT_Wattributes, "%qE attribute only applies to functions",
1030	       name);
1031      *no_add_attrs = true;
1032    }
1033  if (args != NULL && TREE_CHAIN (args) != NULL)
1034    {
1035      expr = TREE_VALUE (args);
1036      expr2 = TREE_VALUE (TREE_CHAIN (args));
1037    }
1038  if (args == NULL || TREE_CHAIN (args) == NULL)
1039    err = 1;
1040  else if (TREE_CODE (expr) != INTEGER_CST
1041	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1042	   || wi::gtu_p (expr, s390_hotpatch_hw_max))
1043    err = 1;
1044  else if (TREE_CODE (expr2) != INTEGER_CST
1045	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1046	   || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1047    err = 1;
1048  else
1049    err = 0;
1050  if (err)
1051    {
1052      error ("requested %qE attribute is not a comma separated pair of"
1053	     " non-negative integer constants or too large (max. %d)", name,
1054	     s390_hotpatch_hw_max);
1055      *no_add_attrs = true;
1056    }
1057
1058  return NULL_TREE;
1059}
1060
1061/* Expand the s390_vector_bool type attribute.  */
1062
1063static tree
1064s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1065				  tree args ATTRIBUTE_UNUSED,
1066				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1067{
1068  tree type = *node, result = NULL_TREE;
1069  machine_mode mode;
1070
1071  while (POINTER_TYPE_P (type)
1072	 || TREE_CODE (type) == FUNCTION_TYPE
1073	 || TREE_CODE (type) == METHOD_TYPE
1074	 || TREE_CODE (type) == ARRAY_TYPE)
1075    type = TREE_TYPE (type);
1076
1077  mode = TYPE_MODE (type);
1078  switch (mode)
1079    {
1080    case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1081    case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1082    case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1083    case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1084    default: break;
1085    }
1086
1087  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1088
1089  if (result)
1090    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1091
1092  return NULL_TREE;
1093}
1094
1095static const struct attribute_spec s390_attribute_table[] = {
1096  { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1097  { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1098  /* End element.  */
1099  { NULL,        0, 0, false, false, false, NULL, false }
1100};
1101
1102/* Return the alignment for LABEL.  We default to the -falign-labels
1103   value except for the literal pool base label.  */
1104int
1105s390_label_align (rtx label)
1106{
1107  rtx_insn *prev_insn = prev_active_insn (label);
1108  rtx set, src;
1109
1110  if (prev_insn == NULL_RTX)
1111    goto old;
1112
1113  set = single_set (prev_insn);
1114
1115  if (set == NULL_RTX)
1116    goto old;
1117
1118  src = SET_SRC (set);
1119
1120  /* Don't align literal pool base labels.  */
1121  if (GET_CODE (src) == UNSPEC
1122      && XINT (src, 1) == UNSPEC_MAIN_BASE)
1123    return 0;
1124
1125 old:
1126  return align_labels_log;
1127}
1128
1129static machine_mode
1130s390_libgcc_cmp_return_mode (void)
1131{
1132  return TARGET_64BIT ? DImode : SImode;
1133}
1134
1135static machine_mode
1136s390_libgcc_shift_count_mode (void)
1137{
1138  return TARGET_64BIT ? DImode : SImode;
1139}
1140
1141static machine_mode
1142s390_unwind_word_mode (void)
1143{
1144  return TARGET_64BIT ? DImode : SImode;
1145}
1146
1147/* Return true if the back end supports mode MODE.  */
1148static bool
1149s390_scalar_mode_supported_p (machine_mode mode)
1150{
1151  /* In contrast to the default implementation reject TImode constants on 31bit
1152     TARGET_ZARCH for ABI compliance.  */
1153  if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1154    return false;
1155
1156  if (DECIMAL_FLOAT_MODE_P (mode))
1157    return default_decimal_float_supported_p ();
1158
1159  return default_scalar_mode_supported_p (mode);
1160}
1161
1162/* Return true if the back end supports vector mode MODE.  */
1163static bool
1164s390_vector_mode_supported_p (machine_mode mode)
1165{
1166  machine_mode inner;
1167
1168  if (!VECTOR_MODE_P (mode)
1169      || !TARGET_VX
1170      || GET_MODE_SIZE (mode) > 16)
1171    return false;
1172
1173  inner = GET_MODE_INNER (mode);
1174
1175  switch (inner)
1176    {
1177    case QImode:
1178    case HImode:
1179    case SImode:
1180    case DImode:
1181    case TImode:
1182    case SFmode:
1183    case DFmode:
1184    case TFmode:
1185      return true;
1186    default:
1187      return false;
1188    }
1189}
1190
1191/* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1192
1193void
1194s390_set_has_landing_pad_p (bool value)
1195{
1196  cfun->machine->has_landing_pad_p = value;
1197}
1198
1199/* If two condition code modes are compatible, return a condition code
1200   mode which is compatible with both.  Otherwise, return
1201   VOIDmode.  */
1202
1203static machine_mode
1204s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1205{
1206  if (m1 == m2)
1207    return m1;
1208
1209  switch (m1)
1210    {
1211    case CCZmode:
1212      if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1213	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1214        return m2;
1215      return VOIDmode;
1216
1217    case CCSmode:
1218    case CCUmode:
1219    case CCTmode:
1220    case CCSRmode:
1221    case CCURmode:
1222    case CCZ1mode:
1223      if (m2 == CCZmode)
1224	return m1;
1225
1226      return VOIDmode;
1227
1228    default:
1229      return VOIDmode;
1230    }
1231  return VOIDmode;
1232}
1233
1234/* Return true if SET either doesn't set the CC register, or else
1235   the source and destination have matching CC modes and that
1236   CC mode is at least as constrained as REQ_MODE.  */
1237
1238static bool
1239s390_match_ccmode_set (rtx set, machine_mode req_mode)
1240{
1241  machine_mode set_mode;
1242
1243  gcc_assert (GET_CODE (set) == SET);
1244
1245  if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1246    return 1;
1247
1248  set_mode = GET_MODE (SET_DEST (set));
1249  switch (set_mode)
1250    {
1251    case CCSmode:
1252    case CCSRmode:
1253    case CCUmode:
1254    case CCURmode:
1255    case CCLmode:
1256    case CCL1mode:
1257    case CCL2mode:
1258    case CCL3mode:
1259    case CCT1mode:
1260    case CCT2mode:
1261    case CCT3mode:
1262    case CCVEQmode:
1263    case CCVHmode:
1264    case CCVHUmode:
1265    case CCVFHmode:
1266    case CCVFHEmode:
1267      if (req_mode != set_mode)
1268        return 0;
1269      break;
1270
1271    case CCZmode:
1272      if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1273	  && req_mode != CCSRmode && req_mode != CCURmode)
1274        return 0;
1275      break;
1276
1277    case CCAPmode:
1278    case CCANmode:
1279      if (req_mode != CCAmode)
1280        return 0;
1281      break;
1282
1283    default:
1284      gcc_unreachable ();
1285    }
1286
1287  return (GET_MODE (SET_SRC (set)) == set_mode);
1288}
1289
1290/* Return true if every SET in INSN that sets the CC register
1291   has source and destination with matching CC modes and that
1292   CC mode is at least as constrained as REQ_MODE.
1293   If REQ_MODE is VOIDmode, always return false.  */
1294
1295bool
1296s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1297{
1298  int i;
1299
1300  /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1301  if (req_mode == VOIDmode)
1302    return false;
1303
1304  if (GET_CODE (PATTERN (insn)) == SET)
1305    return s390_match_ccmode_set (PATTERN (insn), req_mode);
1306
1307  if (GET_CODE (PATTERN (insn)) == PARALLEL)
1308      for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1309        {
1310          rtx set = XVECEXP (PATTERN (insn), 0, i);
1311          if (GET_CODE (set) == SET)
1312            if (!s390_match_ccmode_set (set, req_mode))
1313              return false;
1314        }
1315
1316  return true;
1317}
1318
1319/* If a test-under-mask instruction can be used to implement
1320   (compare (and ... OP1) OP2), return the CC mode required
1321   to do that.  Otherwise, return VOIDmode.
1322   MIXED is true if the instruction can distinguish between
1323   CC1 and CC2 for mixed selected bits (TMxx), it is false
1324   if the instruction cannot (TM).  */
1325
1326machine_mode
1327s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1328{
1329  int bit0, bit1;
1330
1331  /* ??? Fixme: should work on CONST_DOUBLE as well.  */
1332  if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1333    return VOIDmode;
1334
1335  /* Selected bits all zero: CC0.
1336     e.g.: int a; if ((a & (16 + 128)) == 0) */
1337  if (INTVAL (op2) == 0)
1338    return CCTmode;
1339
1340  /* Selected bits all one: CC3.
1341     e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1342  if (INTVAL (op2) == INTVAL (op1))
1343    return CCT3mode;
1344
1345  /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1346     int a;
1347     if ((a & (16 + 128)) == 16)         -> CCT1
1348     if ((a & (16 + 128)) == 128)        -> CCT2  */
1349  if (mixed)
1350    {
1351      bit1 = exact_log2 (INTVAL (op2));
1352      bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1353      if (bit0 != -1 && bit1 != -1)
1354        return bit0 > bit1 ? CCT1mode : CCT2mode;
1355    }
1356
1357  return VOIDmode;
1358}
1359
1360/* Given a comparison code OP (EQ, NE, etc.) and the operands
1361   OP0 and OP1 of a COMPARE, return the mode to be used for the
1362   comparison.  */
1363
1364machine_mode
1365s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1366{
1367  if (TARGET_VX
1368      && register_operand (op0, DFmode)
1369      && register_operand (op1, DFmode))
1370    {
1371      /* LT, LE, UNGT, UNGE require swapping OP0 and OP1.  Either
1372	 s390_emit_compare or s390_canonicalize_comparison will take
1373	 care of it.  */
1374      switch (code)
1375	{
1376	case EQ:
1377	case NE:
1378	  return CCVEQmode;
1379	case GT:
1380	case UNLE:
1381	  return CCVFHmode;
1382	case GE:
1383	case UNLT:
1384	  return CCVFHEmode;
1385	default:
1386	  ;
1387	}
1388    }
1389
1390  switch (code)
1391    {
1392      case EQ:
1393      case NE:
1394	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1395	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1396	  return CCAPmode;
1397	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1398	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1399	  return CCAPmode;
1400	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1401	     || GET_CODE (op1) == NEG)
1402	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1403	  return CCLmode;
1404
1405	if (GET_CODE (op0) == AND)
1406	  {
1407	    /* Check whether we can potentially do it via TM.  */
1408	    machine_mode ccmode;
1409	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1410	    if (ccmode != VOIDmode)
1411	      {
1412		/* Relax CCTmode to CCZmode to allow fall-back to AND
1413		   if that turns out to be beneficial.  */
1414	        return ccmode == CCTmode ? CCZmode : ccmode;
1415	      }
1416	  }
1417
1418	if (register_operand (op0, HImode)
1419	    && GET_CODE (op1) == CONST_INT
1420	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1421	  return CCT3mode;
1422	if (register_operand (op0, QImode)
1423	    && GET_CODE (op1) == CONST_INT
1424	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1425	  return CCT3mode;
1426
1427	return CCZmode;
1428
1429      case LE:
1430      case LT:
1431      case GE:
1432      case GT:
1433	/* The only overflow condition of NEG and ABS happens when
1434	   -INT_MAX is used as parameter, which stays negative. So
1435	   we have an overflow from a positive value to a negative.
1436	   Using CCAP mode the resulting cc can be used for comparisons.  */
1437	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1438	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1439	  return CCAPmode;
1440
1441 	/* If constants are involved in an add instruction it is possible to use
1442 	   the resulting cc for comparisons with zero. Knowing the sign of the
1443	   constant the overflow behavior gets predictable. e.g.:
1444 	     int a, b; if ((b = a + c) > 0)
1445 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1446	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1447	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1448		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1449		    /* Avoid INT32_MIN on 32 bit.  */
1450		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1451	  {
1452	    if (INTVAL (XEXP((op0), 1)) < 0)
1453	      return CCANmode;
1454	    else
1455	      return CCAPmode;
1456	  }
1457	/* Fall through.  */
1458      case UNORDERED:
1459      case ORDERED:
1460      case UNEQ:
1461      case UNLE:
1462      case UNLT:
1463      case UNGE:
1464      case UNGT:
1465      case LTGT:
1466	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1467	    && GET_CODE (op1) != CONST_INT)
1468	  return CCSRmode;
1469	return CCSmode;
1470
1471      case LTU:
1472      case GEU:
1473	if (GET_CODE (op0) == PLUS
1474	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1475	  return CCL1mode;
1476
1477	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1478	    && GET_CODE (op1) != CONST_INT)
1479	  return CCURmode;
1480	return CCUmode;
1481
1482      case LEU:
1483      case GTU:
1484	if (GET_CODE (op0) == MINUS
1485	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1486	  return CCL2mode;
1487
1488	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1489	    && GET_CODE (op1) != CONST_INT)
1490	  return CCURmode;
1491	return CCUmode;
1492
1493      default:
1494	gcc_unreachable ();
1495    }
1496}
1497
1498/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1499   that we can implement more efficiently.  */
1500
1501static void
1502s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1503			      bool op0_preserve_value)
1504{
1505  if (op0_preserve_value)
1506    return;
1507
1508  /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1509  if ((*code == EQ || *code == NE)
1510      && *op1 == const0_rtx
1511      && GET_CODE (*op0) == ZERO_EXTRACT
1512      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1513      && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1514      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1515    {
1516      rtx inner = XEXP (*op0, 0);
1517      HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1518      HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1519      HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1520
1521      if (len > 0 && len < modesize
1522	  && pos >= 0 && pos + len <= modesize
1523	  && modesize <= HOST_BITS_PER_WIDE_INT)
1524	{
1525	  unsigned HOST_WIDE_INT block;
1526	  block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1527	  block <<= modesize - pos - len;
1528
1529	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1530			      gen_int_mode (block, GET_MODE (inner)));
1531	}
1532    }
1533
1534  /* Narrow AND of memory against immediate to enable TM.  */
1535  if ((*code == EQ || *code == NE)
1536      && *op1 == const0_rtx
1537      && GET_CODE (*op0) == AND
1538      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1539      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1540    {
1541      rtx inner = XEXP (*op0, 0);
1542      rtx mask = XEXP (*op0, 1);
1543
1544      /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1545      if (GET_CODE (inner) == SUBREG
1546	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1547	  && (GET_MODE_SIZE (GET_MODE (inner))
1548	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1549	  && ((INTVAL (mask)
1550               & GET_MODE_MASK (GET_MODE (inner))
1551               & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1552	      == 0))
1553	inner = SUBREG_REG (inner);
1554
1555      /* Do not change volatile MEMs.  */
1556      if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1557	{
1558	  int part = s390_single_part (XEXP (*op0, 1),
1559				       GET_MODE (inner), QImode, 0);
1560	  if (part >= 0)
1561	    {
1562	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1563	      inner = adjust_address_nv (inner, QImode, part);
1564	      *op0 = gen_rtx_AND (QImode, inner, mask);
1565	    }
1566	}
1567    }
1568
1569  /* Narrow comparisons against 0xffff to HImode if possible.  */
1570  if ((*code == EQ || *code == NE)
1571      && GET_CODE (*op1) == CONST_INT
1572      && INTVAL (*op1) == 0xffff
1573      && SCALAR_INT_MODE_P (GET_MODE (*op0))
1574      && (nonzero_bits (*op0, GET_MODE (*op0))
1575	  & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1576    {
1577      *op0 = gen_lowpart (HImode, *op0);
1578      *op1 = constm1_rtx;
1579    }
1580
1581  /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1582  if (GET_CODE (*op0) == UNSPEC
1583      && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1584      && XVECLEN (*op0, 0) == 1
1585      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1586      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1587      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1588      && *op1 == const0_rtx)
1589    {
1590      enum rtx_code new_code = UNKNOWN;
1591      switch (*code)
1592	{
1593	  case EQ: new_code = EQ;  break;
1594	  case NE: new_code = NE;  break;
1595	  case LT: new_code = GTU; break;
1596	  case GT: new_code = LTU; break;
1597	  case LE: new_code = GEU; break;
1598	  case GE: new_code = LEU; break;
1599	  default: break;
1600	}
1601
1602      if (new_code != UNKNOWN)
1603	{
1604	  *op0 = XVECEXP (*op0, 0, 0);
1605	  *code = new_code;
1606	}
1607    }
1608
1609  /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1610  if (GET_CODE (*op0) == UNSPEC
1611      && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1612      && XVECLEN (*op0, 0) == 1
1613      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1614      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1615      && CONST_INT_P (*op1))
1616    {
1617      enum rtx_code new_code = UNKNOWN;
1618      switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1619	{
1620	case CCZmode:
1621	case CCRAWmode:
1622	  switch (*code)
1623	    {
1624	    case EQ: new_code = EQ;  break;
1625	    case NE: new_code = NE;  break;
1626	    default: break;
1627	    }
1628	  break;
1629	default: break;
1630	}
1631
1632      if (new_code != UNKNOWN)
1633	{
1634	  /* For CCRAWmode put the required cc mask into the second
1635	     operand.  */
1636        if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1637            && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1638	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1639	  *op0 = XVECEXP (*op0, 0, 0);
1640	  *code = new_code;
1641	}
1642    }
1643
1644  /* Simplify cascaded EQ, NE with const0_rtx.  */
1645  if ((*code == NE || *code == EQ)
1646      && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1647      && GET_MODE (*op0) == SImode
1648      && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1649      && REG_P (XEXP (*op0, 0))
1650      && XEXP (*op0, 1) == const0_rtx
1651      && *op1 == const0_rtx)
1652    {
1653      if ((*code == EQ && GET_CODE (*op0) == NE)
1654          || (*code == NE && GET_CODE (*op0) == EQ))
1655	*code = EQ;
1656      else
1657	*code = NE;
1658      *op0 = XEXP (*op0, 0);
1659    }
1660
1661  /* Prefer register over memory as first operand.  */
1662  if (MEM_P (*op0) && REG_P (*op1))
1663    {
1664      rtx tem = *op0; *op0 = *op1; *op1 = tem;
1665      *code = (int)swap_condition ((enum rtx_code)*code);
1666    }
1667
1668  /* Using the scalar variants of vector instructions for 64 bit FP
1669     comparisons might require swapping the operands.  */
1670  if (TARGET_VX
1671      && register_operand (*op0, DFmode)
1672      && register_operand (*op1, DFmode)
1673      && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1674    {
1675      rtx tmp;
1676
1677      switch (*code)
1678	{
1679	case LT:   *code = GT; break;
1680	case LE:   *code = GE; break;
1681	case UNGT: *code = UNLE; break;
1682	case UNGE: *code = UNLT; break;
1683	default: ;
1684	}
1685      tmp = *op0; *op0 = *op1; *op1 = tmp;
1686    }
1687}
1688
1689/* Helper function for s390_emit_compare.  If possible emit a 64 bit
1690   FP compare using the single element variant of vector instructions.
1691   Replace CODE with the comparison code to be used in the CC reg
1692   compare and return the condition code register RTX in CC.  */
1693
1694static bool
1695s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1696				rtx *cc)
1697{
1698  machine_mode cmp_mode;
1699  bool swap_p = false;
1700
1701  switch (*code)
1702    {
1703    case EQ:   cmp_mode = CCVEQmode;  break;
1704    case NE:   cmp_mode = CCVEQmode;  break;
1705    case GT:   cmp_mode = CCVFHmode;  break;
1706    case GE:   cmp_mode = CCVFHEmode; break;
1707    case UNLE: cmp_mode = CCVFHmode;  break;
1708    case UNLT: cmp_mode = CCVFHEmode; break;
1709    case LT:   cmp_mode = CCVFHmode;  *code = GT;   swap_p = true; break;
1710    case LE:   cmp_mode = CCVFHEmode; *code = GE;   swap_p = true; break;
1711    case UNGE: cmp_mode = CCVFHmode;  *code = UNLE; swap_p = true; break;
1712    case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1713    default: return false;
1714    }
1715
1716  if (swap_p)
1717    {
1718      rtx tmp = cmp2;
1719      cmp2 = cmp1;
1720      cmp1 = tmp;
1721    }
1722  *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1723  emit_insn (gen_rtx_PARALLEL (VOIDmode,
1724	       gen_rtvec (2,
1725			  gen_rtx_SET (VOIDmode, *cc,
1726				       gen_rtx_COMPARE (cmp_mode, cmp1,
1727							cmp2)),
1728			  gen_rtx_CLOBBER (VOIDmode,
1729					   gen_rtx_SCRATCH (V2DImode)))));
1730  return true;
1731}
1732
1733
1734/* Emit a compare instruction suitable to implement the comparison
1735   OP0 CODE OP1.  Return the correct condition RTL to be placed in
1736   the IF_THEN_ELSE of the conditional branch testing the result.  */
1737
1738rtx
1739s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1740{
1741  machine_mode mode = s390_select_ccmode (code, op0, op1);
1742  rtx cc;
1743
1744  if (TARGET_VX
1745      && register_operand (op0, DFmode)
1746      && register_operand (op1, DFmode)
1747      && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1748    {
1749      /* Work has been done by s390_expand_vec_compare_scalar already.  */
1750    }
1751  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1752    {
1753      /* Do not output a redundant compare instruction if a
1754	 compare_and_swap pattern already computed the result and the
1755	 machine modes are compatible.  */
1756      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1757		  == GET_MODE (op0));
1758      cc = op0;
1759    }
1760  else
1761    {
1762      cc = gen_rtx_REG (mode, CC_REGNUM);
1763      emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
1764    }
1765
1766  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1767}
1768
1769/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1770   matches CMP.
1771   Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1772   conditional branch testing the result.  */
1773
1774static rtx
1775s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1776			    rtx cmp, rtx new_rtx)
1777{
1778  emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1779  return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1780			    const0_rtx);
1781}
1782
1783/* Emit a jump instruction to TARGET and return it.  If COND is
1784   NULL_RTX, emit an unconditional jump, else a conditional jump under
1785   condition COND.  */
1786
1787rtx_insn *
1788s390_emit_jump (rtx target, rtx cond)
1789{
1790  rtx insn;
1791
1792  target = gen_rtx_LABEL_REF (VOIDmode, target);
1793  if (cond)
1794    target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1795
1796  insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
1797  return emit_jump_insn (insn);
1798}
1799
1800/* Return branch condition mask to implement a branch
1801   specified by CODE.  Return -1 for invalid comparisons.  */
1802
1803int
1804s390_branch_condition_mask (rtx code)
1805{
1806  const int CC0 = 1 << 3;
1807  const int CC1 = 1 << 2;
1808  const int CC2 = 1 << 1;
1809  const int CC3 = 1 << 0;
1810
1811  gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1812  gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1813  gcc_assert (XEXP (code, 1) == const0_rtx
1814	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1815		  && CONST_INT_P (XEXP (code, 1))));
1816
1817
1818  switch (GET_MODE (XEXP (code, 0)))
1819    {
1820    case CCZmode:
1821    case CCZ1mode:
1822      switch (GET_CODE (code))
1823        {
1824        case EQ:	return CC0;
1825	case NE:	return CC1 | CC2 | CC3;
1826	default:	return -1;
1827        }
1828      break;
1829
1830    case CCT1mode:
1831      switch (GET_CODE (code))
1832        {
1833        case EQ:	return CC1;
1834	case NE:	return CC0 | CC2 | CC3;
1835	default:	return -1;
1836        }
1837      break;
1838
1839    case CCT2mode:
1840      switch (GET_CODE (code))
1841        {
1842        case EQ:	return CC2;
1843	case NE:	return CC0 | CC1 | CC3;
1844	default:	return -1;
1845        }
1846      break;
1847
1848    case CCT3mode:
1849      switch (GET_CODE (code))
1850        {
1851        case EQ:	return CC3;
1852	case NE:	return CC0 | CC1 | CC2;
1853	default:	return -1;
1854        }
1855      break;
1856
1857    case CCLmode:
1858      switch (GET_CODE (code))
1859        {
1860        case EQ:	return CC0 | CC2;
1861	case NE:	return CC1 | CC3;
1862	default:	return -1;
1863        }
1864      break;
1865
1866    case CCL1mode:
1867      switch (GET_CODE (code))
1868        {
1869	case LTU:	return CC2 | CC3;  /* carry */
1870	case GEU:	return CC0 | CC1;  /* no carry */
1871	default:	return -1;
1872        }
1873      break;
1874
1875    case CCL2mode:
1876      switch (GET_CODE (code))
1877        {
1878	case GTU:	return CC0 | CC1;  /* borrow */
1879	case LEU:	return CC2 | CC3;  /* no borrow */
1880	default:	return -1;
1881        }
1882      break;
1883
1884    case CCL3mode:
1885      switch (GET_CODE (code))
1886	{
1887	case EQ:	return CC0 | CC2;
1888	case NE:	return CC1 | CC3;
1889	case LTU:	return CC1;
1890	case GTU:	return CC3;
1891	case LEU:	return CC1 | CC2;
1892	case GEU:	return CC2 | CC3;
1893	default:	return -1;
1894	}
1895
1896    case CCUmode:
1897      switch (GET_CODE (code))
1898        {
1899        case EQ:	return CC0;
1900        case NE:	return CC1 | CC2 | CC3;
1901        case LTU:	return CC1;
1902        case GTU:	return CC2;
1903        case LEU:	return CC0 | CC1;
1904        case GEU:	return CC0 | CC2;
1905	default:	return -1;
1906        }
1907      break;
1908
1909    case CCURmode:
1910      switch (GET_CODE (code))
1911        {
1912        case EQ:	return CC0;
1913        case NE:	return CC2 | CC1 | CC3;
1914        case LTU:	return CC2;
1915        case GTU:	return CC1;
1916        case LEU:	return CC0 | CC2;
1917        case GEU:	return CC0 | CC1;
1918	default:	return -1;
1919        }
1920      break;
1921
1922    case CCAPmode:
1923      switch (GET_CODE (code))
1924        {
1925        case EQ:	return CC0;
1926        case NE:	return CC1 | CC2 | CC3;
1927        case LT:	return CC1 | CC3;
1928        case GT:	return CC2;
1929        case LE:	return CC0 | CC1 | CC3;
1930        case GE:	return CC0 | CC2;
1931	default:	return -1;
1932        }
1933      break;
1934
1935    case CCANmode:
1936      switch (GET_CODE (code))
1937        {
1938        case EQ:	return CC0;
1939        case NE:	return CC1 | CC2 | CC3;
1940        case LT:	return CC1;
1941        case GT:	return CC2 | CC3;
1942        case LE:	return CC0 | CC1;
1943        case GE:	return CC0 | CC2 | CC3;
1944	default:	return -1;
1945        }
1946      break;
1947
1948    case CCSmode:
1949      switch (GET_CODE (code))
1950        {
1951        case EQ:	return CC0;
1952        case NE:	return CC1 | CC2 | CC3;
1953        case LT:	return CC1;
1954        case GT:	return CC2;
1955        case LE:	return CC0 | CC1;
1956        case GE:	return CC0 | CC2;
1957	case UNORDERED:	return CC3;
1958	case ORDERED:	return CC0 | CC1 | CC2;
1959	case UNEQ:	return CC0 | CC3;
1960        case UNLT:	return CC1 | CC3;
1961        case UNGT:	return CC2 | CC3;
1962        case UNLE:	return CC0 | CC1 | CC3;
1963        case UNGE:	return CC0 | CC2 | CC3;
1964	case LTGT:	return CC1 | CC2;
1965	default:	return -1;
1966        }
1967      break;
1968
1969    case CCSRmode:
1970      switch (GET_CODE (code))
1971        {
1972        case EQ:	return CC0;
1973        case NE:	return CC2 | CC1 | CC3;
1974        case LT:	return CC2;
1975        case GT:	return CC1;
1976        case LE:	return CC0 | CC2;
1977        case GE:	return CC0 | CC1;
1978	case UNORDERED:	return CC3;
1979	case ORDERED:	return CC0 | CC2 | CC1;
1980	case UNEQ:	return CC0 | CC3;
1981        case UNLT:	return CC2 | CC3;
1982        case UNGT:	return CC1 | CC3;
1983        case UNLE:	return CC0 | CC2 | CC3;
1984        case UNGE:	return CC0 | CC1 | CC3;
1985	case LTGT:	return CC2 | CC1;
1986	default:	return -1;
1987        }
1988      break;
1989
1990      /* Vector comparison modes.  */
1991
1992    case CCVEQmode:
1993      switch (GET_CODE (code))
1994	{
1995	case EQ:        return CC0;
1996	case NE:        return CC3;
1997	default:        return -1;
1998	}
1999
2000    case CCVEQANYmode:
2001      switch (GET_CODE (code))
2002	{
2003	case EQ:        return CC0 | CC1;
2004	case NE:        return CC3 | CC1;
2005	default:        return -1;
2006	}
2007
2008      /* Integer vector compare modes.  */
2009
2010    case CCVHmode:
2011      switch (GET_CODE (code))
2012	{
2013	case GT:        return CC0;
2014	case LE:        return CC3;
2015	default:        return -1;
2016	}
2017
2018    case CCVHANYmode:
2019      switch (GET_CODE (code))
2020	{
2021	case GT:        return CC0 | CC1;
2022	case LE:        return CC3 | CC1;
2023	default:        return -1;
2024	}
2025
2026    case CCVHUmode:
2027      switch (GET_CODE (code))
2028	{
2029	case GTU:       return CC0;
2030	case LEU:       return CC3;
2031	default:        return -1;
2032	}
2033
2034    case CCVHUANYmode:
2035      switch (GET_CODE (code))
2036	{
2037	case GTU:       return CC0 | CC1;
2038	case LEU:       return CC3 | CC1;
2039	default:        return -1;
2040	}
2041
2042      /* FP vector compare modes.  */
2043
2044    case CCVFHmode:
2045      switch (GET_CODE (code))
2046	{
2047	case GT:        return CC0;
2048	case UNLE:      return CC3;
2049	default:        return -1;
2050	}
2051
2052    case CCVFHANYmode:
2053      switch (GET_CODE (code))
2054	{
2055	case GT:        return CC0 | CC1;
2056	case UNLE:      return CC3 | CC1;
2057	default:        return -1;
2058	}
2059
2060    case CCVFHEmode:
2061      switch (GET_CODE (code))
2062	{
2063	case GE:        return CC0;
2064	case UNLT:      return CC3;
2065	default:        return -1;
2066	}
2067
2068    case CCVFHEANYmode:
2069      switch (GET_CODE (code))
2070	{
2071	case GE:        return CC0 | CC1;
2072	case UNLT:      return CC3 | CC1;
2073	default:        return -1;
2074	}
2075
2076
2077    case CCRAWmode:
2078      switch (GET_CODE (code))
2079	{
2080	case EQ:
2081	  return INTVAL (XEXP (code, 1));
2082	case NE:
2083	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2084	default:
2085	  gcc_unreachable ();
2086	}
2087
2088    default:
2089      return -1;
2090    }
2091}
2092
2093
2094/* Return branch condition mask to implement a compare and branch
2095   specified by CODE.  Return -1 for invalid comparisons.  */
2096
2097int
2098s390_compare_and_branch_condition_mask (rtx code)
2099{
2100  const int CC0 = 1 << 3;
2101  const int CC1 = 1 << 2;
2102  const int CC2 = 1 << 1;
2103
2104  switch (GET_CODE (code))
2105    {
2106    case EQ:
2107      return CC0;
2108    case NE:
2109      return CC1 | CC2;
2110    case LT:
2111    case LTU:
2112      return CC1;
2113    case GT:
2114    case GTU:
2115      return CC2;
2116    case LE:
2117    case LEU:
2118      return CC0 | CC1;
2119    case GE:
2120    case GEU:
2121      return CC0 | CC2;
2122    default:
2123      gcc_unreachable ();
2124    }
2125  return -1;
2126}
2127
2128/* If INV is false, return assembler mnemonic string to implement
2129   a branch specified by CODE.  If INV is true, return mnemonic
2130   for the corresponding inverted branch.  */
2131
2132static const char *
2133s390_branch_condition_mnemonic (rtx code, int inv)
2134{
2135  int mask;
2136
2137  static const char *const mnemonic[16] =
2138    {
2139      NULL, "o", "h", "nle",
2140      "l", "nhe", "lh", "ne",
2141      "e", "nlh", "he", "nl",
2142      "le", "nh", "no", NULL
2143    };
2144
2145  if (GET_CODE (XEXP (code, 0)) == REG
2146      && REGNO (XEXP (code, 0)) == CC_REGNUM
2147      && (XEXP (code, 1) == const0_rtx
2148	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2149	      && CONST_INT_P (XEXP (code, 1)))))
2150    mask = s390_branch_condition_mask (code);
2151  else
2152    mask = s390_compare_and_branch_condition_mask (code);
2153
2154  gcc_assert (mask >= 0);
2155
2156  if (inv)
2157    mask ^= 15;
2158
2159  gcc_assert (mask >= 1 && mask <= 14);
2160
2161  return mnemonic[mask];
2162}
2163
2164/* Return the part of op which has a value different from def.
2165   The size of the part is determined by mode.
2166   Use this function only if you already know that op really
2167   contains such a part.  */
2168
2169unsigned HOST_WIDE_INT
2170s390_extract_part (rtx op, machine_mode mode, int def)
2171{
2172  unsigned HOST_WIDE_INT value = 0;
2173  int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2174  int part_bits = GET_MODE_BITSIZE (mode);
2175  unsigned HOST_WIDE_INT part_mask
2176    = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2177  int i;
2178
2179  for (i = 0; i < max_parts; i++)
2180    {
2181      if (i == 0)
2182	value = (unsigned HOST_WIDE_INT) INTVAL (op);
2183      else
2184	value >>= part_bits;
2185
2186      if ((value & part_mask) != (def & part_mask))
2187	return value & part_mask;
2188    }
2189
2190  gcc_unreachable ();
2191}
2192
2193/* If OP is an integer constant of mode MODE with exactly one
2194   part of mode PART_MODE unequal to DEF, return the number of that
2195   part. Otherwise, return -1.  */
2196
2197int
2198s390_single_part (rtx op,
2199		  machine_mode mode,
2200		  machine_mode part_mode,
2201		  int def)
2202{
2203  unsigned HOST_WIDE_INT value = 0;
2204  int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2205  unsigned HOST_WIDE_INT part_mask
2206    = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2207  int i, part = -1;
2208
2209  if (GET_CODE (op) != CONST_INT)
2210    return -1;
2211
2212  for (i = 0; i < n_parts; i++)
2213    {
2214      if (i == 0)
2215	value = (unsigned HOST_WIDE_INT) INTVAL (op);
2216      else
2217	value >>= GET_MODE_BITSIZE (part_mode);
2218
2219      if ((value & part_mask) != (def & part_mask))
2220	{
2221	  if (part != -1)
2222	    return -1;
2223	  else
2224	    part = i;
2225	}
2226    }
2227  return part == -1 ? -1 : n_parts - 1 - part;
2228}
2229
2230/* Return true if IN contains a contiguous bitfield in the lower SIZE
2231   bits and no other bits are set in IN.  POS and LENGTH can be used
2232   to obtain the start position and the length of the bitfield.
2233
2234   POS gives the position of the first bit of the bitfield counting
2235   from the lowest order bit starting with zero.  In order to use this
2236   value for S/390 instructions this has to be converted to "bits big
2237   endian" style.  */
2238
2239bool
2240s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2241			   int *pos, int *length)
2242{
2243  int tmp_pos = 0;
2244  int tmp_length = 0;
2245  int i;
2246  unsigned HOST_WIDE_INT mask = 1ULL;
2247  bool contiguous = false;
2248
2249  for (i = 0; i < size; mask <<= 1, i++)
2250    {
2251      if (contiguous)
2252	{
2253	  if (mask & in)
2254	    tmp_length++;
2255	  else
2256	    break;
2257	}
2258      else
2259	{
2260	  if (mask & in)
2261	    {
2262	      contiguous = true;
2263	      tmp_length++;
2264	    }
2265	  else
2266	    tmp_pos++;
2267	}
2268    }
2269
2270  if (!tmp_length)
2271    return false;
2272
2273  /* Calculate a mask for all bits beyond the contiguous bits.  */
2274  mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2275
2276  if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2277    mask &= (HOST_WIDE_INT_1U << size) - 1;
2278
2279  if (mask & in)
2280    return false;
2281
2282  if (tmp_length + tmp_pos - 1 > size)
2283    return false;
2284
2285  if (length)
2286    *length = tmp_length;
2287
2288  if (pos)
2289    *pos = tmp_pos;
2290
2291  return true;
2292}
2293
2294bool
2295s390_const_vec_duplicate_p (rtx op)
2296{
2297 if (!VECTOR_MODE_P (GET_MODE (op))
2298      || GET_CODE (op) != CONST_VECTOR
2299      || !CONST_INT_P (XVECEXP (op, 0, 0)))
2300    return false;
2301
2302  if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
2303    {
2304      int i;
2305
2306      for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
2307	if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
2308	  return false;
2309    }
2310  return true;
2311}
2312/* Return true if OP contains the same contiguous bitfield in *all*
2313   its elements.  START and END can be used to obtain the start and
2314   end position of the bitfield.
2315
2316   START/STOP give the position of the first/last bit of the bitfield
2317   counting from the lowest order bit starting with zero.  In order to
2318   use these values for S/390 instructions this has to be converted to
2319   "bits big endian" style.  */
2320
2321bool
2322s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2323{
2324  unsigned HOST_WIDE_INT mask;
2325  int length, size;
2326
2327  if (!s390_const_vec_duplicate_p (op))
2328    return false;
2329
2330  size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2331  mask = UINTVAL (XVECEXP (op, 0, 0));
2332  if (s390_contiguous_bitmask_p (mask, size, start,
2333				 end != NULL ? &length : NULL))
2334    {
2335      if (end != NULL)
2336	*end = *start + length - 1;
2337      return true;
2338    }
2339  /* 0xff00000f style immediates can be covered by swapping start and
2340     end indices in vgm.  */
2341  if (s390_contiguous_bitmask_p (~mask, size, start,
2342				 end != NULL ? &length : NULL))
2343    {
2344      if (end != NULL)
2345	*end = *start - 1;
2346      if (start != NULL)
2347	*start = *start + length;
2348      return true;
2349    }
2350  return false;
2351}
2352
2353/* Return true if C consists only of byte chunks being either 0 or
2354   0xff.  If MASK is !=NULL a byte mask is generated which is
2355   appropriate for the vector generate byte mask instruction.  */
2356
2357bool
2358s390_bytemask_vector_p (rtx op, unsigned *mask)
2359{
2360  int i;
2361  unsigned tmp_mask = 0;
2362  int nunit, unit_size;
2363
2364  if (!VECTOR_MODE_P (GET_MODE (op))
2365      || GET_CODE (op) != CONST_VECTOR
2366      || !CONST_INT_P (XVECEXP (op, 0, 0)))
2367    return false;
2368
2369  nunit = GET_MODE_NUNITS (GET_MODE (op));
2370  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2371
2372  for (i = 0; i < nunit; i++)
2373    {
2374      unsigned HOST_WIDE_INT c;
2375      int j;
2376
2377      if (!CONST_INT_P (XVECEXP (op, 0, i)))
2378	return false;
2379
2380      c = UINTVAL (XVECEXP (op, 0, i));
2381      for (j = 0; j < unit_size; j++)
2382	{
2383	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2384	    return false;
2385	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2386	  c = c >> BITS_PER_UNIT;
2387	}
2388    }
2389
2390  if (mask != NULL)
2391    *mask = tmp_mask;
2392
2393  return true;
2394}
2395
2396/* Check whether a rotate of ROTL followed by an AND of CONTIG is
2397   equivalent to a shift followed by the AND.  In particular, CONTIG
2398   should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2399   for ROTL indicate a rotate to the right.  */
2400
2401bool
2402s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2403{
2404  int pos, len;
2405  bool ok;
2406
2407  ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2408  gcc_assert (ok);
2409
2410  return ((rotl >= 0 && rotl <= pos)
2411	  || (rotl < 0 && -rotl <= bitsize - len - pos));
2412}
2413
2414/* Check whether we can (and want to) split a double-word
2415   move in mode MODE from SRC to DST into two single-word
2416   moves, moving the subword FIRST_SUBWORD first.  */
2417
2418bool
2419s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2420{
2421  /* Floating point and vector registers cannot be split.  */
2422  if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2423    return false;
2424
2425  /* We don't need to split if operands are directly accessible.  */
2426  if (s_operand (src, mode) || s_operand (dst, mode))
2427    return false;
2428
2429  /* Non-offsettable memory references cannot be split.  */
2430  if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2431      || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2432    return false;
2433
2434  /* Moving the first subword must not clobber a register
2435     needed to move the second subword.  */
2436  if (register_operand (dst, mode))
2437    {
2438      rtx subreg = operand_subword (dst, first_subword, 0, mode);
2439      if (reg_overlap_mentioned_p (subreg, src))
2440        return false;
2441    }
2442
2443  return true;
2444}
2445
2446/* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2447   and [MEM2, MEM2 + SIZE] do overlap and false
2448   otherwise.  */
2449
2450bool
2451s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2452{
2453  rtx addr1, addr2, addr_delta;
2454  HOST_WIDE_INT delta;
2455
2456  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2457    return true;
2458
2459  if (size == 0)
2460    return false;
2461
2462  addr1 = XEXP (mem1, 0);
2463  addr2 = XEXP (mem2, 0);
2464
2465  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2466
2467  /* This overlapping check is used by peepholes merging memory block operations.
2468     Overlapping operations would otherwise be recognized by the S/390 hardware
2469     and would fall back to a slower implementation. Allowing overlapping
2470     operations would lead to slow code but not to wrong code. Therefore we are
2471     somewhat optimistic if we cannot prove that the memory blocks are
2472     overlapping.
2473     That's why we return false here although this may accept operations on
2474     overlapping memory areas.  */
2475  if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2476    return false;
2477
2478  delta = INTVAL (addr_delta);
2479
2480  if (delta == 0
2481      || (delta > 0 && delta < size)
2482      || (delta < 0 && -delta < size))
2483    return true;
2484
2485  return false;
2486}
2487
2488/* Check whether the address of memory reference MEM2 equals exactly
2489   the address of memory reference MEM1 plus DELTA.  Return true if
2490   we can prove this to be the case, false otherwise.  */
2491
2492bool
2493s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2494{
2495  rtx addr1, addr2, addr_delta;
2496
2497  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2498    return false;
2499
2500  addr1 = XEXP (mem1, 0);
2501  addr2 = XEXP (mem2, 0);
2502
2503  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2504  if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2505    return false;
2506
2507  return true;
2508}
2509
2510/* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2511
2512void
2513s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2514			      rtx *operands)
2515{
2516  machine_mode wmode = mode;
2517  rtx dst = operands[0];
2518  rtx src1 = operands[1];
2519  rtx src2 = operands[2];
2520  rtx op, clob, tem;
2521
2522  /* If we cannot handle the operation directly, use a temp register.  */
2523  if (!s390_logical_operator_ok_p (operands))
2524    dst = gen_reg_rtx (mode);
2525
2526  /* QImode and HImode patterns make sense only if we have a destination
2527     in memory.  Otherwise perform the operation in SImode.  */
2528  if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2529    wmode = SImode;
2530
2531  /* Widen operands if required.  */
2532  if (mode != wmode)
2533    {
2534      if (GET_CODE (dst) == SUBREG
2535	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2536	dst = tem;
2537      else if (REG_P (dst))
2538	dst = gen_rtx_SUBREG (wmode, dst, 0);
2539      else
2540        dst = gen_reg_rtx (wmode);
2541
2542      if (GET_CODE (src1) == SUBREG
2543	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2544	src1 = tem;
2545      else if (GET_MODE (src1) != VOIDmode)
2546	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2547
2548      if (GET_CODE (src2) == SUBREG
2549	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2550	src2 = tem;
2551      else if (GET_MODE (src2) != VOIDmode)
2552	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2553    }
2554
2555  /* Emit the instruction.  */
2556  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2557  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2558  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2559
2560  /* Fix up the destination if needed.  */
2561  if (dst != operands[0])
2562    emit_move_insn (operands[0], gen_lowpart (mode, dst));
2563}
2564
2565/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2566
2567bool
2568s390_logical_operator_ok_p (rtx *operands)
2569{
2570  /* If the destination operand is in memory, it needs to coincide
2571     with one of the source operands.  After reload, it has to be
2572     the first source operand.  */
2573  if (GET_CODE (operands[0]) == MEM)
2574    return rtx_equal_p (operands[0], operands[1])
2575	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2576
2577  return true;
2578}
2579
2580/* Narrow logical operation CODE of memory operand MEMOP with immediate
2581   operand IMMOP to switch from SS to SI type instructions.  */
2582
2583void
2584s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2585{
2586  int def = code == AND ? -1 : 0;
2587  HOST_WIDE_INT mask;
2588  int part;
2589
2590  gcc_assert (GET_CODE (*memop) == MEM);
2591  gcc_assert (!MEM_VOLATILE_P (*memop));
2592
2593  mask = s390_extract_part (*immop, QImode, def);
2594  part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2595  gcc_assert (part >= 0);
2596
2597  *memop = adjust_address (*memop, QImode, part);
2598  *immop = gen_int_mode (mask, QImode);
2599}
2600
2601
2602/* How to allocate a 'struct machine_function'.  */
2603
2604static struct machine_function *
2605s390_init_machine_status (void)
2606{
2607  return ggc_cleared_alloc<machine_function> ();
2608}
2609
2610/* Map for smallest class containing reg regno.  */
2611
2612const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2613{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2614  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2615  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2616  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2617  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2618  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2619  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2620  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2621  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2622  ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2623  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2624  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2625  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2626  VEC_REGS, VEC_REGS                              /* 52 */
2627};
2628
2629/* Return attribute type of insn.  */
2630
2631static enum attr_type
2632s390_safe_attr_type (rtx_insn *insn)
2633{
2634  if (recog_memoized (insn) >= 0)
2635    return get_attr_type (insn);
2636  else
2637    return TYPE_NONE;
2638}
2639
2640/* Return true if DISP is a valid short displacement.  */
2641
2642static bool
2643s390_short_displacement (rtx disp)
2644{
2645  /* No displacement is OK.  */
2646  if (!disp)
2647    return true;
2648
2649  /* Without the long displacement facility we don't need to
2650     distingiush between long and short displacement.  */
2651  if (!TARGET_LONG_DISPLACEMENT)
2652    return true;
2653
2654  /* Integer displacement in range.  */
2655  if (GET_CODE (disp) == CONST_INT)
2656    return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2657
2658  /* GOT offset is not OK, the GOT can be large.  */
2659  if (GET_CODE (disp) == CONST
2660      && GET_CODE (XEXP (disp, 0)) == UNSPEC
2661      && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2662          || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2663    return false;
2664
2665  /* All other symbolic constants are literal pool references,
2666     which are OK as the literal pool must be small.  */
2667  if (GET_CODE (disp) == CONST)
2668    return true;
2669
2670  return false;
2671}
2672
2673/* Decompose a RTL expression ADDR for a memory address into
2674   its components, returned in OUT.
2675
2676   Returns false if ADDR is not a valid memory address, true
2677   otherwise.  If OUT is NULL, don't return the components,
2678   but check for validity only.
2679
2680   Note: Only addresses in canonical form are recognized.
2681   LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2682   canonical form so that they will be recognized.  */
2683
2684static int
2685s390_decompose_address (rtx addr, struct s390_address *out)
2686{
2687  HOST_WIDE_INT offset = 0;
2688  rtx base = NULL_RTX;
2689  rtx indx = NULL_RTX;
2690  rtx disp = NULL_RTX;
2691  rtx orig_disp;
2692  bool pointer = false;
2693  bool base_ptr = false;
2694  bool indx_ptr = false;
2695  bool literal_pool = false;
2696
2697  /* We may need to substitute the literal pool base register into the address
2698     below.  However, at this point we do not know which register is going to
2699     be used as base, so we substitute the arg pointer register.  This is going
2700     to be treated as holding a pointer below -- it shouldn't be used for any
2701     other purpose.  */
2702  rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2703
2704  /* Decompose address into base + index + displacement.  */
2705
2706  if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2707    base = addr;
2708
2709  else if (GET_CODE (addr) == PLUS)
2710    {
2711      rtx op0 = XEXP (addr, 0);
2712      rtx op1 = XEXP (addr, 1);
2713      enum rtx_code code0 = GET_CODE (op0);
2714      enum rtx_code code1 = GET_CODE (op1);
2715
2716      if (code0 == REG || code0 == UNSPEC)
2717	{
2718	  if (code1 == REG || code1 == UNSPEC)
2719	    {
2720	      indx = op0;	/* index + base */
2721	      base = op1;
2722	    }
2723
2724	  else
2725	    {
2726	      base = op0;	/* base + displacement */
2727	      disp = op1;
2728	    }
2729	}
2730
2731      else if (code0 == PLUS)
2732	{
2733	  indx = XEXP (op0, 0);	/* index + base + disp */
2734	  base = XEXP (op0, 1);
2735	  disp = op1;
2736	}
2737
2738      else
2739	{
2740	  return false;
2741	}
2742    }
2743
2744  else
2745    disp = addr;		/* displacement */
2746
2747  /* Extract integer part of displacement.  */
2748  orig_disp = disp;
2749  if (disp)
2750    {
2751      if (GET_CODE (disp) == CONST_INT)
2752	{
2753	  offset = INTVAL (disp);
2754	  disp = NULL_RTX;
2755	}
2756      else if (GET_CODE (disp) == CONST
2757	       && GET_CODE (XEXP (disp, 0)) == PLUS
2758	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2759	{
2760	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2761	  disp = XEXP (XEXP (disp, 0), 0);
2762	}
2763    }
2764
2765  /* Strip off CONST here to avoid special case tests later.  */
2766  if (disp && GET_CODE (disp) == CONST)
2767    disp = XEXP (disp, 0);
2768
2769  /* We can convert literal pool addresses to
2770     displacements by basing them off the base register.  */
2771  if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2772    {
2773      /* Either base or index must be free to hold the base register.  */
2774      if (!base)
2775        base = fake_pool_base, literal_pool = true;
2776      else if (!indx)
2777        indx = fake_pool_base, literal_pool = true;
2778      else
2779        return false;
2780
2781      /* Mark up the displacement.  */
2782      disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2783			     UNSPEC_LTREL_OFFSET);
2784    }
2785
2786  /* Validate base register.  */
2787  if (base)
2788    {
2789      if (GET_CODE (base) == UNSPEC)
2790	switch (XINT (base, 1))
2791	  {
2792	  case UNSPEC_LTREF:
2793	    if (!disp)
2794	      disp = gen_rtx_UNSPEC (Pmode,
2795				     gen_rtvec (1, XVECEXP (base, 0, 0)),
2796				     UNSPEC_LTREL_OFFSET);
2797	    else
2798	      return false;
2799
2800	    base = XVECEXP (base, 0, 1);
2801	    break;
2802
2803	  case UNSPEC_LTREL_BASE:
2804	    if (XVECLEN (base, 0) == 1)
2805	      base = fake_pool_base, literal_pool = true;
2806	    else
2807	      base = XVECEXP (base, 0, 1);
2808	    break;
2809
2810	  default:
2811	    return false;
2812	  }
2813
2814      if (!REG_P (base)
2815	  || (GET_MODE (base) != SImode
2816	      && GET_MODE (base) != Pmode))
2817	return false;
2818
2819      if (REGNO (base) == STACK_POINTER_REGNUM
2820	  || REGNO (base) == FRAME_POINTER_REGNUM
2821	  || ((reload_completed || reload_in_progress)
2822	      && frame_pointer_needed
2823	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2824	  || REGNO (base) == ARG_POINTER_REGNUM
2825          || (flag_pic
2826              && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2827        pointer = base_ptr = true;
2828
2829      if ((reload_completed || reload_in_progress)
2830	  && base == cfun->machine->base_reg)
2831        pointer = base_ptr = literal_pool = true;
2832    }
2833
2834  /* Validate index register.  */
2835  if (indx)
2836    {
2837      if (GET_CODE (indx) == UNSPEC)
2838	switch (XINT (indx, 1))
2839	  {
2840	  case UNSPEC_LTREF:
2841	    if (!disp)
2842	      disp = gen_rtx_UNSPEC (Pmode,
2843				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
2844				     UNSPEC_LTREL_OFFSET);
2845	    else
2846	      return false;
2847
2848	    indx = XVECEXP (indx, 0, 1);
2849	    break;
2850
2851	  case UNSPEC_LTREL_BASE:
2852	    if (XVECLEN (indx, 0) == 1)
2853	      indx = fake_pool_base, literal_pool = true;
2854	    else
2855	      indx = XVECEXP (indx, 0, 1);
2856	    break;
2857
2858	  default:
2859	    return false;
2860	  }
2861
2862      if (!REG_P (indx)
2863	  || (GET_MODE (indx) != SImode
2864	      && GET_MODE (indx) != Pmode))
2865	return false;
2866
2867      if (REGNO (indx) == STACK_POINTER_REGNUM
2868	  || REGNO (indx) == FRAME_POINTER_REGNUM
2869	  || ((reload_completed || reload_in_progress)
2870	      && frame_pointer_needed
2871	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2872	  || REGNO (indx) == ARG_POINTER_REGNUM
2873          || (flag_pic
2874              && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2875        pointer = indx_ptr = true;
2876
2877      if ((reload_completed || reload_in_progress)
2878	  && indx == cfun->machine->base_reg)
2879        pointer = indx_ptr = literal_pool = true;
2880    }
2881
2882  /* Prefer to use pointer as base, not index.  */
2883  if (base && indx && !base_ptr
2884      && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2885    {
2886      rtx tmp = base;
2887      base = indx;
2888      indx = tmp;
2889    }
2890
2891  /* Validate displacement.  */
2892  if (!disp)
2893    {
2894      /* If virtual registers are involved, the displacement will change later
2895	 anyway as the virtual registers get eliminated.  This could make a
2896	 valid displacement invalid, but it is more likely to make an invalid
2897	 displacement valid, because we sometimes access the register save area
2898	 via negative offsets to one of those registers.
2899	 Thus we don't check the displacement for validity here.  If after
2900	 elimination the displacement turns out to be invalid after all,
2901	 this is fixed up by reload in any case.  */
2902      /* LRA maintains always displacements up to date and we need to
2903	 know the displacement is right during all LRA not only at the
2904	 final elimination.  */
2905      if (lra_in_progress
2906	  || (base != arg_pointer_rtx
2907	      && indx != arg_pointer_rtx
2908	      && base != return_address_pointer_rtx
2909	      && indx != return_address_pointer_rtx
2910	      && base != frame_pointer_rtx
2911	      && indx != frame_pointer_rtx
2912	      && base != virtual_stack_vars_rtx
2913	      && indx != virtual_stack_vars_rtx))
2914	if (!DISP_IN_RANGE (offset))
2915	  return false;
2916    }
2917  else
2918    {
2919      /* All the special cases are pointers.  */
2920      pointer = true;
2921
2922      /* In the small-PIC case, the linker converts @GOT
2923         and @GOTNTPOFF offsets to possible displacements.  */
2924      if (GET_CODE (disp) == UNSPEC
2925          && (XINT (disp, 1) == UNSPEC_GOT
2926	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2927	  && flag_pic == 1)
2928        {
2929	  ;
2930        }
2931
2932      /* Accept pool label offsets.  */
2933      else if (GET_CODE (disp) == UNSPEC
2934	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2935	;
2936
2937      /* Accept literal pool references.  */
2938      else if (GET_CODE (disp) == UNSPEC
2939	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2940        {
2941	  /* In case CSE pulled a non literal pool reference out of
2942	     the pool we have to reject the address.  This is
2943	     especially important when loading the GOT pointer on non
2944	     zarch CPUs.  In this case the literal pool contains an lt
2945	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2946	     will most likely exceed the displacement.  */
2947	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2948	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2949	    return false;
2950
2951	  orig_disp = gen_rtx_CONST (Pmode, disp);
2952	  if (offset)
2953	    {
2954	      /* If we have an offset, make sure it does not
2955		 exceed the size of the constant pool entry.  */
2956	      rtx sym = XVECEXP (disp, 0, 0);
2957	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2958		return false;
2959
2960              orig_disp = plus_constant (Pmode, orig_disp, offset);
2961	    }
2962        }
2963
2964      else
2965	return false;
2966    }
2967
2968  if (!base && !indx)
2969    pointer = true;
2970
2971  if (out)
2972    {
2973      out->base = base;
2974      out->indx = indx;
2975      out->disp = orig_disp;
2976      out->pointer = pointer;
2977      out->literal_pool = literal_pool;
2978    }
2979
2980  return true;
2981}
2982
2983/* Decompose a RTL expression OP for a shift count into its components,
2984   and return the base register in BASE and the offset in OFFSET.
2985
2986   Return true if OP is a valid shift count, false if not.  */
2987
2988bool
2989s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2990{
2991  HOST_WIDE_INT off = 0;
2992
2993  /* We can have an integer constant, an address register,
2994     or a sum of the two.  */
2995  if (GET_CODE (op) == CONST_INT)
2996    {
2997      off = INTVAL (op);
2998      op = NULL_RTX;
2999    }
3000  if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
3001    {
3002      off = INTVAL (XEXP (op, 1));
3003      op = XEXP (op, 0);
3004    }
3005  while (op && GET_CODE (op) == SUBREG)
3006    op = SUBREG_REG (op);
3007
3008  if (op && GET_CODE (op) != REG)
3009    return false;
3010
3011  if (offset)
3012    *offset = off;
3013  if (base)
3014    *base = op;
3015
3016   return true;
3017}
3018
3019
3020/* Return true if CODE is a valid address without index.  */
3021
3022bool
3023s390_legitimate_address_without_index_p (rtx op)
3024{
3025  struct s390_address addr;
3026
3027  if (!s390_decompose_address (XEXP (op, 0), &addr))
3028    return false;
3029  if (addr.indx)
3030    return false;
3031
3032  return true;
3033}
3034
3035
3036/* Return TRUE if ADDR is an operand valid for a load/store relative
3037   instruction.  Be aware that the alignment of the operand needs to
3038   be checked separately.
3039   Valid addresses are single references or a sum of a reference and a
3040   constant integer. Return these parts in SYMREF and ADDEND.  You can
3041   pass NULL in REF and/or ADDEND if you are not interested in these
3042   values.  Literal pool references are *not* considered symbol
3043   references.  */
3044
3045static bool
3046s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3047{
3048  HOST_WIDE_INT tmpaddend = 0;
3049
3050  if (GET_CODE (addr) == CONST)
3051    addr = XEXP (addr, 0);
3052
3053  if (GET_CODE (addr) == PLUS)
3054    {
3055      if (!CONST_INT_P (XEXP (addr, 1)))
3056	return false;
3057
3058      tmpaddend = INTVAL (XEXP (addr, 1));
3059      addr = XEXP (addr, 0);
3060    }
3061
3062  if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3063      || (GET_CODE (addr) == UNSPEC
3064	  && (XINT (addr, 1) == UNSPEC_GOTENT
3065	      || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3066    {
3067      if (symref)
3068	*symref = addr;
3069      if (addend)
3070	*addend = tmpaddend;
3071
3072      return true;
3073    }
3074  return false;
3075}
3076
3077/* Return true if the address in OP is valid for constraint letter C
3078   if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3079   pool MEMs should be accepted.  Only the Q, R, S, T constraint
3080   letters are allowed for C.  */
3081
3082static int
3083s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3084{
3085  struct s390_address addr;
3086  bool decomposed = false;
3087
3088  /* This check makes sure that no symbolic address (except literal
3089     pool references) are accepted by the R or T constraints.  */
3090  if (s390_loadrelative_operand_p (op, NULL, NULL))
3091    return 0;
3092
3093  /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3094  if (!lit_pool_ok)
3095    {
3096      if (!s390_decompose_address (op, &addr))
3097	return 0;
3098      if (addr.literal_pool)
3099	return 0;
3100      decomposed = true;
3101    }
3102
3103  switch (c)
3104    {
3105    case 'Q': /* no index short displacement */
3106      if (!decomposed && !s390_decompose_address (op, &addr))
3107	return 0;
3108      if (addr.indx)
3109	return 0;
3110      if (!s390_short_displacement (addr.disp))
3111	return 0;
3112      break;
3113
3114    case 'R': /* with index short displacement */
3115      if (TARGET_LONG_DISPLACEMENT)
3116	{
3117	  if (!decomposed && !s390_decompose_address (op, &addr))
3118	    return 0;
3119	  if (!s390_short_displacement (addr.disp))
3120	    return 0;
3121	}
3122      /* Any invalid address here will be fixed up by reload,
3123	 so accept it for the most generic constraint.  */
3124      break;
3125
3126    case 'S': /* no index long displacement */
3127      if (!TARGET_LONG_DISPLACEMENT)
3128	return 0;
3129      if (!decomposed && !s390_decompose_address (op, &addr))
3130	return 0;
3131      if (addr.indx)
3132	return 0;
3133      if (s390_short_displacement (addr.disp))
3134	return 0;
3135      break;
3136
3137    case 'T': /* with index long displacement */
3138      if (!TARGET_LONG_DISPLACEMENT)
3139	return 0;
3140      /* Any invalid address here will be fixed up by reload,
3141	 so accept it for the most generic constraint.  */
3142      if ((decomposed || s390_decompose_address (op, &addr))
3143	  && s390_short_displacement (addr.disp))
3144	return 0;
3145      break;
3146    default:
3147      return 0;
3148    }
3149  return 1;
3150}
3151
3152
3153/* Evaluates constraint strings described by the regular expression
3154   ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3155   the constraint given in STR, or 0 else.  */
3156
3157int
3158s390_mem_constraint (const char *str, rtx op)
3159{
3160  char c = str[0];
3161
3162  switch (c)
3163    {
3164    case 'A':
3165      /* Check for offsettable variants of memory constraints.  */
3166      if (!MEM_P (op) || MEM_VOLATILE_P (op))
3167	return 0;
3168      if ((reload_completed || reload_in_progress)
3169	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3170	return 0;
3171      return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3172    case 'B':
3173      /* Check for non-literal-pool variants of memory constraints.  */
3174      if (!MEM_P (op))
3175	return 0;
3176      return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3177    case 'Q':
3178    case 'R':
3179    case 'S':
3180    case 'T':
3181      if (GET_CODE (op) != MEM)
3182	return 0;
3183      return s390_check_qrst_address (c, XEXP (op, 0), true);
3184    case 'U':
3185      return (s390_check_qrst_address ('Q', op, true)
3186	      || s390_check_qrst_address ('R', op, true));
3187    case 'W':
3188      return (s390_check_qrst_address ('S', op, true)
3189	      || s390_check_qrst_address ('T', op, true));
3190    case 'Y':
3191      /* Simply check for the basic form of a shift count.  Reload will
3192	 take care of making sure we have a proper base register.  */
3193      if (!s390_decompose_shift_count (op, NULL, NULL))
3194	return 0;
3195      break;
3196    case 'Z':
3197      return s390_check_qrst_address (str[1], op, true);
3198    default:
3199      return 0;
3200    }
3201  return 1;
3202}
3203
3204
3205/* Evaluates constraint strings starting with letter O.  Input
3206   parameter C is the second letter following the "O" in the constraint
3207   string. Returns 1 if VALUE meets the respective constraint and 0
3208   otherwise.  */
3209
3210int
3211s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3212{
3213  if (!TARGET_EXTIMM)
3214    return 0;
3215
3216  switch (c)
3217    {
3218    case 's':
3219      return trunc_int_for_mode (value, SImode) == value;
3220
3221    case 'p':
3222      return value == 0
3223	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3224
3225    case 'n':
3226      return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3227
3228    default:
3229      gcc_unreachable ();
3230    }
3231}
3232
3233
3234/* Evaluates constraint strings starting with letter N.  Parameter STR
3235   contains the letters following letter "N" in the constraint string.
3236   Returns true if VALUE matches the constraint.  */
3237
3238int
3239s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3240{
3241  machine_mode mode, part_mode;
3242  int def;
3243  int part, part_goal;
3244
3245
3246  if (str[0] == 'x')
3247    part_goal = -1;
3248  else
3249    part_goal = str[0] - '0';
3250
3251  switch (str[1])
3252    {
3253    case 'Q':
3254      part_mode = QImode;
3255      break;
3256    case 'H':
3257      part_mode = HImode;
3258      break;
3259    case 'S':
3260      part_mode = SImode;
3261      break;
3262    default:
3263      return 0;
3264    }
3265
3266  switch (str[2])
3267    {
3268    case 'H':
3269      mode = HImode;
3270      break;
3271    case 'S':
3272      mode = SImode;
3273      break;
3274    case 'D':
3275      mode = DImode;
3276      break;
3277    default:
3278      return 0;
3279    }
3280
3281  switch (str[3])
3282    {
3283    case '0':
3284      def = 0;
3285      break;
3286    case 'F':
3287      def = -1;
3288      break;
3289    default:
3290      return 0;
3291    }
3292
3293  if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3294    return 0;
3295
3296  part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3297  if (part < 0)
3298    return 0;
3299  if (part_goal != -1 && part_goal != part)
3300    return 0;
3301
3302  return 1;
3303}
3304
3305
3306/* Returns true if the input parameter VALUE is a float zero.  */
3307
3308int
3309s390_float_const_zero_p (rtx value)
3310{
3311  return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3312	  && value == CONST0_RTX (GET_MODE (value)));
3313}
3314
3315/* Implement TARGET_REGISTER_MOVE_COST.  */
3316
3317static int
3318s390_register_move_cost (machine_mode mode,
3319                         reg_class_t from, reg_class_t to)
3320{
3321  /* On s390, copy between fprs and gprs is expensive.  */
3322
3323  /* It becomes somewhat faster having ldgr/lgdr.  */
3324  if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3325    {
3326      /* ldgr is single cycle. */
3327      if (reg_classes_intersect_p (from, GENERAL_REGS)
3328	  && reg_classes_intersect_p (to, FP_REGS))
3329	return 1;
3330      /* lgdr needs 3 cycles. */
3331      if (reg_classes_intersect_p (to, GENERAL_REGS)
3332	  && reg_classes_intersect_p (from, FP_REGS))
3333	return 3;
3334    }
3335
3336  /* Otherwise copying is done via memory.  */
3337  if ((reg_classes_intersect_p (from, GENERAL_REGS)
3338       && reg_classes_intersect_p (to, FP_REGS))
3339      || (reg_classes_intersect_p (from, FP_REGS)
3340	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3341    return 10;
3342
3343  return 1;
3344}
3345
3346/* Implement TARGET_MEMORY_MOVE_COST.  */
3347
3348static int
3349s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3350		       reg_class_t rclass ATTRIBUTE_UNUSED,
3351		       bool in ATTRIBUTE_UNUSED)
3352{
3353  return 2;
3354}
3355
3356/* Compute a (partial) cost for rtx X.  Return true if the complete
3357   cost has been computed, and false if subexpressions should be
3358   scanned.  In either case, *TOTAL contains the cost result.
3359   CODE contains GET_CODE (x), OUTER_CODE contains the code
3360   of the superexpression of x.  */
3361
3362static bool
3363s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3364		int *total, bool speed ATTRIBUTE_UNUSED)
3365{
3366  switch (code)
3367    {
3368    case CONST:
3369    case CONST_INT:
3370    case LABEL_REF:
3371    case SYMBOL_REF:
3372    case CONST_DOUBLE:
3373    case MEM:
3374      *total = 0;
3375      return true;
3376
3377    case ASHIFT:
3378    case ASHIFTRT:
3379    case LSHIFTRT:
3380    case ROTATE:
3381    case ROTATERT:
3382    case AND:
3383    case IOR:
3384    case XOR:
3385    case NEG:
3386    case NOT:
3387      *total = COSTS_N_INSNS (1);
3388      return false;
3389
3390    case PLUS:
3391    case MINUS:
3392      *total = COSTS_N_INSNS (1);
3393      return false;
3394
3395    case MULT:
3396      switch (GET_MODE (x))
3397	{
3398	case SImode:
3399	  {
3400	    rtx left = XEXP (x, 0);
3401	    rtx right = XEXP (x, 1);
3402	    if (GET_CODE (right) == CONST_INT
3403		&& CONST_OK_FOR_K (INTVAL (right)))
3404	      *total = s390_cost->mhi;
3405	    else if (GET_CODE (left) == SIGN_EXTEND)
3406	      *total = s390_cost->mh;
3407	    else
3408	      *total = s390_cost->ms;  /* msr, ms, msy */
3409	    break;
3410	  }
3411	case DImode:
3412	  {
3413	    rtx left = XEXP (x, 0);
3414	    rtx right = XEXP (x, 1);
3415	    if (TARGET_ZARCH)
3416	      {
3417		if (GET_CODE (right) == CONST_INT
3418		    && CONST_OK_FOR_K (INTVAL (right)))
3419		  *total = s390_cost->mghi;
3420		else if (GET_CODE (left) == SIGN_EXTEND)
3421		  *total = s390_cost->msgf;
3422		else
3423		  *total = s390_cost->msg;  /* msgr, msg */
3424	      }
3425	    else /* TARGET_31BIT */
3426	      {
3427		if (GET_CODE (left) == SIGN_EXTEND
3428		    && GET_CODE (right) == SIGN_EXTEND)
3429		  /* mulsidi case: mr, m */
3430		  *total = s390_cost->m;
3431		else if (GET_CODE (left) == ZERO_EXTEND
3432			 && GET_CODE (right) == ZERO_EXTEND
3433			 && TARGET_CPU_ZARCH)
3434		  /* umulsidi case: ml, mlr */
3435		  *total = s390_cost->ml;
3436		else
3437		  /* Complex calculation is required.  */
3438		  *total = COSTS_N_INSNS (40);
3439	      }
3440	    break;
3441	  }
3442	case SFmode:
3443	case DFmode:
3444	  *total = s390_cost->mult_df;
3445	  break;
3446	case TFmode:
3447	  *total = s390_cost->mxbr;
3448	  break;
3449	default:
3450	  return false;
3451	}
3452      return false;
3453
3454    case FMA:
3455      switch (GET_MODE (x))
3456	{
3457	case DFmode:
3458	  *total = s390_cost->madbr;
3459	  break;
3460	case SFmode:
3461	  *total = s390_cost->maebr;
3462	  break;
3463	default:
3464	  return false;
3465	}
3466      /* Negate in the third argument is free: FMSUB.  */
3467      if (GET_CODE (XEXP (x, 2)) == NEG)
3468	{
3469	  *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
3470		     + rtx_cost (XEXP (x, 1), FMA, 1, speed)
3471		     + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
3472	  return true;
3473	}
3474      return false;
3475
3476    case UDIV:
3477    case UMOD:
3478      if (GET_MODE (x) == TImode) 	       /* 128 bit division */
3479	*total = s390_cost->dlgr;
3480      else if (GET_MODE (x) == DImode)
3481	{
3482	  rtx right = XEXP (x, 1);
3483	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3484	    *total = s390_cost->dlr;
3485	  else 	                               /* 64 by 64 bit division */
3486	    *total = s390_cost->dlgr;
3487	}
3488      else if (GET_MODE (x) == SImode)         /* 32 bit division */
3489	*total = s390_cost->dlr;
3490      return false;
3491
3492    case DIV:
3493    case MOD:
3494      if (GET_MODE (x) == DImode)
3495	{
3496	  rtx right = XEXP (x, 1);
3497	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3498	    if (TARGET_ZARCH)
3499	      *total = s390_cost->dsgfr;
3500	    else
3501	      *total = s390_cost->dr;
3502	  else 	                               /* 64 by 64 bit division */
3503	    *total = s390_cost->dsgr;
3504	}
3505      else if (GET_MODE (x) == SImode)         /* 32 bit division */
3506	*total = s390_cost->dlr;
3507      else if (GET_MODE (x) == SFmode)
3508	{
3509	  *total = s390_cost->debr;
3510	}
3511      else if (GET_MODE (x) == DFmode)
3512	{
3513	  *total = s390_cost->ddbr;
3514	}
3515      else if (GET_MODE (x) == TFmode)
3516	{
3517	  *total = s390_cost->dxbr;
3518	}
3519      return false;
3520
3521    case SQRT:
3522      if (GET_MODE (x) == SFmode)
3523	*total = s390_cost->sqebr;
3524      else if (GET_MODE (x) == DFmode)
3525	*total = s390_cost->sqdbr;
3526      else /* TFmode */
3527	*total = s390_cost->sqxbr;
3528      return false;
3529
3530    case SIGN_EXTEND:
3531    case ZERO_EXTEND:
3532      if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3533	  || outer_code == PLUS || outer_code == MINUS
3534	  || outer_code == COMPARE)
3535	*total = 0;
3536      return false;
3537
3538    case COMPARE:
3539      *total = COSTS_N_INSNS (1);
3540      if (GET_CODE (XEXP (x, 0)) == AND
3541	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3542	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3543	{
3544	  rtx op0 = XEXP (XEXP (x, 0), 0);
3545	  rtx op1 = XEXP (XEXP (x, 0), 1);
3546	  rtx op2 = XEXP (x, 1);
3547
3548	  if (memory_operand (op0, GET_MODE (op0))
3549	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3550	    return true;
3551	  if (register_operand (op0, GET_MODE (op0))
3552	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3553	    return true;
3554	}
3555      return false;
3556
3557    default:
3558      return false;
3559    }
3560}
3561
3562/* Return the cost of an address rtx ADDR.  */
3563
3564static int
3565s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3566		   addr_space_t as ATTRIBUTE_UNUSED,
3567		   bool speed ATTRIBUTE_UNUSED)
3568{
3569  struct s390_address ad;
3570  if (!s390_decompose_address (addr, &ad))
3571    return 1000;
3572
3573  return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3574}
3575
3576/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3577   otherwise return 0.  */
3578
3579int
3580tls_symbolic_operand (rtx op)
3581{
3582  if (GET_CODE (op) != SYMBOL_REF)
3583    return 0;
3584  return SYMBOL_REF_TLS_MODEL (op);
3585}
3586
3587/* Split DImode access register reference REG (on 64-bit) into its constituent
3588   low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3589   gen_highpart cannot be used as they assume all registers are word-sized,
3590   while our access registers have only half that size.  */
3591
3592void
3593s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3594{
3595  gcc_assert (TARGET_64BIT);
3596  gcc_assert (ACCESS_REG_P (reg));
3597  gcc_assert (GET_MODE (reg) == DImode);
3598  gcc_assert (!(REGNO (reg) & 1));
3599
3600  *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3601  *hi = gen_rtx_REG (SImode, REGNO (reg));
3602}
3603
3604/* Return true if OP contains a symbol reference */
3605
3606bool
3607symbolic_reference_mentioned_p (rtx op)
3608{
3609  const char *fmt;
3610  int i;
3611
3612  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3613    return 1;
3614
3615  fmt = GET_RTX_FORMAT (GET_CODE (op));
3616  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3617    {
3618      if (fmt[i] == 'E')
3619	{
3620	  int j;
3621
3622	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3623	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3624	      return 1;
3625	}
3626
3627      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3628	return 1;
3629    }
3630
3631  return 0;
3632}
3633
3634/* Return true if OP contains a reference to a thread-local symbol.  */
3635
3636bool
3637tls_symbolic_reference_mentioned_p (rtx op)
3638{
3639  const char *fmt;
3640  int i;
3641
3642  if (GET_CODE (op) == SYMBOL_REF)
3643    return tls_symbolic_operand (op);
3644
3645  fmt = GET_RTX_FORMAT (GET_CODE (op));
3646  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3647    {
3648      if (fmt[i] == 'E')
3649	{
3650	  int j;
3651
3652	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3653	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3654	      return true;
3655	}
3656
3657      else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3658	return true;
3659    }
3660
3661  return false;
3662}
3663
3664
3665/* Return true if OP is a legitimate general operand when
3666   generating PIC code.  It is given that flag_pic is on
3667   and that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3668
3669int
3670legitimate_pic_operand_p (rtx op)
3671{
3672  /* Accept all non-symbolic constants.  */
3673  if (!SYMBOLIC_CONST (op))
3674    return 1;
3675
3676  /* Reject everything else; must be handled
3677     via emit_symbolic_move.  */
3678  return 0;
3679}
3680
3681/* Returns true if the constant value OP is a legitimate general operand.
3682   It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3683
3684static bool
3685s390_legitimate_constant_p (machine_mode mode, rtx op)
3686{
3687  if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3688    {
3689      if (GET_MODE_SIZE (mode) != 16)
3690	return 0;
3691
3692      if (!satisfies_constraint_j00 (op)
3693	  && !satisfies_constraint_jm1 (op)
3694	  && !satisfies_constraint_jKK (op)
3695	  && !satisfies_constraint_jxx (op)
3696	  && !satisfies_constraint_jyy (op))
3697	return 0;
3698    }
3699
3700  /* Accept all non-symbolic constants.  */
3701  if (!SYMBOLIC_CONST (op))
3702    return 1;
3703
3704  /* Accept immediate LARL operands.  */
3705  if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3706    return 1;
3707
3708  /* Thread-local symbols are never legal constants.  This is
3709     so that emit_call knows that computing such addresses
3710     might require a function call.  */
3711  if (TLS_SYMBOLIC_CONST (op))
3712    return 0;
3713
3714  /* In the PIC case, symbolic constants must *not* be
3715     forced into the literal pool.  We accept them here,
3716     so that they will be handled by emit_symbolic_move.  */
3717  if (flag_pic)
3718    return 1;
3719
3720  /* All remaining non-PIC symbolic constants are
3721     forced into the literal pool.  */
3722  return 0;
3723}
3724
3725/* Determine if it's legal to put X into the constant pool.  This
3726   is not possible if X contains the address of a symbol that is
3727   not constant (TLS) or not known at final link time (PIC).  */
3728
3729static bool
3730s390_cannot_force_const_mem (machine_mode mode, rtx x)
3731{
3732  switch (GET_CODE (x))
3733    {
3734    case CONST_INT:
3735    case CONST_DOUBLE:
3736    case CONST_VECTOR:
3737      /* Accept all non-symbolic constants.  */
3738      return false;
3739
3740    case LABEL_REF:
3741      /* Labels are OK iff we are non-PIC.  */
3742      return flag_pic != 0;
3743
3744    case SYMBOL_REF:
3745      /* 'Naked' TLS symbol references are never OK,
3746         non-TLS symbols are OK iff we are non-PIC.  */
3747      if (tls_symbolic_operand (x))
3748	return true;
3749      else
3750	return flag_pic != 0;
3751
3752    case CONST:
3753      return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3754    case PLUS:
3755    case MINUS:
3756      return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3757	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3758
3759    case UNSPEC:
3760      switch (XINT (x, 1))
3761	{
3762	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
3763	case UNSPEC_LTREL_OFFSET:
3764	case UNSPEC_GOT:
3765	case UNSPEC_GOTOFF:
3766	case UNSPEC_PLTOFF:
3767	case UNSPEC_TLSGD:
3768	case UNSPEC_TLSLDM:
3769	case UNSPEC_NTPOFF:
3770	case UNSPEC_DTPOFF:
3771	case UNSPEC_GOTNTPOFF:
3772	case UNSPEC_INDNTPOFF:
3773	  return false;
3774
3775	/* If the literal pool shares the code section, be put
3776	   execute template placeholders into the pool as well.  */
3777	case UNSPEC_INSN:
3778	  return TARGET_CPU_ZARCH;
3779
3780	default:
3781	  return true;
3782	}
3783      break;
3784
3785    default:
3786      gcc_unreachable ();
3787    }
3788}
3789
3790/* Returns true if the constant value OP is a legitimate general
3791   operand during and after reload.  The difference to
3792   legitimate_constant_p is that this function will not accept
3793   a constant that would need to be forced to the literal pool
3794   before it can be used as operand.
3795   This function accepts all constants which can be loaded directly
3796   into a GPR.  */
3797
3798bool
3799legitimate_reload_constant_p (rtx op)
3800{
3801  /* Accept la(y) operands.  */
3802  if (GET_CODE (op) == CONST_INT
3803      && DISP_IN_RANGE (INTVAL (op)))
3804    return true;
3805
3806  /* Accept l(g)hi/l(g)fi operands.  */
3807  if (GET_CODE (op) == CONST_INT
3808      && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3809    return true;
3810
3811  /* Accept lliXX operands.  */
3812  if (TARGET_ZARCH
3813      && GET_CODE (op) == CONST_INT
3814      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3815      && s390_single_part (op, word_mode, HImode, 0) >= 0)
3816  return true;
3817
3818  if (TARGET_EXTIMM
3819      && GET_CODE (op) == CONST_INT
3820      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3821      && s390_single_part (op, word_mode, SImode, 0) >= 0)
3822    return true;
3823
3824  /* Accept larl operands.  */
3825  if (TARGET_CPU_ZARCH
3826      && larl_operand (op, VOIDmode))
3827    return true;
3828
3829  /* Accept floating-point zero operands that fit into a single GPR.  */
3830  if (GET_CODE (op) == CONST_DOUBLE
3831      && s390_float_const_zero_p (op)
3832      && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3833    return true;
3834
3835  /* Accept double-word operands that can be split.  */
3836  if (GET_CODE (op) == CONST_INT
3837      && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3838    {
3839      machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3840      rtx hi = operand_subword (op, 0, 0, dword_mode);
3841      rtx lo = operand_subword (op, 1, 0, dword_mode);
3842      return legitimate_reload_constant_p (hi)
3843	     && legitimate_reload_constant_p (lo);
3844    }
3845
3846  /* Everything else cannot be handled without reload.  */
3847  return false;
3848}
3849
3850/* Returns true if the constant value OP is a legitimate fp operand
3851   during and after reload.
3852   This function accepts all constants which can be loaded directly
3853   into an FPR.  */
3854
3855static bool
3856legitimate_reload_fp_constant_p (rtx op)
3857{
3858  /* Accept floating-point zero operands if the load zero instruction
3859     can be used.  Prior to z196 the load fp zero instruction caused a
3860     performance penalty if the result is used as BFP number.  */
3861  if (TARGET_Z196
3862      && GET_CODE (op) == CONST_DOUBLE
3863      && s390_float_const_zero_p (op))
3864    return true;
3865
3866  return false;
3867}
3868
3869/* Returns true if the constant value OP is a legitimate vector operand
3870   during and after reload.
3871   This function accepts all constants which can be loaded directly
3872   into an VR.  */
3873
3874static bool
3875legitimate_reload_vector_constant_p (rtx op)
3876{
3877  if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3878      && (satisfies_constraint_j00 (op)
3879	  || satisfies_constraint_jm1 (op)
3880	  || satisfies_constraint_jKK (op)
3881	  || satisfies_constraint_jxx (op)
3882	  || satisfies_constraint_jyy (op)))
3883    return true;
3884
3885  return false;
3886}
3887
3888/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3889   return the class of reg to actually use.  */
3890
3891static reg_class_t
3892s390_preferred_reload_class (rtx op, reg_class_t rclass)
3893{
3894  switch (GET_CODE (op))
3895    {
3896      /* Constants we cannot reload into general registers
3897	 must be forced into the literal pool.  */
3898      case CONST_VECTOR:
3899      case CONST_DOUBLE:
3900      case CONST_INT:
3901	if (reg_class_subset_p (GENERAL_REGS, rclass)
3902	    && legitimate_reload_constant_p (op))
3903	  return GENERAL_REGS;
3904	else if (reg_class_subset_p (ADDR_REGS, rclass)
3905		 && legitimate_reload_constant_p (op))
3906	  return ADDR_REGS;
3907	else if (reg_class_subset_p (FP_REGS, rclass)
3908		 && legitimate_reload_fp_constant_p (op))
3909	  return FP_REGS;
3910	else if (reg_class_subset_p (VEC_REGS, rclass)
3911		 && legitimate_reload_vector_constant_p (op))
3912	  return VEC_REGS;
3913
3914	return NO_REGS;
3915
3916      /* If a symbolic constant or a PLUS is reloaded,
3917	 it is most likely being used as an address, so
3918	 prefer ADDR_REGS.  If 'class' is not a superset
3919	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
3920      case CONST:
3921	/* Symrefs cannot be pushed into the literal pool with -fPIC
3922	   so we *MUST NOT* return NO_REGS for these cases
3923	   (s390_cannot_force_const_mem will return true).
3924
3925	   On the other hand we MUST return NO_REGS for symrefs with
3926	   invalid addend which might have been pushed to the literal
3927	   pool (no -fPIC).  Usually we would expect them to be
3928	   handled via secondary reload but this does not happen if
3929	   they are used as literal pool slot replacement in reload
3930	   inheritance (see emit_input_reload_insns).  */
3931	if (TARGET_CPU_ZARCH
3932	    && GET_CODE (XEXP (op, 0)) == PLUS
3933	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3934	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3935	  {
3936	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3937	      return ADDR_REGS;
3938	    else
3939	      return NO_REGS;
3940	  }
3941	/* fallthrough */
3942      case LABEL_REF:
3943      case SYMBOL_REF:
3944	if (!legitimate_reload_constant_p (op))
3945          return NO_REGS;
3946	/* fallthrough */
3947      case PLUS:
3948	/* load address will be used.  */
3949	if (reg_class_subset_p (ADDR_REGS, rclass))
3950	  return ADDR_REGS;
3951	else
3952	  return NO_REGS;
3953
3954      default:
3955	break;
3956    }
3957
3958  return rclass;
3959}
3960
3961/* Return true if ADDR is SYMBOL_REF + addend with addend being a
3962   multiple of ALIGNMENT and the SYMBOL_REF being naturally
3963   aligned.  */
3964
3965bool
3966s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3967{
3968  HOST_WIDE_INT addend;
3969  rtx symref;
3970
3971  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3972    return false;
3973
3974  if (addend & (alignment - 1))
3975    return false;
3976
3977  if (GET_CODE (symref) == SYMBOL_REF
3978      && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3979    return true;
3980
3981  if (GET_CODE (symref) == UNSPEC
3982      && alignment <= UNITS_PER_LONG)
3983    return true;
3984
3985  return false;
3986}
3987
3988/* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
3989   operand SCRATCH is used to reload the even part of the address and
3990   adding one.  */
3991
3992void
3993s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3994{
3995  HOST_WIDE_INT addend;
3996  rtx symref;
3997
3998  if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3999    gcc_unreachable ();
4000
4001  if (!(addend & 1))
4002    /* Easy case.  The addend is even so larl will do fine.  */
4003    emit_move_insn (reg, addr);
4004  else
4005    {
4006      /* We can leave the scratch register untouched if the target
4007	 register is a valid base register.  */
4008      if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4009	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4010	scratch = reg;
4011
4012      gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4013      gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4014
4015      if (addend != 1)
4016	emit_move_insn (scratch,
4017			gen_rtx_CONST (Pmode,
4018				       gen_rtx_PLUS (Pmode, symref,
4019						     GEN_INT (addend - 1))));
4020      else
4021	emit_move_insn (scratch, symref);
4022
4023      /* Increment the address using la in order to avoid clobbering cc.  */
4024      s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4025    }
4026}
4027
4028/* Generate what is necessary to move between REG and MEM using
4029   SCRATCH.  The direction is given by TOMEM.  */
4030
4031void
4032s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4033{
4034  /* Reload might have pulled a constant out of the literal pool.
4035     Force it back in.  */
4036  if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4037      || GET_CODE (mem) == CONST_VECTOR
4038      || GET_CODE (mem) == CONST)
4039    mem = force_const_mem (GET_MODE (reg), mem);
4040
4041  gcc_assert (MEM_P (mem));
4042
4043  /* For a load from memory we can leave the scratch register
4044     untouched if the target register is a valid base register.  */
4045  if (!tomem
4046      && REGNO (reg) < FIRST_PSEUDO_REGISTER
4047      && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4048      && GET_MODE (reg) == GET_MODE (scratch))
4049    scratch = reg;
4050
4051  /* Load address into scratch register.  Since we can't have a
4052     secondary reload for a secondary reload we have to cover the case
4053     where larl would need a secondary reload here as well.  */
4054  s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4055
4056  /* Now we can use a standard load/store to do the move.  */
4057  if (tomem)
4058    emit_move_insn (replace_equiv_address (mem, scratch), reg);
4059  else
4060    emit_move_insn (reg, replace_equiv_address (mem, scratch));
4061}
4062
4063/* Inform reload about cases where moving X with a mode MODE to a register in
4064   RCLASS requires an extra scratch or immediate register.  Return the class
4065   needed for the immediate register.  */
4066
4067static reg_class_t
4068s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4069		       machine_mode mode, secondary_reload_info *sri)
4070{
4071  enum reg_class rclass = (enum reg_class) rclass_i;
4072
4073  /* Intermediate register needed.  */
4074  if (reg_classes_intersect_p (CC_REGS, rclass))
4075    return GENERAL_REGS;
4076
4077  if (TARGET_VX)
4078    {
4079      /* The vst/vl vector move instructions allow only for short
4080	 displacements.  */
4081      if (MEM_P (x)
4082	  && GET_CODE (XEXP (x, 0)) == PLUS
4083	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4084	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4085	  && reg_class_subset_p (rclass, VEC_REGS)
4086	  && (!reg_class_subset_p (rclass, FP_REGS)
4087	      || (GET_MODE_SIZE (mode) > 8
4088		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4089	{
4090	  if (in_p)
4091	    sri->icode = (TARGET_64BIT ?
4092			  CODE_FOR_reloaddi_la_in :
4093			  CODE_FOR_reloadsi_la_in);
4094	  else
4095	    sri->icode = (TARGET_64BIT ?
4096			  CODE_FOR_reloaddi_la_out :
4097			  CODE_FOR_reloadsi_la_out);
4098	}
4099    }
4100
4101  if (TARGET_Z10)
4102    {
4103      HOST_WIDE_INT offset;
4104      rtx symref;
4105
4106      /* On z10 several optimizer steps may generate larl operands with
4107	 an odd addend.  */
4108      if (in_p
4109	  && s390_loadrelative_operand_p (x, &symref, &offset)
4110	  && mode == Pmode
4111	  && !SYMBOL_REF_ALIGN1_P (symref)
4112	  && (offset & 1) == 1)
4113	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4114		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4115
4116      /* Handle all the (mem (symref)) accesses we cannot use the z10
4117	 instructions for.  */
4118      if (MEM_P (x)
4119	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4120	  && (mode == QImode
4121	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4122	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4123	      || !s390_check_symref_alignment (XEXP (x, 0),
4124					       GET_MODE_SIZE (mode))))
4125	{
4126#define __SECONDARY_RELOAD_CASE(M,m)					\
4127	  case M##mode:							\
4128	    if (TARGET_64BIT)						\
4129	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4130                                  CODE_FOR_reload##m##di_tomem_z10;	\
4131	    else							\
4132  	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4133                                  CODE_FOR_reload##m##si_tomem_z10;	\
4134	  break;
4135
4136	  switch (GET_MODE (x))
4137	    {
4138	      __SECONDARY_RELOAD_CASE (QI, qi);
4139	      __SECONDARY_RELOAD_CASE (HI, hi);
4140	      __SECONDARY_RELOAD_CASE (SI, si);
4141	      __SECONDARY_RELOAD_CASE (DI, di);
4142	      __SECONDARY_RELOAD_CASE (TI, ti);
4143	      __SECONDARY_RELOAD_CASE (SF, sf);
4144	      __SECONDARY_RELOAD_CASE (DF, df);
4145	      __SECONDARY_RELOAD_CASE (TF, tf);
4146	      __SECONDARY_RELOAD_CASE (SD, sd);
4147	      __SECONDARY_RELOAD_CASE (DD, dd);
4148	      __SECONDARY_RELOAD_CASE (TD, td);
4149	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4150	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4151	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4152	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4153	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4154	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4155	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4156	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4157	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4158	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4159	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4160	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4161	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4162	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4163	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4164	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4165	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4166	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4167	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4168	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4169	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4170	    default:
4171	      gcc_unreachable ();
4172	    }
4173#undef __SECONDARY_RELOAD_CASE
4174	}
4175    }
4176
4177  /* We need a scratch register when loading a PLUS expression which
4178     is not a legitimate operand of the LOAD ADDRESS instruction.  */
4179  /* LRA can deal with transformation of plus op very well -- so we
4180     don't need to prompt LRA in this case.  */
4181  if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4182    sri->icode = (TARGET_64BIT ?
4183		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4184
4185  /* Performing a multiword move from or to memory we have to make sure the
4186     second chunk in memory is addressable without causing a displacement
4187     overflow.  If that would be the case we calculate the address in
4188     a scratch register.  */
4189  if (MEM_P (x)
4190      && GET_CODE (XEXP (x, 0)) == PLUS
4191      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4192      && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4193			 + GET_MODE_SIZE (mode) - 1))
4194    {
4195      /* For GENERAL_REGS a displacement overflow is no problem if occurring
4196	 in a s_operand address since we may fallback to lm/stm.  So we only
4197	 have to care about overflows in the b+i+d case.  */
4198      if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4199	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4200	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4201	  /* For FP_REGS no lm/stm is available so this check is triggered
4202	     for displacement overflows in b+i+d and b+d like addresses.  */
4203	  || (reg_classes_intersect_p (FP_REGS, rclass)
4204	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4205	{
4206	  if (in_p)
4207	    sri->icode = (TARGET_64BIT ?
4208			  CODE_FOR_reloaddi_la_in :
4209			  CODE_FOR_reloadsi_la_in);
4210	  else
4211	    sri->icode = (TARGET_64BIT ?
4212			  CODE_FOR_reloaddi_la_out :
4213			  CODE_FOR_reloadsi_la_out);
4214	}
4215    }
4216
4217  /* A scratch address register is needed when a symbolic constant is
4218     copied to r0 compiling with -fPIC.  In other cases the target
4219     register might be used as temporary (see legitimize_pic_address).  */
4220  if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4221    sri->icode = (TARGET_64BIT ?
4222		  CODE_FOR_reloaddi_PIC_addr :
4223		  CODE_FOR_reloadsi_PIC_addr);
4224
4225  /* Either scratch or no register needed.  */
4226  return NO_REGS;
4227}
4228
4229/* Generate code to load SRC, which is PLUS that is not a
4230   legitimate operand for the LA instruction, into TARGET.
4231   SCRATCH may be used as scratch register.  */
4232
4233void
4234s390_expand_plus_operand (rtx target, rtx src,
4235			  rtx scratch)
4236{
4237  rtx sum1, sum2;
4238  struct s390_address ad;
4239
4240  /* src must be a PLUS; get its two operands.  */
4241  gcc_assert (GET_CODE (src) == PLUS);
4242  gcc_assert (GET_MODE (src) == Pmode);
4243
4244  /* Check if any of the two operands is already scheduled
4245     for replacement by reload.  This can happen e.g. when
4246     float registers occur in an address.  */
4247  sum1 = find_replacement (&XEXP (src, 0));
4248  sum2 = find_replacement (&XEXP (src, 1));
4249  src = gen_rtx_PLUS (Pmode, sum1, sum2);
4250
4251  /* If the address is already strictly valid, there's nothing to do.  */
4252  if (!s390_decompose_address (src, &ad)
4253      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4254      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4255    {
4256      /* Otherwise, one of the operands cannot be an address register;
4257         we reload its value into the scratch register.  */
4258      if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4259	{
4260	  emit_move_insn (scratch, sum1);
4261	  sum1 = scratch;
4262	}
4263      if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4264	{
4265	  emit_move_insn (scratch, sum2);
4266	  sum2 = scratch;
4267	}
4268
4269      /* According to the way these invalid addresses are generated
4270         in reload.c, it should never happen (at least on s390) that
4271         *neither* of the PLUS components, after find_replacements
4272         was applied, is an address register.  */
4273      if (sum1 == scratch && sum2 == scratch)
4274	{
4275	  debug_rtx (src);
4276	  gcc_unreachable ();
4277	}
4278
4279      src = gen_rtx_PLUS (Pmode, sum1, sum2);
4280    }
4281
4282  /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4283     is only ever performed on addresses, so we can mark the
4284     sum as legitimate for LA in any case.  */
4285  s390_load_address (target, src);
4286}
4287
4288
4289/* Return true if ADDR is a valid memory address.
4290   STRICT specifies whether strict register checking applies.  */
4291
4292static bool
4293s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4294{
4295  struct s390_address ad;
4296
4297  if (TARGET_Z10
4298      && larl_operand (addr, VOIDmode)
4299      && (mode == VOIDmode
4300	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4301    return true;
4302
4303  if (!s390_decompose_address (addr, &ad))
4304    return false;
4305
4306  if (strict)
4307    {
4308      if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4309	return false;
4310
4311      if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4312	return false;
4313    }
4314  else
4315    {
4316      if (ad.base
4317	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4318	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4319	return false;
4320
4321      if (ad.indx
4322	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4323	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4324	  return false;
4325    }
4326  return true;
4327}
4328
4329/* Return true if OP is a valid operand for the LA instruction.
4330   In 31-bit, we need to prove that the result is used as an
4331   address, as LA performs only a 31-bit addition.  */
4332
4333bool
4334legitimate_la_operand_p (rtx op)
4335{
4336  struct s390_address addr;
4337  if (!s390_decompose_address (op, &addr))
4338    return false;
4339
4340  return (TARGET_64BIT || addr.pointer);
4341}
4342
4343/* Return true if it is valid *and* preferable to use LA to
4344   compute the sum of OP1 and OP2.  */
4345
4346bool
4347preferred_la_operand_p (rtx op1, rtx op2)
4348{
4349  struct s390_address addr;
4350
4351  if (op2 != const0_rtx)
4352    op1 = gen_rtx_PLUS (Pmode, op1, op2);
4353
4354  if (!s390_decompose_address (op1, &addr))
4355    return false;
4356  if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4357    return false;
4358  if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4359    return false;
4360
4361  /* Avoid LA instructions with index register on z196; it is
4362     preferable to use regular add instructions when possible.
4363     Starting with zEC12 the la with index register is "uncracked"
4364     again.  */
4365  if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4366    return false;
4367
4368  if (!TARGET_64BIT && !addr.pointer)
4369    return false;
4370
4371  if (addr.pointer)
4372    return true;
4373
4374  if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4375      || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4376    return true;
4377
4378  return false;
4379}
4380
4381/* Emit a forced load-address operation to load SRC into DST.
4382   This will use the LOAD ADDRESS instruction even in situations
4383   where legitimate_la_operand_p (SRC) returns false.  */
4384
4385void
4386s390_load_address (rtx dst, rtx src)
4387{
4388  if (TARGET_64BIT)
4389    emit_move_insn (dst, src);
4390  else
4391    emit_insn (gen_force_la_31 (dst, src));
4392}
4393
4394/* Return a legitimate reference for ORIG (an address) using the
4395   register REG.  If REG is 0, a new pseudo is generated.
4396
4397   There are two types of references that must be handled:
4398
4399   1. Global data references must load the address from the GOT, via
4400      the PIC reg.  An insn is emitted to do this load, and the reg is
4401      returned.
4402
4403   2. Static data references, constant pool addresses, and code labels
4404      compute the address as an offset from the GOT, whose base is in
4405      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4406      differentiate them from global data objects.  The returned
4407      address is the PIC reg + an unspec constant.
4408
4409   TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4410   reg also appears in the address.  */
4411
4412rtx
4413legitimize_pic_address (rtx orig, rtx reg)
4414{
4415  rtx addr = orig;
4416  rtx addend = const0_rtx;
4417  rtx new_rtx = orig;
4418
4419  gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4420
4421  if (GET_CODE (addr) == CONST)
4422    addr = XEXP (addr, 0);
4423
4424  if (GET_CODE (addr) == PLUS)
4425    {
4426      addend = XEXP (addr, 1);
4427      addr = XEXP (addr, 0);
4428    }
4429
4430  if ((GET_CODE (addr) == LABEL_REF
4431       || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4432       || (GET_CODE (addr) == UNSPEC &&
4433	   (XINT (addr, 1) == UNSPEC_GOTENT
4434	    || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4435      && GET_CODE (addend) == CONST_INT)
4436    {
4437      /* This can be locally addressed.  */
4438
4439      /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4440      rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4441			gen_rtx_CONST (Pmode, addr) : addr);
4442
4443      if (TARGET_CPU_ZARCH
4444	  && larl_operand (const_addr, VOIDmode)
4445	  && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4446	  && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4447	{
4448	  if (INTVAL (addend) & 1)
4449	    {
4450	      /* LARL can't handle odd offsets, so emit a pair of LARL
4451		 and LA.  */
4452	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4453
4454	      if (!DISP_IN_RANGE (INTVAL (addend)))
4455		{
4456		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4457		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4458		  addr = gen_rtx_CONST (Pmode, addr);
4459		  addend = const1_rtx;
4460		}
4461
4462	      emit_move_insn (temp, addr);
4463	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4464
4465	      if (reg != 0)
4466		{
4467		  s390_load_address (reg, new_rtx);
4468		  new_rtx = reg;
4469		}
4470	    }
4471	  else
4472	    {
4473	      /* If the offset is even, we can just use LARL.  This
4474		 will happen automatically.  */
4475	    }
4476	}
4477      else
4478	{
4479	  /* No larl - Access local symbols relative to the GOT.  */
4480
4481	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4482
4483	  if (reload_in_progress || reload_completed)
4484	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4485
4486	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4487	  if (addend != const0_rtx)
4488	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4489	  addr = gen_rtx_CONST (Pmode, addr);
4490	  addr = force_const_mem (Pmode, addr);
4491	  emit_move_insn (temp, addr);
4492
4493	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4494	  if (reg != 0)
4495	    {
4496	      s390_load_address (reg, new_rtx);
4497	      new_rtx = reg;
4498	    }
4499	}
4500    }
4501  else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4502    {
4503      /* A non-local symbol reference without addend.
4504
4505	 The symbol ref is wrapped into an UNSPEC to make sure the
4506	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4507	 This will tell the linker to put the symbol into the GOT.
4508
4509	 Additionally the code dereferencing the GOT slot is emitted here.
4510
4511	 An addend to the symref needs to be added afterwards.
4512	 legitimize_pic_address calls itself recursively to handle
4513	 that case.  So no need to do it here.  */
4514
4515      if (reg == 0)
4516        reg = gen_reg_rtx (Pmode);
4517
4518      if (TARGET_Z10)
4519	{
4520	  /* Use load relative if possible.
4521	     lgrl <target>, sym@GOTENT  */
4522	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4523	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4524	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4525
4526	  emit_move_insn (reg, new_rtx);
4527	  new_rtx = reg;
4528	}
4529      else if (flag_pic == 1)
4530        {
4531          /* Assume GOT offset is a valid displacement operand (< 4k
4532             or < 512k with z990).  This is handled the same way in
4533             both 31- and 64-bit code (@GOT).
4534             lg <target>, sym@GOT(r12)  */
4535
4536	  if (reload_in_progress || reload_completed)
4537	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4538
4539          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4540          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4541          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4542          new_rtx = gen_const_mem (Pmode, new_rtx);
4543          emit_move_insn (reg, new_rtx);
4544          new_rtx = reg;
4545        }
4546      else if (TARGET_CPU_ZARCH)
4547        {
4548          /* If the GOT offset might be >= 4k, we determine the position
4549             of the GOT entry via a PC-relative LARL (@GOTENT).
4550	     larl temp, sym@GOTENT
4551             lg   <target>, 0(temp) */
4552
4553          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4554
4555	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4556		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4557
4558          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4559          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4560	  emit_move_insn (temp, new_rtx);
4561
4562	  new_rtx = gen_const_mem (Pmode, temp);
4563          emit_move_insn (reg, new_rtx);
4564
4565          new_rtx = reg;
4566        }
4567      else
4568        {
4569          /* If the GOT offset might be >= 4k, we have to load it
4570             from the literal pool (@GOT).
4571
4572	     lg temp, lit-litbase(r13)
4573             lg <target>, 0(temp)
4574	     lit:  .long sym@GOT  */
4575
4576          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4577
4578	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4579		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4580
4581	  if (reload_in_progress || reload_completed)
4582	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4583
4584          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4585          addr = gen_rtx_CONST (Pmode, addr);
4586          addr = force_const_mem (Pmode, addr);
4587          emit_move_insn (temp, addr);
4588
4589          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4590          new_rtx = gen_const_mem (Pmode, new_rtx);
4591          emit_move_insn (reg, new_rtx);
4592          new_rtx = reg;
4593        }
4594    }
4595  else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4596    {
4597      gcc_assert (XVECLEN (addr, 0) == 1);
4598      switch (XINT (addr, 1))
4599	{
4600	  /* These address symbols (or PLT slots) relative to the GOT
4601	     (not GOT slots!).  In general this will exceed the
4602	     displacement range so these value belong into the literal
4603	     pool.  */
4604	case UNSPEC_GOTOFF:
4605	case UNSPEC_PLTOFF:
4606	  new_rtx = force_const_mem (Pmode, orig);
4607	  break;
4608
4609	  /* For -fPIC the GOT size might exceed the displacement
4610	     range so make sure the value is in the literal pool.  */
4611	case UNSPEC_GOT:
4612	  if (flag_pic == 2)
4613	    new_rtx = force_const_mem (Pmode, orig);
4614	  break;
4615
4616	  /* For @GOTENT larl is used.  This is handled like local
4617	     symbol refs.  */
4618	case UNSPEC_GOTENT:
4619	  gcc_unreachable ();
4620	  break;
4621
4622	  /* @PLT is OK as is on 64-bit, must be converted to
4623	     GOT-relative @PLTOFF on 31-bit.  */
4624	case UNSPEC_PLT:
4625	  if (!TARGET_CPU_ZARCH)
4626	    {
4627	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4628
4629	      if (reload_in_progress || reload_completed)
4630		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4631
4632	      addr = XVECEXP (addr, 0, 0);
4633	      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4634				     UNSPEC_PLTOFF);
4635	      if (addend != const0_rtx)
4636		addr = gen_rtx_PLUS (Pmode, addr, addend);
4637	      addr = gen_rtx_CONST (Pmode, addr);
4638	      addr = force_const_mem (Pmode, addr);
4639	      emit_move_insn (temp, addr);
4640
4641	      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4642	      if (reg != 0)
4643		{
4644		  s390_load_address (reg, new_rtx);
4645		  new_rtx = reg;
4646		}
4647	    }
4648	  else
4649	    /* On 64 bit larl can be used.  This case is handled like
4650	       local symbol refs.  */
4651	    gcc_unreachable ();
4652	  break;
4653
4654	  /* Everything else cannot happen.  */
4655	default:
4656	  gcc_unreachable ();
4657	}
4658    }
4659  else if (addend != const0_rtx)
4660    {
4661      /* Otherwise, compute the sum.  */
4662
4663      rtx base = legitimize_pic_address (addr, reg);
4664      new_rtx  = legitimize_pic_address (addend,
4665					 base == reg ? NULL_RTX : reg);
4666      if (GET_CODE (new_rtx) == CONST_INT)
4667	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4668      else
4669	{
4670	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4671	    {
4672	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4673	      new_rtx = XEXP (new_rtx, 1);
4674	    }
4675	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4676	}
4677
4678      if (GET_CODE (new_rtx) == CONST)
4679	new_rtx = XEXP (new_rtx, 0);
4680      new_rtx = force_operand (new_rtx, 0);
4681    }
4682
4683  return new_rtx;
4684}
4685
4686/* Load the thread pointer into a register.  */
4687
4688rtx
4689s390_get_thread_pointer (void)
4690{
4691  rtx tp = gen_reg_rtx (Pmode);
4692
4693  emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4694  mark_reg_pointer (tp, BITS_PER_WORD);
4695
4696  return tp;
4697}
4698
4699/* Emit a tls call insn. The call target is the SYMBOL_REF stored
4700   in s390_tls_symbol which always refers to __tls_get_offset.
4701   The returned offset is written to RESULT_REG and an USE rtx is
4702   generated for TLS_CALL.  */
4703
4704static GTY(()) rtx s390_tls_symbol;
4705
4706static void
4707s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4708{
4709  rtx insn;
4710
4711  if (!flag_pic)
4712    emit_insn (s390_load_got ());
4713
4714  if (!s390_tls_symbol)
4715    s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4716
4717  insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4718			 gen_rtx_REG (Pmode, RETURN_REGNUM));
4719
4720  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4721  RTL_CONST_CALL_P (insn) = 1;
4722}
4723
4724/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4725   this (thread-local) address.  REG may be used as temporary.  */
4726
4727static rtx
4728legitimize_tls_address (rtx addr, rtx reg)
4729{
4730  rtx new_rtx, tls_call, temp, base, r2, insn;
4731
4732  if (GET_CODE (addr) == SYMBOL_REF)
4733    switch (tls_symbolic_operand (addr))
4734      {
4735      case TLS_MODEL_GLOBAL_DYNAMIC:
4736	start_sequence ();
4737	r2 = gen_rtx_REG (Pmode, 2);
4738	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4739	new_rtx = gen_rtx_CONST (Pmode, tls_call);
4740	new_rtx = force_const_mem (Pmode, new_rtx);
4741	emit_move_insn (r2, new_rtx);
4742	s390_emit_tls_call_insn (r2, tls_call);
4743	insn = get_insns ();
4744	end_sequence ();
4745
4746	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4747	temp = gen_reg_rtx (Pmode);
4748	emit_libcall_block (insn, temp, r2, new_rtx);
4749
4750	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4751	if (reg != 0)
4752	  {
4753	    s390_load_address (reg, new_rtx);
4754	    new_rtx = reg;
4755	  }
4756	break;
4757
4758      case TLS_MODEL_LOCAL_DYNAMIC:
4759	start_sequence ();
4760	r2 = gen_rtx_REG (Pmode, 2);
4761	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4762	new_rtx = gen_rtx_CONST (Pmode, tls_call);
4763	new_rtx = force_const_mem (Pmode, new_rtx);
4764	emit_move_insn (r2, new_rtx);
4765	s390_emit_tls_call_insn (r2, tls_call);
4766	insn = get_insns ();
4767	end_sequence ();
4768
4769	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4770	temp = gen_reg_rtx (Pmode);
4771	emit_libcall_block (insn, temp, r2, new_rtx);
4772
4773	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4774	base = gen_reg_rtx (Pmode);
4775	s390_load_address (base, new_rtx);
4776
4777	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4778	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4779	new_rtx = force_const_mem (Pmode, new_rtx);
4780	temp = gen_reg_rtx (Pmode);
4781	emit_move_insn (temp, new_rtx);
4782
4783	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4784	if (reg != 0)
4785	  {
4786	    s390_load_address (reg, new_rtx);
4787	    new_rtx = reg;
4788	  }
4789	break;
4790
4791      case TLS_MODEL_INITIAL_EXEC:
4792	if (flag_pic == 1)
4793	  {
4794	    /* Assume GOT offset < 4k.  This is handled the same way
4795	       in both 31- and 64-bit code.  */
4796
4797	    if (reload_in_progress || reload_completed)
4798	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4799
4800	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4801	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4802	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4803	    new_rtx = gen_const_mem (Pmode, new_rtx);
4804	    temp = gen_reg_rtx (Pmode);
4805	    emit_move_insn (temp, new_rtx);
4806	  }
4807	else if (TARGET_CPU_ZARCH)
4808	  {
4809	    /* If the GOT offset might be >= 4k, we determine the position
4810	       of the GOT entry via a PC-relative LARL.  */
4811
4812	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4813	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4814	    temp = gen_reg_rtx (Pmode);
4815	    emit_move_insn (temp, new_rtx);
4816
4817	    new_rtx = gen_const_mem (Pmode, temp);
4818	    temp = gen_reg_rtx (Pmode);
4819	    emit_move_insn (temp, new_rtx);
4820	  }
4821	else if (flag_pic)
4822	  {
4823	    /* If the GOT offset might be >= 4k, we have to load it
4824	       from the literal pool.  */
4825
4826	    if (reload_in_progress || reload_completed)
4827	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4828
4829	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4830	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4831	    new_rtx = force_const_mem (Pmode, new_rtx);
4832	    temp = gen_reg_rtx (Pmode);
4833	    emit_move_insn (temp, new_rtx);
4834
4835            new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4836	    new_rtx = gen_const_mem (Pmode, new_rtx);
4837
4838	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4839	    temp = gen_reg_rtx (Pmode);
4840	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
4841	  }
4842	else
4843	  {
4844	    /* In position-dependent code, load the absolute address of
4845	       the GOT entry from the literal pool.  */
4846
4847	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4848	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4849	    new_rtx = force_const_mem (Pmode, new_rtx);
4850	    temp = gen_reg_rtx (Pmode);
4851	    emit_move_insn (temp, new_rtx);
4852
4853	    new_rtx = temp;
4854	    new_rtx = gen_const_mem (Pmode, new_rtx);
4855	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4856	    temp = gen_reg_rtx (Pmode);
4857	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
4858	  }
4859
4860	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4861	if (reg != 0)
4862	  {
4863	    s390_load_address (reg, new_rtx);
4864	    new_rtx = reg;
4865	  }
4866	break;
4867
4868      case TLS_MODEL_LOCAL_EXEC:
4869	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4870	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4871	new_rtx = force_const_mem (Pmode, new_rtx);
4872        temp = gen_reg_rtx (Pmode);
4873	emit_move_insn (temp, new_rtx);
4874
4875	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4876	if (reg != 0)
4877	  {
4878	    s390_load_address (reg, new_rtx);
4879	    new_rtx = reg;
4880	  }
4881	break;
4882
4883      default:
4884	gcc_unreachable ();
4885      }
4886
4887  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4888    {
4889      switch (XINT (XEXP (addr, 0), 1))
4890	{
4891	case UNSPEC_INDNTPOFF:
4892	  gcc_assert (TARGET_CPU_ZARCH);
4893	  new_rtx = addr;
4894	  break;
4895
4896	default:
4897	  gcc_unreachable ();
4898	}
4899    }
4900
4901  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4902	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4903    {
4904      new_rtx = XEXP (XEXP (addr, 0), 0);
4905      if (GET_CODE (new_rtx) != SYMBOL_REF)
4906	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4907
4908      new_rtx = legitimize_tls_address (new_rtx, reg);
4909      new_rtx = plus_constant (Pmode, new_rtx,
4910			       INTVAL (XEXP (XEXP (addr, 0), 1)));
4911      new_rtx = force_operand (new_rtx, 0);
4912    }
4913
4914  else
4915    gcc_unreachable ();  /* for now ... */
4916
4917  return new_rtx;
4918}
4919
4920/* Emit insns making the address in operands[1] valid for a standard
4921   move to operands[0].  operands[1] is replaced by an address which
4922   should be used instead of the former RTX to emit the move
4923   pattern.  */
4924
4925void
4926emit_symbolic_move (rtx *operands)
4927{
4928  rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4929
4930  if (GET_CODE (operands[0]) == MEM)
4931    operands[1] = force_reg (Pmode, operands[1]);
4932  else if (TLS_SYMBOLIC_CONST (operands[1]))
4933    operands[1] = legitimize_tls_address (operands[1], temp);
4934  else if (flag_pic)
4935    operands[1] = legitimize_pic_address (operands[1], temp);
4936}
4937
4938/* Try machine-dependent ways of modifying an illegitimate address X
4939   to be legitimate.  If we find one, return the new, valid address.
4940
4941   OLDX is the address as it was before break_out_memory_refs was called.
4942   In some cases it is useful to look at this to decide what needs to be done.
4943
4944   MODE is the mode of the operand pointed to by X.
4945
4946   When -fpic is used, special handling is needed for symbolic references.
4947   See comments by legitimize_pic_address for details.  */
4948
4949static rtx
4950s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4951			 machine_mode mode ATTRIBUTE_UNUSED)
4952{
4953  rtx constant_term = const0_rtx;
4954
4955  if (TLS_SYMBOLIC_CONST (x))
4956    {
4957      x = legitimize_tls_address (x, 0);
4958
4959      if (s390_legitimate_address_p (mode, x, FALSE))
4960	return x;
4961    }
4962  else if (GET_CODE (x) == PLUS
4963	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4964	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4965    {
4966      return x;
4967    }
4968  else if (flag_pic)
4969    {
4970      if (SYMBOLIC_CONST (x)
4971          || (GET_CODE (x) == PLUS
4972              && (SYMBOLIC_CONST (XEXP (x, 0))
4973                  || SYMBOLIC_CONST (XEXP (x, 1)))))
4974	  x = legitimize_pic_address (x, 0);
4975
4976      if (s390_legitimate_address_p (mode, x, FALSE))
4977	return x;
4978    }
4979
4980  x = eliminate_constant_term (x, &constant_term);
4981
4982  /* Optimize loading of large displacements by splitting them
4983     into the multiple of 4K and the rest; this allows the
4984     former to be CSE'd if possible.
4985
4986     Don't do this if the displacement is added to a register
4987     pointing into the stack frame, as the offsets will
4988     change later anyway.  */
4989
4990  if (GET_CODE (constant_term) == CONST_INT
4991      && !TARGET_LONG_DISPLACEMENT
4992      && !DISP_IN_RANGE (INTVAL (constant_term))
4993      && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4994    {
4995      HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4996      HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4997
4998      rtx temp = gen_reg_rtx (Pmode);
4999      rtx val  = force_operand (GEN_INT (upper), temp);
5000      if (val != temp)
5001	emit_move_insn (temp, val);
5002
5003      x = gen_rtx_PLUS (Pmode, x, temp);
5004      constant_term = GEN_INT (lower);
5005    }
5006
5007  if (GET_CODE (x) == PLUS)
5008    {
5009      if (GET_CODE (XEXP (x, 0)) == REG)
5010	{
5011	  rtx temp = gen_reg_rtx (Pmode);
5012	  rtx val  = force_operand (XEXP (x, 1), temp);
5013	  if (val != temp)
5014	    emit_move_insn (temp, val);
5015
5016	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5017	}
5018
5019      else if (GET_CODE (XEXP (x, 1)) == REG)
5020	{
5021	  rtx temp = gen_reg_rtx (Pmode);
5022	  rtx val  = force_operand (XEXP (x, 0), temp);
5023	  if (val != temp)
5024	    emit_move_insn (temp, val);
5025
5026	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5027	}
5028    }
5029
5030  if (constant_term != const0_rtx)
5031    x = gen_rtx_PLUS (Pmode, x, constant_term);
5032
5033  return x;
5034}
5035
5036/* Try a machine-dependent way of reloading an illegitimate address AD
5037   operand.  If we find one, push the reload and return the new address.
5038
5039   MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5040   and TYPE is the reload type of the current reload.  */
5041
5042rtx
5043legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5044			   int opnum, int type)
5045{
5046  if (!optimize || TARGET_LONG_DISPLACEMENT)
5047    return NULL_RTX;
5048
5049  if (GET_CODE (ad) == PLUS)
5050    {
5051      rtx tem = simplify_binary_operation (PLUS, Pmode,
5052					   XEXP (ad, 0), XEXP (ad, 1));
5053      if (tem)
5054	ad = tem;
5055    }
5056
5057  if (GET_CODE (ad) == PLUS
5058      && GET_CODE (XEXP (ad, 0)) == REG
5059      && GET_CODE (XEXP (ad, 1)) == CONST_INT
5060      && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5061    {
5062      HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5063      HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5064      rtx cst, tem, new_rtx;
5065
5066      cst = GEN_INT (upper);
5067      if (!legitimate_reload_constant_p (cst))
5068	cst = force_const_mem (Pmode, cst);
5069
5070      tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5071      new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5072
5073      push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5074		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5075		   opnum, (enum reload_type) type);
5076      return new_rtx;
5077    }
5078
5079  return NULL_RTX;
5080}
5081
5082/* Emit code to move LEN bytes from DST to SRC.  */
5083
5084bool
5085s390_expand_movmem (rtx dst, rtx src, rtx len)
5086{
5087  /* When tuning for z10 or higher we rely on the Glibc functions to
5088     do the right thing. Only for constant lengths below 64k we will
5089     generate inline code.  */
5090  if (s390_tune >= PROCESSOR_2097_Z10
5091      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5092    return false;
5093
5094  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5095    {
5096      if (INTVAL (len) > 0)
5097        emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5098    }
5099
5100  else if (TARGET_MVCLE)
5101    {
5102      emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5103    }
5104
5105  else
5106    {
5107      rtx dst_addr, src_addr, count, blocks, temp;
5108      rtx_code_label *loop_start_label = gen_label_rtx ();
5109      rtx_code_label *loop_end_label = gen_label_rtx ();
5110      rtx_code_label *end_label = gen_label_rtx ();
5111      machine_mode mode;
5112
5113      mode = GET_MODE (len);
5114      if (mode == VOIDmode)
5115        mode = Pmode;
5116
5117      dst_addr = gen_reg_rtx (Pmode);
5118      src_addr = gen_reg_rtx (Pmode);
5119      count = gen_reg_rtx (mode);
5120      blocks = gen_reg_rtx (mode);
5121
5122      convert_move (count, len, 1);
5123      emit_cmp_and_jump_insns (count, const0_rtx,
5124			       EQ, NULL_RTX, mode, 1, end_label);
5125
5126      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5127      emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5128      dst = change_address (dst, VOIDmode, dst_addr);
5129      src = change_address (src, VOIDmode, src_addr);
5130
5131      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5132			   OPTAB_DIRECT);
5133      if (temp != count)
5134        emit_move_insn (count, temp);
5135
5136      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5137			   OPTAB_DIRECT);
5138      if (temp != blocks)
5139        emit_move_insn (blocks, temp);
5140
5141      emit_cmp_and_jump_insns (blocks, const0_rtx,
5142			       EQ, NULL_RTX, mode, 1, loop_end_label);
5143
5144      emit_label (loop_start_label);
5145
5146      if (TARGET_Z10
5147	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5148	{
5149	  rtx prefetch;
5150
5151	  /* Issue a read prefetch for the +3 cache line.  */
5152	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5153				   const0_rtx, const0_rtx);
5154	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5155	  emit_insn (prefetch);
5156
5157	  /* Issue a write prefetch for the +3 cache line.  */
5158	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5159				   const1_rtx, const0_rtx);
5160	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5161	  emit_insn (prefetch);
5162	}
5163
5164      emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5165      s390_load_address (dst_addr,
5166			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5167      s390_load_address (src_addr,
5168			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5169
5170      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5171			   OPTAB_DIRECT);
5172      if (temp != blocks)
5173        emit_move_insn (blocks, temp);
5174
5175      emit_cmp_and_jump_insns (blocks, const0_rtx,
5176			       EQ, NULL_RTX, mode, 1, loop_end_label);
5177
5178      emit_jump (loop_start_label);
5179      emit_label (loop_end_label);
5180
5181      emit_insn (gen_movmem_short (dst, src,
5182				   convert_to_mode (Pmode, count, 1)));
5183      emit_label (end_label);
5184    }
5185  return true;
5186}
5187
5188/* Emit code to set LEN bytes at DST to VAL.
5189   Make use of clrmem if VAL is zero.  */
5190
5191void
5192s390_expand_setmem (rtx dst, rtx len, rtx val)
5193{
5194  if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5195    return;
5196
5197  gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5198
5199  if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5200    {
5201      if (val == const0_rtx && INTVAL (len) <= 256)
5202        emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5203      else
5204	{
5205	  /* Initialize memory by storing the first byte.  */
5206	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5207
5208	  if (INTVAL (len) > 1)
5209	    {
5210	      /* Initiate 1 byte overlap move.
5211	         The first byte of DST is propagated through DSTP1.
5212		 Prepare a movmem for:  DST+1 = DST (length = LEN - 1).
5213		 DST is set to size 1 so the rest of the memory location
5214		 does not count as source operand.  */
5215	      rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5216	      set_mem_size (dst, 1);
5217
5218	      emit_insn (gen_movmem_short (dstp1, dst,
5219					   GEN_INT (INTVAL (len) - 2)));
5220	    }
5221	}
5222    }
5223
5224  else if (TARGET_MVCLE)
5225    {
5226      val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5227      emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
5228    }
5229
5230  else
5231    {
5232      rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5233      rtx_code_label *loop_start_label = gen_label_rtx ();
5234      rtx_code_label *loop_end_label = gen_label_rtx ();
5235      rtx_code_label *end_label = gen_label_rtx ();
5236      machine_mode mode;
5237
5238      mode = GET_MODE (len);
5239      if (mode == VOIDmode)
5240        mode = Pmode;
5241
5242      dst_addr = gen_reg_rtx (Pmode);
5243      count = gen_reg_rtx (mode);
5244      blocks = gen_reg_rtx (mode);
5245
5246      convert_move (count, len, 1);
5247      emit_cmp_and_jump_insns (count, const0_rtx,
5248			       EQ, NULL_RTX, mode, 1, end_label);
5249
5250      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5251      dst = change_address (dst, VOIDmode, dst_addr);
5252
5253      if (val == const0_rtx)
5254        temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5255			     OPTAB_DIRECT);
5256      else
5257	{
5258	  dstp1 = adjust_address (dst, VOIDmode, 1);
5259	  set_mem_size (dst, 1);
5260
5261	  /* Initialize memory by storing the first byte.  */
5262	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5263
5264	  /* If count is 1 we are done.  */
5265	  emit_cmp_and_jump_insns (count, const1_rtx,
5266				   EQ, NULL_RTX, mode, 1, end_label);
5267
5268	  temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5269			       OPTAB_DIRECT);
5270	}
5271      if (temp != count)
5272        emit_move_insn (count, temp);
5273
5274      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5275			   OPTAB_DIRECT);
5276      if (temp != blocks)
5277        emit_move_insn (blocks, temp);
5278
5279      emit_cmp_and_jump_insns (blocks, const0_rtx,
5280			       EQ, NULL_RTX, mode, 1, loop_end_label);
5281
5282      emit_label (loop_start_label);
5283
5284      if (TARGET_Z10
5285	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5286	{
5287	  /* Issue a write prefetch for the +4 cache line.  */
5288	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5289						     GEN_INT (1024)),
5290				       const1_rtx, const0_rtx);
5291	  emit_insn (prefetch);
5292	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5293	}
5294
5295      if (val == const0_rtx)
5296	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5297      else
5298	emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5299      s390_load_address (dst_addr,
5300			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5301
5302      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5303			   OPTAB_DIRECT);
5304      if (temp != blocks)
5305        emit_move_insn (blocks, temp);
5306
5307      emit_cmp_and_jump_insns (blocks, const0_rtx,
5308			       EQ, NULL_RTX, mode, 1, loop_end_label);
5309
5310      emit_jump (loop_start_label);
5311      emit_label (loop_end_label);
5312
5313      if (val == const0_rtx)
5314        emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5315      else
5316        emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5317      emit_label (end_label);
5318    }
5319}
5320
5321/* Emit code to compare LEN bytes at OP0 with those at OP1,
5322   and return the result in TARGET.  */
5323
5324bool
5325s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5326{
5327  rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5328  rtx tmp;
5329
5330  /* When tuning for z10 or higher we rely on the Glibc functions to
5331     do the right thing. Only for constant lengths below 64k we will
5332     generate inline code.  */
5333  if (s390_tune >= PROCESSOR_2097_Z10
5334      && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5335    return false;
5336
5337  /* As the result of CMPINT is inverted compared to what we need,
5338     we have to swap the operands.  */
5339  tmp = op0; op0 = op1; op1 = tmp;
5340
5341  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5342    {
5343      if (INTVAL (len) > 0)
5344        {
5345          emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5346          emit_insn (gen_cmpint (target, ccreg));
5347        }
5348      else
5349        emit_move_insn (target, const0_rtx);
5350    }
5351  else if (TARGET_MVCLE)
5352    {
5353      emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5354      emit_insn (gen_cmpint (target, ccreg));
5355    }
5356  else
5357    {
5358      rtx addr0, addr1, count, blocks, temp;
5359      rtx_code_label *loop_start_label = gen_label_rtx ();
5360      rtx_code_label *loop_end_label = gen_label_rtx ();
5361      rtx_code_label *end_label = gen_label_rtx ();
5362      machine_mode mode;
5363
5364      mode = GET_MODE (len);
5365      if (mode == VOIDmode)
5366        mode = Pmode;
5367
5368      addr0 = gen_reg_rtx (Pmode);
5369      addr1 = gen_reg_rtx (Pmode);
5370      count = gen_reg_rtx (mode);
5371      blocks = gen_reg_rtx (mode);
5372
5373      convert_move (count, len, 1);
5374      emit_cmp_and_jump_insns (count, const0_rtx,
5375			       EQ, NULL_RTX, mode, 1, end_label);
5376
5377      emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5378      emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5379      op0 = change_address (op0, VOIDmode, addr0);
5380      op1 = change_address (op1, VOIDmode, addr1);
5381
5382      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5383			   OPTAB_DIRECT);
5384      if (temp != count)
5385        emit_move_insn (count, temp);
5386
5387      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5388			   OPTAB_DIRECT);
5389      if (temp != blocks)
5390        emit_move_insn (blocks, temp);
5391
5392      emit_cmp_and_jump_insns (blocks, const0_rtx,
5393			       EQ, NULL_RTX, mode, 1, loop_end_label);
5394
5395      emit_label (loop_start_label);
5396
5397      if (TARGET_Z10
5398	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5399	{
5400	  rtx prefetch;
5401
5402	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5403	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5404				   const0_rtx, const0_rtx);
5405	  emit_insn (prefetch);
5406	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5407
5408	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5409	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5410				   const0_rtx, const0_rtx);
5411	  emit_insn (prefetch);
5412	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5413	}
5414
5415      emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5416      temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5417      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5418			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5419      temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
5420      emit_jump_insn (temp);
5421
5422      s390_load_address (addr0,
5423			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5424      s390_load_address (addr1,
5425			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5426
5427      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5428			   OPTAB_DIRECT);
5429      if (temp != blocks)
5430        emit_move_insn (blocks, temp);
5431
5432      emit_cmp_and_jump_insns (blocks, const0_rtx,
5433			       EQ, NULL_RTX, mode, 1, loop_end_label);
5434
5435      emit_jump (loop_start_label);
5436      emit_label (loop_end_label);
5437
5438      emit_insn (gen_cmpmem_short (op0, op1,
5439				   convert_to_mode (Pmode, count, 1)));
5440      emit_label (end_label);
5441
5442      emit_insn (gen_cmpint (target, ccreg));
5443    }
5444  return true;
5445}
5446
5447/* Emit a conditional jump to LABEL for condition code mask MASK using
5448   comparsion operator COMPARISON.  Return the emitted jump insn.  */
5449
5450static rtx
5451s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5452{
5453  rtx temp;
5454
5455  gcc_assert (comparison == EQ || comparison == NE);
5456  gcc_assert (mask > 0 && mask < 15);
5457
5458  temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5459			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5460  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5461			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5462  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
5463  return emit_jump_insn (temp);
5464}
5465
5466/* Emit the instructions to implement strlen of STRING and store the
5467   result in TARGET.  The string has the known ALIGNMENT.  This
5468   version uses vector instructions and is therefore not appropriate
5469   for targets prior to z13.  */
5470
5471void
5472s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5473{
5474  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5475  int very_likely = REG_BR_PROB_BASE - 1;
5476  rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5477  rtx str_reg = gen_reg_rtx (V16QImode);
5478  rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5479  rtx str_idx_reg = gen_reg_rtx (Pmode);
5480  rtx result_reg = gen_reg_rtx (V16QImode);
5481  rtx is_aligned_label = gen_label_rtx ();
5482  rtx into_loop_label = NULL_RTX;
5483  rtx loop_start_label = gen_label_rtx ();
5484  rtx temp;
5485  rtx len = gen_reg_rtx (QImode);
5486  rtx cond;
5487
5488  s390_load_address (str_addr_base_reg, XEXP (string, 0));
5489  emit_move_insn (str_idx_reg, const0_rtx);
5490
5491  if (INTVAL (alignment) < 16)
5492    {
5493      /* Check whether the address happens to be aligned properly so
5494	 jump directly to the aligned loop.  */
5495      emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5496					    str_addr_base_reg, GEN_INT (15)),
5497			       const0_rtx, EQ, NULL_RTX,
5498			       Pmode, 1, is_aligned_label);
5499
5500      temp = gen_reg_rtx (Pmode);
5501      temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5502			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5503      gcc_assert (REG_P (temp));
5504      highest_index_to_load_reg =
5505	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5506		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5507      gcc_assert (REG_P (highest_index_to_load_reg));
5508      emit_insn (gen_vllv16qi (str_reg,
5509		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5510		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5511
5512      into_loop_label = gen_label_rtx ();
5513      s390_emit_jump (into_loop_label, NULL_RTX);
5514      emit_barrier ();
5515    }
5516
5517  emit_label (is_aligned_label);
5518  LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5519
5520  /* Reaching this point we are only performing 16 bytes aligned
5521     loads.  */
5522  emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5523
5524  emit_label (loop_start_label);
5525  LABEL_NUSES (loop_start_label) = 1;
5526
5527  /* Load 16 bytes of the string into VR.  */
5528  emit_move_insn (str_reg,
5529		  gen_rtx_MEM (V16QImode,
5530			       gen_rtx_PLUS (Pmode, str_idx_reg,
5531					     str_addr_base_reg)));
5532  if (into_loop_label != NULL_RTX)
5533    {
5534      emit_label (into_loop_label);
5535      LABEL_NUSES (into_loop_label) = 1;
5536    }
5537
5538  /* Increment string index by 16 bytes.  */
5539  expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5540		str_idx_reg, 1, OPTAB_DIRECT);
5541
5542  emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5543				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5544
5545  add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5546		    REG_BR_PROB, very_likely);
5547  emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5548
5549  /* If the string pointer wasn't aligned we have loaded less then 16
5550     bytes and the remaining bytes got filled with zeros (by vll).
5551     Now we have to check whether the resulting index lies within the
5552     bytes actually part of the string.  */
5553
5554  cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5555			    highest_index_to_load_reg);
5556  s390_load_address (highest_index_to_load_reg,
5557		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5558				   const1_rtx));
5559  if (TARGET_64BIT)
5560    emit_insn (gen_movdicc (str_idx_reg, cond,
5561			    highest_index_to_load_reg, str_idx_reg));
5562  else
5563    emit_insn (gen_movsicc (str_idx_reg, cond,
5564			    highest_index_to_load_reg, str_idx_reg));
5565
5566  add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5567		    very_unlikely);
5568
5569  expand_binop (Pmode, add_optab, str_idx_reg,
5570		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5571  /* FIXME: len is already zero extended - so avoid the llgcr emitted
5572     here.  */
5573  temp = expand_binop (Pmode, add_optab, str_idx_reg,
5574		       convert_to_mode (Pmode, len, 1),
5575		       target, 1, OPTAB_DIRECT);
5576  if (temp != target)
5577    emit_move_insn (target, temp);
5578}
5579
5580void
5581s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5582{
5583  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5584  rtx temp = gen_reg_rtx (Pmode);
5585  rtx src_addr = XEXP (src, 0);
5586  rtx dst_addr = XEXP (dst, 0);
5587  rtx src_addr_reg = gen_reg_rtx (Pmode);
5588  rtx dst_addr_reg = gen_reg_rtx (Pmode);
5589  rtx offset = gen_reg_rtx (Pmode);
5590  rtx vsrc = gen_reg_rtx (V16QImode);
5591  rtx vpos = gen_reg_rtx (V16QImode);
5592  rtx loadlen = gen_reg_rtx (SImode);
5593  rtx gpos_qi = gen_reg_rtx(QImode);
5594  rtx gpos = gen_reg_rtx (SImode);
5595  rtx done_label = gen_label_rtx ();
5596  rtx loop_label = gen_label_rtx ();
5597  rtx exit_label = gen_label_rtx ();
5598  rtx full_label = gen_label_rtx ();
5599
5600  /* Perform a quick check for string ending on the first up to 16
5601     bytes and exit early if successful.  */
5602
5603  emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5604  emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5605  emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5606  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5607  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5608  /* gpos is the byte index if a zero was found and 16 otherwise.
5609     So if it is lower than the loaded bytes we have a hit.  */
5610  emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5611			   full_label);
5612  emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5613
5614  force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5615		      1, OPTAB_DIRECT);
5616  emit_jump (exit_label);
5617  emit_barrier ();
5618
5619  emit_label (full_label);
5620  LABEL_NUSES (full_label) = 1;
5621
5622  /* Calculate `offset' so that src + offset points to the last byte
5623     before 16 byte alignment.  */
5624
5625  /* temp = src_addr & 0xf */
5626  force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5627		      1, OPTAB_DIRECT);
5628
5629  /* offset = 0xf - temp */
5630  emit_move_insn (offset, GEN_INT (15));
5631  force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5632		      1, OPTAB_DIRECT);
5633
5634  /* Store `offset' bytes in the dstination string.  The quick check
5635     has loaded at least `offset' bytes into vsrc.  */
5636
5637  emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5638
5639  /* Advance to the next byte to be loaded.  */
5640  force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5641		      1, OPTAB_DIRECT);
5642
5643  /* Make sure the addresses are single regs which can be used as a
5644     base.  */
5645  emit_move_insn (src_addr_reg, src_addr);
5646  emit_move_insn (dst_addr_reg, dst_addr);
5647
5648  /* MAIN LOOP */
5649
5650  emit_label (loop_label);
5651  LABEL_NUSES (loop_label) = 1;
5652
5653  emit_move_insn (vsrc,
5654		  gen_rtx_MEM (V16QImode,
5655			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5656
5657  emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5658				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5659  add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5660		    REG_BR_PROB, very_unlikely);
5661
5662  emit_move_insn (gen_rtx_MEM (V16QImode,
5663			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5664		  vsrc);
5665  /* offset += 16 */
5666  force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5667		      offset,  1, OPTAB_DIRECT);
5668
5669  emit_jump (loop_label);
5670  emit_barrier ();
5671
5672  /* REGULAR EXIT */
5673
5674  /* We are done.  Add the offset of the zero character to the dst_addr
5675     pointer to get the result.  */
5676
5677  emit_label (done_label);
5678  LABEL_NUSES (done_label) = 1;
5679
5680  force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5681		      1, OPTAB_DIRECT);
5682
5683  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5684  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5685
5686  emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5687
5688  force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5689		      1, OPTAB_DIRECT);
5690
5691  /* EARLY EXIT */
5692
5693  emit_label (exit_label);
5694  LABEL_NUSES (exit_label) = 1;
5695}
5696
5697
5698/* Expand conditional increment or decrement using alc/slb instructions.
5699   Should generate code setting DST to either SRC or SRC + INCREMENT,
5700   depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5701   Returns true if successful, false otherwise.
5702
5703   That makes it possible to implement some if-constructs without jumps e.g.:
5704   (borrow = CC0 | CC1 and carry = CC2 | CC3)
5705   unsigned int a, b, c;
5706   if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
5707   if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
5708   if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
5709   if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
5710
5711   Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5712   if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
5713   if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5714   if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
5715   if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
5716
5717bool
5718s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5719		   rtx dst, rtx src, rtx increment)
5720{
5721  machine_mode cmp_mode;
5722  machine_mode cc_mode;
5723  rtx op_res;
5724  rtx insn;
5725  rtvec p;
5726  int ret;
5727
5728  if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5729      && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5730    cmp_mode = SImode;
5731  else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5732	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5733    cmp_mode = DImode;
5734  else
5735    return false;
5736
5737  /* Try ADD LOGICAL WITH CARRY.  */
5738  if (increment == const1_rtx)
5739    {
5740      /* Determine CC mode to use.  */
5741      if (cmp_code == EQ || cmp_code == NE)
5742	{
5743	  if (cmp_op1 != const0_rtx)
5744	    {
5745	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5746					     NULL_RTX, 0, OPTAB_WIDEN);
5747	      cmp_op1 = const0_rtx;
5748	    }
5749
5750	  cmp_code = cmp_code == EQ ? LEU : GTU;
5751	}
5752
5753      if (cmp_code == LTU || cmp_code == LEU)
5754	{
5755	  rtx tem = cmp_op0;
5756	  cmp_op0 = cmp_op1;
5757	  cmp_op1 = tem;
5758	  cmp_code = swap_condition (cmp_code);
5759	}
5760
5761      switch (cmp_code)
5762	{
5763	  case GTU:
5764	    cc_mode = CCUmode;
5765	    break;
5766
5767	  case GEU:
5768	    cc_mode = CCL3mode;
5769	    break;
5770
5771	  default:
5772	    return false;
5773	}
5774
5775      /* Emit comparison instruction pattern. */
5776      if (!register_operand (cmp_op0, cmp_mode))
5777	cmp_op0 = force_reg (cmp_mode, cmp_op0);
5778
5779      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
5780			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5781      /* We use insn_invalid_p here to add clobbers if required.  */
5782      ret = insn_invalid_p (emit_insn (insn), false);
5783      gcc_assert (!ret);
5784
5785      /* Emit ALC instruction pattern.  */
5786      op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5787			       gen_rtx_REG (cc_mode, CC_REGNUM),
5788			       const0_rtx);
5789
5790      if (src != const0_rtx)
5791	{
5792	  if (!register_operand (src, GET_MODE (dst)))
5793	    src = force_reg (GET_MODE (dst), src);
5794
5795	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5796	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5797	}
5798
5799      p = rtvec_alloc (2);
5800      RTVEC_ELT (p, 0) =
5801        gen_rtx_SET (VOIDmode, dst, op_res);
5802      RTVEC_ELT (p, 1) =
5803	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5804      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5805
5806      return true;
5807    }
5808
5809  /* Try SUBTRACT LOGICAL WITH BORROW.  */
5810  if (increment == constm1_rtx)
5811    {
5812      /* Determine CC mode to use.  */
5813      if (cmp_code == EQ || cmp_code == NE)
5814	{
5815	  if (cmp_op1 != const0_rtx)
5816	    {
5817	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5818					     NULL_RTX, 0, OPTAB_WIDEN);
5819	      cmp_op1 = const0_rtx;
5820	    }
5821
5822	  cmp_code = cmp_code == EQ ? LEU : GTU;
5823	}
5824
5825      if (cmp_code == GTU || cmp_code == GEU)
5826	{
5827	  rtx tem = cmp_op0;
5828	  cmp_op0 = cmp_op1;
5829	  cmp_op1 = tem;
5830	  cmp_code = swap_condition (cmp_code);
5831	}
5832
5833      switch (cmp_code)
5834	{
5835	  case LEU:
5836	    cc_mode = CCUmode;
5837	    break;
5838
5839	  case LTU:
5840	    cc_mode = CCL3mode;
5841	    break;
5842
5843	  default:
5844	    return false;
5845	}
5846
5847      /* Emit comparison instruction pattern. */
5848      if (!register_operand (cmp_op0, cmp_mode))
5849	cmp_op0 = force_reg (cmp_mode, cmp_op0);
5850
5851      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
5852			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5853      /* We use insn_invalid_p here to add clobbers if required.  */
5854      ret = insn_invalid_p (emit_insn (insn), false);
5855      gcc_assert (!ret);
5856
5857      /* Emit SLB instruction pattern.  */
5858      if (!register_operand (src, GET_MODE (dst)))
5859	src = force_reg (GET_MODE (dst), src);
5860
5861      op_res = gen_rtx_MINUS (GET_MODE (dst),
5862			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5863			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5864					      gen_rtx_REG (cc_mode, CC_REGNUM),
5865					      const0_rtx));
5866      p = rtvec_alloc (2);
5867      RTVEC_ELT (p, 0) =
5868        gen_rtx_SET (VOIDmode, dst, op_res);
5869      RTVEC_ELT (p, 1) =
5870	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5871      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5872
5873      return true;
5874    }
5875
5876  return false;
5877}
5878
5879/* Expand code for the insv template. Return true if successful.  */
5880
5881bool
5882s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5883{
5884  int bitsize = INTVAL (op1);
5885  int bitpos = INTVAL (op2);
5886  machine_mode mode = GET_MODE (dest);
5887  machine_mode smode;
5888  int smode_bsize, mode_bsize;
5889  rtx op, clobber;
5890
5891  if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5892    return false;
5893
5894  /* Generate INSERT IMMEDIATE (IILL et al).  */
5895  /* (set (ze (reg)) (const_int)).  */
5896  if (TARGET_ZARCH
5897      && register_operand (dest, word_mode)
5898      && (bitpos % 16) == 0
5899      && (bitsize % 16) == 0
5900      && const_int_operand (src, VOIDmode))
5901    {
5902      HOST_WIDE_INT val = INTVAL (src);
5903      int regpos = bitpos + bitsize;
5904
5905      while (regpos > bitpos)
5906	{
5907	  machine_mode putmode;
5908	  int putsize;
5909
5910	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5911	    putmode = SImode;
5912	  else
5913	    putmode = HImode;
5914
5915	  putsize = GET_MODE_BITSIZE (putmode);
5916	  regpos -= putsize;
5917	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5918						GEN_INT (putsize),
5919						GEN_INT (regpos)),
5920			  gen_int_mode (val, putmode));
5921	  val >>= putsize;
5922	}
5923      gcc_assert (regpos == bitpos);
5924      return true;
5925    }
5926
5927  smode = smallest_mode_for_size (bitsize, MODE_INT);
5928  smode_bsize = GET_MODE_BITSIZE (smode);
5929  mode_bsize = GET_MODE_BITSIZE (mode);
5930
5931  /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
5932  if (bitpos == 0
5933      && (bitsize % BITS_PER_UNIT) == 0
5934      && MEM_P (dest)
5935      && (register_operand (src, word_mode)
5936	  || const_int_operand (src, VOIDmode)))
5937    {
5938      /* Emit standard pattern if possible.  */
5939      if (smode_bsize == bitsize)
5940	{
5941	  emit_move_insn (adjust_address (dest, smode, 0),
5942			  gen_lowpart (smode, src));
5943	  return true;
5944	}
5945
5946      /* (set (ze (mem)) (const_int)).  */
5947      else if (const_int_operand (src, VOIDmode))
5948	{
5949	  int size = bitsize / BITS_PER_UNIT;
5950	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5951					BLKmode,
5952					UNITS_PER_WORD - size);
5953
5954	  dest = adjust_address (dest, BLKmode, 0);
5955	  set_mem_size (dest, size);
5956	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
5957	  return true;
5958	}
5959
5960      /* (set (ze (mem)) (reg)).  */
5961      else if (register_operand (src, word_mode))
5962	{
5963	  if (bitsize <= 32)
5964	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5965						  const0_rtx), src);
5966	  else
5967	    {
5968	      /* Emit st,stcmh sequence.  */
5969	      int stcmh_width = bitsize - 32;
5970	      int size = stcmh_width / BITS_PER_UNIT;
5971
5972	      emit_move_insn (adjust_address (dest, SImode, size),
5973			      gen_lowpart (SImode, src));
5974	      set_mem_size (dest, size);
5975	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5976						    GEN_INT (stcmh_width),
5977						    const0_rtx),
5978			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5979	    }
5980	  return true;
5981	}
5982    }
5983
5984  /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
5985  if ((bitpos % BITS_PER_UNIT) == 0
5986      && (bitsize % BITS_PER_UNIT) == 0
5987      && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5988      && MEM_P (src)
5989      && (mode == DImode || mode == SImode)
5990      && register_operand (dest, mode))
5991    {
5992      /* Emit a strict_low_part pattern if possible.  */
5993      if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5994	{
5995	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5996	  op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
5997	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5998	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5999	  return true;
6000	}
6001
6002      /* ??? There are more powerful versions of ICM that are not
6003	 completely represented in the md file.  */
6004    }
6005
6006  /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6007  if (TARGET_Z10 && (mode == DImode || mode == SImode))
6008    {
6009      machine_mode mode_s = GET_MODE (src);
6010
6011      if (mode_s == VOIDmode)
6012	{
6013	  /* Assume const_int etc already in the proper mode.  */
6014	  src = force_reg (mode, src);
6015	}
6016      else if (mode_s != mode)
6017	{
6018	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6019	  src = force_reg (mode_s, src);
6020	  src = gen_lowpart (mode, src);
6021	}
6022
6023      op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6024      op = gen_rtx_SET (VOIDmode, op, src);
6025
6026      if (!TARGET_ZEC12)
6027	{
6028	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6029	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6030	}
6031      emit_insn (op);
6032
6033      return true;
6034    }
6035
6036  return false;
6037}
6038
6039/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6040   register that holds VAL of mode MODE shifted by COUNT bits.  */
6041
6042static inline rtx
6043s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6044{
6045  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6046			     NULL_RTX, 1, OPTAB_DIRECT);
6047  return expand_simple_binop (SImode, ASHIFT, val, count,
6048			      NULL_RTX, 1, OPTAB_DIRECT);
6049}
6050
6051/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6052   the result in TARGET.  */
6053
6054void
6055s390_expand_vec_compare (rtx target, enum rtx_code cond,
6056			 rtx cmp_op1, rtx cmp_op2)
6057{
6058  machine_mode mode = GET_MODE (target);
6059  bool neg_p = false, swap_p = false;
6060  rtx tmp;
6061
6062  if (GET_MODE (cmp_op1) == V2DFmode)
6063    {
6064      switch (cond)
6065	{
6066	  /* NE a != b -> !(a == b) */
6067	case NE:   cond = EQ; neg_p = true;                break;
6068	  /* UNGT a u> b -> !(b >= a) */
6069	case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6070	  /* UNGE a u>= b -> !(b > a) */
6071	case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6072	  /* LE: a <= b -> b >= a */
6073	case LE:   cond = GE;               swap_p = true; break;
6074	  /* UNLE: a u<= b -> !(a > b) */
6075	case UNLE: cond = GT; neg_p = true;                break;
6076	  /* LT: a < b -> b > a */
6077	case LT:   cond = GT;               swap_p = true; break;
6078	  /* UNLT: a u< b -> !(a >= b) */
6079	case UNLT: cond = GE; neg_p = true;                break;
6080	case UNEQ:
6081	  emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6082	  return;
6083	case LTGT:
6084	  emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6085	  return;
6086	case ORDERED:
6087	  emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6088	  return;
6089	case UNORDERED:
6090	  emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6091	  return;
6092	default: break;
6093	}
6094    }
6095  else
6096    {
6097      switch (cond)
6098	{
6099	  /* NE: a != b -> !(a == b) */
6100	case NE:  cond = EQ;  neg_p = true;                break;
6101	  /* GE: a >= b -> !(b > a) */
6102	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6103	  /* GEU: a >= b -> !(b > a) */
6104	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6105	  /* LE: a <= b -> !(a > b) */
6106	case LE:  cond = GT;  neg_p = true;                break;
6107	  /* LEU: a <= b -> !(a > b) */
6108	case LEU: cond = GTU; neg_p = true;                break;
6109	  /* LT: a < b -> b > a */
6110	case LT:  cond = GT;                swap_p = true; break;
6111	  /* LTU: a < b -> b > a */
6112	case LTU: cond = GTU;               swap_p = true; break;
6113	default: break;
6114	}
6115    }
6116
6117  if (swap_p)
6118    {
6119      tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6120    }
6121
6122  emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_fmt_ee (cond,
6123						  mode,
6124						  cmp_op1, cmp_op2)));
6125  if (neg_p)
6126    emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_NOT (mode, target)));
6127}
6128
6129/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6130   TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6131   elements in CMP1 and CMP2 fulfill the comparison.  */
6132void
6133s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6134			    rtx cmp1, rtx cmp2, bool all_p)
6135{
6136  enum rtx_code new_code = code;
6137  machine_mode cmp_mode, full_cmp_mode, scratch_mode;
6138  rtx tmp_reg = gen_reg_rtx (SImode);
6139  bool swap_p = false;
6140
6141  if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6142    {
6143      switch (code)
6144	{
6145	case EQ:  cmp_mode = CCVEQmode; break;
6146	case NE:  cmp_mode = CCVEQmode; break;
6147	case GT:  cmp_mode = CCVHmode;  break;
6148	case GE:  cmp_mode = CCVHmode;  new_code = LE; swap_p = true; break;
6149	case LT:  cmp_mode = CCVHmode;  new_code = GT; swap_p = true; break;
6150	case LE:  cmp_mode = CCVHmode;  new_code = LE; break;
6151	case GTU: cmp_mode = CCVHUmode; break;
6152	case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
6153	case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
6154	case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6155	default: gcc_unreachable ();
6156	}
6157      scratch_mode = GET_MODE (cmp1);
6158    }
6159  else if (GET_MODE (cmp1) == V2DFmode)
6160    {
6161      switch (code)
6162	{
6163	case EQ:   cmp_mode = CCVEQmode;  break;
6164	case NE:   cmp_mode = CCVEQmode;  break;
6165	case GT:   cmp_mode = CCVFHmode;  break;
6166	case GE:   cmp_mode = CCVFHEmode; break;
6167	case UNLE: cmp_mode = CCVFHmode;  break;
6168	case UNLT: cmp_mode = CCVFHEmode; break;
6169	case LT:   cmp_mode = CCVFHmode;  new_code = GT; swap_p = true; break;
6170	case LE:   cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6171	default: gcc_unreachable ();
6172	}
6173      scratch_mode = V2DImode;
6174    }
6175  else
6176    gcc_unreachable ();
6177
6178  if (!all_p)
6179    switch (cmp_mode)
6180      {
6181      case CCVEQmode:  full_cmp_mode = CCVEQANYmode;  break;
6182      case CCVHmode:   full_cmp_mode = CCVHANYmode;   break;
6183      case CCVHUmode:  full_cmp_mode = CCVHUANYmode;  break;
6184      case CCVFHmode:  full_cmp_mode = CCVFHANYmode;  break;
6185      case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6186      default: gcc_unreachable ();
6187      }
6188  else
6189    /* The modes without ANY match the ALL modes.  */
6190    full_cmp_mode = cmp_mode;
6191
6192  if (swap_p)
6193    {
6194      rtx tmp = cmp2;
6195      cmp2 = cmp1;
6196      cmp1 = tmp;
6197    }
6198
6199  emit_insn (gen_rtx_PARALLEL (VOIDmode,
6200	       gen_rtvec (2, gen_rtx_SET (VOIDmode,
6201			       gen_rtx_REG (cmp_mode, CC_REGNUM),
6202			       gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6203			  gen_rtx_CLOBBER (VOIDmode,
6204					   gen_rtx_SCRATCH (scratch_mode)))));
6205  emit_move_insn (target, const0_rtx);
6206  emit_move_insn (tmp_reg, const1_rtx);
6207
6208  emit_move_insn (target,
6209		  gen_rtx_IF_THEN_ELSE (SImode,
6210		    gen_rtx_fmt_ee (new_code, VOIDmode,
6211				    gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6212				    const0_rtx),
6213		      target, tmp_reg));
6214}
6215
6216/* Generate a vector comparison expression loading either elements of
6217   THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6218   and CMP_OP2.  */
6219
6220void
6221s390_expand_vcond (rtx target, rtx then, rtx els,
6222		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6223{
6224  rtx tmp;
6225  machine_mode result_mode;
6226  rtx result_target;
6227
6228  /* We always use an integral type vector to hold the comparison
6229     result.  */
6230  result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
6231  result_target = gen_reg_rtx (result_mode);
6232
6233  /* Alternatively this could be done by reload by lowering the cmp*
6234     predicates.  But it appears to be better for scheduling etc. to
6235     have that in early.  */
6236  if (!REG_P (cmp_op1))
6237    cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6238
6239  if (!REG_P (cmp_op2))
6240    cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6241
6242  s390_expand_vec_compare (result_target, cond,
6243			   cmp_op1, cmp_op2);
6244
6245  /* If the results are supposed to be either -1 or 0 we are done
6246     since this is what our compare instructions generate anyway.  */
6247  if (all_ones_operand (then, GET_MODE (then))
6248      && const0_operand (els, GET_MODE (els)))
6249    {
6250      emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
6251					      result_target, 0));
6252      return;
6253    }
6254
6255  /* Otherwise we will do a vsel afterwards.  */
6256  /* This gets triggered e.g.
6257     with gcc.c-torture/compile/pr53410-1.c */
6258  if (!REG_P (then))
6259    then = force_reg (GET_MODE (target), then);
6260
6261  if (!REG_P (els))
6262    els = force_reg (GET_MODE (target), els);
6263
6264  tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6265			result_target,
6266			CONST0_RTX (result_mode));
6267
6268  /* We compared the result against zero above so we have to swap then
6269     and els here.  */
6270  tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
6271
6272  gcc_assert (GET_MODE (target) == GET_MODE (then));
6273  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
6274}
6275
6276/* Emit the RTX necessary to initialize the vector TARGET with values
6277   in VALS.  */
6278void
6279s390_expand_vec_init (rtx target, rtx vals)
6280{
6281  machine_mode mode = GET_MODE (target);
6282  machine_mode inner_mode = GET_MODE_INNER (mode);
6283  int n_elts = GET_MODE_NUNITS (mode);
6284  bool all_same = true, all_regs = true, all_const_int = true;
6285  rtx x;
6286  int i;
6287
6288  for (i = 0; i < n_elts; ++i)
6289    {
6290      x = XVECEXP (vals, 0, i);
6291
6292      if (!CONST_INT_P (x))
6293	all_const_int = false;
6294
6295      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6296	all_same = false;
6297
6298      if (!REG_P (x))
6299	all_regs = false;
6300    }
6301
6302  /* Use vector gen mask or vector gen byte mask if possible.  */
6303  if (all_same && all_const_int
6304      && (XVECEXP (vals, 0, 0) == const0_rtx
6305	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6306					       NULL, NULL)
6307	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6308    {
6309      emit_insn (gen_rtx_SET (VOIDmode, target,
6310			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6311      return;
6312    }
6313
6314  if (all_same)
6315    {
6316      emit_insn (gen_rtx_SET (VOIDmode, target,
6317			      gen_rtx_VEC_DUPLICATE (mode,
6318						     XVECEXP (vals, 0, 0))));
6319      return;
6320    }
6321
6322  if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6323    {
6324      /* Use vector load pair.  */
6325      emit_insn (gen_rtx_SET (VOIDmode, target,
6326			      gen_rtx_VEC_CONCAT (mode,
6327						  XVECEXP (vals, 0, 0),
6328						  XVECEXP (vals, 0, 1))));
6329      return;
6330    }
6331
6332  /* We are about to set the vector elements one by one.  Zero out the
6333     full register first in order to help the data flow framework to
6334     detect it as full VR set.  */
6335  emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
6336
6337  /* Unfortunately the vec_init expander is not allowed to fail.  So
6338     we have to implement the fallback ourselves.  */
6339  for (i = 0; i < n_elts; i++)
6340    emit_insn (gen_rtx_SET (VOIDmode, target,
6341			    gen_rtx_UNSPEC (mode,
6342					    gen_rtvec (3, XVECEXP (vals, 0, i),
6343						       GEN_INT (i), target),
6344					    UNSPEC_VEC_SET)));
6345}
6346
6347/* Structure to hold the initial parameters for a compare_and_swap operation
6348   in HImode and QImode.  */
6349
6350struct alignment_context
6351{
6352  rtx memsi;	  /* SI aligned memory location.  */
6353  rtx shift;	  /* Bit offset with regard to lsb.  */
6354  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6355  rtx modemaski;  /* ~modemask */
6356  bool aligned;	  /* True if memory is aligned, false else.  */
6357};
6358
6359/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6360   structure AC for transparent simplifying, if the memory alignment is known
6361   to be at least 32bit.  MEM is the memory location for the actual operation
6362   and MODE its mode.  */
6363
6364static void
6365init_alignment_context (struct alignment_context *ac, rtx mem,
6366			machine_mode mode)
6367{
6368  ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6369  ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6370
6371  if (ac->aligned)
6372    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6373  else
6374    {
6375      /* Alignment is unknown.  */
6376      rtx byteoffset, addr, align;
6377
6378      /* Force the address into a register.  */
6379      addr = force_reg (Pmode, XEXP (mem, 0));
6380
6381      /* Align it to SImode.  */
6382      align = expand_simple_binop (Pmode, AND, addr,
6383				   GEN_INT (-GET_MODE_SIZE (SImode)),
6384				   NULL_RTX, 1, OPTAB_DIRECT);
6385      /* Generate MEM.  */
6386      ac->memsi = gen_rtx_MEM (SImode, align);
6387      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6388      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6389      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6390
6391      /* Calculate shiftcount.  */
6392      byteoffset = expand_simple_binop (Pmode, AND, addr,
6393					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6394					NULL_RTX, 1, OPTAB_DIRECT);
6395      /* As we already have some offset, evaluate the remaining distance.  */
6396      ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6397				      NULL_RTX, 1, OPTAB_DIRECT);
6398    }
6399
6400  /* Shift is the byte count, but we need the bitcount.  */
6401  ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6402				   NULL_RTX, 1, OPTAB_DIRECT);
6403
6404  /* Calculate masks.  */
6405  ac->modemask = expand_simple_binop (SImode, ASHIFT,
6406				      GEN_INT (GET_MODE_MASK (mode)),
6407				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6408  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6409				      NULL_RTX, 1);
6410}
6411
6412/* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6413   use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6414   perform the merge in SEQ2.  */
6415
6416static rtx
6417s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6418		    machine_mode mode, rtx val, rtx ins)
6419{
6420  rtx tmp;
6421
6422  if (ac->aligned)
6423    {
6424      start_sequence ();
6425      tmp = copy_to_mode_reg (SImode, val);
6426      if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6427			    const0_rtx, ins))
6428	{
6429	  *seq1 = NULL;
6430	  *seq2 = get_insns ();
6431	  end_sequence ();
6432	  return tmp;
6433	}
6434      end_sequence ();
6435    }
6436
6437  /* Failed to use insv.  Generate a two part shift and mask.  */
6438  start_sequence ();
6439  tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6440  *seq1 = get_insns ();
6441  end_sequence ();
6442
6443  start_sequence ();
6444  tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6445  *seq2 = get_insns ();
6446  end_sequence ();
6447
6448  return tmp;
6449}
6450
6451/* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
6452   the memory location, CMP the old value to compare MEM with and NEW_RTX the
6453   value to set if CMP == MEM.  */
6454
6455void
6456s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6457		    rtx cmp, rtx new_rtx, bool is_weak)
6458{
6459  struct alignment_context ac;
6460  rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6461  rtx res = gen_reg_rtx (SImode);
6462  rtx_code_label *csloop = NULL, *csend = NULL;
6463
6464  gcc_assert (MEM_P (mem));
6465
6466  init_alignment_context (&ac, mem, mode);
6467
6468  /* Load full word.  Subsequent loads are performed by CS.  */
6469  val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6470			     NULL_RTX, 1, OPTAB_DIRECT);
6471
6472  /* Prepare insertions of cmp and new_rtx into the loaded value.  When
6473     possible, we try to use insv to make this happen efficiently.  If
6474     that fails we'll generate code both inside and outside the loop.  */
6475  cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6476  newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6477
6478  if (seq0)
6479    emit_insn (seq0);
6480  if (seq1)
6481    emit_insn (seq1);
6482
6483  /* Start CS loop.  */
6484  if (!is_weak)
6485    {
6486      /* Begin assuming success.  */
6487      emit_move_insn (btarget, const1_rtx);
6488
6489      csloop = gen_label_rtx ();
6490      csend = gen_label_rtx ();
6491      emit_label (csloop);
6492    }
6493
6494  /* val = "<mem>00..0<mem>"
6495   * cmp = "00..0<cmp>00..0"
6496   * new = "00..0<new>00..0"
6497   */
6498
6499  emit_insn (seq2);
6500  emit_insn (seq3);
6501
6502  cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6503  if (is_weak)
6504    emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6505  else
6506    {
6507      rtx tmp;
6508
6509      /* Jump to end if we're done (likely?).  */
6510      s390_emit_jump (csend, cc);
6511
6512      /* Check for changes outside mode, and loop internal if so.
6513	 Arrange the moves so that the compare is adjacent to the
6514	 branch so that we can generate CRJ.  */
6515      tmp = copy_to_reg (val);
6516      force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6517			  1, OPTAB_DIRECT);
6518      cc = s390_emit_compare (NE, val, tmp);
6519      s390_emit_jump (csloop, cc);
6520
6521      /* Failed.  */
6522      emit_move_insn (btarget, const0_rtx);
6523      emit_label (csend);
6524    }
6525
6526  /* Return the correct part of the bitfield.  */
6527  convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6528					      NULL_RTX, 1, OPTAB_DIRECT), 1);
6529}
6530
6531/* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
6532   and VAL the value to play with.  If AFTER is true then store the value
6533   MEM holds after the operation, if AFTER is false then store the value MEM
6534   holds before the operation.  If TARGET is zero then discard that value, else
6535   store it to TARGET.  */
6536
6537void
6538s390_expand_atomic (machine_mode mode, enum rtx_code code,
6539		    rtx target, rtx mem, rtx val, bool after)
6540{
6541  struct alignment_context ac;
6542  rtx cmp;
6543  rtx new_rtx = gen_reg_rtx (SImode);
6544  rtx orig = gen_reg_rtx (SImode);
6545  rtx_code_label *csloop = gen_label_rtx ();
6546
6547  gcc_assert (!target || register_operand (target, VOIDmode));
6548  gcc_assert (MEM_P (mem));
6549
6550  init_alignment_context (&ac, mem, mode);
6551
6552  /* Shift val to the correct bit positions.
6553     Preserve "icm", but prevent "ex icm".  */
6554  if (!(ac.aligned && code == SET && MEM_P (val)))
6555    val = s390_expand_mask_and_shift (val, mode, ac.shift);
6556
6557  /* Further preparation insns.  */
6558  if (code == PLUS || code == MINUS)
6559    emit_move_insn (orig, val);
6560  else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6561    val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6562			       NULL_RTX, 1, OPTAB_DIRECT);
6563
6564  /* Load full word.  Subsequent loads are performed by CS.  */
6565  cmp = force_reg (SImode, ac.memsi);
6566
6567  /* Start CS loop.  */
6568  emit_label (csloop);
6569  emit_move_insn (new_rtx, cmp);
6570
6571  /* Patch new with val at correct position.  */
6572  switch (code)
6573    {
6574    case PLUS:
6575    case MINUS:
6576      val = expand_simple_binop (SImode, code, new_rtx, orig,
6577				 NULL_RTX, 1, OPTAB_DIRECT);
6578      val = expand_simple_binop (SImode, AND, val, ac.modemask,
6579				 NULL_RTX, 1, OPTAB_DIRECT);
6580      /* FALLTHRU */
6581    case SET:
6582      if (ac.aligned && MEM_P (val))
6583	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6584			 0, 0, SImode, val);
6585      else
6586	{
6587	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6588				     NULL_RTX, 1, OPTAB_DIRECT);
6589	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6590				     NULL_RTX, 1, OPTAB_DIRECT);
6591	}
6592      break;
6593    case AND:
6594    case IOR:
6595    case XOR:
6596      new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6597				 NULL_RTX, 1, OPTAB_DIRECT);
6598      break;
6599    case MULT: /* NAND */
6600      new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6601				 NULL_RTX, 1, OPTAB_DIRECT);
6602      new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6603				 NULL_RTX, 1, OPTAB_DIRECT);
6604      break;
6605    default:
6606      gcc_unreachable ();
6607    }
6608
6609  s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6610						      ac.memsi, cmp, new_rtx));
6611
6612  /* Return the correct part of the bitfield.  */
6613  if (target)
6614    convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6615					       after ? new_rtx : cmp, ac.shift,
6616					       NULL_RTX, 1, OPTAB_DIRECT), 1);
6617}
6618
6619/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6620   We need to emit DTP-relative relocations.  */
6621
6622static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6623
6624static void
6625s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6626{
6627  switch (size)
6628    {
6629    case 4:
6630      fputs ("\t.long\t", file);
6631      break;
6632    case 8:
6633      fputs ("\t.quad\t", file);
6634      break;
6635    default:
6636      gcc_unreachable ();
6637    }
6638  output_addr_const (file, x);
6639  fputs ("@DTPOFF", file);
6640}
6641
6642/* Return the proper mode for REGNO being represented in the dwarf
6643   unwind table.  */
6644machine_mode
6645s390_dwarf_frame_reg_mode (int regno)
6646{
6647  machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6648
6649  /* The rightmost 64 bits of vector registers are call-clobbered.  */
6650  if (GET_MODE_SIZE (save_mode) > 8)
6651    save_mode = DImode;
6652
6653  return save_mode;
6654}
6655
6656#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6657/* Implement TARGET_MANGLE_TYPE.  */
6658
6659static const char *
6660s390_mangle_type (const_tree type)
6661{
6662  type = TYPE_MAIN_VARIANT (type);
6663
6664  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6665      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6666    return NULL;
6667
6668  if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6669  if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6670  if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6671  if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6672
6673  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6674      && TARGET_LONG_DOUBLE_128)
6675    return "g";
6676
6677  /* For all other types, use normal C++ mangling.  */
6678  return NULL;
6679}
6680#endif
6681
6682/* In the name of slightly smaller debug output, and to cater to
6683   general assembler lossage, recognize various UNSPEC sequences
6684   and turn them back into a direct symbol reference.  */
6685
6686static rtx
6687s390_delegitimize_address (rtx orig_x)
6688{
6689  rtx x, y;
6690
6691  orig_x = delegitimize_mem_from_attrs (orig_x);
6692  x = orig_x;
6693
6694  /* Extract the symbol ref from:
6695     (plus:SI (reg:SI 12 %r12)
6696              (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6697	                            UNSPEC_GOTOFF/PLTOFF)))
6698     and
6699     (plus:SI (reg:SI 12 %r12)
6700              (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6701                                             UNSPEC_GOTOFF/PLTOFF)
6702				 (const_int 4 [0x4]))))  */
6703  if (GET_CODE (x) == PLUS
6704      && REG_P (XEXP (x, 0))
6705      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6706      && GET_CODE (XEXP (x, 1)) == CONST)
6707    {
6708      HOST_WIDE_INT offset = 0;
6709
6710      /* The const operand.  */
6711      y = XEXP (XEXP (x, 1), 0);
6712
6713      if (GET_CODE (y) == PLUS
6714	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
6715	{
6716	  offset = INTVAL (XEXP (y, 1));
6717	  y = XEXP (y, 0);
6718	}
6719
6720      if (GET_CODE (y) == UNSPEC
6721	  && (XINT (y, 1) == UNSPEC_GOTOFF
6722	      || XINT (y, 1) == UNSPEC_PLTOFF))
6723	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6724    }
6725
6726  if (GET_CODE (x) != MEM)
6727    return orig_x;
6728
6729  x = XEXP (x, 0);
6730  if (GET_CODE (x) == PLUS
6731      && GET_CODE (XEXP (x, 1)) == CONST
6732      && GET_CODE (XEXP (x, 0)) == REG
6733      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6734    {
6735      y = XEXP (XEXP (x, 1), 0);
6736      if (GET_CODE (y) == UNSPEC
6737	  && XINT (y, 1) == UNSPEC_GOT)
6738	y = XVECEXP (y, 0, 0);
6739      else
6740	return orig_x;
6741    }
6742  else if (GET_CODE (x) == CONST)
6743    {
6744      /* Extract the symbol ref from:
6745	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6746	                               UNSPEC_PLT/GOTENT)))  */
6747
6748      y = XEXP (x, 0);
6749      if (GET_CODE (y) == UNSPEC
6750	  && (XINT (y, 1) == UNSPEC_GOTENT
6751	      || XINT (y, 1) == UNSPEC_PLT))
6752	y = XVECEXP (y, 0, 0);
6753      else
6754	return orig_x;
6755    }
6756  else
6757    return orig_x;
6758
6759  if (GET_MODE (orig_x) != Pmode)
6760    {
6761      if (GET_MODE (orig_x) == BLKmode)
6762	return orig_x;
6763      y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6764      if (y == NULL_RTX)
6765	return orig_x;
6766    }
6767  return y;
6768}
6769
6770/* Output operand OP to stdio stream FILE.
6771   OP is an address (register + offset) which is not used to address data;
6772   instead the rightmost bits are interpreted as the value.  */
6773
6774static void
6775print_shift_count_operand (FILE *file, rtx op)
6776{
6777  HOST_WIDE_INT offset;
6778  rtx base;
6779
6780  /* Extract base register and offset.  */
6781  if (!s390_decompose_shift_count (op, &base, &offset))
6782    gcc_unreachable ();
6783
6784  /* Sanity check.  */
6785  if (base)
6786    {
6787      gcc_assert (GET_CODE (base) == REG);
6788      gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6789      gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6790    }
6791
6792  /* Offsets are constricted to twelve bits.  */
6793  fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6794  if (base)
6795    fprintf (file, "(%s)", reg_names[REGNO (base)]);
6796}
6797
6798/* Assigns the number of NOP halfwords to be emitted before and after the
6799   function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
6800   If hotpatching is disabled for the function, the values are set to zero.
6801*/
6802
6803static void
6804s390_function_num_hotpatch_hw (tree decl,
6805			       int *hw_before,
6806			       int *hw_after)
6807{
6808  tree attr;
6809
6810  attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6811
6812  /* Handle the arguments of the hotpatch attribute.  The values
6813     specified via attribute might override the cmdline argument
6814     values.  */
6815  if (attr)
6816    {
6817      tree args = TREE_VALUE (attr);
6818
6819      *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6820      *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6821    }
6822  else
6823    {
6824      /* Use the values specified by the cmdline arguments.  */
6825      *hw_before = s390_hotpatch_hw_before_label;
6826      *hw_after = s390_hotpatch_hw_after_label;
6827    }
6828}
6829
6830/* Write the extra assembler code needed to declare a function properly.  */
6831
6832void
6833s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6834				tree decl)
6835{
6836  int hw_before, hw_after;
6837
6838  s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6839  if (hw_before > 0)
6840    {
6841      unsigned int function_alignment;
6842      int i;
6843
6844      /* Add a trampoline code area before the function label and initialize it
6845	 with two-byte nop instructions.  This area can be overwritten with code
6846	 that jumps to a patched version of the function.  */
6847      asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6848		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6849		   hw_before);
6850      for (i = 1; i < hw_before; i++)
6851	fputs ("\tnopr\t%r7\n", asm_out_file);
6852
6853      /* Note:  The function label must be aligned so that (a) the bytes of the
6854	 following nop do not cross a cacheline boundary, and (b) a jump address
6855	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6856	 stored directly before the label without crossing a cacheline
6857	 boundary.  All this is necessary to make sure the trampoline code can
6858	 be changed atomically.
6859	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6860	 if there are NOPs before the function label, the alignment is placed
6861	 before them.  So it is necessary to duplicate the alignment after the
6862	 NOPs.  */
6863      function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6864      if (! DECL_USER_ALIGN (decl))
6865	function_alignment = MAX (function_alignment,
6866				  (unsigned int) align_functions);
6867      fputs ("\t# alignment for hotpatch\n", asm_out_file);
6868      ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6869    }
6870
6871  ASM_OUTPUT_LABEL (asm_out_file, fname);
6872  if (hw_after > 0)
6873    asm_fprintf (asm_out_file,
6874		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6875		 hw_after);
6876}
6877
6878/* Output machine-dependent UNSPECs occurring in address constant X
6879   in assembler syntax to stdio stream FILE.  Returns true if the
6880   constant X could be recognized, false otherwise.  */
6881
6882static bool
6883s390_output_addr_const_extra (FILE *file, rtx x)
6884{
6885  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6886    switch (XINT (x, 1))
6887      {
6888      case UNSPEC_GOTENT:
6889	output_addr_const (file, XVECEXP (x, 0, 0));
6890	fprintf (file, "@GOTENT");
6891	return true;
6892      case UNSPEC_GOT:
6893	output_addr_const (file, XVECEXP (x, 0, 0));
6894	fprintf (file, "@GOT");
6895	return true;
6896      case UNSPEC_GOTOFF:
6897	output_addr_const (file, XVECEXP (x, 0, 0));
6898	fprintf (file, "@GOTOFF");
6899	return true;
6900      case UNSPEC_PLT:
6901	output_addr_const (file, XVECEXP (x, 0, 0));
6902	fprintf (file, "@PLT");
6903	return true;
6904      case UNSPEC_PLTOFF:
6905	output_addr_const (file, XVECEXP (x, 0, 0));
6906	fprintf (file, "@PLTOFF");
6907	return true;
6908      case UNSPEC_TLSGD:
6909	output_addr_const (file, XVECEXP (x, 0, 0));
6910	fprintf (file, "@TLSGD");
6911	return true;
6912      case UNSPEC_TLSLDM:
6913	assemble_name (file, get_some_local_dynamic_name ());
6914	fprintf (file, "@TLSLDM");
6915	return true;
6916      case UNSPEC_DTPOFF:
6917	output_addr_const (file, XVECEXP (x, 0, 0));
6918	fprintf (file, "@DTPOFF");
6919	return true;
6920      case UNSPEC_NTPOFF:
6921	output_addr_const (file, XVECEXP (x, 0, 0));
6922	fprintf (file, "@NTPOFF");
6923	return true;
6924      case UNSPEC_GOTNTPOFF:
6925	output_addr_const (file, XVECEXP (x, 0, 0));
6926	fprintf (file, "@GOTNTPOFF");
6927	return true;
6928      case UNSPEC_INDNTPOFF:
6929	output_addr_const (file, XVECEXP (x, 0, 0));
6930	fprintf (file, "@INDNTPOFF");
6931	return true;
6932      }
6933
6934  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6935    switch (XINT (x, 1))
6936      {
6937      case UNSPEC_POOL_OFFSET:
6938	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6939	output_addr_const (file, x);
6940	return true;
6941      }
6942  return false;
6943}
6944
6945/* Output address operand ADDR in assembler syntax to
6946   stdio stream FILE.  */
6947
6948void
6949print_operand_address (FILE *file, rtx addr)
6950{
6951  struct s390_address ad;
6952
6953  if (s390_loadrelative_operand_p (addr, NULL, NULL))
6954    {
6955      if (!TARGET_Z10)
6956	{
6957	  output_operand_lossage ("symbolic memory references are "
6958				  "only supported on z10 or later");
6959	  return;
6960	}
6961      output_addr_const (file, addr);
6962      return;
6963    }
6964
6965  if (!s390_decompose_address (addr, &ad)
6966      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6967      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6968    output_operand_lossage ("cannot decompose address");
6969
6970  if (ad.disp)
6971    output_addr_const (file, ad.disp);
6972  else
6973    fprintf (file, "0");
6974
6975  if (ad.base && ad.indx)
6976    fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6977                              reg_names[REGNO (ad.base)]);
6978  else if (ad.base)
6979    fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6980}
6981
6982/* Output operand X in assembler syntax to stdio stream FILE.
6983   CODE specified the format flag.  The following format flags
6984   are recognized:
6985
6986    'C': print opcode suffix for branch condition.
6987    'D': print opcode suffix for inverse branch condition.
6988    'E': print opcode suffix for branch on index instruction.
6989    'G': print the size of the operand in bytes.
6990    'J': print tls_load/tls_gdcall/tls_ldcall suffix
6991    'M': print the second word of a TImode operand.
6992    'N': print the second word of a DImode operand.
6993    'O': print only the displacement of a memory reference or address.
6994    'R': print only the base register of a memory reference or address.
6995    'S': print S-type memory reference (base+displacement).
6996    'Y': print shift count operand.
6997
6998    'b': print integer X as if it's an unsigned byte.
6999    'c': print integer X as if it's an signed byte.
7000    'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7001    'f': "end" contiguous bitmask X in SImode.
7002    'h': print integer X as if it's a signed halfword.
7003    'i': print the first nonzero HImode part of X.
7004    'j': print the first HImode part unequal to -1 of X.
7005    'k': print the first nonzero SImode part of X.
7006    'm': print the first SImode part unequal to -1 of X.
7007    'o': print integer X as if it's an unsigned 32bit word.
7008    's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7009    't': CONST_INT: "start" of contiguous bitmask X in SImode.
7010         CONST_VECTOR: Generate a bitmask for vgbm instruction.
7011    'x': print integer X as if it's an unsigned halfword.
7012    'v': print register number as vector register (v1 instead of f1).
7013*/
7014
7015void
7016print_operand (FILE *file, rtx x, int code)
7017{
7018  HOST_WIDE_INT ival;
7019
7020  switch (code)
7021    {
7022    case 'C':
7023      fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7024      return;
7025
7026    case 'D':
7027      fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7028      return;
7029
7030    case 'E':
7031      if (GET_CODE (x) == LE)
7032	fprintf (file, "l");
7033      else if (GET_CODE (x) == GT)
7034	fprintf (file, "h");
7035      else
7036	output_operand_lossage ("invalid comparison operator "
7037				"for 'E' output modifier");
7038      return;
7039
7040    case 'J':
7041      if (GET_CODE (x) == SYMBOL_REF)
7042	{
7043	  fprintf (file, "%s", ":tls_load:");
7044	  output_addr_const (file, x);
7045	}
7046      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7047	{
7048	  fprintf (file, "%s", ":tls_gdcall:");
7049	  output_addr_const (file, XVECEXP (x, 0, 0));
7050	}
7051      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7052	{
7053	  fprintf (file, "%s", ":tls_ldcall:");
7054	  const char *name = get_some_local_dynamic_name ();
7055	  gcc_assert (name);
7056	  assemble_name (file, name);
7057	}
7058      else
7059	output_operand_lossage ("invalid reference for 'J' output modifier");
7060      return;
7061
7062    case 'G':
7063      fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7064      return;
7065
7066    case 'O':
7067      {
7068        struct s390_address ad;
7069	int ret;
7070
7071	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7072
7073	if (!ret
7074	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7075	    || ad.indx)
7076	  {
7077	    output_operand_lossage ("invalid address for 'O' output modifier");
7078	    return;
7079	  }
7080
7081        if (ad.disp)
7082          output_addr_const (file, ad.disp);
7083        else
7084          fprintf (file, "0");
7085      }
7086      return;
7087
7088    case 'R':
7089      {
7090        struct s390_address ad;
7091	int ret;
7092
7093	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7094
7095	if (!ret
7096	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7097	    || ad.indx)
7098	  {
7099	    output_operand_lossage ("invalid address for 'R' output modifier");
7100	    return;
7101	  }
7102
7103        if (ad.base)
7104          fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7105        else
7106          fprintf (file, "0");
7107      }
7108      return;
7109
7110    case 'S':
7111      {
7112	struct s390_address ad;
7113	int ret;
7114
7115	if (!MEM_P (x))
7116	  {
7117	    output_operand_lossage ("memory reference expected for "
7118				    "'S' output modifier");
7119	    return;
7120	  }
7121	ret = s390_decompose_address (XEXP (x, 0), &ad);
7122
7123	if (!ret
7124	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7125	    || ad.indx)
7126	  {
7127	    output_operand_lossage ("invalid address for 'S' output modifier");
7128	    return;
7129	  }
7130
7131	if (ad.disp)
7132	  output_addr_const (file, ad.disp);
7133	else
7134	  fprintf (file, "0");
7135
7136	if (ad.base)
7137	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7138      }
7139      return;
7140
7141    case 'N':
7142      if (GET_CODE (x) == REG)
7143	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7144      else if (GET_CODE (x) == MEM)
7145	x = change_address (x, VOIDmode,
7146			    plus_constant (Pmode, XEXP (x, 0), 4));
7147      else
7148	output_operand_lossage ("register or memory expression expected "
7149				"for 'N' output modifier");
7150      break;
7151
7152    case 'M':
7153      if (GET_CODE (x) == REG)
7154	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7155      else if (GET_CODE (x) == MEM)
7156	x = change_address (x, VOIDmode,
7157			    plus_constant (Pmode, XEXP (x, 0), 8));
7158      else
7159	output_operand_lossage ("register or memory expression expected "
7160				"for 'M' output modifier");
7161      break;
7162
7163    case 'Y':
7164      print_shift_count_operand (file, x);
7165      return;
7166    }
7167
7168  switch (GET_CODE (x))
7169    {
7170    case REG:
7171      /* Print FP regs as fx instead of vx when they are accessed
7172	 through non-vector mode.  */
7173      if (code == 'v'
7174	  || VECTOR_NOFP_REG_P (x)
7175	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7176	  || (VECTOR_REG_P (x)
7177	      && (GET_MODE_SIZE (GET_MODE (x)) /
7178		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7179	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7180      else
7181	fprintf (file, "%s", reg_names[REGNO (x)]);
7182      break;
7183
7184    case MEM:
7185      output_address (XEXP (x, 0));
7186      break;
7187
7188    case CONST:
7189    case CODE_LABEL:
7190    case LABEL_REF:
7191    case SYMBOL_REF:
7192      output_addr_const (file, x);
7193      break;
7194
7195    case CONST_INT:
7196      ival = INTVAL (x);
7197      switch (code)
7198	{
7199	case 0:
7200	  break;
7201	case 'b':
7202	  ival &= 0xff;
7203	  break;
7204	case 'c':
7205	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
7206	  break;
7207	case 'x':
7208	  ival &= 0xffff;
7209	  break;
7210	case 'h':
7211	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7212	  break;
7213	case 'i':
7214	  ival = s390_extract_part (x, HImode, 0);
7215	  break;
7216	case 'j':
7217	  ival = s390_extract_part (x, HImode, -1);
7218	  break;
7219	case 'k':
7220	  ival = s390_extract_part (x, SImode, 0);
7221	  break;
7222	case 'm':
7223	  ival = s390_extract_part (x, SImode, -1);
7224	  break;
7225	case 'o':
7226	  ival &= 0xffffffff;
7227	  break;
7228	case 'e': case 'f':
7229	case 's': case 't':
7230	  {
7231	    int pos, len;
7232	    bool ok;
7233
7234	    len = (code == 's' || code == 'e' ? 64 : 32);
7235	    ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7236	    gcc_assert (ok);
7237	    if (code == 's' || code == 't')
7238	      ival = 64 - pos - len;
7239	    else
7240	      ival = 64 - 1 - pos;
7241	  }
7242	  break;
7243	default:
7244	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
7245	}
7246      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7247      break;
7248
7249    case CONST_DOUBLE:
7250      gcc_assert (GET_MODE (x) == VOIDmode);
7251      if (code == 'b')
7252        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
7253      else if (code == 'x')
7254        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
7255      else if (code == 'h')
7256        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7257		 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
7258      else
7259	{
7260	  if (code == 0)
7261	    output_operand_lossage ("invalid constant - try using "
7262				    "an output modifier");
7263	  else
7264	    output_operand_lossage ("invalid constant for output modifier '%c'",
7265				    code);
7266	}
7267      break;
7268    case CONST_VECTOR:
7269      switch (code)
7270	{
7271	case 'h':
7272	  gcc_assert (s390_const_vec_duplicate_p (x));
7273	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7274		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7275	  break;
7276	case 'e':
7277	case 's':
7278	  {
7279	    int start, stop, inner_len;
7280	    bool ok;
7281
7282	    inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7283	    ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7284	    gcc_assert (ok);
7285	    if (code == 's' || code == 't')
7286	      ival = inner_len - stop - 1;
7287	    else
7288	      ival = inner_len - start - 1;
7289	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7290	  }
7291	  break;
7292	case 't':
7293	  {
7294	    unsigned mask;
7295	    bool ok = s390_bytemask_vector_p (x, &mask);
7296	    gcc_assert (ok);
7297	    fprintf (file, "%u", mask);
7298	  }
7299	  break;
7300
7301	default:
7302	  output_operand_lossage ("invalid constant vector for output "
7303				  "modifier '%c'", code);
7304	}
7305      break;
7306
7307    default:
7308      if (code == 0)
7309	output_operand_lossage ("invalid expression - try using "
7310				"an output modifier");
7311      else
7312	output_operand_lossage ("invalid expression for output "
7313				"modifier '%c'", code);
7314      break;
7315    }
7316}
7317
7318/* Target hook for assembling integer objects.  We need to define it
7319   here to work a round a bug in some versions of GAS, which couldn't
7320   handle values smaller than INT_MIN when printed in decimal.  */
7321
7322static bool
7323s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7324{
7325  if (size == 8 && aligned_p
7326      && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7327    {
7328      fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7329	       INTVAL (x));
7330      return true;
7331    }
7332  return default_assemble_integer (x, size, aligned_p);
7333}
7334
7335/* Returns true if register REGNO is used  for forming
7336   a memory address in expression X.  */
7337
7338static bool
7339reg_used_in_mem_p (int regno, rtx x)
7340{
7341  enum rtx_code code = GET_CODE (x);
7342  int i, j;
7343  const char *fmt;
7344
7345  if (code == MEM)
7346    {
7347      if (refers_to_regno_p (regno, XEXP (x, 0)))
7348	return true;
7349    }
7350  else if (code == SET
7351	   && GET_CODE (SET_DEST (x)) == PC)
7352    {
7353      if (refers_to_regno_p (regno, SET_SRC (x)))
7354	return true;
7355    }
7356
7357  fmt = GET_RTX_FORMAT (code);
7358  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7359    {
7360      if (fmt[i] == 'e'
7361	  && reg_used_in_mem_p (regno, XEXP (x, i)))
7362	return true;
7363
7364      else if (fmt[i] == 'E')
7365	for (j = 0; j < XVECLEN (x, i); j++)
7366	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7367	    return true;
7368    }
7369  return false;
7370}
7371
7372/* Returns true if expression DEP_RTX sets an address register
7373   used by instruction INSN to address memory.  */
7374
7375static bool
7376addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7377{
7378  rtx target, pat;
7379
7380  if (NONJUMP_INSN_P (dep_rtx))
7381    dep_rtx = PATTERN (dep_rtx);
7382
7383  if (GET_CODE (dep_rtx) == SET)
7384    {
7385      target = SET_DEST (dep_rtx);
7386      if (GET_CODE (target) == STRICT_LOW_PART)
7387	target = XEXP (target, 0);
7388      while (GET_CODE (target) == SUBREG)
7389	target = SUBREG_REG (target);
7390
7391      if (GET_CODE (target) == REG)
7392	{
7393	  int regno = REGNO (target);
7394
7395	  if (s390_safe_attr_type (insn) == TYPE_LA)
7396	    {
7397	      pat = PATTERN (insn);
7398	      if (GET_CODE (pat) == PARALLEL)
7399		{
7400		  gcc_assert (XVECLEN (pat, 0) == 2);
7401		  pat = XVECEXP (pat, 0, 0);
7402		}
7403	      gcc_assert (GET_CODE (pat) == SET);
7404	      return refers_to_regno_p (regno, SET_SRC (pat));
7405	    }
7406	  else if (get_attr_atype (insn) == ATYPE_AGEN)
7407	    return reg_used_in_mem_p (regno, PATTERN (insn));
7408	}
7409    }
7410  return false;
7411}
7412
7413/* Return 1, if dep_insn sets register used in insn in the agen unit.  */
7414
7415int
7416s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7417{
7418  rtx dep_rtx = PATTERN (dep_insn);
7419  int i;
7420
7421  if (GET_CODE (dep_rtx) == SET
7422      && addr_generation_dependency_p (dep_rtx, insn))
7423    return 1;
7424  else if (GET_CODE (dep_rtx) == PARALLEL)
7425    {
7426      for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7427	{
7428	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7429	    return 1;
7430	}
7431    }
7432  return 0;
7433}
7434
7435
7436/* A C statement (sans semicolon) to update the integer scheduling priority
7437   INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
7438   reduce the priority to execute INSN later.  Do not define this macro if
7439   you do not need to adjust the scheduling priorities of insns.
7440
7441   A STD instruction should be scheduled earlier,
7442   in order to use the bypass.  */
7443static int
7444s390_adjust_priority (rtx_insn *insn, int priority)
7445{
7446  if (! INSN_P (insn))
7447    return priority;
7448
7449  if (s390_tune != PROCESSOR_2084_Z990
7450      && s390_tune != PROCESSOR_2094_Z9_109
7451      && s390_tune != PROCESSOR_2097_Z10
7452      && s390_tune != PROCESSOR_2817_Z196
7453      && s390_tune != PROCESSOR_2827_ZEC12
7454      && s390_tune != PROCESSOR_2964_Z13)
7455    return priority;
7456
7457  switch (s390_safe_attr_type (insn))
7458    {
7459      case TYPE_FSTOREDF:
7460      case TYPE_FSTORESF:
7461	priority = priority << 3;
7462	break;
7463      case TYPE_STORE:
7464      case TYPE_STM:
7465	priority = priority << 1;
7466	break;
7467      default:
7468        break;
7469    }
7470  return priority;
7471}
7472
7473
7474/* The number of instructions that can be issued per cycle.  */
7475
7476static int
7477s390_issue_rate (void)
7478{
7479  switch (s390_tune)
7480    {
7481    case PROCESSOR_2084_Z990:
7482    case PROCESSOR_2094_Z9_109:
7483    case PROCESSOR_2817_Z196:
7484      return 3;
7485    case PROCESSOR_2097_Z10:
7486      return 2;
7487      /* Starting with EC12 we use the sched_reorder hook to take care
7488	 of instruction dispatch constraints.  The algorithm only
7489	 picks the best instruction and assumes only a single
7490	 instruction gets issued per cycle.  */
7491    case PROCESSOR_2827_ZEC12:
7492    default:
7493      return 1;
7494    }
7495}
7496
7497static int
7498s390_first_cycle_multipass_dfa_lookahead (void)
7499{
7500  return 4;
7501}
7502
7503/* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7504   Fix up MEMs as required.  */
7505
7506static void
7507annotate_constant_pool_refs (rtx *x)
7508{
7509  int i, j;
7510  const char *fmt;
7511
7512  gcc_assert (GET_CODE (*x) != SYMBOL_REF
7513	      || !CONSTANT_POOL_ADDRESS_P (*x));
7514
7515  /* Literal pool references can only occur inside a MEM ...  */
7516  if (GET_CODE (*x) == MEM)
7517    {
7518      rtx memref = XEXP (*x, 0);
7519
7520      if (GET_CODE (memref) == SYMBOL_REF
7521	  && CONSTANT_POOL_ADDRESS_P (memref))
7522	{
7523	  rtx base = cfun->machine->base_reg;
7524	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7525				     UNSPEC_LTREF);
7526
7527	  *x = replace_equiv_address (*x, addr);
7528	  return;
7529	}
7530
7531      if (GET_CODE (memref) == CONST
7532	  && GET_CODE (XEXP (memref, 0)) == PLUS
7533	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7534	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7535	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7536	{
7537	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7538	  rtx sym = XEXP (XEXP (memref, 0), 0);
7539	  rtx base = cfun->machine->base_reg;
7540	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7541				     UNSPEC_LTREF);
7542
7543	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7544	  return;
7545	}
7546    }
7547
7548  /* ... or a load-address type pattern.  */
7549  if (GET_CODE (*x) == SET)
7550    {
7551      rtx addrref = SET_SRC (*x);
7552
7553      if (GET_CODE (addrref) == SYMBOL_REF
7554	  && CONSTANT_POOL_ADDRESS_P (addrref))
7555	{
7556	  rtx base = cfun->machine->base_reg;
7557	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7558				     UNSPEC_LTREF);
7559
7560	  SET_SRC (*x) = addr;
7561	  return;
7562	}
7563
7564      if (GET_CODE (addrref) == CONST
7565	  && GET_CODE (XEXP (addrref, 0)) == PLUS
7566	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7567	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7568	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7569	{
7570	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7571	  rtx sym = XEXP (XEXP (addrref, 0), 0);
7572	  rtx base = cfun->machine->base_reg;
7573	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7574				     UNSPEC_LTREF);
7575
7576	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
7577	  return;
7578	}
7579    }
7580
7581  /* Annotate LTREL_BASE as well.  */
7582  if (GET_CODE (*x) == UNSPEC
7583      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7584    {
7585      rtx base = cfun->machine->base_reg;
7586      *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7587				  UNSPEC_LTREL_BASE);
7588      return;
7589    }
7590
7591  fmt = GET_RTX_FORMAT (GET_CODE (*x));
7592  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7593    {
7594      if (fmt[i] == 'e')
7595        {
7596          annotate_constant_pool_refs (&XEXP (*x, i));
7597        }
7598      else if (fmt[i] == 'E')
7599        {
7600          for (j = 0; j < XVECLEN (*x, i); j++)
7601            annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7602        }
7603    }
7604}
7605
7606/* Split all branches that exceed the maximum distance.
7607   Returns true if this created a new literal pool entry.  */
7608
7609static int
7610s390_split_branches (void)
7611{
7612  rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7613  int new_literal = 0, ret;
7614  rtx_insn *insn;
7615  rtx pat, target;
7616  rtx *label;
7617
7618  /* We need correct insn addresses.  */
7619
7620  shorten_branches (get_insns ());
7621
7622  /* Find all branches that exceed 64KB, and split them.  */
7623
7624  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7625    {
7626      if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7627	continue;
7628
7629      pat = PATTERN (insn);
7630      if (GET_CODE (pat) == PARALLEL)
7631	pat = XVECEXP (pat, 0, 0);
7632      if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7633	continue;
7634
7635      if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7636	{
7637	  label = &SET_SRC (pat);
7638	}
7639      else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7640	{
7641	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7642	    label = &XEXP (SET_SRC (pat), 1);
7643          else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7644            label = &XEXP (SET_SRC (pat), 2);
7645	  else
7646	    continue;
7647        }
7648      else
7649	continue;
7650
7651      if (get_attr_length (insn) <= 4)
7652	continue;
7653
7654      /* We are going to use the return register as scratch register,
7655	 make sure it will be saved/restored by the prologue/epilogue.  */
7656      cfun_frame_layout.save_return_addr_p = 1;
7657
7658      if (!flag_pic)
7659	{
7660	  new_literal = 1;
7661	  rtx mem = force_const_mem (Pmode, *label);
7662	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn);
7663	  INSN_ADDRESSES_NEW (set_insn, -1);
7664	  annotate_constant_pool_refs (&PATTERN (set_insn));
7665
7666	  target = temp_reg;
7667	}
7668      else
7669	{
7670	  new_literal = 1;
7671	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7672				   UNSPEC_LTREL_OFFSET);
7673	  target = gen_rtx_CONST (Pmode, target);
7674	  target = force_const_mem (Pmode, target);
7675	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
7676	  INSN_ADDRESSES_NEW (set_insn, -1);
7677	  annotate_constant_pool_refs (&PATTERN (set_insn));
7678
7679          target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7680							cfun->machine->base_reg),
7681				   UNSPEC_LTREL_BASE);
7682	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
7683	}
7684
7685      ret = validate_change (insn, label, target, 0);
7686      gcc_assert (ret);
7687    }
7688
7689  return new_literal;
7690}
7691
7692
7693/* Find an annotated literal pool symbol referenced in RTX X,
7694   and store it at REF.  Will abort if X contains references to
7695   more than one such pool symbol; multiple references to the same
7696   symbol are allowed, however.
7697
7698   The rtx pointed to by REF must be initialized to NULL_RTX
7699   by the caller before calling this routine.  */
7700
7701static void
7702find_constant_pool_ref (rtx x, rtx *ref)
7703{
7704  int i, j;
7705  const char *fmt;
7706
7707  /* Ignore LTREL_BASE references.  */
7708  if (GET_CODE (x) == UNSPEC
7709      && XINT (x, 1) == UNSPEC_LTREL_BASE)
7710    return;
7711  /* Likewise POOL_ENTRY insns.  */
7712  if (GET_CODE (x) == UNSPEC_VOLATILE
7713      && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7714    return;
7715
7716  gcc_assert (GET_CODE (x) != SYMBOL_REF
7717              || !CONSTANT_POOL_ADDRESS_P (x));
7718
7719  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7720    {
7721      rtx sym = XVECEXP (x, 0, 0);
7722      gcc_assert (GET_CODE (sym) == SYMBOL_REF
7723	          && CONSTANT_POOL_ADDRESS_P (sym));
7724
7725      if (*ref == NULL_RTX)
7726	*ref = sym;
7727      else
7728	gcc_assert (*ref == sym);
7729
7730      return;
7731    }
7732
7733  fmt = GET_RTX_FORMAT (GET_CODE (x));
7734  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7735    {
7736      if (fmt[i] == 'e')
7737        {
7738          find_constant_pool_ref (XEXP (x, i), ref);
7739        }
7740      else if (fmt[i] == 'E')
7741        {
7742          for (j = 0; j < XVECLEN (x, i); j++)
7743            find_constant_pool_ref (XVECEXP (x, i, j), ref);
7744        }
7745    }
7746}
7747
7748/* Replace every reference to the annotated literal pool
7749   symbol REF in X by its base plus OFFSET.  */
7750
7751static void
7752replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7753{
7754  int i, j;
7755  const char *fmt;
7756
7757  gcc_assert (*x != ref);
7758
7759  if (GET_CODE (*x) == UNSPEC
7760      && XINT (*x, 1) == UNSPEC_LTREF
7761      && XVECEXP (*x, 0, 0) == ref)
7762    {
7763      *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7764      return;
7765    }
7766
7767  if (GET_CODE (*x) == PLUS
7768      && GET_CODE (XEXP (*x, 1)) == CONST_INT
7769      && GET_CODE (XEXP (*x, 0)) == UNSPEC
7770      && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7771      && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7772    {
7773      rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7774      *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7775      return;
7776    }
7777
7778  fmt = GET_RTX_FORMAT (GET_CODE (*x));
7779  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7780    {
7781      if (fmt[i] == 'e')
7782        {
7783          replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7784        }
7785      else if (fmt[i] == 'E')
7786        {
7787          for (j = 0; j < XVECLEN (*x, i); j++)
7788            replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7789        }
7790    }
7791}
7792
7793/* Check whether X contains an UNSPEC_LTREL_BASE.
7794   Return its constant pool symbol if found, NULL_RTX otherwise.  */
7795
7796static rtx
7797find_ltrel_base (rtx x)
7798{
7799  int i, j;
7800  const char *fmt;
7801
7802  if (GET_CODE (x) == UNSPEC
7803      && XINT (x, 1) == UNSPEC_LTREL_BASE)
7804    return XVECEXP (x, 0, 0);
7805
7806  fmt = GET_RTX_FORMAT (GET_CODE (x));
7807  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7808    {
7809      if (fmt[i] == 'e')
7810        {
7811          rtx fnd = find_ltrel_base (XEXP (x, i));
7812	  if (fnd)
7813	    return fnd;
7814        }
7815      else if (fmt[i] == 'E')
7816        {
7817          for (j = 0; j < XVECLEN (x, i); j++)
7818	    {
7819              rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7820	      if (fnd)
7821		return fnd;
7822	    }
7823        }
7824    }
7825
7826  return NULL_RTX;
7827}
7828
7829/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
7830
7831static void
7832replace_ltrel_base (rtx *x)
7833{
7834  int i, j;
7835  const char *fmt;
7836
7837  if (GET_CODE (*x) == UNSPEC
7838      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7839    {
7840      *x = XVECEXP (*x, 0, 1);
7841      return;
7842    }
7843
7844  fmt = GET_RTX_FORMAT (GET_CODE (*x));
7845  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7846    {
7847      if (fmt[i] == 'e')
7848        {
7849          replace_ltrel_base (&XEXP (*x, i));
7850        }
7851      else if (fmt[i] == 'E')
7852        {
7853          for (j = 0; j < XVECLEN (*x, i); j++)
7854            replace_ltrel_base (&XVECEXP (*x, i, j));
7855        }
7856    }
7857}
7858
7859
7860/* We keep a list of constants which we have to add to internal
7861   constant tables in the middle of large functions.  */
7862
7863#define NR_C_MODES 32
7864machine_mode constant_modes[NR_C_MODES] =
7865{
7866  TFmode, TImode, TDmode,
7867  V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
7868  V4SFmode, V2DFmode, V1TFmode,
7869  DFmode, DImode, DDmode,
7870  V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7871  SFmode, SImode, SDmode,
7872  V4QImode, V2HImode, V1SImode,  V1SFmode,
7873  HImode,
7874  V2QImode, V1HImode,
7875  QImode,
7876  V1QImode
7877};
7878
7879struct constant
7880{
7881  struct constant *next;
7882  rtx value;
7883  rtx_code_label *label;
7884};
7885
7886struct constant_pool
7887{
7888  struct constant_pool *next;
7889  rtx_insn *first_insn;
7890  rtx_insn *pool_insn;
7891  bitmap insns;
7892  rtx_insn *emit_pool_after;
7893
7894  struct constant *constants[NR_C_MODES];
7895  struct constant *execute;
7896  rtx_code_label *label;
7897  int size;
7898};
7899
7900/* Allocate new constant_pool structure.  */
7901
7902static struct constant_pool *
7903s390_alloc_pool (void)
7904{
7905  struct constant_pool *pool;
7906  int i;
7907
7908  pool = (struct constant_pool *) xmalloc (sizeof *pool);
7909  pool->next = NULL;
7910  for (i = 0; i < NR_C_MODES; i++)
7911    pool->constants[i] = NULL;
7912
7913  pool->execute = NULL;
7914  pool->label = gen_label_rtx ();
7915  pool->first_insn = NULL;
7916  pool->pool_insn = NULL;
7917  pool->insns = BITMAP_ALLOC (NULL);
7918  pool->size = 0;
7919  pool->emit_pool_after = NULL;
7920
7921  return pool;
7922}
7923
7924/* Create new constant pool covering instructions starting at INSN
7925   and chain it to the end of POOL_LIST.  */
7926
7927static struct constant_pool *
7928s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7929{
7930  struct constant_pool *pool, **prev;
7931
7932  pool = s390_alloc_pool ();
7933  pool->first_insn = insn;
7934
7935  for (prev = pool_list; *prev; prev = &(*prev)->next)
7936    ;
7937  *prev = pool;
7938
7939  return pool;
7940}
7941
7942/* End range of instructions covered by POOL at INSN and emit
7943   placeholder insn representing the pool.  */
7944
7945static void
7946s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7947{
7948  rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7949
7950  if (!insn)
7951    insn = get_last_insn ();
7952
7953  pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7954  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7955}
7956
7957/* Add INSN to the list of insns covered by POOL.  */
7958
7959static void
7960s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7961{
7962  bitmap_set_bit (pool->insns, INSN_UID (insn));
7963}
7964
7965/* Return pool out of POOL_LIST that covers INSN.  */
7966
7967static struct constant_pool *
7968s390_find_pool (struct constant_pool *pool_list, rtx insn)
7969{
7970  struct constant_pool *pool;
7971
7972  for (pool = pool_list; pool; pool = pool->next)
7973    if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7974      break;
7975
7976  return pool;
7977}
7978
7979/* Add constant VAL of mode MODE to the constant pool POOL.  */
7980
7981static void
7982s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7983{
7984  struct constant *c;
7985  int i;
7986
7987  for (i = 0; i < NR_C_MODES; i++)
7988    if (constant_modes[i] == mode)
7989      break;
7990  gcc_assert (i != NR_C_MODES);
7991
7992  for (c = pool->constants[i]; c != NULL; c = c->next)
7993    if (rtx_equal_p (val, c->value))
7994      break;
7995
7996  if (c == NULL)
7997    {
7998      c = (struct constant *) xmalloc (sizeof *c);
7999      c->value = val;
8000      c->label = gen_label_rtx ();
8001      c->next = pool->constants[i];
8002      pool->constants[i] = c;
8003      pool->size += GET_MODE_SIZE (mode);
8004    }
8005}
8006
8007/* Return an rtx that represents the offset of X from the start of
8008   pool POOL.  */
8009
8010static rtx
8011s390_pool_offset (struct constant_pool *pool, rtx x)
8012{
8013  rtx label;
8014
8015  label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8016  x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8017		      UNSPEC_POOL_OFFSET);
8018  return gen_rtx_CONST (GET_MODE (x), x);
8019}
8020
8021/* Find constant VAL of mode MODE in the constant pool POOL.
8022   Return an RTX describing the distance from the start of
8023   the pool to the location of the new constant.  */
8024
8025static rtx
8026s390_find_constant (struct constant_pool *pool, rtx val,
8027		    machine_mode mode)
8028{
8029  struct constant *c;
8030  int i;
8031
8032  for (i = 0; i < NR_C_MODES; i++)
8033    if (constant_modes[i] == mode)
8034      break;
8035  gcc_assert (i != NR_C_MODES);
8036
8037  for (c = pool->constants[i]; c != NULL; c = c->next)
8038    if (rtx_equal_p (val, c->value))
8039      break;
8040
8041  gcc_assert (c);
8042
8043  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8044}
8045
8046/* Check whether INSN is an execute.  Return the label_ref to its
8047   execute target template if so, NULL_RTX otherwise.  */
8048
8049static rtx
8050s390_execute_label (rtx insn)
8051{
8052  if (NONJUMP_INSN_P (insn)
8053      && GET_CODE (PATTERN (insn)) == PARALLEL
8054      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8055      && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8056    return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8057
8058  return NULL_RTX;
8059}
8060
8061/* Add execute target for INSN to the constant pool POOL.  */
8062
8063static void
8064s390_add_execute (struct constant_pool *pool, rtx insn)
8065{
8066  struct constant *c;
8067
8068  for (c = pool->execute; c != NULL; c = c->next)
8069    if (INSN_UID (insn) == INSN_UID (c->value))
8070      break;
8071
8072  if (c == NULL)
8073    {
8074      c = (struct constant *) xmalloc (sizeof *c);
8075      c->value = insn;
8076      c->label = gen_label_rtx ();
8077      c->next = pool->execute;
8078      pool->execute = c;
8079      pool->size += 6;
8080    }
8081}
8082
8083/* Find execute target for INSN in the constant pool POOL.
8084   Return an RTX describing the distance from the start of
8085   the pool to the location of the execute target.  */
8086
8087static rtx
8088s390_find_execute (struct constant_pool *pool, rtx insn)
8089{
8090  struct constant *c;
8091
8092  for (c = pool->execute; c != NULL; c = c->next)
8093    if (INSN_UID (insn) == INSN_UID (c->value))
8094      break;
8095
8096  gcc_assert (c);
8097
8098  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8099}
8100
8101/* For an execute INSN, extract the execute target template.  */
8102
8103static rtx
8104s390_execute_target (rtx insn)
8105{
8106  rtx pattern = PATTERN (insn);
8107  gcc_assert (s390_execute_label (insn));
8108
8109  if (XVECLEN (pattern, 0) == 2)
8110    {
8111      pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8112    }
8113  else
8114    {
8115      rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8116      int i;
8117
8118      for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8119	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8120
8121      pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8122    }
8123
8124  return pattern;
8125}
8126
8127/* Indicate that INSN cannot be duplicated.  This is the case for
8128   execute insns that carry a unique label.  */
8129
8130static bool
8131s390_cannot_copy_insn_p (rtx_insn *insn)
8132{
8133  rtx label = s390_execute_label (insn);
8134  return label && label != const0_rtx;
8135}
8136
8137/* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8138   do not emit the pool base label.  */
8139
8140static void
8141s390_dump_pool (struct constant_pool *pool, bool remote_label)
8142{
8143  struct constant *c;
8144  rtx_insn *insn = pool->pool_insn;
8145  int i;
8146
8147  /* Switch to rodata section.  */
8148  if (TARGET_CPU_ZARCH)
8149    {
8150      insn = emit_insn_after (gen_pool_section_start (), insn);
8151      INSN_ADDRESSES_NEW (insn, -1);
8152    }
8153
8154  /* Ensure minimum pool alignment.  */
8155  if (TARGET_CPU_ZARCH)
8156    insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8157  else
8158    insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8159  INSN_ADDRESSES_NEW (insn, -1);
8160
8161  /* Emit pool base label.  */
8162  if (!remote_label)
8163    {
8164      insn = emit_label_after (pool->label, insn);
8165      INSN_ADDRESSES_NEW (insn, -1);
8166    }
8167
8168  /* Dump constants in descending alignment requirement order,
8169     ensuring proper alignment for every constant.  */
8170  for (i = 0; i < NR_C_MODES; i++)
8171    for (c = pool->constants[i]; c; c = c->next)
8172      {
8173	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8174	rtx value = copy_rtx (c->value);
8175	if (GET_CODE (value) == CONST
8176	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8177	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8178	    && XVECLEN (XEXP (value, 0), 0) == 1)
8179	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8180
8181	insn = emit_label_after (c->label, insn);
8182	INSN_ADDRESSES_NEW (insn, -1);
8183
8184	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8185					 gen_rtvec (1, value),
8186					 UNSPECV_POOL_ENTRY);
8187	insn = emit_insn_after (value, insn);
8188	INSN_ADDRESSES_NEW (insn, -1);
8189      }
8190
8191  /* Ensure minimum alignment for instructions.  */
8192  insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8193  INSN_ADDRESSES_NEW (insn, -1);
8194
8195  /* Output in-pool execute template insns.  */
8196  for (c = pool->execute; c; c = c->next)
8197    {
8198      insn = emit_label_after (c->label, insn);
8199      INSN_ADDRESSES_NEW (insn, -1);
8200
8201      insn = emit_insn_after (s390_execute_target (c->value), insn);
8202      INSN_ADDRESSES_NEW (insn, -1);
8203    }
8204
8205  /* Switch back to previous section.  */
8206  if (TARGET_CPU_ZARCH)
8207    {
8208      insn = emit_insn_after (gen_pool_section_end (), insn);
8209      INSN_ADDRESSES_NEW (insn, -1);
8210    }
8211
8212  insn = emit_barrier_after (insn);
8213  INSN_ADDRESSES_NEW (insn, -1);
8214
8215  /* Remove placeholder insn.  */
8216  remove_insn (pool->pool_insn);
8217}
8218
8219/* Free all memory used by POOL.  */
8220
8221static void
8222s390_free_pool (struct constant_pool *pool)
8223{
8224  struct constant *c, *next;
8225  int i;
8226
8227  for (i = 0; i < NR_C_MODES; i++)
8228    for (c = pool->constants[i]; c; c = next)
8229      {
8230	next = c->next;
8231	free (c);
8232      }
8233
8234  for (c = pool->execute; c; c = next)
8235    {
8236      next = c->next;
8237      free (c);
8238    }
8239
8240  BITMAP_FREE (pool->insns);
8241  free (pool);
8242}
8243
8244
8245/* Collect main literal pool.  Return NULL on overflow.  */
8246
8247static struct constant_pool *
8248s390_mainpool_start (void)
8249{
8250  struct constant_pool *pool;
8251  rtx_insn *insn;
8252
8253  pool = s390_alloc_pool ();
8254
8255  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8256    {
8257      if (NONJUMP_INSN_P (insn)
8258	  && GET_CODE (PATTERN (insn)) == SET
8259	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8260	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8261	{
8262	  /* There might be two main_pool instructions if base_reg
8263	     is call-clobbered; one for shrink-wrapped code and one
8264	     for the rest.  We want to keep the first.  */
8265	  if (pool->pool_insn)
8266	    {
8267	      insn = PREV_INSN (insn);
8268	      delete_insn (NEXT_INSN (insn));
8269	      continue;
8270	    }
8271	  pool->pool_insn = insn;
8272	}
8273
8274      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8275	{
8276	  s390_add_execute (pool, insn);
8277	}
8278      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8279	{
8280	  rtx pool_ref = NULL_RTX;
8281	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
8282	  if (pool_ref)
8283	    {
8284	      rtx constant = get_pool_constant (pool_ref);
8285	      machine_mode mode = get_pool_mode (pool_ref);
8286	      s390_add_constant (pool, constant, mode);
8287	    }
8288	}
8289
8290      /* If hot/cold partitioning is enabled we have to make sure that
8291	 the literal pool is emitted in the same section where the
8292	 initialization of the literal pool base pointer takes place.
8293	 emit_pool_after is only used in the non-overflow case on non
8294	 Z cpus where we can emit the literal pool at the end of the
8295	 function body within the text section.  */
8296      if (NOTE_P (insn)
8297	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8298	  && !pool->emit_pool_after)
8299	pool->emit_pool_after = PREV_INSN (insn);
8300    }
8301
8302  gcc_assert (pool->pool_insn || pool->size == 0);
8303
8304  if (pool->size >= 4096)
8305    {
8306      /* We're going to chunkify the pool, so remove the main
8307	 pool placeholder insn.  */
8308      remove_insn (pool->pool_insn);
8309
8310      s390_free_pool (pool);
8311      pool = NULL;
8312    }
8313
8314  /* If the functions ends with the section where the literal pool
8315     should be emitted set the marker to its end.  */
8316  if (pool && !pool->emit_pool_after)
8317    pool->emit_pool_after = get_last_insn ();
8318
8319  return pool;
8320}
8321
8322/* POOL holds the main literal pool as collected by s390_mainpool_start.
8323   Modify the current function to output the pool constants as well as
8324   the pool register setup instruction.  */
8325
8326static void
8327s390_mainpool_finish (struct constant_pool *pool)
8328{
8329  rtx base_reg = cfun->machine->base_reg;
8330
8331  /* If the pool is empty, we're done.  */
8332  if (pool->size == 0)
8333    {
8334      /* We don't actually need a base register after all.  */
8335      cfun->machine->base_reg = NULL_RTX;
8336
8337      if (pool->pool_insn)
8338	remove_insn (pool->pool_insn);
8339      s390_free_pool (pool);
8340      return;
8341    }
8342
8343  /* We need correct insn addresses.  */
8344  shorten_branches (get_insns ());
8345
8346  /* On zSeries, we use a LARL to load the pool register.  The pool is
8347     located in the .rodata section, so we emit it after the function.  */
8348  if (TARGET_CPU_ZARCH)
8349    {
8350      rtx set = gen_main_base_64 (base_reg, pool->label);
8351      rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8352      INSN_ADDRESSES_NEW (insn, -1);
8353      remove_insn (pool->pool_insn);
8354
8355      insn = get_last_insn ();
8356      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8357      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8358
8359      s390_dump_pool (pool, 0);
8360    }
8361
8362  /* On S/390, if the total size of the function's code plus literal pool
8363     does not exceed 4096 bytes, we use BASR to set up a function base
8364     pointer, and emit the literal pool at the end of the function.  */
8365  else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8366	   + pool->size + 8 /* alignment slop */ < 4096)
8367    {
8368      rtx set = gen_main_base_31_small (base_reg, pool->label);
8369      rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8370      INSN_ADDRESSES_NEW (insn, -1);
8371      remove_insn (pool->pool_insn);
8372
8373      insn = emit_label_after (pool->label, insn);
8374      INSN_ADDRESSES_NEW (insn, -1);
8375
8376      /* emit_pool_after will be set by s390_mainpool_start to the
8377	 last insn of the section where the literal pool should be
8378	 emitted.  */
8379      insn = pool->emit_pool_after;
8380
8381      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8382      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8383
8384      s390_dump_pool (pool, 1);
8385    }
8386
8387  /* Otherwise, we emit an inline literal pool and use BASR to branch
8388     over it, setting up the pool register at the same time.  */
8389  else
8390    {
8391      rtx_code_label *pool_end = gen_label_rtx ();
8392
8393      rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8394      rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8395      JUMP_LABEL (insn) = pool_end;
8396      INSN_ADDRESSES_NEW (insn, -1);
8397      remove_insn (pool->pool_insn);
8398
8399      insn = emit_label_after (pool->label, insn);
8400      INSN_ADDRESSES_NEW (insn, -1);
8401
8402      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8403      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8404
8405      insn = emit_label_after (pool_end, pool->pool_insn);
8406      INSN_ADDRESSES_NEW (insn, -1);
8407
8408      s390_dump_pool (pool, 1);
8409    }
8410
8411
8412  /* Replace all literal pool references.  */
8413
8414  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8415    {
8416      if (INSN_P (insn))
8417	replace_ltrel_base (&PATTERN (insn));
8418
8419      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8420        {
8421          rtx addr, pool_ref = NULL_RTX;
8422          find_constant_pool_ref (PATTERN (insn), &pool_ref);
8423          if (pool_ref)
8424            {
8425	      if (s390_execute_label (insn))
8426		addr = s390_find_execute (pool, insn);
8427	      else
8428		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8429						 get_pool_mode (pool_ref));
8430
8431              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8432              INSN_CODE (insn) = -1;
8433            }
8434        }
8435    }
8436
8437
8438  /* Free the pool.  */
8439  s390_free_pool (pool);
8440}
8441
8442/* POOL holds the main literal pool as collected by s390_mainpool_start.
8443   We have decided we cannot use this pool, so revert all changes
8444   to the current function that were done by s390_mainpool_start.  */
8445static void
8446s390_mainpool_cancel (struct constant_pool *pool)
8447{
8448  /* We didn't actually change the instruction stream, so simply
8449     free the pool memory.  */
8450  s390_free_pool (pool);
8451}
8452
8453
8454/* Chunkify the literal pool.  */
8455
8456#define S390_POOL_CHUNK_MIN	0xc00
8457#define S390_POOL_CHUNK_MAX	0xe00
8458
8459static struct constant_pool *
8460s390_chunkify_start (void)
8461{
8462  struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8463  int extra_size = 0;
8464  bitmap far_labels;
8465  rtx pending_ltrel = NULL_RTX;
8466  rtx_insn *insn;
8467
8468  rtx (*gen_reload_base) (rtx, rtx) =
8469    TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8470
8471
8472  /* We need correct insn addresses.  */
8473
8474  shorten_branches (get_insns ());
8475
8476  /* Scan all insns and move literals to pool chunks.  */
8477
8478  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8479    {
8480      bool section_switch_p = false;
8481
8482      /* Check for pending LTREL_BASE.  */
8483      if (INSN_P (insn))
8484	{
8485	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8486	  if (ltrel_base)
8487	    {
8488	      gcc_assert (ltrel_base == pending_ltrel);
8489	      pending_ltrel = NULL_RTX;
8490	    }
8491	}
8492
8493      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8494	{
8495	  if (!curr_pool)
8496	    curr_pool = s390_start_pool (&pool_list, insn);
8497
8498	  s390_add_execute (curr_pool, insn);
8499	  s390_add_pool_insn (curr_pool, insn);
8500	}
8501      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8502	{
8503	  rtx pool_ref = NULL_RTX;
8504	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
8505	  if (pool_ref)
8506	    {
8507	      rtx constant = get_pool_constant (pool_ref);
8508	      machine_mode mode = get_pool_mode (pool_ref);
8509
8510	      if (!curr_pool)
8511		curr_pool = s390_start_pool (&pool_list, insn);
8512
8513	      s390_add_constant (curr_pool, constant, mode);
8514	      s390_add_pool_insn (curr_pool, insn);
8515
8516	      /* Don't split the pool chunk between a LTREL_OFFSET load
8517		 and the corresponding LTREL_BASE.  */
8518	      if (GET_CODE (constant) == CONST
8519		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
8520		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8521		{
8522		  gcc_assert (!pending_ltrel);
8523		  pending_ltrel = pool_ref;
8524		}
8525	    }
8526	}
8527
8528      if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8529	{
8530	  if (curr_pool)
8531	    s390_add_pool_insn (curr_pool, insn);
8532	  /* An LTREL_BASE must follow within the same basic block.  */
8533	  gcc_assert (!pending_ltrel);
8534	}
8535
8536      if (NOTE_P (insn))
8537	switch (NOTE_KIND (insn))
8538	  {
8539	  case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8540	    section_switch_p = true;
8541	    break;
8542	  case NOTE_INSN_VAR_LOCATION:
8543	  case NOTE_INSN_CALL_ARG_LOCATION:
8544	    continue;
8545	  default:
8546	    break;
8547	  }
8548
8549      if (!curr_pool
8550	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8551          || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8552	continue;
8553
8554      if (TARGET_CPU_ZARCH)
8555	{
8556	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
8557	    continue;
8558
8559	  s390_end_pool (curr_pool, NULL);
8560	  curr_pool = NULL;
8561	}
8562      else
8563	{
8564          int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8565			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8566			 + extra_size;
8567
8568	  /* We will later have to insert base register reload insns.
8569	     Those will have an effect on code size, which we need to
8570	     consider here.  This calculation makes rather pessimistic
8571	     worst-case assumptions.  */
8572	  if (LABEL_P (insn))
8573	    extra_size += 6;
8574
8575	  if (chunk_size < S390_POOL_CHUNK_MIN
8576	      && curr_pool->size < S390_POOL_CHUNK_MIN
8577	      && !section_switch_p)
8578	    continue;
8579
8580	  /* Pool chunks can only be inserted after BARRIERs ...  */
8581	  if (BARRIER_P (insn))
8582	    {
8583	      s390_end_pool (curr_pool, insn);
8584	      curr_pool = NULL;
8585	      extra_size = 0;
8586	    }
8587
8588	  /* ... so if we don't find one in time, create one.  */
8589          else if (chunk_size > S390_POOL_CHUNK_MAX
8590	           || curr_pool->size > S390_POOL_CHUNK_MAX
8591		   || section_switch_p)
8592	    {
8593	      rtx_insn *label, *jump, *barrier, *next, *prev;
8594
8595	      if (!section_switch_p)
8596		{
8597		  /* We can insert the barrier only after a 'real' insn.  */
8598		  if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8599		    continue;
8600		  if (get_attr_length (insn) == 0)
8601		    continue;
8602		  /* Don't separate LTREL_BASE from the corresponding
8603		     LTREL_OFFSET load.  */
8604		  if (pending_ltrel)
8605		    continue;
8606		  next = insn;
8607		  do
8608		    {
8609		      insn = next;
8610		      next = NEXT_INSN (insn);
8611		    }
8612		  while (next
8613			 && NOTE_P (next)
8614			 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8615			     || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8616		}
8617	      else
8618		{
8619		  gcc_assert (!pending_ltrel);
8620
8621		  /* The old pool has to end before the section switch
8622		     note in order to make it part of the current
8623		     section.  */
8624		  insn = PREV_INSN (insn);
8625		}
8626
8627	      label = gen_label_rtx ();
8628	      prev = insn;
8629	      if (prev && NOTE_P (prev))
8630		prev = prev_nonnote_insn (prev);
8631	      if (prev)
8632		jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8633						    INSN_LOCATION (prev));
8634	      else
8635		jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8636	      barrier = emit_barrier_after (jump);
8637	      insn = emit_label_after (label, barrier);
8638	      JUMP_LABEL (jump) = label;
8639	      LABEL_NUSES (label) = 1;
8640
8641	      INSN_ADDRESSES_NEW (jump, -1);
8642	      INSN_ADDRESSES_NEW (barrier, -1);
8643	      INSN_ADDRESSES_NEW (insn, -1);
8644
8645	      s390_end_pool (curr_pool, barrier);
8646	      curr_pool = NULL;
8647	      extra_size = 0;
8648	    }
8649	}
8650    }
8651
8652  if (curr_pool)
8653    s390_end_pool (curr_pool, NULL);
8654  gcc_assert (!pending_ltrel);
8655
8656  /* Find all labels that are branched into
8657     from an insn belonging to a different chunk.  */
8658
8659  far_labels = BITMAP_ALLOC (NULL);
8660
8661  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8662    {
8663      rtx_jump_table_data *table;
8664
8665      /* Labels marked with LABEL_PRESERVE_P can be target
8666	 of non-local jumps, so we have to mark them.
8667	 The same holds for named labels.
8668
8669	 Don't do that, however, if it is the label before
8670	 a jump table.  */
8671
8672      if (LABEL_P (insn)
8673	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8674	{
8675	  rtx_insn *vec_insn = NEXT_INSN (insn);
8676	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8677	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8678	}
8679      /* Check potential targets in a table jump (casesi_jump).  */
8680      else if (tablejump_p (insn, NULL, &table))
8681	{
8682	  rtx vec_pat = PATTERN (table);
8683	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8684
8685	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8686	    {
8687	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8688
8689	      if (s390_find_pool (pool_list, label)
8690		  != s390_find_pool (pool_list, insn))
8691		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8692	    }
8693	}
8694      /* If we have a direct jump (conditional or unconditional),
8695	 check all potential targets.  */
8696      else if (JUMP_P (insn))
8697	{
8698	  rtx pat = PATTERN (insn);
8699
8700	  if (GET_CODE (pat) == PARALLEL)
8701	    pat = XVECEXP (pat, 0, 0);
8702
8703	  if (GET_CODE (pat) == SET)
8704	    {
8705	      rtx label = JUMP_LABEL (insn);
8706	      if (label && !ANY_RETURN_P (label))
8707		{
8708		  if (s390_find_pool (pool_list, label)
8709		      != s390_find_pool (pool_list, insn))
8710		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8711		}
8712	    }
8713	}
8714    }
8715
8716  /* Insert base register reload insns before every pool.  */
8717
8718  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8719    {
8720      rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8721				      curr_pool->label);
8722      rtx_insn *insn = curr_pool->first_insn;
8723      INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8724    }
8725
8726  /* Insert base register reload insns at every far label.  */
8727
8728  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8729    if (LABEL_P (insn)
8730        && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8731      {
8732	struct constant_pool *pool = s390_find_pool (pool_list, insn);
8733	if (pool)
8734	  {
8735	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8736					    pool->label);
8737	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8738	  }
8739      }
8740
8741
8742  BITMAP_FREE (far_labels);
8743
8744
8745  /* Recompute insn addresses.  */
8746
8747  init_insn_lengths ();
8748  shorten_branches (get_insns ());
8749
8750  return pool_list;
8751}
8752
8753/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8754   After we have decided to use this list, finish implementing
8755   all changes to the current function as required.  */
8756
8757static void
8758s390_chunkify_finish (struct constant_pool *pool_list)
8759{
8760  struct constant_pool *curr_pool = NULL;
8761  rtx_insn *insn;
8762
8763
8764  /* Replace all literal pool references.  */
8765
8766  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8767    {
8768      if (INSN_P (insn))
8769	replace_ltrel_base (&PATTERN (insn));
8770
8771      curr_pool = s390_find_pool (pool_list, insn);
8772      if (!curr_pool)
8773	continue;
8774
8775      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8776        {
8777          rtx addr, pool_ref = NULL_RTX;
8778          find_constant_pool_ref (PATTERN (insn), &pool_ref);
8779          if (pool_ref)
8780            {
8781	      if (s390_execute_label (insn))
8782		addr = s390_find_execute (curr_pool, insn);
8783	      else
8784		addr = s390_find_constant (curr_pool,
8785					   get_pool_constant (pool_ref),
8786					   get_pool_mode (pool_ref));
8787
8788              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8789              INSN_CODE (insn) = -1;
8790            }
8791        }
8792    }
8793
8794  /* Dump out all literal pools.  */
8795
8796  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8797    s390_dump_pool (curr_pool, 0);
8798
8799  /* Free pool list.  */
8800
8801  while (pool_list)
8802    {
8803      struct constant_pool *next = pool_list->next;
8804      s390_free_pool (pool_list);
8805      pool_list = next;
8806    }
8807}
8808
8809/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8810   We have decided we cannot use this list, so revert all changes
8811   to the current function that were done by s390_chunkify_start.  */
8812
8813static void
8814s390_chunkify_cancel (struct constant_pool *pool_list)
8815{
8816  struct constant_pool *curr_pool = NULL;
8817  rtx_insn *insn;
8818
8819  /* Remove all pool placeholder insns.  */
8820
8821  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8822    {
8823      /* Did we insert an extra barrier?  Remove it.  */
8824      rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8825      rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8826      rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8827
8828      if (jump && JUMP_P (jump)
8829	  && barrier && BARRIER_P (barrier)
8830	  && label && LABEL_P (label)
8831	  && GET_CODE (PATTERN (jump)) == SET
8832	  && SET_DEST (PATTERN (jump)) == pc_rtx
8833	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8834	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8835	{
8836	  remove_insn (jump);
8837	  remove_insn (barrier);
8838	  remove_insn (label);
8839	}
8840
8841      remove_insn (curr_pool->pool_insn);
8842    }
8843
8844  /* Remove all base register reload insns.  */
8845
8846  for (insn = get_insns (); insn; )
8847    {
8848      rtx_insn *next_insn = NEXT_INSN (insn);
8849
8850      if (NONJUMP_INSN_P (insn)
8851	  && GET_CODE (PATTERN (insn)) == SET
8852	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8853	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8854	remove_insn (insn);
8855
8856      insn = next_insn;
8857    }
8858
8859  /* Free pool list.  */
8860
8861  while (pool_list)
8862    {
8863      struct constant_pool *next = pool_list->next;
8864      s390_free_pool (pool_list);
8865      pool_list = next;
8866    }
8867}
8868
8869/* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
8870
8871void
8872s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8873{
8874  REAL_VALUE_TYPE r;
8875
8876  switch (GET_MODE_CLASS (mode))
8877    {
8878    case MODE_FLOAT:
8879    case MODE_DECIMAL_FLOAT:
8880      gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8881
8882      REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
8883      assemble_real (r, mode, align);
8884      break;
8885
8886    case MODE_INT:
8887      assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8888      mark_symbol_refs_as_used (exp);
8889      break;
8890
8891    case MODE_VECTOR_INT:
8892    case MODE_VECTOR_FLOAT:
8893      {
8894	int i;
8895	machine_mode inner_mode;
8896	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8897
8898	inner_mode = GET_MODE_INNER (GET_MODE (exp));
8899	for (i = 0; i < XVECLEN (exp, 0); i++)
8900	  s390_output_pool_entry (XVECEXP (exp, 0, i),
8901				  inner_mode,
8902				  i == 0
8903				  ? align
8904				  : GET_MODE_BITSIZE (inner_mode));
8905      }
8906      break;
8907
8908    default:
8909      gcc_unreachable ();
8910    }
8911}
8912
8913
8914/* Return an RTL expression representing the value of the return address
8915   for the frame COUNT steps up from the current frame.  FRAME is the
8916   frame pointer of that frame.  */
8917
8918rtx
8919s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8920{
8921  int offset;
8922  rtx addr;
8923
8924  /* Without backchain, we fail for all but the current frame.  */
8925
8926  if (!TARGET_BACKCHAIN && count > 0)
8927    return NULL_RTX;
8928
8929  /* For the current frame, we need to make sure the initial
8930     value of RETURN_REGNUM is actually saved.  */
8931
8932  if (count == 0)
8933    {
8934      /* On non-z architectures branch splitting could overwrite r14.  */
8935      if (TARGET_CPU_ZARCH)
8936	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8937      else
8938	{
8939	  cfun_frame_layout.save_return_addr_p = true;
8940	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8941	}
8942    }
8943
8944  if (TARGET_PACKED_STACK)
8945    offset = -2 * UNITS_PER_LONG;
8946  else
8947    offset = RETURN_REGNUM * UNITS_PER_LONG;
8948
8949  addr = plus_constant (Pmode, frame, offset);
8950  addr = memory_address (Pmode, addr);
8951  return gen_rtx_MEM (Pmode, addr);
8952}
8953
8954/* Return an RTL expression representing the back chain stored in
8955   the current stack frame.  */
8956
8957rtx
8958s390_back_chain_rtx (void)
8959{
8960  rtx chain;
8961
8962  gcc_assert (TARGET_BACKCHAIN);
8963
8964  if (TARGET_PACKED_STACK)
8965    chain = plus_constant (Pmode, stack_pointer_rtx,
8966			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
8967  else
8968    chain = stack_pointer_rtx;
8969
8970  chain = gen_rtx_MEM (Pmode, chain);
8971  return chain;
8972}
8973
8974/* Find first call clobbered register unused in a function.
8975   This could be used as base register in a leaf function
8976   or for holding the return address before epilogue.  */
8977
8978static int
8979find_unused_clobbered_reg (void)
8980{
8981  int i;
8982  for (i = 0; i < 6; i++)
8983    if (!df_regs_ever_live_p (i))
8984      return i;
8985  return 0;
8986}
8987
8988
8989/* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
8990   clobbered hard regs in SETREG.  */
8991
8992static void
8993s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8994{
8995  char *regs_ever_clobbered = (char *)data;
8996  unsigned int i, regno;
8997  machine_mode mode = GET_MODE (setreg);
8998
8999  if (GET_CODE (setreg) == SUBREG)
9000    {
9001      rtx inner = SUBREG_REG (setreg);
9002      if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9003	return;
9004      regno = subreg_regno (setreg);
9005    }
9006  else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9007    regno = REGNO (setreg);
9008  else
9009    return;
9010
9011  for (i = regno;
9012       i < regno + HARD_REGNO_NREGS (regno, mode);
9013       i++)
9014    regs_ever_clobbered[i] = 1;
9015}
9016
9017/* Walks through all basic blocks of the current function looking
9018   for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9019   of the passed integer array REGS_EVER_CLOBBERED are set to one for
9020   each of those regs.  */
9021
9022static void
9023s390_regs_ever_clobbered (char regs_ever_clobbered[])
9024{
9025  basic_block cur_bb;
9026  rtx_insn *cur_insn;
9027  unsigned int i;
9028
9029  memset (regs_ever_clobbered, 0, 32);
9030
9031  /* For non-leaf functions we have to consider all call clobbered regs to be
9032     clobbered.  */
9033  if (!crtl->is_leaf)
9034    {
9035      for (i = 0; i < 32; i++)
9036	regs_ever_clobbered[i] = call_really_used_regs[i];
9037    }
9038
9039  /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9040     this work is done by liveness analysis (mark_regs_live_at_end).
9041     Special care is needed for functions containing landing pads.  Landing pads
9042     may use the eh registers, but the code which sets these registers is not
9043     contained in that function.  Hence s390_regs_ever_clobbered is not able to
9044     deal with this automatically.  */
9045  if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9046    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9047      if (crtl->calls_eh_return
9048	  || (cfun->machine->has_landing_pad_p
9049	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9050	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9051
9052  /* For nonlocal gotos all call-saved registers have to be saved.
9053     This flag is also set for the unwinding code in libgcc.
9054     See expand_builtin_unwind_init.  For regs_ever_live this is done by
9055     reload.  */
9056  if (crtl->saves_all_registers)
9057    for (i = 0; i < 32; i++)
9058      if (!call_really_used_regs[i])
9059	regs_ever_clobbered[i] = 1;
9060
9061  FOR_EACH_BB_FN (cur_bb, cfun)
9062    {
9063      FOR_BB_INSNS (cur_bb, cur_insn)
9064	{
9065	  rtx pat;
9066
9067	  if (!INSN_P (cur_insn))
9068	    continue;
9069
9070	  pat = PATTERN (cur_insn);
9071
9072	  /* Ignore GPR restore insns.  */
9073	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9074	    {
9075	      if (GET_CODE (pat) == SET
9076		  && GENERAL_REG_P (SET_DEST (pat)))
9077		{
9078		  /* lgdr  */
9079		  if (GET_MODE (SET_SRC (pat)) == DImode
9080		      && FP_REG_P (SET_SRC (pat)))
9081		    continue;
9082
9083		  /* l / lg  */
9084		  if (GET_CODE (SET_SRC (pat)) == MEM)
9085		    continue;
9086		}
9087
9088	      /* lm / lmg */
9089	      if (GET_CODE (pat) == PARALLEL
9090		  && load_multiple_operation (pat, VOIDmode))
9091		continue;
9092	    }
9093
9094	  note_stores (pat,
9095		       s390_reg_clobbered_rtx,
9096		       regs_ever_clobbered);
9097	}
9098    }
9099}
9100
9101/* Determine the frame area which actually has to be accessed
9102   in the function epilogue. The values are stored at the
9103   given pointers AREA_BOTTOM (address of the lowest used stack
9104   address) and AREA_TOP (address of the first item which does
9105   not belong to the stack frame).  */
9106
9107static void
9108s390_frame_area (int *area_bottom, int *area_top)
9109{
9110  int b, t;
9111
9112  b = INT_MAX;
9113  t = INT_MIN;
9114
9115  if (cfun_frame_layout.first_restore_gpr != -1)
9116    {
9117      b = (cfun_frame_layout.gprs_offset
9118	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9119      t = b + (cfun_frame_layout.last_restore_gpr
9120	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9121    }
9122
9123  if (TARGET_64BIT && cfun_save_high_fprs_p)
9124    {
9125      b = MIN (b, cfun_frame_layout.f8_offset);
9126      t = MAX (t, (cfun_frame_layout.f8_offset
9127		   + cfun_frame_layout.high_fprs * 8));
9128    }
9129
9130  if (!TARGET_64BIT)
9131    {
9132      if (cfun_fpr_save_p (FPR4_REGNUM))
9133	{
9134	  b = MIN (b, cfun_frame_layout.f4_offset);
9135	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9136	}
9137      if (cfun_fpr_save_p (FPR6_REGNUM))
9138	{
9139	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9140	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9141	}
9142    }
9143  *area_bottom = b;
9144  *area_top = t;
9145}
9146/* Update gpr_save_slots in the frame layout trying to make use of
9147   FPRs as GPR save slots.
9148   This is a helper routine of s390_register_info.  */
9149
9150static void
9151s390_register_info_gprtofpr ()
9152{
9153  int save_reg_slot = FPR0_REGNUM;
9154  int i, j;
9155
9156  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9157    return;
9158
9159  for (i = 15; i >= 6; i--)
9160    {
9161      if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9162	continue;
9163
9164      /* Advance to the next FP register which can be used as a
9165	 GPR save slot.  */
9166      while ((!call_really_used_regs[save_reg_slot]
9167	      || df_regs_ever_live_p (save_reg_slot)
9168	      || cfun_fpr_save_p (save_reg_slot))
9169	     && FP_REGNO_P (save_reg_slot))
9170	save_reg_slot++;
9171      if (!FP_REGNO_P (save_reg_slot))
9172	{
9173	  /* We only want to use ldgr/lgdr if we can get rid of
9174	     stm/lm entirely.  So undo the gpr slot allocation in
9175	     case we ran out of FPR save slots.  */
9176	  for (j = 6; j <= 15; j++)
9177	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9178	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9179	  break;
9180	}
9181      cfun_gpr_save_slot (i) = save_reg_slot++;
9182    }
9183}
9184
9185/* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9186   stdarg.
9187   This is a helper routine for s390_register_info.  */
9188
9189static void
9190s390_register_info_stdarg_fpr ()
9191{
9192  int i;
9193  int min_fpr;
9194  int max_fpr;
9195
9196  /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9197     f0-f4 for 64 bit.  */
9198  if (!cfun->stdarg
9199      || !TARGET_HARD_FLOAT
9200      || !cfun->va_list_fpr_size
9201      || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9202    return;
9203
9204  min_fpr = crtl->args.info.fprs;
9205  max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9206  if (max_fpr >= FP_ARG_NUM_REG)
9207    max_fpr = FP_ARG_NUM_REG - 1;
9208
9209  /* FPR argument regs start at f0.  */
9210  min_fpr += FPR0_REGNUM;
9211  max_fpr += FPR0_REGNUM;
9212
9213  for (i = min_fpr; i <= max_fpr; i++)
9214    cfun_set_fpr_save (i);
9215}
9216
9217/* Reserve the GPR save slots for GPRs which need to be saved due to
9218   stdarg.
9219   This is a helper routine for s390_register_info.  */
9220
9221static void
9222s390_register_info_stdarg_gpr ()
9223{
9224  int i;
9225  int min_gpr;
9226  int max_gpr;
9227
9228  if (!cfun->stdarg
9229      || !cfun->va_list_gpr_size
9230      || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9231    return;
9232
9233  min_gpr = crtl->args.info.gprs;
9234  max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9235  if (max_gpr >= GP_ARG_NUM_REG)
9236    max_gpr = GP_ARG_NUM_REG - 1;
9237
9238  /* GPR argument regs start at r2.  */
9239  min_gpr += GPR2_REGNUM;
9240  max_gpr += GPR2_REGNUM;
9241
9242  /* If r6 was supposed to be saved into an FPR and now needs to go to
9243     the stack for vararg we have to adjust the restore range to make
9244     sure that the restore is done from stack as well.  */
9245  if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9246      && min_gpr <= GPR6_REGNUM
9247      && max_gpr >= GPR6_REGNUM)
9248    {
9249      if (cfun_frame_layout.first_restore_gpr == -1
9250	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9251	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9252      if (cfun_frame_layout.last_restore_gpr == -1
9253	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9254	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9255    }
9256
9257  if (cfun_frame_layout.first_save_gpr == -1
9258      || cfun_frame_layout.first_save_gpr > min_gpr)
9259    cfun_frame_layout.first_save_gpr = min_gpr;
9260
9261  if (cfun_frame_layout.last_save_gpr == -1
9262      || cfun_frame_layout.last_save_gpr < max_gpr)
9263    cfun_frame_layout.last_save_gpr = max_gpr;
9264
9265  for (i = min_gpr; i <= max_gpr; i++)
9266    cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9267}
9268
9269/* Calculate the save and restore ranges for stm(g) and lm(g) in the
9270   prologue and epilogue.  */
9271
9272static void
9273s390_register_info_set_ranges ()
9274{
9275  int i, j;
9276
9277  /* Find the first and the last save slot supposed to use the stack
9278     to set the restore range.
9279     Vararg regs might be marked as save to stack but only the
9280     call-saved regs really need restoring (i.e. r6).  This code
9281     assumes that the vararg regs have not yet been recorded in
9282     cfun_gpr_save_slot.  */
9283  for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9284  for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9285  cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9286  cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9287  cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9288  cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9289}
9290
9291/* The GPR and FPR save slots in cfun->machine->frame_layout are set
9292   for registers which need to be saved in function prologue.
9293   This function can be used until the insns emitted for save/restore
9294   of the regs are visible in the RTL stream.  */
9295
9296static void
9297s390_register_info ()
9298{
9299  int i;
9300  char clobbered_regs[32];
9301
9302  gcc_assert (!epilogue_completed);
9303
9304  if (reload_completed)
9305    /* After reload we rely on our own routine to determine which
9306       registers need saving.  */
9307    s390_regs_ever_clobbered (clobbered_regs);
9308  else
9309    /* During reload we use regs_ever_live as a base since reload
9310       does changes in there which we otherwise would not be aware
9311       of.  */
9312    for (i = 0; i < 32; i++)
9313      clobbered_regs[i] = df_regs_ever_live_p (i);
9314
9315  for (i = 0; i < 32; i++)
9316    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9317
9318  /* Mark the call-saved FPRs which need to be saved.
9319     This needs to be done before checking the special GPRs since the
9320     stack pointer usage depends on whether high FPRs have to be saved
9321     or not.  */
9322  cfun_frame_layout.fpr_bitmap = 0;
9323  cfun_frame_layout.high_fprs = 0;
9324  for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9325    if (clobbered_regs[i] && !call_really_used_regs[i])
9326      {
9327	cfun_set_fpr_save (i);
9328	if (i >= FPR8_REGNUM)
9329	  cfun_frame_layout.high_fprs++;
9330      }
9331
9332  if (flag_pic)
9333    clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
9334      |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
9335
9336  clobbered_regs[BASE_REGNUM]
9337    |= (cfun->machine->base_reg
9338	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9339
9340  clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9341    |= !!frame_pointer_needed;
9342
9343  /* On pre z900 machines this might take until machine dependent
9344     reorg to decide.
9345     save_return_addr_p will only be set on non-zarch machines so
9346     there is no risk that r14 goes into an FPR instead of a stack
9347     slot.  */
9348  clobbered_regs[RETURN_REGNUM]
9349    |= (!crtl->is_leaf
9350	|| TARGET_TPF_PROFILING
9351	|| cfun->machine->split_branches_pending_p
9352	|| cfun_frame_layout.save_return_addr_p
9353	|| crtl->calls_eh_return);
9354
9355  clobbered_regs[STACK_POINTER_REGNUM]
9356    |= (!crtl->is_leaf
9357	|| TARGET_TPF_PROFILING
9358	|| cfun_save_high_fprs_p
9359	|| get_frame_size () > 0
9360	|| (reload_completed && cfun_frame_layout.frame_size > 0)
9361	|| cfun->calls_alloca);
9362
9363  memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9364
9365  for (i = 6; i < 16; i++)
9366    if (clobbered_regs[i])
9367      cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9368
9369  s390_register_info_stdarg_fpr ();
9370  s390_register_info_gprtofpr ();
9371  s390_register_info_set_ranges ();
9372  /* stdarg functions might need to save GPRs 2 to 6.  This might
9373     override the GPR->FPR save decision made by
9374     s390_register_info_gprtofpr for r6 since vararg regs must go to
9375     the stack.  */
9376  s390_register_info_stdarg_gpr ();
9377}
9378
9379/* This function is called by s390_optimize_prologue in order to get
9380   rid of unnecessary GPR save/restore instructions.  The register info
9381   for the GPRs is re-computed and the ranges are re-calculated.  */
9382
9383static void
9384s390_optimize_register_info ()
9385{
9386  char clobbered_regs[32];
9387  int i;
9388
9389  gcc_assert (epilogue_completed);
9390  gcc_assert (!cfun->machine->split_branches_pending_p);
9391
9392  s390_regs_ever_clobbered (clobbered_regs);
9393
9394  for (i = 0; i < 32; i++)
9395    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9396
9397  /* There is still special treatment needed for cases invisible to
9398     s390_regs_ever_clobbered.  */
9399  clobbered_regs[RETURN_REGNUM]
9400    |= (TARGET_TPF_PROFILING
9401	/* When expanding builtin_return_addr in ESA mode we do not
9402	   know whether r14 will later be needed as scratch reg when
9403	   doing branch splitting.  So the builtin always accesses the
9404	   r14 save slot and we need to stick to the save/restore
9405	   decision for r14 even if it turns out that it didn't get
9406	   clobbered.  */
9407	|| cfun_frame_layout.save_return_addr_p
9408	|| crtl->calls_eh_return);
9409
9410  memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9411
9412  for (i = 6; i < 16; i++)
9413    if (!clobbered_regs[i])
9414      cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9415
9416  s390_register_info_set_ranges ();
9417  s390_register_info_stdarg_gpr ();
9418}
9419
9420/* Fill cfun->machine with info about frame of current function.  */
9421
9422static void
9423s390_frame_info (void)
9424{
9425  HOST_WIDE_INT lowest_offset;
9426
9427  cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9428  cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9429
9430  /* The va_arg builtin uses a constant distance of 16 *
9431     UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9432     pointer.  So even if we are going to save the stack pointer in an
9433     FPR we need the stack space in order to keep the offsets
9434     correct.  */
9435  if (cfun->stdarg && cfun_save_arg_fprs_p)
9436    {
9437      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9438
9439      if (cfun_frame_layout.first_save_gpr_slot == -1)
9440	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9441    }
9442
9443  cfun_frame_layout.frame_size = get_frame_size ();
9444  if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9445    fatal_error (input_location,
9446		 "total size of local variables exceeds architecture limit");
9447
9448  if (!TARGET_PACKED_STACK)
9449    {
9450      /* Fixed stack layout.  */
9451      cfun_frame_layout.backchain_offset = 0;
9452      cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9453      cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9454      cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9455      cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9456				       * UNITS_PER_LONG);
9457    }
9458  else if (TARGET_BACKCHAIN)
9459    {
9460      /* Kernel stack layout - packed stack, backchain, no float  */
9461      gcc_assert (TARGET_SOFT_FLOAT);
9462      cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9463					    - UNITS_PER_LONG);
9464
9465      /* The distance between the backchain and the return address
9466	 save slot must not change.  So we always need a slot for the
9467	 stack pointer which resides in between.  */
9468      cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9469
9470      cfun_frame_layout.gprs_offset
9471	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9472
9473      /* FPRs will not be saved.  Nevertheless pick sane values to
9474	 keep area calculations valid.  */
9475      cfun_frame_layout.f0_offset =
9476	cfun_frame_layout.f4_offset =
9477	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9478    }
9479  else
9480    {
9481      int num_fprs;
9482
9483      /* Packed stack layout without backchain.  */
9484
9485      /* With stdarg FPRs need their dedicated slots.  */
9486      num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9487		  : (cfun_fpr_save_p (FPR4_REGNUM) +
9488		     cfun_fpr_save_p (FPR6_REGNUM)));
9489      cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9490
9491      num_fprs = (cfun->stdarg ? 2
9492		  : (cfun_fpr_save_p (FPR0_REGNUM)
9493		     + cfun_fpr_save_p (FPR2_REGNUM)));
9494      cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9495
9496      cfun_frame_layout.gprs_offset
9497	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9498
9499      cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9500				     - cfun_frame_layout.high_fprs * 8);
9501    }
9502
9503  if (cfun_save_high_fprs_p)
9504    cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9505
9506  if (!crtl->is_leaf)
9507    cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9508
9509  /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9510     sized area at the bottom of the stack.  This is required also for
9511     leaf functions.  When GCC generates a local stack reference it
9512     will always add STACK_POINTER_OFFSET to all these references.  */
9513  if (crtl->is_leaf
9514      && !TARGET_TPF_PROFILING
9515      && cfun_frame_layout.frame_size == 0
9516      && !cfun->calls_alloca)
9517    return;
9518
9519  /* Calculate the number of bytes we have used in our own register
9520     save area.  With the packed stack layout we can re-use the
9521     remaining bytes for normal stack elements.  */
9522
9523  if (TARGET_PACKED_STACK)
9524    lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9525			      cfun_frame_layout.f4_offset),
9526			 cfun_frame_layout.gprs_offset);
9527  else
9528    lowest_offset = 0;
9529
9530  if (TARGET_BACKCHAIN)
9531    lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9532
9533  cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9534
9535  /* If under 31 bit an odd number of gprs has to be saved we have to
9536     adjust the frame size to sustain 8 byte alignment of stack
9537     frames.  */
9538  cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9539				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
9540				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9541}
9542
9543/* Generate frame layout.  Fills in register and frame data for the current
9544   function in cfun->machine.  This routine can be called multiple times;
9545   it will re-do the complete frame layout every time.  */
9546
9547static void
9548s390_init_frame_layout (void)
9549{
9550  HOST_WIDE_INT frame_size;
9551  int base_used;
9552
9553  gcc_assert (!reload_completed);
9554
9555  /* On S/390 machines, we may need to perform branch splitting, which
9556     will require both base and return address register.  We have no
9557     choice but to assume we're going to need them until right at the
9558     end of the machine dependent reorg phase.  */
9559  if (!TARGET_CPU_ZARCH)
9560    cfun->machine->split_branches_pending_p = true;
9561
9562  do
9563    {
9564      frame_size = cfun_frame_layout.frame_size;
9565
9566      /* Try to predict whether we'll need the base register.  */
9567      base_used = cfun->machine->split_branches_pending_p
9568		  || crtl->uses_const_pool
9569		  || (!DISP_IN_RANGE (frame_size)
9570		      && !CONST_OK_FOR_K (frame_size));
9571
9572      /* Decide which register to use as literal pool base.  In small
9573	 leaf functions, try to use an unused call-clobbered register
9574	 as base register to avoid save/restore overhead.  */
9575      if (!base_used)
9576	cfun->machine->base_reg = NULL_RTX;
9577      else if (crtl->is_leaf && !df_regs_ever_live_p (5))
9578	cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
9579      else
9580	cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
9581
9582      s390_register_info ();
9583      s390_frame_info ();
9584    }
9585  while (frame_size != cfun_frame_layout.frame_size);
9586}
9587
9588/* Remove the FPR clobbers from a tbegin insn if it can be proven that
9589   the TX is nonescaping.  A transaction is considered escaping if
9590   there is at least one path from tbegin returning CC0 to the
9591   function exit block without an tend.
9592
9593   The check so far has some limitations:
9594   - only single tbegin/tend BBs are supported
9595   - the first cond jump after tbegin must separate the CC0 path from ~CC0
9596   - when CC is copied to a GPR and the CC0 check is done with the GPR
9597     this is not supported
9598*/
9599
9600static void
9601s390_optimize_nonescaping_tx (void)
9602{
9603  const unsigned int CC0 = 1 << 3;
9604  basic_block tbegin_bb = NULL;
9605  basic_block tend_bb = NULL;
9606  basic_block bb;
9607  rtx_insn *insn;
9608  bool result = true;
9609  int bb_index;
9610  rtx_insn *tbegin_insn = NULL;
9611
9612  if (!cfun->machine->tbegin_p)
9613    return;
9614
9615  for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9616    {
9617      bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9618
9619      if (!bb)
9620	continue;
9621
9622      FOR_BB_INSNS (bb, insn)
9623	{
9624	  rtx ite, cc, pat, target;
9625	  unsigned HOST_WIDE_INT mask;
9626
9627	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9628	    continue;
9629
9630	  pat = PATTERN (insn);
9631
9632	  if (GET_CODE (pat) == PARALLEL)
9633	    pat = XVECEXP (pat, 0, 0);
9634
9635	  if (GET_CODE (pat) != SET
9636	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9637	    continue;
9638
9639	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9640	    {
9641	      rtx_insn *tmp;
9642
9643	      tbegin_insn = insn;
9644
9645	      /* Just return if the tbegin doesn't have clobbers.  */
9646	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
9647		return;
9648
9649	      if (tbegin_bb != NULL)
9650		return;
9651
9652	      /* Find the next conditional jump.  */
9653	      for (tmp = NEXT_INSN (insn);
9654		   tmp != NULL_RTX;
9655		   tmp = NEXT_INSN (tmp))
9656		{
9657		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9658		    return;
9659		  if (!JUMP_P (tmp))
9660		    continue;
9661
9662		  ite = SET_SRC (PATTERN (tmp));
9663		  if (GET_CODE (ite) != IF_THEN_ELSE)
9664		    continue;
9665
9666		  cc = XEXP (XEXP (ite, 0), 0);
9667		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9668		      || GET_MODE (cc) != CCRAWmode
9669		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9670		    return;
9671
9672		  if (bb->succs->length () != 2)
9673		    return;
9674
9675		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9676		  if (GET_CODE (XEXP (ite, 0)) == NE)
9677		    mask ^= 0xf;
9678
9679		  if (mask == CC0)
9680		    target = XEXP (ite, 1);
9681		  else if (mask == (CC0 ^ 0xf))
9682		    target = XEXP (ite, 2);
9683		  else
9684		    return;
9685
9686		  {
9687		    edge_iterator ei;
9688		    edge e1, e2;
9689
9690		    ei = ei_start (bb->succs);
9691		    e1 = ei_safe_edge (ei);
9692		    ei_next (&ei);
9693		    e2 = ei_safe_edge (ei);
9694
9695		    if (e2->flags & EDGE_FALLTHRU)
9696		      {
9697			e2 = e1;
9698			e1 = ei_safe_edge (ei);
9699		      }
9700
9701		    if (!(e1->flags & EDGE_FALLTHRU))
9702		      return;
9703
9704		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9705		  }
9706		  if (tmp == BB_END (bb))
9707		    break;
9708		}
9709	    }
9710
9711	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9712	    {
9713	      if (tend_bb != NULL)
9714		return;
9715	      tend_bb = bb;
9716	    }
9717	}
9718    }
9719
9720  /* Either we successfully remove the FPR clobbers here or we are not
9721     able to do anything for this TX.  Both cases don't qualify for
9722     another look.  */
9723  cfun->machine->tbegin_p = false;
9724
9725  if (tbegin_bb == NULL || tend_bb == NULL)
9726    return;
9727
9728  calculate_dominance_info (CDI_POST_DOMINATORS);
9729  result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9730  free_dominance_info (CDI_POST_DOMINATORS);
9731
9732  if (!result)
9733    return;
9734
9735  PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9736			    gen_rtvec (2,
9737				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
9738				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9739  INSN_CODE (tbegin_insn) = -1;
9740  df_insn_rescan (tbegin_insn);
9741
9742  return;
9743}
9744
9745/* Return true if it is legal to put a value with MODE into REGNO.  */
9746
9747bool
9748s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9749{
9750  if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9751    return false;
9752
9753  switch (REGNO_REG_CLASS (regno))
9754    {
9755    case VEC_REGS:
9756      return ((GET_MODE_CLASS (mode) == MODE_INT
9757	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
9758	      || mode == DFmode
9759	      || s390_vector_mode_supported_p (mode));
9760      break;
9761    case FP_REGS:
9762      if (TARGET_VX
9763	  && ((GET_MODE_CLASS (mode) == MODE_INT
9764	       && s390_class_max_nregs (FP_REGS, mode) == 1)
9765	      || mode == DFmode
9766	      || s390_vector_mode_supported_p (mode)))
9767	return true;
9768
9769      if (REGNO_PAIR_OK (regno, mode))
9770	{
9771	  if (mode == SImode || mode == DImode)
9772	    return true;
9773
9774	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9775	    return true;
9776	}
9777      break;
9778    case ADDR_REGS:
9779      if (FRAME_REGNO_P (regno) && mode == Pmode)
9780	return true;
9781
9782      /* fallthrough */
9783    case GENERAL_REGS:
9784      if (REGNO_PAIR_OK (regno, mode))
9785	{
9786	  if (TARGET_ZARCH
9787	      || (mode != TFmode && mode != TCmode && mode != TDmode))
9788	    return true;
9789	}
9790      break;
9791    case CC_REGS:
9792      if (GET_MODE_CLASS (mode) == MODE_CC)
9793	return true;
9794      break;
9795    case ACCESS_REGS:
9796      if (REGNO_PAIR_OK (regno, mode))
9797	{
9798	  if (mode == SImode || mode == Pmode)
9799	    return true;
9800	}
9801      break;
9802    default:
9803      return false;
9804    }
9805
9806  return false;
9807}
9808
9809/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
9810
9811bool
9812s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9813{
9814   /* Once we've decided upon a register to use as base register, it must
9815      no longer be used for any other purpose.  */
9816  if (cfun->machine->base_reg)
9817    if (REGNO (cfun->machine->base_reg) == old_reg
9818	|| REGNO (cfun->machine->base_reg) == new_reg)
9819      return false;
9820
9821  /* Prevent regrename from using call-saved regs which haven't
9822     actually been saved.  This is necessary since regrename assumes
9823     the backend save/restore decisions are based on
9824     df_regs_ever_live.  Since we have our own routine we have to tell
9825     regrename manually about it.  */
9826  if (GENERAL_REGNO_P (new_reg)
9827      && !call_really_used_regs[new_reg]
9828      && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
9829    return false;
9830
9831  return true;
9832}
9833
9834/* Return nonzero if register REGNO can be used as a scratch register
9835   in peephole2.  */
9836
9837static bool
9838s390_hard_regno_scratch_ok (unsigned int regno)
9839{
9840  /* See s390_hard_regno_rename_ok.  */
9841  if (GENERAL_REGNO_P (regno)
9842      && !call_really_used_regs[regno]
9843      && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
9844    return false;
9845
9846  return true;
9847}
9848
9849/* Maximum number of registers to represent a value of mode MODE
9850   in a register of class RCLASS.  */
9851
9852int
9853s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9854{
9855  int reg_size;
9856  bool reg_pair_required_p = false;
9857
9858  switch (rclass)
9859    {
9860    case FP_REGS:
9861    case VEC_REGS:
9862      reg_size = TARGET_VX ? 16 : 8;
9863
9864      /* TF and TD modes would fit into a VR but we put them into a
9865	 register pair since we do not have 128bit FP instructions on
9866	 full VRs.  */
9867      if (TARGET_VX
9868	  && SCALAR_FLOAT_MODE_P (mode)
9869	  && GET_MODE_SIZE (mode) >= 16)
9870	reg_pair_required_p = true;
9871
9872      /* Even if complex types would fit into a single FPR/VR we force
9873	 them into a register pair to deal with the parts more easily.
9874	 (FIXME: What about complex ints?)  */
9875      if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9876	reg_pair_required_p = true;
9877      break;
9878    case ACCESS_REGS:
9879      reg_size = 4;
9880      break;
9881    default:
9882      reg_size = UNITS_PER_WORD;
9883      break;
9884    }
9885
9886  if (reg_pair_required_p)
9887    return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9888
9889  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9890}
9891
9892/* Return TRUE if changing mode from FROM to TO should not be allowed
9893   for register class CLASS.  */
9894
9895int
9896s390_cannot_change_mode_class (machine_mode from_mode,
9897			       machine_mode to_mode,
9898			       enum reg_class rclass)
9899{
9900  machine_mode small_mode;
9901  machine_mode big_mode;
9902
9903  if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9904    return 0;
9905
9906  if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9907    {
9908      small_mode = from_mode;
9909      big_mode = to_mode;
9910    }
9911  else
9912    {
9913      small_mode = to_mode;
9914      big_mode = from_mode;
9915    }
9916
9917  /* Values residing in VRs are little-endian style.  All modes are
9918     placed left-aligned in an VR.  This means that we cannot allow
9919     switching between modes with differing sizes.  Also if the vector
9920     facility is available we still place TFmode values in VR register
9921     pairs, since the only instructions we have operating on TFmodes
9922     only deal with register pairs.  Therefore we have to allow DFmode
9923     subregs of TFmodes to enable the TFmode splitters.  */
9924  if (reg_classes_intersect_p (VEC_REGS, rclass)
9925      && (GET_MODE_SIZE (small_mode) < 8
9926	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9927    return 1;
9928
9929  /* Likewise for access registers, since they have only half the
9930     word size on 64-bit.  */
9931  if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9932    return 1;
9933
9934  return 0;
9935}
9936
9937/* Return true if we use LRA instead of reload pass.  */
9938static bool
9939s390_lra_p (void)
9940{
9941  return s390_lra_flag;
9942}
9943
9944/* Return true if register FROM can be eliminated via register TO.  */
9945
9946static bool
9947s390_can_eliminate (const int from, const int to)
9948{
9949  /* On zSeries machines, we have not marked the base register as fixed.
9950     Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9951     If a function requires the base register, we say here that this
9952     elimination cannot be performed.  This will cause reload to free
9953     up the base register (as if it were fixed).  On the other hand,
9954     if the current function does *not* require the base register, we
9955     say here the elimination succeeds, which in turn allows reload
9956     to allocate the base register for any other purpose.  */
9957  if (from == BASE_REGNUM && to == BASE_REGNUM)
9958    {
9959      if (TARGET_CPU_ZARCH)
9960	{
9961	  s390_init_frame_layout ();
9962	  return cfun->machine->base_reg == NULL_RTX;
9963	}
9964
9965      return false;
9966    }
9967
9968  /* Everything else must point into the stack frame.  */
9969  gcc_assert (to == STACK_POINTER_REGNUM
9970	      || to == HARD_FRAME_POINTER_REGNUM);
9971
9972  gcc_assert (from == FRAME_POINTER_REGNUM
9973	      || from == ARG_POINTER_REGNUM
9974	      || from == RETURN_ADDRESS_POINTER_REGNUM);
9975
9976  /* Make sure we actually saved the return address.  */
9977  if (from == RETURN_ADDRESS_POINTER_REGNUM)
9978    if (!crtl->calls_eh_return
9979	&& !cfun->stdarg
9980	&& !cfun_frame_layout.save_return_addr_p)
9981      return false;
9982
9983  return true;
9984}
9985
9986/* Return offset between register FROM and TO initially after prolog.  */
9987
9988HOST_WIDE_INT
9989s390_initial_elimination_offset (int from, int to)
9990{
9991  HOST_WIDE_INT offset;
9992
9993  /* ??? Why are we called for non-eliminable pairs?  */
9994  if (!s390_can_eliminate (from, to))
9995    return 0;
9996
9997  switch (from)
9998    {
9999    case FRAME_POINTER_REGNUM:
10000      offset = (get_frame_size()
10001		+ STACK_POINTER_OFFSET
10002		+ crtl->outgoing_args_size);
10003      break;
10004
10005    case ARG_POINTER_REGNUM:
10006      s390_init_frame_layout ();
10007      offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10008      break;
10009
10010    case RETURN_ADDRESS_POINTER_REGNUM:
10011      s390_init_frame_layout ();
10012
10013      if (cfun_frame_layout.first_save_gpr_slot == -1)
10014	{
10015	  /* If it turns out that for stdarg nothing went into the reg
10016	     save area we also do not need the return address
10017	     pointer.  */
10018	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10019	    return 0;
10020
10021	  gcc_unreachable ();
10022	}
10023
10024      /* In order to make the following work it is not necessary for
10025	 r14 to have a save slot.  It is sufficient if one other GPR
10026	 got one.  Since the GPRs are always stored without gaps we
10027	 are able to calculate where the r14 save slot would
10028	 reside.  */
10029      offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10030		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10031		UNITS_PER_LONG);
10032      break;
10033
10034    case BASE_REGNUM:
10035      offset = 0;
10036      break;
10037
10038    default:
10039      gcc_unreachable ();
10040    }
10041
10042  return offset;
10043}
10044
10045/* Emit insn to save fpr REGNUM at offset OFFSET relative
10046   to register BASE.  Return generated insn.  */
10047
10048static rtx
10049save_fpr (rtx base, int offset, int regnum)
10050{
10051  rtx addr;
10052  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10053
10054  if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10055    set_mem_alias_set (addr, get_varargs_alias_set ());
10056  else
10057    set_mem_alias_set (addr, get_frame_alias_set ());
10058
10059  return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10060}
10061
10062/* Emit insn to restore fpr REGNUM from offset OFFSET relative
10063   to register BASE.  Return generated insn.  */
10064
10065static rtx
10066restore_fpr (rtx base, int offset, int regnum)
10067{
10068  rtx addr;
10069  addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10070  set_mem_alias_set (addr, get_frame_alias_set ());
10071
10072  return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10073}
10074
10075/* Return true if REGNO is a global register, but not one
10076   of the special ones that need to be saved/restored in anyway.  */
10077
10078static inline bool
10079global_not_special_regno_p (int regno)
10080{
10081  return (global_regs[regno]
10082	  /* These registers are special and need to be
10083	     restored in any case.  */
10084	  && !(regno == STACK_POINTER_REGNUM
10085	       || regno == RETURN_REGNUM
10086	       || regno == BASE_REGNUM
10087	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10088}
10089
10090/* Generate insn to save registers FIRST to LAST into
10091   the register save area located at offset OFFSET
10092   relative to register BASE.  */
10093
10094static rtx
10095save_gprs (rtx base, int offset, int first, int last)
10096{
10097  rtx addr, insn, note;
10098  int i;
10099
10100  addr = plus_constant (Pmode, base, offset);
10101  addr = gen_rtx_MEM (Pmode, addr);
10102
10103  set_mem_alias_set (addr, get_frame_alias_set ());
10104
10105  /* Special-case single register.  */
10106  if (first == last)
10107    {
10108      if (TARGET_64BIT)
10109        insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10110      else
10111        insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10112
10113      if (!global_not_special_regno_p (first))
10114	RTX_FRAME_RELATED_P (insn) = 1;
10115      return insn;
10116    }
10117
10118
10119  insn = gen_store_multiple (addr,
10120			     gen_rtx_REG (Pmode, first),
10121			     GEN_INT (last - first + 1));
10122
10123  if (first <= 6 && cfun->stdarg)
10124    for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10125      {
10126	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10127
10128	if (first + i <= 6)
10129	  set_mem_alias_set (mem, get_varargs_alias_set ());
10130      }
10131
10132  /* We need to set the FRAME_RELATED flag on all SETs
10133     inside the store-multiple pattern.
10134
10135     However, we must not emit DWARF records for registers 2..5
10136     if they are stored for use by variable arguments ...
10137
10138     ??? Unfortunately, it is not enough to simply not the
10139     FRAME_RELATED flags for those SETs, because the first SET
10140     of the PARALLEL is always treated as if it had the flag
10141     set, even if it does not.  Therefore we emit a new pattern
10142     without those registers as REG_FRAME_RELATED_EXPR note.  */
10143
10144  if (first >= 6 && !global_not_special_regno_p (first))
10145    {
10146      rtx pat = PATTERN (insn);
10147
10148      for (i = 0; i < XVECLEN (pat, 0); i++)
10149	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10150	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10151								     0, i)))))
10152	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10153
10154      RTX_FRAME_RELATED_P (insn) = 1;
10155    }
10156  else if (last >= 6)
10157    {
10158      int start;
10159
10160      for (start = first >= 6 ? first : 6; start <= last; start++)
10161	if (!global_not_special_regno_p (start))
10162	  break;
10163
10164      if (start > last)
10165	return insn;
10166
10167      addr = plus_constant (Pmode, base,
10168			    offset + (start - first) * UNITS_PER_LONG);
10169
10170      if (start == last)
10171	{
10172	  if (TARGET_64BIT)
10173	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10174			      gen_rtx_REG (Pmode, start));
10175	  else
10176	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10177			      gen_rtx_REG (Pmode, start));
10178	  note = PATTERN (note);
10179
10180	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10181	  RTX_FRAME_RELATED_P (insn) = 1;
10182
10183	  return insn;
10184	}
10185
10186      note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10187				 gen_rtx_REG (Pmode, start),
10188				 GEN_INT (last - start + 1));
10189      note = PATTERN (note);
10190
10191      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10192
10193      for (i = 0; i < XVECLEN (note, 0); i++)
10194	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10195	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10196								     0, i)))))
10197	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10198
10199      RTX_FRAME_RELATED_P (insn) = 1;
10200    }
10201
10202  return insn;
10203}
10204
10205/* Generate insn to restore registers FIRST to LAST from
10206   the register save area located at offset OFFSET
10207   relative to register BASE.  */
10208
10209static rtx
10210restore_gprs (rtx base, int offset, int first, int last)
10211{
10212  rtx addr, insn;
10213
10214  addr = plus_constant (Pmode, base, offset);
10215  addr = gen_rtx_MEM (Pmode, addr);
10216  set_mem_alias_set (addr, get_frame_alias_set ());
10217
10218  /* Special-case single register.  */
10219  if (first == last)
10220    {
10221      if (TARGET_64BIT)
10222        insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10223      else
10224        insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10225
10226      RTX_FRAME_RELATED_P (insn) = 1;
10227      return insn;
10228    }
10229
10230  insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10231			    addr,
10232			    GEN_INT (last - first + 1));
10233  RTX_FRAME_RELATED_P (insn) = 1;
10234  return insn;
10235}
10236
10237/* Return insn sequence to load the GOT register.  */
10238
10239static GTY(()) rtx got_symbol;
10240rtx_insn *
10241s390_load_got (void)
10242{
10243  rtx_insn *insns;
10244
10245  /* We cannot use pic_offset_table_rtx here since we use this
10246     function also for non-pic if __tls_get_offset is called and in
10247     that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10248     aren't usable.  */
10249  rtx got_rtx = gen_rtx_REG (Pmode, 12);
10250
10251  if (!got_symbol)
10252    {
10253      got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10254      SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10255    }
10256
10257  start_sequence ();
10258
10259  if (TARGET_CPU_ZARCH)
10260    {
10261      emit_move_insn (got_rtx, got_symbol);
10262    }
10263  else
10264    {
10265      rtx offset;
10266
10267      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10268			       UNSPEC_LTREL_OFFSET);
10269      offset = gen_rtx_CONST (Pmode, offset);
10270      offset = force_const_mem (Pmode, offset);
10271
10272      emit_move_insn (got_rtx, offset);
10273
10274      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10275			       UNSPEC_LTREL_BASE);
10276      offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10277
10278      emit_move_insn (got_rtx, offset);
10279    }
10280
10281  insns = get_insns ();
10282  end_sequence ();
10283  return insns;
10284}
10285
10286/* This ties together stack memory (MEM with an alias set of frame_alias_set)
10287   and the change to the stack pointer.  */
10288
10289static void
10290s390_emit_stack_tie (void)
10291{
10292  rtx mem = gen_frame_mem (BLKmode,
10293			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10294
10295  emit_insn (gen_stack_tie (mem));
10296}
10297
10298/* Copy GPRS into FPR save slots.  */
10299
10300static void
10301s390_save_gprs_to_fprs (void)
10302{
10303  int i;
10304
10305  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10306    return;
10307
10308  for (i = 6; i < 16; i++)
10309    {
10310      if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10311	{
10312	  rtx_insn *insn =
10313	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10314			    gen_rtx_REG (DImode, i));
10315	  RTX_FRAME_RELATED_P (insn) = 1;
10316	  /* This prevents dwarf2cfi from interpreting the set.  Doing
10317	     so it might emit def_cfa_register infos setting an FPR as
10318	     new CFA.  */
10319	  add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10320	}
10321    }
10322}
10323
10324/* Restore GPRs from FPR save slots.  */
10325
10326static void
10327s390_restore_gprs_from_fprs (void)
10328{
10329  int i;
10330
10331  if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10332    return;
10333
10334  for (i = 6; i < 16; i++)
10335    {
10336      rtx_insn *insn;
10337
10338      if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10339	continue;
10340
10341      rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10342
10343      if (i == STACK_POINTER_REGNUM)
10344	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10345      else
10346	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10347
10348      df_set_regs_ever_live (i, true);
10349      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10350      if (i == STACK_POINTER_REGNUM)
10351	add_reg_note (insn, REG_CFA_DEF_CFA,
10352		      plus_constant (Pmode, stack_pointer_rtx,
10353				     STACK_POINTER_OFFSET));
10354      RTX_FRAME_RELATED_P (insn) = 1;
10355    }
10356}
10357
10358
10359/* A pass run immediately before shrink-wrapping and prologue and epilogue
10360   generation.  */
10361
10362namespace {
10363
10364const pass_data pass_data_s390_early_mach =
10365{
10366  RTL_PASS, /* type */
10367  "early_mach", /* name */
10368  OPTGROUP_NONE, /* optinfo_flags */
10369  TV_MACH_DEP, /* tv_id */
10370  0, /* properties_required */
10371  0, /* properties_provided */
10372  0, /* properties_destroyed */
10373  0, /* todo_flags_start */
10374  ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10375};
10376
10377class pass_s390_early_mach : public rtl_opt_pass
10378{
10379public:
10380  pass_s390_early_mach (gcc::context *ctxt)
10381    : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10382  {}
10383
10384  /* opt_pass methods: */
10385  virtual unsigned int execute (function *);
10386
10387}; // class pass_s390_early_mach
10388
10389unsigned int
10390pass_s390_early_mach::execute (function *fun)
10391{
10392  rtx_insn *insn;
10393
10394  /* Try to get rid of the FPR clobbers.  */
10395  s390_optimize_nonescaping_tx ();
10396
10397  /* Re-compute register info.  */
10398  s390_register_info ();
10399
10400  /* If we're using a base register, ensure that it is always valid for
10401     the first non-prologue instruction.  */
10402  if (fun->machine->base_reg)
10403    emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10404
10405  /* Annotate all constant pool references to let the scheduler know
10406     they implicitly use the base register.  */
10407  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10408    if (INSN_P (insn))
10409      {
10410	annotate_constant_pool_refs (&PATTERN (insn));
10411	df_insn_rescan (insn);
10412      }
10413  return 0;
10414}
10415
10416} // anon namespace
10417
10418/* Expand the prologue into a bunch of separate insns.  */
10419
10420void
10421s390_emit_prologue (void)
10422{
10423  rtx insn, addr;
10424  rtx temp_reg;
10425  int i;
10426  int offset;
10427  int next_fpr = 0;
10428
10429  /* Choose best register to use for temp use within prologue.
10430     See below for why TPF must use the register 1.  */
10431
10432  if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10433      && !crtl->is_leaf
10434      && !TARGET_TPF_PROFILING)
10435    temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10436  else
10437    temp_reg = gen_rtx_REG (Pmode, 1);
10438
10439  s390_save_gprs_to_fprs ();
10440
10441  /* Save call saved gprs.  */
10442  if (cfun_frame_layout.first_save_gpr != -1)
10443    {
10444      insn = save_gprs (stack_pointer_rtx,
10445			cfun_frame_layout.gprs_offset +
10446			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10447					  - cfun_frame_layout.first_save_gpr_slot),
10448			cfun_frame_layout.first_save_gpr,
10449			cfun_frame_layout.last_save_gpr);
10450      emit_insn (insn);
10451    }
10452
10453  /* Dummy insn to mark literal pool slot.  */
10454
10455  if (cfun->machine->base_reg)
10456    emit_insn (gen_main_pool (cfun->machine->base_reg));
10457
10458  offset = cfun_frame_layout.f0_offset;
10459
10460  /* Save f0 and f2.  */
10461  for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10462    {
10463      if (cfun_fpr_save_p (i))
10464	{
10465	  save_fpr (stack_pointer_rtx, offset, i);
10466	  offset += 8;
10467	}
10468      else if (!TARGET_PACKED_STACK || cfun->stdarg)
10469	offset += 8;
10470    }
10471
10472  /* Save f4 and f6.  */
10473  offset = cfun_frame_layout.f4_offset;
10474  for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10475    {
10476      if (cfun_fpr_save_p (i))
10477	{
10478	  insn = save_fpr (stack_pointer_rtx, offset, i);
10479	  offset += 8;
10480
10481	  /* If f4 and f6 are call clobbered they are saved due to
10482	     stdargs and therefore are not frame related.  */
10483	  if (!call_really_used_regs[i])
10484	    RTX_FRAME_RELATED_P (insn) = 1;
10485	}
10486      else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10487	offset += 8;
10488    }
10489
10490  if (TARGET_PACKED_STACK
10491      && cfun_save_high_fprs_p
10492      && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10493    {
10494      offset = (cfun_frame_layout.f8_offset
10495		+ (cfun_frame_layout.high_fprs - 1) * 8);
10496
10497      for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10498	if (cfun_fpr_save_p (i))
10499	  {
10500	    insn = save_fpr (stack_pointer_rtx, offset, i);
10501
10502	    RTX_FRAME_RELATED_P (insn) = 1;
10503	    offset -= 8;
10504	  }
10505      if (offset >= cfun_frame_layout.f8_offset)
10506	next_fpr = i;
10507    }
10508
10509  if (!TARGET_PACKED_STACK)
10510    next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10511
10512  if (flag_stack_usage_info)
10513    current_function_static_stack_size = cfun_frame_layout.frame_size;
10514
10515  /* Decrement stack pointer.  */
10516
10517  if (cfun_frame_layout.frame_size > 0)
10518    {
10519      rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10520      rtx real_frame_off;
10521
10522      if (s390_stack_size)
10523  	{
10524	  HOST_WIDE_INT stack_guard;
10525
10526	  if (s390_stack_guard)
10527	    stack_guard = s390_stack_guard;
10528	  else
10529	    {
10530	      /* If no value for stack guard is provided the smallest power of 2
10531		 larger than the current frame size is chosen.  */
10532	      stack_guard = 1;
10533	      while (stack_guard < cfun_frame_layout.frame_size)
10534		stack_guard <<= 1;
10535	    }
10536
10537	  if (cfun_frame_layout.frame_size >= s390_stack_size)
10538	    {
10539	      warning (0, "frame size of function %qs is %wd"
10540		       " bytes exceeding user provided stack limit of "
10541		       "%d bytes.  "
10542		       "An unconditional trap is added.",
10543		       current_function_name(), cfun_frame_layout.frame_size,
10544		       s390_stack_size);
10545	      emit_insn (gen_trap ());
10546	    }
10547	  else
10548	    {
10549	      /* stack_guard has to be smaller than s390_stack_size.
10550		 Otherwise we would emit an AND with zero which would
10551		 not match the test under mask pattern.  */
10552	      if (stack_guard >= s390_stack_size)
10553		{
10554		  warning (0, "frame size of function %qs is %wd"
10555			   " bytes which is more than half the stack size. "
10556			   "The dynamic check would not be reliable. "
10557			   "No check emitted for this function.",
10558			   current_function_name(),
10559			   cfun_frame_layout.frame_size);
10560		}
10561	      else
10562		{
10563		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10564						    & ~(stack_guard - 1));
10565
10566		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10567				       GEN_INT (stack_check_mask));
10568		  if (TARGET_64BIT)
10569		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10570							 t, const0_rtx),
10571					     t, const0_rtx, const0_rtx));
10572		  else
10573		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10574							 t, const0_rtx),
10575					     t, const0_rtx, const0_rtx));
10576		}
10577	    }
10578  	}
10579
10580      if (s390_warn_framesize > 0
10581	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
10582	warning (0, "frame size of %qs is %wd bytes",
10583		 current_function_name (), cfun_frame_layout.frame_size);
10584
10585      if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10586	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10587
10588      /* Save incoming stack pointer into temp reg.  */
10589      if (TARGET_BACKCHAIN || next_fpr)
10590	insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10591
10592      /* Subtract frame size from stack pointer.  */
10593
10594      if (DISP_IN_RANGE (INTVAL (frame_off)))
10595	{
10596	  insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10597			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10598					    frame_off));
10599	  insn = emit_insn (insn);
10600	}
10601      else
10602	{
10603	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10604	    frame_off = force_const_mem (Pmode, frame_off);
10605
10606          insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10607	  annotate_constant_pool_refs (&PATTERN (insn));
10608	}
10609
10610      RTX_FRAME_RELATED_P (insn) = 1;
10611      real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10612      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10613		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10614				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10615					       real_frame_off)));
10616
10617      /* Set backchain.  */
10618
10619      if (TARGET_BACKCHAIN)
10620	{
10621	  if (cfun_frame_layout.backchain_offset)
10622	    addr = gen_rtx_MEM (Pmode,
10623				plus_constant (Pmode, stack_pointer_rtx,
10624				  cfun_frame_layout.backchain_offset));
10625	  else
10626	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10627	  set_mem_alias_set (addr, get_frame_alias_set ());
10628	  insn = emit_insn (gen_move_insn (addr, temp_reg));
10629	}
10630
10631      /* If we support non-call exceptions (e.g. for Java),
10632	 we need to make sure the backchain pointer is set up
10633	 before any possibly trapping memory access.  */
10634      if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10635	{
10636	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10637	  emit_clobber (addr);
10638	}
10639    }
10640
10641  /* Save fprs 8 - 15 (64 bit ABI).  */
10642
10643  if (cfun_save_high_fprs_p && next_fpr)
10644    {
10645      /* If the stack might be accessed through a different register
10646	 we have to make sure that the stack pointer decrement is not
10647	 moved below the use of the stack slots.  */
10648      s390_emit_stack_tie ();
10649
10650      insn = emit_insn (gen_add2_insn (temp_reg,
10651				       GEN_INT (cfun_frame_layout.f8_offset)));
10652
10653      offset = 0;
10654
10655      for (i = FPR8_REGNUM; i <= next_fpr; i++)
10656	if (cfun_fpr_save_p (i))
10657	  {
10658	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10659				      cfun_frame_layout.frame_size
10660				      + cfun_frame_layout.f8_offset
10661				      + offset);
10662
10663	    insn = save_fpr (temp_reg, offset, i);
10664	    offset += 8;
10665	    RTX_FRAME_RELATED_P (insn) = 1;
10666	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10667			  gen_rtx_SET (VOIDmode,
10668				       gen_rtx_MEM (DFmode, addr),
10669				       gen_rtx_REG (DFmode, i)));
10670	  }
10671    }
10672
10673  /* Set frame pointer, if needed.  */
10674
10675  if (frame_pointer_needed)
10676    {
10677      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10678      RTX_FRAME_RELATED_P (insn) = 1;
10679    }
10680
10681  /* Set up got pointer, if needed.  */
10682
10683  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10684    {
10685      rtx_insn *insns = s390_load_got ();
10686
10687      for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10688	annotate_constant_pool_refs (&PATTERN (insn));
10689
10690      emit_insn (insns);
10691    }
10692
10693  if (TARGET_TPF_PROFILING)
10694    {
10695      /* Generate a BAS instruction to serve as a function
10696	 entry intercept to facilitate the use of tracing
10697	 algorithms located at the branch target.  */
10698      emit_insn (gen_prologue_tpf ());
10699
10700      /* Emit a blockage here so that all code
10701	 lies between the profiling mechanisms.  */
10702      emit_insn (gen_blockage ());
10703    }
10704}
10705
10706/* Expand the epilogue into a bunch of separate insns.  */
10707
10708void
10709s390_emit_epilogue (bool sibcall)
10710{
10711  rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10712  int area_bottom, area_top, offset = 0;
10713  int next_offset;
10714  rtvec p;
10715  int i;
10716
10717  if (TARGET_TPF_PROFILING)
10718    {
10719
10720      /* Generate a BAS instruction to serve as a function
10721	 entry intercept to facilitate the use of tracing
10722	 algorithms located at the branch target.  */
10723
10724      /* Emit a blockage here so that all code
10725         lies between the profiling mechanisms.  */
10726      emit_insn (gen_blockage ());
10727
10728      emit_insn (gen_epilogue_tpf ());
10729    }
10730
10731  /* Check whether to use frame or stack pointer for restore.  */
10732
10733  frame_pointer = (frame_pointer_needed
10734		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
10735
10736  s390_frame_area (&area_bottom, &area_top);
10737
10738  /* Check whether we can access the register save area.
10739     If not, increment the frame pointer as required.  */
10740
10741  if (area_top <= area_bottom)
10742    {
10743      /* Nothing to restore.  */
10744    }
10745  else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10746           && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10747    {
10748      /* Area is in range.  */
10749      offset = cfun_frame_layout.frame_size;
10750    }
10751  else
10752    {
10753      rtx insn, frame_off, cfa;
10754
10755      offset = area_bottom < 0 ? -area_bottom : 0;
10756      frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10757
10758      cfa = gen_rtx_SET (VOIDmode, frame_pointer,
10759			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10760      if (DISP_IN_RANGE (INTVAL (frame_off)))
10761	{
10762	  insn = gen_rtx_SET (VOIDmode, frame_pointer,
10763			      gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10764	  insn = emit_insn (insn);
10765	}
10766      else
10767	{
10768	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10769	    frame_off = force_const_mem (Pmode, frame_off);
10770
10771	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10772	  annotate_constant_pool_refs (&PATTERN (insn));
10773	}
10774      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10775      RTX_FRAME_RELATED_P (insn) = 1;
10776    }
10777
10778  /* Restore call saved fprs.  */
10779
10780  if (TARGET_64BIT)
10781    {
10782      if (cfun_save_high_fprs_p)
10783	{
10784	  next_offset = cfun_frame_layout.f8_offset;
10785	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10786	    {
10787	      if (cfun_fpr_save_p (i))
10788		{
10789		  restore_fpr (frame_pointer,
10790			       offset + next_offset, i);
10791		  cfa_restores
10792		    = alloc_reg_note (REG_CFA_RESTORE,
10793				      gen_rtx_REG (DFmode, i), cfa_restores);
10794		  next_offset += 8;
10795		}
10796	    }
10797	}
10798
10799    }
10800  else
10801    {
10802      next_offset = cfun_frame_layout.f4_offset;
10803      /* f4, f6 */
10804      for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10805	{
10806	  if (cfun_fpr_save_p (i))
10807	    {
10808	      restore_fpr (frame_pointer,
10809			   offset + next_offset, i);
10810	      cfa_restores
10811		= alloc_reg_note (REG_CFA_RESTORE,
10812				  gen_rtx_REG (DFmode, i), cfa_restores);
10813	      next_offset += 8;
10814	    }
10815	  else if (!TARGET_PACKED_STACK)
10816	    next_offset += 8;
10817	}
10818
10819    }
10820
10821  /* Return register.  */
10822
10823  return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10824
10825  /* Restore call saved gprs.  */
10826
10827  if (cfun_frame_layout.first_restore_gpr != -1)
10828    {
10829      rtx insn, addr;
10830      int i;
10831
10832      /* Check for global register and save them
10833	 to stack location from where they get restored.  */
10834
10835      for (i = cfun_frame_layout.first_restore_gpr;
10836	   i <= cfun_frame_layout.last_restore_gpr;
10837	   i++)
10838	{
10839	  if (global_not_special_regno_p (i))
10840	    {
10841	      addr = plus_constant (Pmode, frame_pointer,
10842				    offset + cfun_frame_layout.gprs_offset
10843				    + (i - cfun_frame_layout.first_save_gpr_slot)
10844				    * UNITS_PER_LONG);
10845	      addr = gen_rtx_MEM (Pmode, addr);
10846	      set_mem_alias_set (addr, get_frame_alias_set ());
10847	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10848	    }
10849	  else
10850	    cfa_restores
10851	      = alloc_reg_note (REG_CFA_RESTORE,
10852				gen_rtx_REG (Pmode, i), cfa_restores);
10853	}
10854
10855      if (! sibcall)
10856	{
10857	  /* Fetch return address from stack before load multiple,
10858	     this will do good for scheduling.
10859
10860	     Only do this if we already decided that r14 needs to be
10861	     saved to a stack slot. (And not just because r14 happens to
10862	     be in between two GPRs which need saving.)  Otherwise it
10863	     would be difficult to take that decision back in
10864	     s390_optimize_prologue.  */
10865	  if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
10866	    {
10867	      int return_regnum = find_unused_clobbered_reg();
10868	      if (!return_regnum)
10869		return_regnum = 4;
10870	      return_reg = gen_rtx_REG (Pmode, return_regnum);
10871
10872	      addr = plus_constant (Pmode, frame_pointer,
10873				    offset + cfun_frame_layout.gprs_offset
10874				    + (RETURN_REGNUM
10875				       - cfun_frame_layout.first_save_gpr_slot)
10876				    * UNITS_PER_LONG);
10877	      addr = gen_rtx_MEM (Pmode, addr);
10878	      set_mem_alias_set (addr, get_frame_alias_set ());
10879	      emit_move_insn (return_reg, addr);
10880
10881	      /* Once we did that optimization we have to make sure
10882		 s390_optimize_prologue does not try to remove the
10883		 store of r14 since we will not be able to find the
10884		 load issued here.  */
10885	      cfun_frame_layout.save_return_addr_p = true;
10886	    }
10887	}
10888
10889      insn = restore_gprs (frame_pointer,
10890			   offset + cfun_frame_layout.gprs_offset
10891			   + (cfun_frame_layout.first_restore_gpr
10892			      - cfun_frame_layout.first_save_gpr_slot)
10893			   * UNITS_PER_LONG,
10894			   cfun_frame_layout.first_restore_gpr,
10895			   cfun_frame_layout.last_restore_gpr);
10896      insn = emit_insn (insn);
10897      REG_NOTES (insn) = cfa_restores;
10898      add_reg_note (insn, REG_CFA_DEF_CFA,
10899		    plus_constant (Pmode, stack_pointer_rtx,
10900				   STACK_POINTER_OFFSET));
10901      RTX_FRAME_RELATED_P (insn) = 1;
10902    }
10903
10904  s390_restore_gprs_from_fprs ();
10905
10906  if (! sibcall)
10907    {
10908
10909      /* Return to caller.  */
10910
10911      p = rtvec_alloc (2);
10912
10913      RTVEC_ELT (p, 0) = ret_rtx;
10914      RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10915      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10916    }
10917}
10918
10919/* Implement TARGET_SET_UP_BY_PROLOGUE.  */
10920
10921static void
10922s300_set_up_by_prologue (hard_reg_set_container *regs)
10923{
10924  if (cfun->machine->base_reg
10925      && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10926    SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10927}
10928
10929/* Return true if the function can use simple_return to return outside
10930   of a shrink-wrapped region.  At present shrink-wrapping is supported
10931   in all cases.  */
10932
10933bool
10934s390_can_use_simple_return_insn (void)
10935{
10936  return true;
10937}
10938
10939/* Return true if the epilogue is guaranteed to contain only a return
10940   instruction and if a direct return can therefore be used instead.
10941   One of the main advantages of using direct return instructions
10942   is that we can then use conditional returns.  */
10943
10944bool
10945s390_can_use_return_insn (void)
10946{
10947  int i;
10948
10949  if (!reload_completed)
10950    return false;
10951
10952  if (crtl->profile)
10953    return false;
10954
10955  if (TARGET_TPF_PROFILING)
10956    return false;
10957
10958  for (i = 0; i < 16; i++)
10959    if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
10960      return false;
10961
10962  /* For 31 bit this is not covered by the frame_size check below
10963     since f4, f6 are saved in the register save area without needing
10964     additional stack space.  */
10965  if (!TARGET_64BIT
10966      && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10967    return false;
10968
10969  if (cfun->machine->base_reg
10970      && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10971    return false;
10972
10973  return cfun_frame_layout.frame_size == 0;
10974}
10975
10976/* The VX ABI differs for vararg functions.  Therefore we need the
10977   prototype of the callee to be available when passing vector type
10978   values.  */
10979static const char *
10980s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10981{
10982  return ((TARGET_VX_ABI
10983	   && typelist == 0
10984	   && VECTOR_TYPE_P (TREE_TYPE (val))
10985	   && (funcdecl == NULL_TREE
10986	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
10987		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10988	  ? N_("Vector argument passed to unprototyped function")
10989	  : NULL);
10990}
10991
10992
10993/* Return the size in bytes of a function argument of
10994   type TYPE and/or mode MODE.  At least one of TYPE or
10995   MODE must be specified.  */
10996
10997static int
10998s390_function_arg_size (machine_mode mode, const_tree type)
10999{
11000  if (type)
11001    return int_size_in_bytes (type);
11002
11003  /* No type info available for some library calls ...  */
11004  if (mode != BLKmode)
11005    return GET_MODE_SIZE (mode);
11006
11007  /* If we have neither type nor mode, abort */
11008  gcc_unreachable ();
11009}
11010
11011/* Return true if a function argument of type TYPE and mode MODE
11012   is to be passed in a vector register, if available.  */
11013
11014bool
11015s390_function_arg_vector (machine_mode mode, const_tree type)
11016{
11017  if (!TARGET_VX_ABI)
11018    return false;
11019
11020  if (s390_function_arg_size (mode, type) > 16)
11021    return false;
11022
11023  /* No type info available for some library calls ...  */
11024  if (!type)
11025    return VECTOR_MODE_P (mode);
11026
11027  /* The ABI says that record types with a single member are treated
11028     just like that member would be.  */
11029  while (TREE_CODE (type) == RECORD_TYPE)
11030    {
11031      tree field, single = NULL_TREE;
11032
11033      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11034	{
11035	  if (TREE_CODE (field) != FIELD_DECL)
11036	    continue;
11037
11038	  if (single == NULL_TREE)
11039	    single = TREE_TYPE (field);
11040	  else
11041	    return false;
11042	}
11043
11044      if (single == NULL_TREE)
11045	return false;
11046      else
11047	{
11048	  /* If the field declaration adds extra byte due to
11049	     e.g. padding this is not accepted as vector type.  */
11050	  if (int_size_in_bytes (single) <= 0
11051	      || int_size_in_bytes (single) != int_size_in_bytes (type))
11052	    return false;
11053	  type = single;
11054	}
11055    }
11056
11057  return VECTOR_TYPE_P (type);
11058}
11059
11060/* Return true if a function argument of type TYPE and mode MODE
11061   is to be passed in a floating-point register, if available.  */
11062
11063static bool
11064s390_function_arg_float (machine_mode mode, const_tree type)
11065{
11066  if (s390_function_arg_size (mode, type) > 8)
11067    return false;
11068
11069  /* Soft-float changes the ABI: no floating-point registers are used.  */
11070  if (TARGET_SOFT_FLOAT)
11071    return false;
11072
11073  /* No type info available for some library calls ...  */
11074  if (!type)
11075    return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11076
11077  /* The ABI says that record types with a single member are treated
11078     just like that member would be.  */
11079  while (TREE_CODE (type) == RECORD_TYPE)
11080    {
11081      tree field, single = NULL_TREE;
11082
11083      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11084	{
11085	  if (TREE_CODE (field) != FIELD_DECL)
11086	    continue;
11087
11088	  if (single == NULL_TREE)
11089	    single = TREE_TYPE (field);
11090	  else
11091	    return false;
11092	}
11093
11094      if (single == NULL_TREE)
11095	return false;
11096      else
11097	type = single;
11098    }
11099
11100  return TREE_CODE (type) == REAL_TYPE;
11101}
11102
11103/* Return true if a function argument of type TYPE and mode MODE
11104   is to be passed in an integer register, or a pair of integer
11105   registers, if available.  */
11106
11107static bool
11108s390_function_arg_integer (machine_mode mode, const_tree type)
11109{
11110  int size = s390_function_arg_size (mode, type);
11111  if (size > 8)
11112    return false;
11113
11114  /* No type info available for some library calls ...  */
11115  if (!type)
11116    return GET_MODE_CLASS (mode) == MODE_INT
11117	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
11118
11119  /* We accept small integral (and similar) types.  */
11120  if (INTEGRAL_TYPE_P (type)
11121      || POINTER_TYPE_P (type)
11122      || TREE_CODE (type) == NULLPTR_TYPE
11123      || TREE_CODE (type) == OFFSET_TYPE
11124      || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11125    return true;
11126
11127  /* We also accept structs of size 1, 2, 4, 8 that are not
11128     passed in floating-point registers.  */
11129  if (AGGREGATE_TYPE_P (type)
11130      && exact_log2 (size) >= 0
11131      && !s390_function_arg_float (mode, type))
11132    return true;
11133
11134  return false;
11135}
11136
11137/* Return 1 if a function argument of type TYPE and mode MODE
11138   is to be passed by reference.  The ABI specifies that only
11139   structures of size 1, 2, 4, or 8 bytes are passed by value,
11140   all other structures (and complex numbers) are passed by
11141   reference.  */
11142
11143static bool
11144s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11145			machine_mode mode, const_tree type,
11146			bool named ATTRIBUTE_UNUSED)
11147{
11148  int size = s390_function_arg_size (mode, type);
11149
11150  if (s390_function_arg_vector (mode, type))
11151    return false;
11152
11153  if (size > 8)
11154    return true;
11155
11156  if (type)
11157    {
11158      if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11159        return true;
11160
11161      if (TREE_CODE (type) == COMPLEX_TYPE
11162	  || TREE_CODE (type) == VECTOR_TYPE)
11163	return true;
11164    }
11165
11166  return false;
11167}
11168
11169/* Update the data in CUM to advance over an argument of mode MODE and
11170   data type TYPE.  (TYPE is null for libcalls where that information
11171   may not be available.).  The boolean NAMED specifies whether the
11172   argument is a named argument (as opposed to an unnamed argument
11173   matching an ellipsis).  */
11174
11175static void
11176s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11177			   const_tree type, bool named)
11178{
11179  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11180
11181  if (s390_function_arg_vector (mode, type))
11182    {
11183      /* We are called for unnamed vector stdarg arguments which are
11184	 passed on the stack.  In this case this hook does not have to
11185	 do anything since stack arguments are tracked by common
11186	 code.  */
11187      if (!named)
11188	return;
11189      cum->vrs += 1;
11190    }
11191  else if (s390_function_arg_float (mode, type))
11192    {
11193      cum->fprs += 1;
11194    }
11195  else if (s390_function_arg_integer (mode, type))
11196    {
11197      int size = s390_function_arg_size (mode, type);
11198      cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11199    }
11200  else
11201    gcc_unreachable ();
11202}
11203
11204/* Define where to put the arguments to a function.
11205   Value is zero to push the argument on the stack,
11206   or a hard register in which to store the argument.
11207
11208   MODE is the argument's machine mode.
11209   TYPE is the data type of the argument (as a tree).
11210    This is null for libcalls where that information may
11211    not be available.
11212   CUM is a variable of type CUMULATIVE_ARGS which gives info about
11213    the preceding args and about the function being called.
11214   NAMED is nonzero if this argument is a named parameter
11215    (otherwise it is an extra parameter matching an ellipsis).
11216
11217   On S/390, we use general purpose registers 2 through 6 to
11218   pass integer, pointer, and certain structure arguments, and
11219   floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11220   to pass floating point arguments.  All remaining arguments
11221   are pushed to the stack.  */
11222
11223static rtx
11224s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11225		   const_tree type, bool named)
11226{
11227  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11228
11229  if (!named)
11230    s390_check_type_for_vector_abi (type, true, false);
11231
11232  if (s390_function_arg_vector (mode, type))
11233    {
11234      /* Vector arguments being part of the ellipsis are passed on the
11235	 stack.  */
11236      if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11237	return NULL_RTX;
11238
11239      return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11240    }
11241  else if (s390_function_arg_float (mode, type))
11242    {
11243      if (cum->fprs + 1 > FP_ARG_NUM_REG)
11244	return NULL_RTX;
11245      else
11246	return gen_rtx_REG (mode, cum->fprs + 16);
11247    }
11248  else if (s390_function_arg_integer (mode, type))
11249    {
11250      int size = s390_function_arg_size (mode, type);
11251      int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11252
11253      if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11254	return NULL_RTX;
11255      else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11256	return gen_rtx_REG (mode, cum->gprs + 2);
11257      else if (n_gprs == 2)
11258	{
11259	  rtvec p = rtvec_alloc (2);
11260
11261	  RTVEC_ELT (p, 0)
11262	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11263					 const0_rtx);
11264	  RTVEC_ELT (p, 1)
11265	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11266					 GEN_INT (4));
11267
11268	  return gen_rtx_PARALLEL (mode, p);
11269	}
11270    }
11271
11272  /* After the real arguments, expand_call calls us once again
11273     with a void_type_node type.  Whatever we return here is
11274     passed as operand 2 to the call expanders.
11275
11276     We don't need this feature ...  */
11277  else if (type == void_type_node)
11278    return const0_rtx;
11279
11280  gcc_unreachable ();
11281}
11282
11283/* Return true if return values of type TYPE should be returned
11284   in a memory buffer whose address is passed by the caller as
11285   hidden first argument.  */
11286
11287static bool
11288s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11289{
11290  /* We accept small integral (and similar) types.  */
11291  if (INTEGRAL_TYPE_P (type)
11292      || POINTER_TYPE_P (type)
11293      || TREE_CODE (type) == OFFSET_TYPE
11294      || TREE_CODE (type) == REAL_TYPE)
11295    return int_size_in_bytes (type) > 8;
11296
11297  /* vector types which fit into a VR.  */
11298  if (TARGET_VX_ABI
11299      && VECTOR_TYPE_P (type)
11300      && int_size_in_bytes (type) <= 16)
11301    return false;
11302
11303  /* Aggregates and similar constructs are always returned
11304     in memory.  */
11305  if (AGGREGATE_TYPE_P (type)
11306      || TREE_CODE (type) == COMPLEX_TYPE
11307      || VECTOR_TYPE_P (type))
11308    return true;
11309
11310  /* ??? We get called on all sorts of random stuff from
11311     aggregate_value_p.  We can't abort, but it's not clear
11312     what's safe to return.  Pretend it's a struct I guess.  */
11313  return true;
11314}
11315
11316/* Function arguments and return values are promoted to word size.  */
11317
11318static machine_mode
11319s390_promote_function_mode (const_tree type, machine_mode mode,
11320                            int *punsignedp,
11321                            const_tree fntype ATTRIBUTE_UNUSED,
11322                            int for_return ATTRIBUTE_UNUSED)
11323{
11324  if (INTEGRAL_MODE_P (mode)
11325      && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11326    {
11327      if (type != NULL_TREE && POINTER_TYPE_P (type))
11328	*punsignedp = POINTERS_EXTEND_UNSIGNED;
11329      return Pmode;
11330    }
11331
11332  return mode;
11333}
11334
11335/* Define where to return a (scalar) value of type RET_TYPE.
11336   If RET_TYPE is null, define where to return a (scalar)
11337   value of mode MODE from a libcall.  */
11338
11339static rtx
11340s390_function_and_libcall_value (machine_mode mode,
11341				 const_tree ret_type,
11342				 const_tree fntype_or_decl,
11343				 bool outgoing ATTRIBUTE_UNUSED)
11344{
11345  /* For vector return types it is important to use the RET_TYPE
11346     argument whenever available since the middle-end might have
11347     changed the mode to a scalar mode.  */
11348  bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11349			    || (!ret_type && VECTOR_MODE_P (mode)));
11350
11351  /* For normal functions perform the promotion as
11352     promote_function_mode would do.  */
11353  if (ret_type)
11354    {
11355      int unsignedp = TYPE_UNSIGNED (ret_type);
11356      mode = promote_function_mode (ret_type, mode, &unsignedp,
11357				    fntype_or_decl, 1);
11358    }
11359
11360  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11361	      || SCALAR_FLOAT_MODE_P (mode)
11362	      || (TARGET_VX_ABI && vector_ret_type_p));
11363  gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11364
11365  if (TARGET_VX_ABI && vector_ret_type_p)
11366    return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11367  else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11368    return gen_rtx_REG (mode, 16);
11369  else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11370	   || UNITS_PER_LONG == UNITS_PER_WORD)
11371    return gen_rtx_REG (mode, 2);
11372  else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11373    {
11374      /* This case is triggered when returning a 64 bit value with
11375	 -m31 -mzarch.  Although the value would fit into a single
11376	 register it has to be forced into a 32 bit register pair in
11377	 order to match the ABI.  */
11378      rtvec p = rtvec_alloc (2);
11379
11380      RTVEC_ELT (p, 0)
11381	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11382      RTVEC_ELT (p, 1)
11383	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11384
11385      return gen_rtx_PARALLEL (mode, p);
11386    }
11387
11388  gcc_unreachable ();
11389}
11390
11391/* Define where to return a scalar return value of type RET_TYPE.  */
11392
11393static rtx
11394s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11395		     bool outgoing)
11396{
11397  return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11398					  fn_decl_or_type, outgoing);
11399}
11400
11401/* Define where to return a scalar libcall return value of mode
11402   MODE.  */
11403
11404static rtx
11405s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11406{
11407  return s390_function_and_libcall_value (mode, NULL_TREE,
11408					  NULL_TREE, true);
11409}
11410
11411
11412/* Create and return the va_list datatype.
11413
11414   On S/390, va_list is an array type equivalent to
11415
11416      typedef struct __va_list_tag
11417        {
11418            long __gpr;
11419            long __fpr;
11420            void *__overflow_arg_area;
11421            void *__reg_save_area;
11422        } va_list[1];
11423
11424   where __gpr and __fpr hold the number of general purpose
11425   or floating point arguments used up to now, respectively,
11426   __overflow_arg_area points to the stack location of the
11427   next argument passed on the stack, and __reg_save_area
11428   always points to the start of the register area in the
11429   call frame of the current function.  The function prologue
11430   saves all registers used for argument passing into this
11431   area if the function uses variable arguments.  */
11432
11433static tree
11434s390_build_builtin_va_list (void)
11435{
11436  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11437
11438  record = lang_hooks.types.make_type (RECORD_TYPE);
11439
11440  type_decl =
11441    build_decl (BUILTINS_LOCATION,
11442		TYPE_DECL, get_identifier ("__va_list_tag"), record);
11443
11444  f_gpr = build_decl (BUILTINS_LOCATION,
11445		      FIELD_DECL, get_identifier ("__gpr"),
11446		      long_integer_type_node);
11447  f_fpr = build_decl (BUILTINS_LOCATION,
11448		      FIELD_DECL, get_identifier ("__fpr"),
11449		      long_integer_type_node);
11450  f_ovf = build_decl (BUILTINS_LOCATION,
11451		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
11452		      ptr_type_node);
11453  f_sav = build_decl (BUILTINS_LOCATION,
11454		      FIELD_DECL, get_identifier ("__reg_save_area"),
11455		      ptr_type_node);
11456
11457  va_list_gpr_counter_field = f_gpr;
11458  va_list_fpr_counter_field = f_fpr;
11459
11460  DECL_FIELD_CONTEXT (f_gpr) = record;
11461  DECL_FIELD_CONTEXT (f_fpr) = record;
11462  DECL_FIELD_CONTEXT (f_ovf) = record;
11463  DECL_FIELD_CONTEXT (f_sav) = record;
11464
11465  TYPE_STUB_DECL (record) = type_decl;
11466  TYPE_NAME (record) = type_decl;
11467  TYPE_FIELDS (record) = f_gpr;
11468  DECL_CHAIN (f_gpr) = f_fpr;
11469  DECL_CHAIN (f_fpr) = f_ovf;
11470  DECL_CHAIN (f_ovf) = f_sav;
11471
11472  layout_type (record);
11473
11474  /* The correct type is an array type of one element.  */
11475  return build_array_type (record, build_index_type (size_zero_node));
11476}
11477
11478/* Implement va_start by filling the va_list structure VALIST.
11479   STDARG_P is always true, and ignored.
11480   NEXTARG points to the first anonymous stack argument.
11481
11482   The following global variables are used to initialize
11483   the va_list structure:
11484
11485     crtl->args.info:
11486       holds number of gprs and fprs used for named arguments.
11487     crtl->args.arg_offset_rtx:
11488       holds the offset of the first anonymous stack argument
11489       (relative to the virtual arg pointer).  */
11490
11491static void
11492s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11493{
11494  HOST_WIDE_INT n_gpr, n_fpr;
11495  int off;
11496  tree f_gpr, f_fpr, f_ovf, f_sav;
11497  tree gpr, fpr, ovf, sav, t;
11498
11499  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11500  f_fpr = DECL_CHAIN (f_gpr);
11501  f_ovf = DECL_CHAIN (f_fpr);
11502  f_sav = DECL_CHAIN (f_ovf);
11503
11504  valist = build_simple_mem_ref (valist);
11505  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11506  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11507  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11508  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11509
11510  /* Count number of gp and fp argument registers used.  */
11511
11512  n_gpr = crtl->args.info.gprs;
11513  n_fpr = crtl->args.info.fprs;
11514
11515  if (cfun->va_list_gpr_size)
11516    {
11517      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11518		  build_int_cst (NULL_TREE, n_gpr));
11519      TREE_SIDE_EFFECTS (t) = 1;
11520      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11521    }
11522
11523  if (cfun->va_list_fpr_size)
11524    {
11525      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11526	          build_int_cst (NULL_TREE, n_fpr));
11527      TREE_SIDE_EFFECTS (t) = 1;
11528      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11529    }
11530
11531  /* Find the overflow area.
11532     FIXME: This currently is too pessimistic when the vector ABI is
11533     enabled.  In that case we *always* set up the overflow area
11534     pointer.  */
11535  if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11536      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11537      || TARGET_VX_ABI)
11538    {
11539      t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11540
11541      off = INTVAL (crtl->args.arg_offset_rtx);
11542      off = off < 0 ? 0 : off;
11543      if (TARGET_DEBUG_ARG)
11544	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11545		 (int)n_gpr, (int)n_fpr, off);
11546
11547      t = fold_build_pointer_plus_hwi (t, off);
11548
11549      t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11550      TREE_SIDE_EFFECTS (t) = 1;
11551      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11552    }
11553
11554  /* Find the register save area.  */
11555  if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11556      || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11557    {
11558      t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11559      t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11560
11561      t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11562      TREE_SIDE_EFFECTS (t) = 1;
11563      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11564    }
11565}
11566
11567/* Implement va_arg by updating the va_list structure
11568   VALIST as required to retrieve an argument of type
11569   TYPE, and returning that argument.
11570
11571   Generates code equivalent to:
11572
11573   if (integral value) {
11574     if (size  <= 4 && args.gpr < 5 ||
11575         size  > 4 && args.gpr < 4 )
11576       ret = args.reg_save_area[args.gpr+8]
11577     else
11578       ret = *args.overflow_arg_area++;
11579   } else if (vector value) {
11580       ret = *args.overflow_arg_area;
11581       args.overflow_arg_area += size / 8;
11582   } else if (float value) {
11583     if (args.fgpr < 2)
11584       ret = args.reg_save_area[args.fpr+64]
11585     else
11586       ret = *args.overflow_arg_area++;
11587   } else if (aggregate value) {
11588     if (args.gpr < 5)
11589       ret = *args.reg_save_area[args.gpr]
11590     else
11591       ret = **args.overflow_arg_area++;
11592   } */
11593
11594static tree
11595s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11596		      gimple_seq *post_p ATTRIBUTE_UNUSED)
11597{
11598  tree f_gpr, f_fpr, f_ovf, f_sav;
11599  tree gpr, fpr, ovf, sav, reg, t, u;
11600  int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11601  tree lab_false, lab_over;
11602  tree addr = create_tmp_var (ptr_type_node, "addr");
11603  bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11604			a stack slot.  */
11605
11606  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11607  f_fpr = DECL_CHAIN (f_gpr);
11608  f_ovf = DECL_CHAIN (f_fpr);
11609  f_sav = DECL_CHAIN (f_ovf);
11610
11611  valist = build_va_arg_indirect_ref (valist);
11612  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11613  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11614  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11615
11616  /* The tree for args* cannot be shared between gpr/fpr and ovf since
11617     both appear on a lhs.  */
11618  valist = unshare_expr (valist);
11619  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11620
11621  size = int_size_in_bytes (type);
11622
11623  s390_check_type_for_vector_abi (type, true, false);
11624
11625  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11626    {
11627      if (TARGET_DEBUG_ARG)
11628	{
11629	  fprintf (stderr, "va_arg: aggregate type");
11630	  debug_tree (type);
11631	}
11632
11633      /* Aggregates are passed by reference.  */
11634      indirect_p = 1;
11635      reg = gpr;
11636      n_reg = 1;
11637
11638      /* kernel stack layout on 31 bit: It is assumed here that no padding
11639	 will be added by s390_frame_info because for va_args always an even
11640	 number of gprs has to be saved r15-r2 = 14 regs.  */
11641      sav_ofs = 2 * UNITS_PER_LONG;
11642      sav_scale = UNITS_PER_LONG;
11643      size = UNITS_PER_LONG;
11644      max_reg = GP_ARG_NUM_REG - n_reg;
11645      left_align_p = false;
11646    }
11647  else if (s390_function_arg_vector (TYPE_MODE (type), type))
11648    {
11649      if (TARGET_DEBUG_ARG)
11650	{
11651	  fprintf (stderr, "va_arg: vector type");
11652	  debug_tree (type);
11653	}
11654
11655      indirect_p = 0;
11656      reg = NULL_TREE;
11657      n_reg = 0;
11658      sav_ofs = 0;
11659      sav_scale = 8;
11660      max_reg = 0;
11661      left_align_p = true;
11662    }
11663  else if (s390_function_arg_float (TYPE_MODE (type), type))
11664    {
11665      if (TARGET_DEBUG_ARG)
11666	{
11667	  fprintf (stderr, "va_arg: float type");
11668	  debug_tree (type);
11669	}
11670
11671      /* FP args go in FP registers, if present.  */
11672      indirect_p = 0;
11673      reg = fpr;
11674      n_reg = 1;
11675      sav_ofs = 16 * UNITS_PER_LONG;
11676      sav_scale = 8;
11677      max_reg = FP_ARG_NUM_REG - n_reg;
11678      left_align_p = false;
11679    }
11680  else
11681    {
11682      if (TARGET_DEBUG_ARG)
11683	{
11684	  fprintf (stderr, "va_arg: other type");
11685	  debug_tree (type);
11686	}
11687
11688      /* Otherwise into GP registers.  */
11689      indirect_p = 0;
11690      reg = gpr;
11691      n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11692
11693      /* kernel stack layout on 31 bit: It is assumed here that no padding
11694	 will be added by s390_frame_info because for va_args always an even
11695	 number of gprs has to be saved r15-r2 = 14 regs.  */
11696      sav_ofs = 2 * UNITS_PER_LONG;
11697
11698      if (size < UNITS_PER_LONG)
11699	sav_ofs += UNITS_PER_LONG - size;
11700
11701      sav_scale = UNITS_PER_LONG;
11702      max_reg = GP_ARG_NUM_REG - n_reg;
11703      left_align_p = false;
11704    }
11705
11706  /* Pull the value out of the saved registers ...  */
11707
11708  if (reg != NULL_TREE)
11709    {
11710      /*
11711	if (reg > ((typeof (reg))max_reg))
11712          goto lab_false;
11713
11714        addr = sav + sav_ofs + reg * save_scale;
11715
11716	goto lab_over;
11717
11718        lab_false:
11719      */
11720
11721      lab_false = create_artificial_label (UNKNOWN_LOCATION);
11722      lab_over = create_artificial_label (UNKNOWN_LOCATION);
11723
11724      t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11725      t = build2 (GT_EXPR, boolean_type_node, reg, t);
11726      u = build1 (GOTO_EXPR, void_type_node, lab_false);
11727      t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11728      gimplify_and_add (t, pre_p);
11729
11730      t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11731      u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11732		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11733      t = fold_build_pointer_plus (t, u);
11734
11735      gimplify_assign (addr, t, pre_p);
11736
11737      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11738
11739      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11740    }
11741
11742  /* ... Otherwise out of the overflow area.  */
11743
11744  t = ovf;
11745  if (size < UNITS_PER_LONG && !left_align_p)
11746    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11747
11748  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11749
11750  gimplify_assign (addr, t, pre_p);
11751
11752  if (size < UNITS_PER_LONG && left_align_p)
11753    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11754  else
11755    t = fold_build_pointer_plus_hwi (t, size);
11756
11757  gimplify_assign (ovf, t, pre_p);
11758
11759  if (reg != NULL_TREE)
11760    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11761
11762
11763  /* Increment register save count.  */
11764
11765  if (n_reg > 0)
11766    {
11767      u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
11768		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
11769      gimplify_and_add (u, pre_p);
11770    }
11771
11772  if (indirect_p)
11773    {
11774      t = build_pointer_type_for_mode (build_pointer_type (type),
11775				       ptr_mode, true);
11776      addr = fold_convert (t, addr);
11777      addr = build_va_arg_indirect_ref (addr);
11778    }
11779  else
11780    {
11781      t = build_pointer_type_for_mode (type, ptr_mode, true);
11782      addr = fold_convert (t, addr);
11783    }
11784
11785  return build_va_arg_indirect_ref (addr);
11786}
11787
11788/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
11789   expanders.
11790   DEST  - Register location where CC will be stored.
11791   TDB   - Pointer to a 256 byte area where to store the transaction.
11792           diagnostic block. NULL if TDB is not needed.
11793   RETRY - Retry count value.  If non-NULL a retry loop for CC2
11794           is emitted
11795   CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
11796                    of the tbegin instruction pattern.  */
11797
11798void
11799s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
11800{
11801  rtx retry_plus_two = gen_reg_rtx (SImode);
11802  rtx retry_reg = gen_reg_rtx (SImode);
11803  rtx_code_label *retry_label = NULL;
11804
11805  if (retry != NULL_RTX)
11806    {
11807      emit_move_insn (retry_reg, retry);
11808      emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
11809      emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
11810      retry_label = gen_label_rtx ();
11811      emit_label (retry_label);
11812    }
11813
11814  if (clobber_fprs_p)
11815    emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
11816  else
11817    emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11818				     tdb));
11819
11820  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
11821					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
11822								   CC_REGNUM)),
11823					UNSPEC_CC_TO_INT));
11824  if (retry != NULL_RTX)
11825    {
11826      const int CC0 = 1 << 3;
11827      const int CC1 = 1 << 2;
11828      const int CC3 = 1 << 0;
11829      rtx jump;
11830      rtx count = gen_reg_rtx (SImode);
11831      rtx_code_label *leave_label = gen_label_rtx ();
11832
11833      /* Exit for success and permanent failures.  */
11834      jump = s390_emit_jump (leave_label,
11835			     gen_rtx_EQ (VOIDmode,
11836			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
11837			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
11838      LABEL_NUSES (leave_label) = 1;
11839
11840      /* CC2 - transient failure. Perform retry with ppa.  */
11841      emit_move_insn (count, retry_plus_two);
11842      emit_insn (gen_subsi3 (count, count, retry_reg));
11843      emit_insn (gen_tx_assist (count));
11844      jump = emit_jump_insn (gen_doloop_si64 (retry_label,
11845					      retry_reg,
11846					      retry_reg));
11847      JUMP_LABEL (jump) = retry_label;
11848      LABEL_NUSES (retry_label) = 1;
11849      emit_label (leave_label);
11850    }
11851}
11852
11853
11854/* Return the decl for the target specific builtin with the function
11855   code FCODE.  */
11856
11857static tree
11858s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11859{
11860  if (fcode >= S390_BUILTIN_MAX)
11861    return error_mark_node;
11862
11863  return s390_builtin_decls[fcode];
11864}
11865
11866/* We call mcount before the function prologue.  So a profiled leaf
11867   function should stay a leaf function.  */
11868
11869static bool
11870s390_keep_leaf_when_profiled ()
11871{
11872  return true;
11873}
11874
11875/* Output assembly code for the trampoline template to
11876   stdio stream FILE.
11877
11878   On S/390, we use gpr 1 internally in the trampoline code;
11879   gpr 0 is used to hold the static chain.  */
11880
11881static void
11882s390_asm_trampoline_template (FILE *file)
11883{
11884  rtx op[2];
11885  op[0] = gen_rtx_REG (Pmode, 0);
11886  op[1] = gen_rtx_REG (Pmode, 1);
11887
11888  if (TARGET_64BIT)
11889    {
11890      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
11891      output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
11892      output_asm_insn ("br\t%1", op);             /* 2 byte */
11893      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11894    }
11895  else
11896    {
11897      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
11898      output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
11899      output_asm_insn ("br\t%1", op);             /* 2 byte */
11900      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11901    }
11902}
11903
11904/* Emit RTL insns to initialize the variable parts of a trampoline.
11905   FNADDR is an RTX for the address of the function's pure code.
11906   CXT is an RTX for the static chain value for the function.  */
11907
11908static void
11909s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11910{
11911  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11912  rtx mem;
11913
11914  emit_block_move (m_tramp, assemble_trampoline_template (),
11915		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11916
11917  mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11918  emit_move_insn (mem, cxt);
11919  mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11920  emit_move_insn (mem, fnaddr);
11921}
11922
11923/* Output assembler code to FILE to increment profiler label # LABELNO
11924   for profiling a function entry.  */
11925
11926void
11927s390_function_profiler (FILE *file, int labelno)
11928{
11929  rtx op[7];
11930
11931  char label[128];
11932  ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11933
11934  fprintf (file, "# function profiler \n");
11935
11936  op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11937  op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11938  op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11939
11940  op[2] = gen_rtx_REG (Pmode, 1);
11941  op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11942  SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11943
11944  op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11945  if (flag_pic)
11946    {
11947      op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11948      op[4] = gen_rtx_CONST (Pmode, op[4]);
11949    }
11950
11951  if (TARGET_64BIT)
11952    {
11953      output_asm_insn ("stg\t%0,%1", op);
11954      output_asm_insn ("larl\t%2,%3", op);
11955      output_asm_insn ("brasl\t%0,%4", op);
11956      output_asm_insn ("lg\t%0,%1", op);
11957    }
11958  else if (!flag_pic)
11959    {
11960      op[6] = gen_label_rtx ();
11961
11962      output_asm_insn ("st\t%0,%1", op);
11963      output_asm_insn ("bras\t%2,%l6", op);
11964      output_asm_insn (".long\t%4", op);
11965      output_asm_insn (".long\t%3", op);
11966      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11967      output_asm_insn ("l\t%0,0(%2)", op);
11968      output_asm_insn ("l\t%2,4(%2)", op);
11969      output_asm_insn ("basr\t%0,%0", op);
11970      output_asm_insn ("l\t%0,%1", op);
11971    }
11972  else
11973    {
11974      op[5] = gen_label_rtx ();
11975      op[6] = gen_label_rtx ();
11976
11977      output_asm_insn ("st\t%0,%1", op);
11978      output_asm_insn ("bras\t%2,%l6", op);
11979      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11980      output_asm_insn (".long\t%4-%l5", op);
11981      output_asm_insn (".long\t%3-%l5", op);
11982      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11983      output_asm_insn ("lr\t%0,%2", op);
11984      output_asm_insn ("a\t%0,0(%2)", op);
11985      output_asm_insn ("a\t%2,4(%2)", op);
11986      output_asm_insn ("basr\t%0,%0", op);
11987      output_asm_insn ("l\t%0,%1", op);
11988    }
11989}
11990
11991/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11992   into its SYMBOL_REF_FLAGS.  */
11993
11994static void
11995s390_encode_section_info (tree decl, rtx rtl, int first)
11996{
11997  default_encode_section_info (decl, rtl, first);
11998
11999  if (TREE_CODE (decl) == VAR_DECL)
12000    {
12001      /* If a variable has a forced alignment to < 2 bytes, mark it
12002	 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
12003	 operand.  */
12004      if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
12005	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
12006      if (!DECL_SIZE (decl)
12007	  || !DECL_ALIGN (decl)
12008	  || !tree_fits_shwi_p (DECL_SIZE (decl))
12009	  || (DECL_ALIGN (decl) <= 64
12010	      && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
12011	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
12012    }
12013
12014  /* Literal pool references don't have a decl so they are handled
12015     differently here.  We rely on the information in the MEM_ALIGN
12016     entry to decide upon natural alignment.  */
12017  if (MEM_P (rtl)
12018      && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12019      && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
12020      && (MEM_ALIGN (rtl) == 0
12021	  || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
12022	  || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
12023    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
12024}
12025
12026/* Output thunk to FILE that implements a C++ virtual function call (with
12027   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
12028   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12029   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12030   relative to the resulting this pointer.  */
12031
12032static void
12033s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12034		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12035		      tree function)
12036{
12037  rtx op[10];
12038  int nonlocal = 0;
12039
12040  /* Make sure unwind info is emitted for the thunk if needed.  */
12041  final_start_function (emit_barrier (), file, 1);
12042
12043  /* Operand 0 is the target function.  */
12044  op[0] = XEXP (DECL_RTL (function), 0);
12045  if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12046    {
12047      nonlocal = 1;
12048      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12049			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12050      op[0] = gen_rtx_CONST (Pmode, op[0]);
12051    }
12052
12053  /* Operand 1 is the 'this' pointer.  */
12054  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12055    op[1] = gen_rtx_REG (Pmode, 3);
12056  else
12057    op[1] = gen_rtx_REG (Pmode, 2);
12058
12059  /* Operand 2 is the delta.  */
12060  op[2] = GEN_INT (delta);
12061
12062  /* Operand 3 is the vcall_offset.  */
12063  op[3] = GEN_INT (vcall_offset);
12064
12065  /* Operand 4 is the temporary register.  */
12066  op[4] = gen_rtx_REG (Pmode, 1);
12067
12068  /* Operands 5 to 8 can be used as labels.  */
12069  op[5] = NULL_RTX;
12070  op[6] = NULL_RTX;
12071  op[7] = NULL_RTX;
12072  op[8] = NULL_RTX;
12073
12074  /* Operand 9 can be used for temporary register.  */
12075  op[9] = NULL_RTX;
12076
12077  /* Generate code.  */
12078  if (TARGET_64BIT)
12079    {
12080      /* Setup literal pool pointer if required.  */
12081      if ((!DISP_IN_RANGE (delta)
12082	   && !CONST_OK_FOR_K (delta)
12083	   && !CONST_OK_FOR_Os (delta))
12084	  || (!DISP_IN_RANGE (vcall_offset)
12085	      && !CONST_OK_FOR_K (vcall_offset)
12086	      && !CONST_OK_FOR_Os (vcall_offset)))
12087	{
12088	  op[5] = gen_label_rtx ();
12089	  output_asm_insn ("larl\t%4,%5", op);
12090	}
12091
12092      /* Add DELTA to this pointer.  */
12093      if (delta)
12094	{
12095	  if (CONST_OK_FOR_J (delta))
12096	    output_asm_insn ("la\t%1,%2(%1)", op);
12097	  else if (DISP_IN_RANGE (delta))
12098	    output_asm_insn ("lay\t%1,%2(%1)", op);
12099	  else if (CONST_OK_FOR_K (delta))
12100	    output_asm_insn ("aghi\t%1,%2", op);
12101 	  else if (CONST_OK_FOR_Os (delta))
12102 	    output_asm_insn ("agfi\t%1,%2", op);
12103	  else
12104	    {
12105	      op[6] = gen_label_rtx ();
12106	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12107	    }
12108	}
12109
12110      /* Perform vcall adjustment.  */
12111      if (vcall_offset)
12112	{
12113	  if (DISP_IN_RANGE (vcall_offset))
12114	    {
12115	      output_asm_insn ("lg\t%4,0(%1)", op);
12116	      output_asm_insn ("ag\t%1,%3(%4)", op);
12117	    }
12118	  else if (CONST_OK_FOR_K (vcall_offset))
12119	    {
12120	      output_asm_insn ("lghi\t%4,%3", op);
12121	      output_asm_insn ("ag\t%4,0(%1)", op);
12122	      output_asm_insn ("ag\t%1,0(%4)", op);
12123	    }
12124 	  else if (CONST_OK_FOR_Os (vcall_offset))
12125 	    {
12126 	      output_asm_insn ("lgfi\t%4,%3", op);
12127 	      output_asm_insn ("ag\t%4,0(%1)", op);
12128 	      output_asm_insn ("ag\t%1,0(%4)", op);
12129 	    }
12130	  else
12131	    {
12132	      op[7] = gen_label_rtx ();
12133	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12134	      output_asm_insn ("ag\t%4,0(%1)", op);
12135	      output_asm_insn ("ag\t%1,0(%4)", op);
12136	    }
12137	}
12138
12139      /* Jump to target.  */
12140      output_asm_insn ("jg\t%0", op);
12141
12142      /* Output literal pool if required.  */
12143      if (op[5])
12144	{
12145	  output_asm_insn (".align\t4", op);
12146	  targetm.asm_out.internal_label (file, "L",
12147					  CODE_LABEL_NUMBER (op[5]));
12148	}
12149      if (op[6])
12150	{
12151	  targetm.asm_out.internal_label (file, "L",
12152					  CODE_LABEL_NUMBER (op[6]));
12153	  output_asm_insn (".long\t%2", op);
12154	}
12155      if (op[7])
12156	{
12157	  targetm.asm_out.internal_label (file, "L",
12158					  CODE_LABEL_NUMBER (op[7]));
12159	  output_asm_insn (".long\t%3", op);
12160	}
12161    }
12162  else
12163    {
12164      /* Setup base pointer if required.  */
12165      if (!vcall_offset
12166	  || (!DISP_IN_RANGE (delta)
12167              && !CONST_OK_FOR_K (delta)
12168	      && !CONST_OK_FOR_Os (delta))
12169	  || (!DISP_IN_RANGE (delta)
12170              && !CONST_OK_FOR_K (vcall_offset)
12171	      && !CONST_OK_FOR_Os (vcall_offset)))
12172	{
12173	  op[5] = gen_label_rtx ();
12174	  output_asm_insn ("basr\t%4,0", op);
12175	  targetm.asm_out.internal_label (file, "L",
12176					  CODE_LABEL_NUMBER (op[5]));
12177	}
12178
12179      /* Add DELTA to this pointer.  */
12180      if (delta)
12181	{
12182	  if (CONST_OK_FOR_J (delta))
12183	    output_asm_insn ("la\t%1,%2(%1)", op);
12184	  else if (DISP_IN_RANGE (delta))
12185	    output_asm_insn ("lay\t%1,%2(%1)", op);
12186	  else if (CONST_OK_FOR_K (delta))
12187	    output_asm_insn ("ahi\t%1,%2", op);
12188	  else if (CONST_OK_FOR_Os (delta))
12189 	    output_asm_insn ("afi\t%1,%2", op);
12190	  else
12191	    {
12192	      op[6] = gen_label_rtx ();
12193	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
12194	    }
12195	}
12196
12197      /* Perform vcall adjustment.  */
12198      if (vcall_offset)
12199        {
12200	  if (CONST_OK_FOR_J (vcall_offset))
12201	    {
12202	      output_asm_insn ("l\t%4,0(%1)", op);
12203	      output_asm_insn ("a\t%1,%3(%4)", op);
12204	    }
12205	  else if (DISP_IN_RANGE (vcall_offset))
12206	    {
12207	      output_asm_insn ("l\t%4,0(%1)", op);
12208	      output_asm_insn ("ay\t%1,%3(%4)", op);
12209	    }
12210	  else if (CONST_OK_FOR_K (vcall_offset))
12211	    {
12212	      output_asm_insn ("lhi\t%4,%3", op);
12213	      output_asm_insn ("a\t%4,0(%1)", op);
12214	      output_asm_insn ("a\t%1,0(%4)", op);
12215	    }
12216	  else if (CONST_OK_FOR_Os (vcall_offset))
12217 	    {
12218 	      output_asm_insn ("iilf\t%4,%3", op);
12219 	      output_asm_insn ("a\t%4,0(%1)", op);
12220 	      output_asm_insn ("a\t%1,0(%4)", op);
12221 	    }
12222	  else
12223	    {
12224	      op[7] = gen_label_rtx ();
12225	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
12226	      output_asm_insn ("a\t%4,0(%1)", op);
12227	      output_asm_insn ("a\t%1,0(%4)", op);
12228	    }
12229
12230	  /* We had to clobber the base pointer register.
12231	     Re-setup the base pointer (with a different base).  */
12232	  op[5] = gen_label_rtx ();
12233	  output_asm_insn ("basr\t%4,0", op);
12234	  targetm.asm_out.internal_label (file, "L",
12235					  CODE_LABEL_NUMBER (op[5]));
12236	}
12237
12238      /* Jump to target.  */
12239      op[8] = gen_label_rtx ();
12240
12241      if (!flag_pic)
12242	output_asm_insn ("l\t%4,%8-%5(%4)", op);
12243      else if (!nonlocal)
12244	output_asm_insn ("a\t%4,%8-%5(%4)", op);
12245      /* We cannot call through .plt, since .plt requires %r12 loaded.  */
12246      else if (flag_pic == 1)
12247	{
12248	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
12249	  output_asm_insn ("l\t%4,%0(%4)", op);
12250	}
12251      else if (flag_pic == 2)
12252	{
12253	  op[9] = gen_rtx_REG (Pmode, 0);
12254	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12255	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
12256	  output_asm_insn ("ar\t%4,%9", op);
12257	  output_asm_insn ("l\t%4,0(%4)", op);
12258	}
12259
12260      output_asm_insn ("br\t%4", op);
12261
12262      /* Output literal pool.  */
12263      output_asm_insn (".align\t4", op);
12264
12265      if (nonlocal && flag_pic == 2)
12266	output_asm_insn (".long\t%0", op);
12267      if (nonlocal)
12268	{
12269	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12270	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12271	}
12272
12273      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12274      if (!flag_pic)
12275	output_asm_insn (".long\t%0", op);
12276      else
12277	output_asm_insn (".long\t%0-%5", op);
12278
12279      if (op[6])
12280	{
12281	  targetm.asm_out.internal_label (file, "L",
12282					  CODE_LABEL_NUMBER (op[6]));
12283	  output_asm_insn (".long\t%2", op);
12284	}
12285      if (op[7])
12286	{
12287	  targetm.asm_out.internal_label (file, "L",
12288					  CODE_LABEL_NUMBER (op[7]));
12289	  output_asm_insn (".long\t%3", op);
12290	}
12291    }
12292  final_end_function ();
12293}
12294
12295static bool
12296s390_valid_pointer_mode (machine_mode mode)
12297{
12298  return (mode == SImode || (TARGET_64BIT && mode == DImode));
12299}
12300
12301/* Checks whether the given CALL_EXPR would use a caller
12302   saved register.  This is used to decide whether sibling call
12303   optimization could be performed on the respective function
12304   call.  */
12305
12306static bool
12307s390_call_saved_register_used (tree call_expr)
12308{
12309  CUMULATIVE_ARGS cum_v;
12310  cumulative_args_t cum;
12311  tree parameter;
12312  machine_mode mode;
12313  tree type;
12314  rtx parm_rtx;
12315  int reg, i;
12316
12317  INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12318  cum = pack_cumulative_args (&cum_v);
12319
12320  for (i = 0; i < call_expr_nargs (call_expr); i++)
12321    {
12322      parameter = CALL_EXPR_ARG (call_expr, i);
12323      gcc_assert (parameter);
12324
12325      /* For an undeclared variable passed as parameter we will get
12326	 an ERROR_MARK node here.  */
12327      if (TREE_CODE (parameter) == ERROR_MARK)
12328	return true;
12329
12330      type = TREE_TYPE (parameter);
12331      gcc_assert (type);
12332
12333      mode = TYPE_MODE (type);
12334      gcc_assert (mode);
12335
12336      /* We assume that in the target function all parameters are
12337	 named.  This only has an impact on vector argument register
12338	 usage none of which is call-saved.  */
12339      if (pass_by_reference (&cum_v, mode, type, true))
12340 	{
12341 	  mode = Pmode;
12342 	  type = build_pointer_type (type);
12343 	}
12344
12345       parm_rtx = s390_function_arg (cum, mode, type, true);
12346
12347       s390_function_arg_advance (cum, mode, type, true);
12348
12349       if (!parm_rtx)
12350	 continue;
12351
12352       if (REG_P (parm_rtx))
12353  	 {
12354	   for (reg = 0;
12355		reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12356		reg++)
12357	     if (!call_used_regs[reg + REGNO (parm_rtx)])
12358 	       return true;
12359	 }
12360
12361       if (GET_CODE (parm_rtx) == PARALLEL)
12362	 {
12363	   int i;
12364
12365	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12366	     {
12367	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12368
12369	       gcc_assert (REG_P (r));
12370
12371	       for (reg = 0;
12372		    reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12373		    reg++)
12374		 if (!call_used_regs[reg + REGNO (r)])
12375		   return true;
12376	     }
12377	 }
12378
12379    }
12380  return false;
12381}
12382
12383/* Return true if the given call expression can be
12384   turned into a sibling call.
12385   DECL holds the declaration of the function to be called whereas
12386   EXP is the call expression itself.  */
12387
12388static bool
12389s390_function_ok_for_sibcall (tree decl, tree exp)
12390{
12391  /* The TPF epilogue uses register 1.  */
12392  if (TARGET_TPF_PROFILING)
12393    return false;
12394
12395  /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12396     which would have to be restored before the sibcall.  */
12397  if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12398    return false;
12399
12400  /* Register 6 on s390 is available as an argument register but unfortunately
12401     "caller saved". This makes functions needing this register for arguments
12402     not suitable for sibcalls.  */
12403  return !s390_call_saved_register_used (exp);
12404}
12405
12406/* Return the fixed registers used for condition codes.  */
12407
12408static bool
12409s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12410{
12411  *p1 = CC_REGNUM;
12412  *p2 = INVALID_REGNUM;
12413
12414  return true;
12415}
12416
12417/* This function is used by the call expanders of the machine description.
12418   It emits the call insn itself together with the necessary operations
12419   to adjust the target address and returns the emitted insn.
12420   ADDR_LOCATION is the target address rtx
12421   TLS_CALL the location of the thread-local symbol
12422   RESULT_REG the register where the result of the call should be stored
12423   RETADDR_REG the register where the return address should be stored
12424               If this parameter is NULL_RTX the call is considered
12425               to be a sibling call.  */
12426
12427rtx_insn *
12428s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12429		rtx retaddr_reg)
12430{
12431  bool plt_call = false;
12432  rtx_insn *insn;
12433  rtx call;
12434  rtx clobber;
12435  rtvec vec;
12436
12437  /* Direct function calls need special treatment.  */
12438  if (GET_CODE (addr_location) == SYMBOL_REF)
12439    {
12440      /* When calling a global routine in PIC mode, we must
12441         replace the symbol itself with the PLT stub.  */
12442      if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12443        {
12444	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12445	    {
12446	      addr_location = gen_rtx_UNSPEC (Pmode,
12447					      gen_rtvec (1, addr_location),
12448					      UNSPEC_PLT);
12449	      addr_location = gen_rtx_CONST (Pmode, addr_location);
12450	      plt_call = true;
12451	    }
12452	  else
12453	    /* For -fpic code the PLT entries might use r12 which is
12454	       call-saved.  Therefore we cannot do a sibcall when
12455	       calling directly using a symbol ref.  When reaching
12456	       this point we decided (in s390_function_ok_for_sibcall)
12457	       to do a sibcall for a function pointer but one of the
12458	       optimizers was able to get rid of the function pointer
12459	       by propagating the symbol ref into the call.  This
12460	       optimization is illegal for S/390 so we turn the direct
12461	       call into a indirect call again.  */
12462	    addr_location = force_reg (Pmode, addr_location);
12463        }
12464
12465      /* Unless we can use the bras(l) insn, force the
12466         routine address into a register.  */
12467      if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12468        {
12469	  if (flag_pic)
12470	    addr_location = legitimize_pic_address (addr_location, 0);
12471	  else
12472	    addr_location = force_reg (Pmode, addr_location);
12473	}
12474    }
12475
12476  /* If it is already an indirect call or the code above moved the
12477     SYMBOL_REF to somewhere else make sure the address can be found in
12478     register 1.  */
12479  if (retaddr_reg == NULL_RTX
12480      && GET_CODE (addr_location) != SYMBOL_REF
12481      && !plt_call)
12482    {
12483      emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12484      addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12485    }
12486
12487  addr_location = gen_rtx_MEM (QImode, addr_location);
12488  call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12489
12490  if (result_reg != NULL_RTX)
12491    call = gen_rtx_SET (VOIDmode, result_reg, call);
12492
12493  if (retaddr_reg != NULL_RTX)
12494    {
12495      clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12496
12497      if (tls_call != NULL_RTX)
12498	vec = gen_rtvec (3, call, clobber,
12499			 gen_rtx_USE (VOIDmode, tls_call));
12500      else
12501	vec = gen_rtvec (2, call, clobber);
12502
12503      call = gen_rtx_PARALLEL (VOIDmode, vec);
12504    }
12505
12506  insn = emit_call_insn (call);
12507
12508  /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
12509  if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12510    {
12511      /* s390_function_ok_for_sibcall should
12512	 have denied sibcalls in this case.  */
12513      gcc_assert (retaddr_reg != NULL_RTX);
12514      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12515    }
12516  return insn;
12517}
12518
12519/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
12520
12521static void
12522s390_conditional_register_usage (void)
12523{
12524  int i;
12525
12526  if (flag_pic)
12527    {
12528      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12529      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12530    }
12531  if (TARGET_CPU_ZARCH)
12532    {
12533      fixed_regs[BASE_REGNUM] = 0;
12534      call_used_regs[BASE_REGNUM] = 0;
12535      fixed_regs[RETURN_REGNUM] = 0;
12536      call_used_regs[RETURN_REGNUM] = 0;
12537    }
12538  if (TARGET_64BIT)
12539    {
12540      for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12541	call_used_regs[i] = call_really_used_regs[i] = 0;
12542    }
12543  else
12544    {
12545      call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12546      call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12547    }
12548
12549  if (TARGET_SOFT_FLOAT)
12550    {
12551      for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12552	call_used_regs[i] = fixed_regs[i] = 1;
12553    }
12554
12555  /* Disable v16 - v31 for non-vector target.  */
12556  if (!TARGET_VX)
12557    {
12558      for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12559	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12560    }
12561}
12562
12563/* Corresponding function to eh_return expander.  */
12564
12565static GTY(()) rtx s390_tpf_eh_return_symbol;
12566void
12567s390_emit_tpf_eh_return (rtx target)
12568{
12569  rtx_insn *insn;
12570  rtx reg, orig_ra;
12571
12572  if (!s390_tpf_eh_return_symbol)
12573    s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12574
12575  reg = gen_rtx_REG (Pmode, 2);
12576  orig_ra = gen_rtx_REG (Pmode, 3);
12577
12578  emit_move_insn (reg, target);
12579  emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12580  insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12581                                     gen_rtx_REG (Pmode, RETURN_REGNUM));
12582  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12583  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12584
12585  emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12586}
12587
12588/* Rework the prologue/epilogue to avoid saving/restoring
12589   registers unnecessarily.  */
12590
12591static void
12592s390_optimize_prologue (void)
12593{
12594  rtx_insn *insn, *new_insn, *next_insn;
12595
12596  /* Do a final recompute of the frame-related data.  */
12597  s390_optimize_register_info ();
12598
12599  /* If all special registers are in fact used, there's nothing we
12600     can do, so no point in walking the insn list.  */
12601
12602  if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12603      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12604      && (TARGET_CPU_ZARCH
12605          || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12606              && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12607    return;
12608
12609  /* Search for prologue/epilogue insns and replace them.  */
12610
12611  for (insn = get_insns (); insn; insn = next_insn)
12612    {
12613      int first, last, off;
12614      rtx set, base, offset;
12615      rtx pat;
12616
12617      next_insn = NEXT_INSN (insn);
12618
12619      if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12620	continue;
12621
12622      pat = PATTERN (insn);
12623
12624      /* Remove ldgr/lgdr instructions used for saving and restore
12625	 GPRs if possible.  */
12626      if (TARGET_Z10)
12627	{
12628	  rtx tmp_pat = pat;
12629
12630	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
12631	    tmp_pat = XVECEXP (pat, 0, 0);
12632
12633	  if (GET_CODE (tmp_pat) == SET
12634	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
12635	      && REG_P (SET_SRC (tmp_pat))
12636	      && REG_P (SET_DEST (tmp_pat)))
12637	    {
12638	      int src_regno = REGNO (SET_SRC (tmp_pat));
12639	      int dest_regno = REGNO (SET_DEST (tmp_pat));
12640	      int gpr_regno;
12641	      int fpr_regno;
12642
12643	      if (!((GENERAL_REGNO_P (src_regno)
12644		     && FP_REGNO_P (dest_regno))
12645		    || (FP_REGNO_P (src_regno)
12646			&& GENERAL_REGNO_P (dest_regno))))
12647		continue;
12648
12649	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12650	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12651
12652	      /* GPR must be call-saved, FPR must be call-clobbered.  */
12653	      if (!call_really_used_regs[fpr_regno]
12654		  || call_really_used_regs[gpr_regno])
12655		continue;
12656
12657	      /* It must not happen that what we once saved in an FPR now
12658		 needs a stack slot.  */
12659	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
12660
12661	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
12662		{
12663		  remove_insn (insn);
12664		  continue;
12665		}
12666	    }
12667	}
12668
12669      if (GET_CODE (pat) == PARALLEL
12670	  && store_multiple_operation (pat, VOIDmode))
12671	{
12672	  set = XVECEXP (pat, 0, 0);
12673	  first = REGNO (SET_SRC (set));
12674	  last = first + XVECLEN (pat, 0) - 1;
12675	  offset = const0_rtx;
12676	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12677	  off = INTVAL (offset);
12678
12679	  if (GET_CODE (base) != REG || off < 0)
12680	    continue;
12681	  if (cfun_frame_layout.first_save_gpr != -1
12682	      && (cfun_frame_layout.first_save_gpr < first
12683		  || cfun_frame_layout.last_save_gpr > last))
12684	    continue;
12685	  if (REGNO (base) != STACK_POINTER_REGNUM
12686	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12687	    continue;
12688	  if (first > BASE_REGNUM || last < BASE_REGNUM)
12689	    continue;
12690
12691	  if (cfun_frame_layout.first_save_gpr != -1)
12692	    {
12693	      rtx s_pat = save_gprs (base,
12694				     off + (cfun_frame_layout.first_save_gpr
12695					    - first) * UNITS_PER_LONG,
12696				     cfun_frame_layout.first_save_gpr,
12697				     cfun_frame_layout.last_save_gpr);
12698	      new_insn = emit_insn_before (s_pat, insn);
12699	      INSN_ADDRESSES_NEW (new_insn, -1);
12700	    }
12701
12702	  remove_insn (insn);
12703	  continue;
12704	}
12705
12706      if (cfun_frame_layout.first_save_gpr == -1
12707	  && GET_CODE (pat) == SET
12708	  && GENERAL_REG_P (SET_SRC (pat))
12709	  && GET_CODE (SET_DEST (pat)) == MEM)
12710	{
12711	  set = pat;
12712	  first = REGNO (SET_SRC (set));
12713	  offset = const0_rtx;
12714	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12715	  off = INTVAL (offset);
12716
12717	  if (GET_CODE (base) != REG || off < 0)
12718	    continue;
12719	  if (REGNO (base) != STACK_POINTER_REGNUM
12720	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12721	    continue;
12722
12723	  remove_insn (insn);
12724	  continue;
12725	}
12726
12727      if (GET_CODE (pat) == PARALLEL
12728	  && load_multiple_operation (pat, VOIDmode))
12729	{
12730	  set = XVECEXP (pat, 0, 0);
12731	  first = REGNO (SET_DEST (set));
12732	  last = first + XVECLEN (pat, 0) - 1;
12733	  offset = const0_rtx;
12734	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12735	  off = INTVAL (offset);
12736
12737	  if (GET_CODE (base) != REG || off < 0)
12738	    continue;
12739
12740	  if (cfun_frame_layout.first_restore_gpr != -1
12741	      && (cfun_frame_layout.first_restore_gpr < first
12742		  || cfun_frame_layout.last_restore_gpr > last))
12743	    continue;
12744	  if (REGNO (base) != STACK_POINTER_REGNUM
12745	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12746	    continue;
12747	  if (first > BASE_REGNUM || last < BASE_REGNUM)
12748	    continue;
12749
12750	  if (cfun_frame_layout.first_restore_gpr != -1)
12751	    {
12752	      rtx rpat = restore_gprs (base,
12753				       off + (cfun_frame_layout.first_restore_gpr
12754					      - first) * UNITS_PER_LONG,
12755				       cfun_frame_layout.first_restore_gpr,
12756				       cfun_frame_layout.last_restore_gpr);
12757
12758	      /* Remove REG_CFA_RESTOREs for registers that we no
12759		 longer need to save.  */
12760	      REG_NOTES (rpat) = REG_NOTES (insn);
12761	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12762		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12763		    && ((int) REGNO (XEXP (*ptr, 0))
12764			< cfun_frame_layout.first_restore_gpr))
12765		  *ptr = XEXP (*ptr, 1);
12766		else
12767		  ptr = &XEXP (*ptr, 1);
12768	      new_insn = emit_insn_before (rpat, insn);
12769	      RTX_FRAME_RELATED_P (new_insn) = 1;
12770	      INSN_ADDRESSES_NEW (new_insn, -1);
12771	    }
12772
12773	  remove_insn (insn);
12774	  continue;
12775	}
12776
12777      if (cfun_frame_layout.first_restore_gpr == -1
12778	  && GET_CODE (pat) == SET
12779	  && GENERAL_REG_P (SET_DEST (pat))
12780	  && GET_CODE (SET_SRC (pat)) == MEM)
12781	{
12782	  set = pat;
12783	  first = REGNO (SET_DEST (set));
12784	  offset = const0_rtx;
12785	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12786	  off = INTVAL (offset);
12787
12788	  if (GET_CODE (base) != REG || off < 0)
12789	    continue;
12790
12791	  if (REGNO (base) != STACK_POINTER_REGNUM
12792	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12793	    continue;
12794
12795	  remove_insn (insn);
12796	  continue;
12797	}
12798    }
12799}
12800
12801/* On z10 and later the dynamic branch prediction must see the
12802   backward jump within a certain windows.  If not it falls back to
12803   the static prediction.  This function rearranges the loop backward
12804   branch in a way which makes the static prediction always correct.
12805   The function returns true if it added an instruction.  */
12806static bool
12807s390_fix_long_loop_prediction (rtx_insn *insn)
12808{
12809  rtx set = single_set (insn);
12810  rtx code_label, label_ref, new_label;
12811  rtx_insn *uncond_jump;
12812  rtx_insn *cur_insn;
12813  rtx tmp;
12814  int distance;
12815
12816  /* This will exclude branch on count and branch on index patterns
12817     since these are correctly statically predicted.  */
12818  if (!set
12819      || SET_DEST (set) != pc_rtx
12820      || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12821    return false;
12822
12823  /* Skip conditional returns.  */
12824  if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12825      && XEXP (SET_SRC (set), 2) == pc_rtx)
12826    return false;
12827
12828  label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12829	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12830
12831  gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12832
12833  code_label = XEXP (label_ref, 0);
12834
12835  if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12836      || INSN_ADDRESSES (INSN_UID (insn)) == -1
12837      || (INSN_ADDRESSES (INSN_UID (insn))
12838	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12839    return false;
12840
12841  for (distance = 0, cur_insn = PREV_INSN (insn);
12842       distance < PREDICT_DISTANCE - 6;
12843       distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12844    if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12845      return false;
12846
12847  new_label = gen_label_rtx ();
12848  uncond_jump = emit_jump_insn_after (
12849		  gen_rtx_SET (VOIDmode, pc_rtx,
12850			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
12851		  insn);
12852  emit_label_after (new_label, uncond_jump);
12853
12854  tmp = XEXP (SET_SRC (set), 1);
12855  XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12856  XEXP (SET_SRC (set), 2) = tmp;
12857  INSN_CODE (insn) = -1;
12858
12859  XEXP (label_ref, 0) = new_label;
12860  JUMP_LABEL (insn) = new_label;
12861  JUMP_LABEL (uncond_jump) = code_label;
12862
12863  return true;
12864}
12865
12866/* Returns 1 if INSN reads the value of REG for purposes not related
12867   to addressing of memory, and 0 otherwise.  */
12868static int
12869s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12870{
12871  return reg_referenced_p (reg, PATTERN (insn))
12872    && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12873}
12874
12875/* Starting from INSN find_cond_jump looks downwards in the insn
12876   stream for a single jump insn which is the last user of the
12877   condition code set in INSN.  */
12878static rtx_insn *
12879find_cond_jump (rtx_insn *insn)
12880{
12881  for (; insn; insn = NEXT_INSN (insn))
12882    {
12883      rtx ite, cc;
12884
12885      if (LABEL_P (insn))
12886	break;
12887
12888      if (!JUMP_P (insn))
12889	{
12890	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12891	    break;
12892	  continue;
12893	}
12894
12895      /* This will be triggered by a return.  */
12896      if (GET_CODE (PATTERN (insn)) != SET)
12897	break;
12898
12899      gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12900      ite = SET_SRC (PATTERN (insn));
12901
12902      if (GET_CODE (ite) != IF_THEN_ELSE)
12903	break;
12904
12905      cc = XEXP (XEXP (ite, 0), 0);
12906      if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12907	break;
12908
12909      if (find_reg_note (insn, REG_DEAD, cc))
12910	return insn;
12911      break;
12912    }
12913
12914  return NULL;
12915}
12916
12917/* Swap the condition in COND and the operands in OP0 and OP1 so that
12918   the semantics does not change.  If NULL_RTX is passed as COND the
12919   function tries to find the conditional jump starting with INSN.  */
12920static void
12921s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12922{
12923  rtx tmp = *op0;
12924
12925  if (cond == NULL_RTX)
12926    {
12927      rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12928      rtx set = jump ? single_set (jump) : NULL_RTX;
12929
12930      if (set == NULL_RTX)
12931	return;
12932
12933      cond = XEXP (SET_SRC (set), 0);
12934    }
12935
12936  *op0 = *op1;
12937  *op1 = tmp;
12938  PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12939}
12940
12941/* On z10, instructions of the compare-and-branch family have the
12942   property to access the register occurring as second operand with
12943   its bits complemented.  If such a compare is grouped with a second
12944   instruction that accesses the same register non-complemented, and
12945   if that register's value is delivered via a bypass, then the
12946   pipeline recycles, thereby causing significant performance decline.
12947   This function locates such situations and exchanges the two
12948   operands of the compare.  The function return true whenever it
12949   added an insn.  */
12950static bool
12951s390_z10_optimize_cmp (rtx_insn *insn)
12952{
12953  rtx_insn *prev_insn, *next_insn;
12954  bool insn_added_p = false;
12955  rtx cond, *op0, *op1;
12956
12957  if (GET_CODE (PATTERN (insn)) == PARALLEL)
12958    {
12959      /* Handle compare and branch and branch on count
12960	 instructions.  */
12961      rtx pattern = single_set (insn);
12962
12963      if (!pattern
12964	  || SET_DEST (pattern) != pc_rtx
12965	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12966	return false;
12967
12968      cond = XEXP (SET_SRC (pattern), 0);
12969      op0 = &XEXP (cond, 0);
12970      op1 = &XEXP (cond, 1);
12971    }
12972  else if (GET_CODE (PATTERN (insn)) == SET)
12973    {
12974      rtx src, dest;
12975
12976      /* Handle normal compare instructions.  */
12977      src = SET_SRC (PATTERN (insn));
12978      dest = SET_DEST (PATTERN (insn));
12979
12980      if (!REG_P (dest)
12981	  || !CC_REGNO_P (REGNO (dest))
12982	  || GET_CODE (src) != COMPARE)
12983	return false;
12984
12985      /* s390_swap_cmp will try to find the conditional
12986	 jump when passing NULL_RTX as condition.  */
12987      cond = NULL_RTX;
12988      op0 = &XEXP (src, 0);
12989      op1 = &XEXP (src, 1);
12990    }
12991  else
12992    return false;
12993
12994  if (!REG_P (*op0) || !REG_P (*op1))
12995    return false;
12996
12997  if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12998    return false;
12999
13000  /* Swap the COMPARE arguments and its mask if there is a
13001     conflicting access in the previous insn.  */
13002  prev_insn = prev_active_insn (insn);
13003  if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13004      && reg_referenced_p (*op1, PATTERN (prev_insn)))
13005    s390_swap_cmp (cond, op0, op1, insn);
13006
13007  /* Check if there is a conflict with the next insn. If there
13008     was no conflict with the previous insn, then swap the
13009     COMPARE arguments and its mask.  If we already swapped
13010     the operands, or if swapping them would cause a conflict
13011     with the previous insn, issue a NOP after the COMPARE in
13012     order to separate the two instuctions.  */
13013  next_insn = next_active_insn (insn);
13014  if (next_insn != NULL_RTX && INSN_P (next_insn)
13015      && s390_non_addr_reg_read_p (*op1, next_insn))
13016    {
13017      if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13018	  && s390_non_addr_reg_read_p (*op0, prev_insn))
13019	{
13020	  if (REGNO (*op1) == 0)
13021	    emit_insn_after (gen_nop1 (), insn);
13022	  else
13023	    emit_insn_after (gen_nop (), insn);
13024	  insn_added_p = true;
13025	}
13026      else
13027	s390_swap_cmp (cond, op0, op1, insn);
13028    }
13029  return insn_added_p;
13030}
13031
13032/* Perform machine-dependent processing.  */
13033
13034static void
13035s390_reorg (void)
13036{
13037  bool pool_overflow = false;
13038  int hw_before, hw_after;
13039
13040  /* Make sure all splits have been performed; splits after
13041     machine_dependent_reorg might confuse insn length counts.  */
13042  split_all_insns_noflow ();
13043
13044  /* Install the main literal pool and the associated base
13045     register load insns.
13046
13047     In addition, there are two problematic situations we need
13048     to correct:
13049
13050     - the literal pool might be > 4096 bytes in size, so that
13051       some of its elements cannot be directly accessed
13052
13053     - a branch target might be > 64K away from the branch, so that
13054       it is not possible to use a PC-relative instruction.
13055
13056     To fix those, we split the single literal pool into multiple
13057     pool chunks, reloading the pool base register at various
13058     points throughout the function to ensure it always points to
13059     the pool chunk the following code expects, and / or replace
13060     PC-relative branches by absolute branches.
13061
13062     However, the two problems are interdependent: splitting the
13063     literal pool can move a branch further away from its target,
13064     causing the 64K limit to overflow, and on the other hand,
13065     replacing a PC-relative branch by an absolute branch means
13066     we need to put the branch target address into the literal
13067     pool, possibly causing it to overflow.
13068
13069     So, we loop trying to fix up both problems until we manage
13070     to satisfy both conditions at the same time.  Note that the
13071     loop is guaranteed to terminate as every pass of the loop
13072     strictly decreases the total number of PC-relative branches
13073     in the function.  (This is not completely true as there
13074     might be branch-over-pool insns introduced by chunkify_start.
13075     Those never need to be split however.)  */
13076
13077  for (;;)
13078    {
13079      struct constant_pool *pool = NULL;
13080
13081      /* Collect the literal pool.  */
13082      if (!pool_overflow)
13083	{
13084	  pool = s390_mainpool_start ();
13085	  if (!pool)
13086	    pool_overflow = true;
13087	}
13088
13089      /* If literal pool overflowed, start to chunkify it.  */
13090      if (pool_overflow)
13091        pool = s390_chunkify_start ();
13092
13093      /* Split out-of-range branches.  If this has created new
13094	 literal pool entries, cancel current chunk list and
13095	 recompute it.  zSeries machines have large branch
13096	 instructions, so we never need to split a branch.  */
13097      if (!TARGET_CPU_ZARCH && s390_split_branches ())
13098        {
13099          if (pool_overflow)
13100            s390_chunkify_cancel (pool);
13101	  else
13102            s390_mainpool_cancel (pool);
13103
13104          continue;
13105        }
13106
13107      /* If we made it up to here, both conditions are satisfied.
13108	 Finish up literal pool related changes.  */
13109      if (pool_overflow)
13110	s390_chunkify_finish (pool);
13111      else
13112	s390_mainpool_finish (pool);
13113
13114      /* We're done splitting branches.  */
13115      cfun->machine->split_branches_pending_p = false;
13116      break;
13117    }
13118
13119  /* Generate out-of-pool execute target insns.  */
13120  if (TARGET_CPU_ZARCH)
13121    {
13122      rtx_insn *insn, *target;
13123      rtx label;
13124
13125      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13126	{
13127	  label = s390_execute_label (insn);
13128	  if (!label)
13129	    continue;
13130
13131	  gcc_assert (label != const0_rtx);
13132
13133	  target = emit_label (XEXP (label, 0));
13134	  INSN_ADDRESSES_NEW (target, -1);
13135
13136	  target = emit_insn (s390_execute_target (insn));
13137	  INSN_ADDRESSES_NEW (target, -1);
13138	}
13139    }
13140
13141  /* Try to optimize prologue and epilogue further.  */
13142  s390_optimize_prologue ();
13143
13144  /* Walk over the insns and do some >=z10 specific changes.  */
13145  if (s390_tune == PROCESSOR_2097_Z10
13146      || s390_tune == PROCESSOR_2817_Z196
13147      || s390_tune == PROCESSOR_2827_ZEC12
13148      || s390_tune == PROCESSOR_2964_Z13)
13149    {
13150      rtx_insn *insn;
13151      bool insn_added_p = false;
13152
13153      /* The insn lengths and addresses have to be up to date for the
13154	 following manipulations.  */
13155      shorten_branches (get_insns ());
13156
13157      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13158	{
13159	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13160	    continue;
13161
13162	  if (JUMP_P (insn))
13163	    insn_added_p |= s390_fix_long_loop_prediction (insn);
13164
13165	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
13166	       || GET_CODE (PATTERN (insn)) == SET)
13167	      && s390_tune == PROCESSOR_2097_Z10)
13168	    insn_added_p |= s390_z10_optimize_cmp (insn);
13169	}
13170
13171      /* Adjust branches if we added new instructions.  */
13172      if (insn_added_p)
13173	shorten_branches (get_insns ());
13174    }
13175
13176  s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13177  if (hw_after > 0)
13178    {
13179      rtx_insn *insn;
13180
13181      /* Insert NOPs for hotpatching. */
13182      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13183	/* Emit NOPs
13184	    1. inside the area covered by debug information to allow setting
13185	       breakpoints at the NOPs,
13186	    2. before any insn which results in an asm instruction,
13187	    3. before in-function labels to avoid jumping to the NOPs, for
13188	       example as part of a loop,
13189	    4. before any barrier in case the function is completely empty
13190	       (__builtin_unreachable ()) and has neither internal labels nor
13191	       active insns.
13192	*/
13193	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13194	  break;
13195      /* Output a series of NOPs before the first active insn.  */
13196      while (insn && hw_after > 0)
13197	{
13198	  if (hw_after >= 3 && TARGET_CPU_ZARCH)
13199	    {
13200	      emit_insn_before (gen_nop_6_byte (), insn);
13201	      hw_after -= 3;
13202	    }
13203	  else if (hw_after >= 2)
13204	    {
13205	      emit_insn_before (gen_nop_4_byte (), insn);
13206	      hw_after -= 2;
13207	    }
13208	  else
13209	    {
13210	      emit_insn_before (gen_nop_2_byte (), insn);
13211	      hw_after -= 1;
13212	    }
13213	}
13214    }
13215}
13216
13217/* Return true if INSN is a fp load insn writing register REGNO.  */
13218static inline bool
13219s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13220{
13221  rtx set;
13222  enum attr_type flag = s390_safe_attr_type (insn);
13223
13224  if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13225    return false;
13226
13227  set = single_set (insn);
13228
13229  if (set == NULL_RTX)
13230    return false;
13231
13232  if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13233    return false;
13234
13235  if (REGNO (SET_DEST (set)) != regno)
13236    return false;
13237
13238  return true;
13239}
13240
13241/* This value describes the distance to be avoided between an
13242   aritmetic fp instruction and an fp load writing the same register.
13243   Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13244   fine but the exact value has to be avoided. Otherwise the FP
13245   pipeline will throw an exception causing a major penalty.  */
13246#define Z10_EARLYLOAD_DISTANCE 7
13247
13248/* Rearrange the ready list in order to avoid the situation described
13249   for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
13250   moved to the very end of the ready list.  */
13251static void
13252s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13253{
13254  unsigned int regno;
13255  int nready = *nready_p;
13256  rtx_insn *tmp;
13257  int i;
13258  rtx_insn *insn;
13259  rtx set;
13260  enum attr_type flag;
13261  int distance;
13262
13263  /* Skip DISTANCE - 1 active insns.  */
13264  for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13265       distance > 0 && insn != NULL_RTX;
13266       distance--, insn = prev_active_insn (insn))
13267    if (CALL_P (insn) || JUMP_P (insn))
13268      return;
13269
13270  if (insn == NULL_RTX)
13271    return;
13272
13273  set = single_set (insn);
13274
13275  if (set == NULL_RTX || !REG_P (SET_DEST (set))
13276      || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13277    return;
13278
13279  flag = s390_safe_attr_type (insn);
13280
13281  if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13282    return;
13283
13284  regno = REGNO (SET_DEST (set));
13285  i = nready - 1;
13286
13287  while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13288    i--;
13289
13290  if (!i)
13291    return;
13292
13293  tmp = ready[i];
13294  memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13295  ready[0] = tmp;
13296}
13297
13298
13299/* The s390_sched_state variable tracks the state of the current or
13300   the last instruction group.
13301
13302   0,1,2 number of instructions scheduled in the current group
13303   3     the last group is complete - normal insns
13304   4     the last group was a cracked/expanded insn */
13305
13306static int s390_sched_state;
13307
13308#define S390_SCHED_STATE_NORMAL  3
13309#define S390_SCHED_STATE_CRACKED 4
13310
13311#define S390_SCHED_ATTR_MASK_CRACKED    0x1
13312#define S390_SCHED_ATTR_MASK_EXPANDED   0x2
13313#define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
13314#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
13315
13316static unsigned int
13317s390_get_sched_attrmask (rtx_insn *insn)
13318{
13319  unsigned int mask = 0;
13320
13321  switch (s390_tune)
13322    {
13323    case PROCESSOR_2827_ZEC12:
13324      if (get_attr_zEC12_cracked (insn))
13325	mask |= S390_SCHED_ATTR_MASK_CRACKED;
13326      if (get_attr_zEC12_expanded (insn))
13327	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13328      if (get_attr_zEC12_endgroup (insn))
13329	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13330      if (get_attr_zEC12_groupalone (insn))
13331	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13332      break;
13333    case PROCESSOR_2964_Z13:
13334      if (get_attr_z13_cracked (insn))
13335	mask |= S390_SCHED_ATTR_MASK_CRACKED;
13336      if (get_attr_z13_expanded (insn))
13337	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13338      if (get_attr_z13_endgroup (insn))
13339	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13340      if (get_attr_z13_groupalone (insn))
13341	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13342      break;
13343    default:
13344      gcc_unreachable ();
13345    }
13346  return mask;
13347}
13348
13349static unsigned int
13350s390_get_unit_mask (rtx_insn *insn, int *units)
13351{
13352  unsigned int mask = 0;
13353
13354  switch (s390_tune)
13355    {
13356    case PROCESSOR_2964_Z13:
13357      *units = 3;
13358      if (get_attr_z13_unit_lsu (insn))
13359	mask |= 1 << 0;
13360      if (get_attr_z13_unit_fxu (insn))
13361	mask |= 1 << 1;
13362      if (get_attr_z13_unit_vfu (insn))
13363	mask |= 1 << 2;
13364      break;
13365    default:
13366      gcc_unreachable ();
13367    }
13368  return mask;
13369}
13370
13371/* Return the scheduling score for INSN.  The higher the score the
13372   better.  The score is calculated from the OOO scheduling attributes
13373   of INSN and the scheduling state s390_sched_state.  */
13374static int
13375s390_sched_score (rtx_insn *insn)
13376{
13377  unsigned int mask = s390_get_sched_attrmask (insn);
13378  int score = 0;
13379
13380  switch (s390_sched_state)
13381    {
13382    case 0:
13383      /* Try to put insns into the first slot which would otherwise
13384	 break a group.  */
13385      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13386	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13387	score += 5;
13388      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13389	score += 10;
13390    case 1:
13391      /* Prefer not cracked insns while trying to put together a
13392	 group.  */
13393      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13394	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13395	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13396	score += 10;
13397      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
13398	score += 5;
13399      break;
13400    case 2:
13401      /* Prefer not cracked insns while trying to put together a
13402	 group.  */
13403      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13404	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13405	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13406	score += 10;
13407      /* Prefer endgroup insns in the last slot.  */
13408      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
13409	score += 10;
13410      break;
13411    case S390_SCHED_STATE_NORMAL:
13412      /* Prefer not cracked insns if the last was not cracked.  */
13413      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13414	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
13415	score += 5;
13416      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13417	score += 10;
13418      break;
13419    case S390_SCHED_STATE_CRACKED:
13420      /* Try to keep cracked insns together to prevent them from
13421	 interrupting groups.  */
13422      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13423	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13424	score += 5;
13425      break;
13426    }
13427
13428  if (s390_tune == PROCESSOR_2964_Z13)
13429    {
13430      int units, i;
13431      unsigned unit_mask, m = 1;
13432
13433      unit_mask = s390_get_unit_mask (insn, &units);
13434      gcc_assert (units <= MAX_SCHED_UNITS);
13435
13436      /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
13437	 ago the last insn of this unit type got scheduled.  This is
13438	 supposed to help providing a proper instruction mix to the
13439	 CPU.  */
13440      for (i = 0; i < units; i++, m <<= 1)
13441	if (m & unit_mask)
13442	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
13443		    MAX_SCHED_MIX_DISTANCE);
13444    }
13445  return score;
13446}
13447
13448/* This function is called via hook TARGET_SCHED_REORDER before
13449   issuing one insn from list READY which contains *NREADYP entries.
13450   For target z10 it reorders load instructions to avoid early load
13451   conflicts in the floating point pipeline  */
13452static int
13453s390_sched_reorder (FILE *file, int verbose,
13454		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13455{
13456  if (s390_tune == PROCESSOR_2097_Z10)
13457    if (reload_completed && *nreadyp > 1)
13458      s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13459
13460  if ((s390_tune == PROCESSOR_2827_ZEC12
13461       || s390_tune == PROCESSOR_2964_Z13)
13462      && reload_completed
13463      && *nreadyp > 1)
13464    {
13465      int i;
13466      int last_index = *nreadyp - 1;
13467      int max_index = -1;
13468      int max_score = -1;
13469      rtx_insn *tmp;
13470
13471      /* Just move the insn with the highest score to the top (the
13472	 end) of the list.  A full sort is not needed since a conflict
13473	 in the hazard recognition cannot happen.  So the top insn in
13474	 the ready list will always be taken.  */
13475      for (i = last_index; i >= 0; i--)
13476	{
13477	  int score;
13478
13479	  if (recog_memoized (ready[i]) < 0)
13480	    continue;
13481
13482	  score = s390_sched_score (ready[i]);
13483	  if (score > max_score)
13484	    {
13485	      max_score = score;
13486	      max_index = i;
13487	    }
13488	}
13489
13490      if (max_index != -1)
13491	{
13492	  if (max_index != last_index)
13493	    {
13494	      tmp = ready[max_index];
13495	      ready[max_index] = ready[last_index];
13496	      ready[last_index] = tmp;
13497
13498	      if (verbose > 5)
13499		fprintf (file,
13500			 ";;\t\tBACKEND: move insn %d to the top of list\n",
13501			 INSN_UID (ready[last_index]));
13502	    }
13503	  else if (verbose > 5)
13504	    fprintf (file,
13505		     ";;\t\tBACKEND: best insn %d already on top\n",
13506		     INSN_UID (ready[last_index]));
13507	}
13508
13509      if (verbose > 5)
13510	{
13511	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
13512		   s390_sched_state);
13513
13514	  for (i = last_index; i >= 0; i--)
13515	    {
13516	      unsigned int sched_mask;
13517	      rtx_insn *insn = ready[i];
13518
13519	      if (recog_memoized (insn) < 0)
13520		continue;
13521
13522	      sched_mask = s390_get_sched_attrmask (insn);
13523	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
13524		       INSN_UID (insn),
13525		       s390_sched_score (insn));
13526#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
13527					   ((M) & sched_mask) ? #ATTR : "");
13528	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
13529	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
13530	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
13531	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
13532#undef PRINT_SCHED_ATTR
13533	      if (s390_tune == PROCESSOR_2964_Z13)
13534		{
13535		  unsigned int unit_mask, m = 1;
13536		  int units, j;
13537
13538		  unit_mask  = s390_get_unit_mask (insn, &units);
13539		  fprintf (file, "(units:");
13540		  for (j = 0; j < units; j++, m <<= 1)
13541		    if (m & unit_mask)
13542		      fprintf (file, " u%d", j);
13543		  fprintf (file, ")");
13544		}
13545	      fprintf (file, "\n");
13546	    }
13547	}
13548    }
13549
13550  return s390_issue_rate ();
13551}
13552
13553
13554/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13555   the scheduler has issued INSN.  It stores the last issued insn into
13556   last_scheduled_insn in order to make it available for
13557   s390_sched_reorder.  */
13558static int
13559s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13560{
13561  last_scheduled_insn = insn;
13562
13563  if ((s390_tune == PROCESSOR_2827_ZEC12
13564       || s390_tune == PROCESSOR_2964_Z13)
13565      && reload_completed
13566      && recog_memoized (insn) >= 0)
13567    {
13568      unsigned int mask = s390_get_sched_attrmask (insn);
13569
13570      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13571	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13572	s390_sched_state = S390_SCHED_STATE_CRACKED;
13573      else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
13574	       || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13575	s390_sched_state = S390_SCHED_STATE_NORMAL;
13576      else
13577	{
13578	  /* Only normal insns are left (mask == 0).  */
13579	  switch (s390_sched_state)
13580	    {
13581	    case 0:
13582	    case 1:
13583	    case 2:
13584	    case S390_SCHED_STATE_NORMAL:
13585	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
13586		s390_sched_state = 1;
13587	      else
13588		s390_sched_state++;
13589
13590	      break;
13591	    case S390_SCHED_STATE_CRACKED:
13592	      s390_sched_state = S390_SCHED_STATE_NORMAL;
13593	      break;
13594	    }
13595	}
13596
13597      if (s390_tune == PROCESSOR_2964_Z13)
13598	{
13599	  int units, i;
13600	  unsigned unit_mask, m = 1;
13601
13602	  unit_mask = s390_get_unit_mask (insn, &units);
13603	  gcc_assert (units <= MAX_SCHED_UNITS);
13604
13605	  for (i = 0; i < units; i++, m <<= 1)
13606	    if (m & unit_mask)
13607	      last_scheduled_unit_distance[i] = 0;
13608	    else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
13609	      last_scheduled_unit_distance[i]++;
13610	}
13611
13612      if (verbose > 5)
13613	{
13614	  unsigned int sched_mask;
13615
13616	  sched_mask = s390_get_sched_attrmask (insn);
13617
13618	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
13619#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
13620	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
13621	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
13622	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
13623	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
13624#undef PRINT_SCHED_ATTR
13625
13626	  if (s390_tune == PROCESSOR_2964_Z13)
13627	    {
13628	      unsigned int unit_mask, m = 1;
13629	      int units, j;
13630
13631	      unit_mask  = s390_get_unit_mask (insn, &units);
13632	      fprintf (file, "(units:");
13633	      for (j = 0; j < units; j++, m <<= 1)
13634		if (m & unit_mask)
13635		  fprintf (file, " %d", j);
13636	      fprintf (file, ")");
13637	    }
13638	  fprintf (file, " sched state: %d\n", s390_sched_state);
13639
13640	  if (s390_tune == PROCESSOR_2964_Z13)
13641	    {
13642	      int units, j;
13643
13644	      s390_get_unit_mask (insn, &units);
13645
13646	      fprintf (file, ";;\t\tBACKEND: units unused for: ");
13647	      for (j = 0; j < units; j++)
13648		fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
13649	      fprintf (file, "\n");
13650	    }
13651	}
13652    }
13653
13654  if (GET_CODE (PATTERN (insn)) != USE
13655      && GET_CODE (PATTERN (insn)) != CLOBBER)
13656    return more - 1;
13657  else
13658    return more;
13659}
13660
13661static void
13662s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13663		 int verbose ATTRIBUTE_UNUSED,
13664		 int max_ready ATTRIBUTE_UNUSED)
13665{
13666  last_scheduled_insn = NULL;
13667  memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
13668  s390_sched_state = 0;
13669}
13670
13671/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13672   a new number struct loop *loop should be unrolled if tuned for cpus with
13673   a built-in stride prefetcher.
13674   The loop is analyzed for memory accesses by calling check_dpu for
13675   each rtx of the loop. Depending on the loop_depth and the amount of
13676   memory accesses a new number <=nunroll is returned to improve the
13677   behaviour of the hardware prefetch unit.  */
13678static unsigned
13679s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13680{
13681  basic_block *bbs;
13682  rtx_insn *insn;
13683  unsigned i;
13684  unsigned mem_count = 0;
13685
13686  if (s390_tune != PROCESSOR_2097_Z10
13687      && s390_tune != PROCESSOR_2817_Z196
13688      && s390_tune != PROCESSOR_2827_ZEC12
13689      && s390_tune != PROCESSOR_2964_Z13)
13690    return nunroll;
13691
13692  /* Count the number of memory references within the loop body.  */
13693  bbs = get_loop_body (loop);
13694  subrtx_iterator::array_type array;
13695  for (i = 0; i < loop->num_nodes; i++)
13696    FOR_BB_INSNS (bbs[i], insn)
13697      if (INSN_P (insn) && INSN_CODE (insn) != -1)
13698	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13699	  if (MEM_P (*iter))
13700	    mem_count += 1;
13701  free (bbs);
13702
13703  /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
13704  if (mem_count == 0)
13705    return nunroll;
13706
13707  switch (loop_depth(loop))
13708    {
13709    case 1:
13710      return MIN (nunroll, 28 / mem_count);
13711    case 2:
13712      return MIN (nunroll, 22 / mem_count);
13713    default:
13714      return MIN (nunroll, 16 / mem_count);
13715    }
13716}
13717
13718static void
13719s390_option_override (void)
13720{
13721  unsigned int i;
13722  cl_deferred_option *opt;
13723  vec<cl_deferred_option> *v =
13724    (vec<cl_deferred_option> *) s390_deferred_options;
13725
13726  if (v)
13727    FOR_EACH_VEC_ELT (*v, i, opt)
13728      {
13729	switch (opt->opt_index)
13730	  {
13731	  case OPT_mhotpatch_:
13732	    {
13733	      int val1;
13734	      int val2;
13735	      char s[256];
13736	      char *t;
13737
13738	      strncpy (s, opt->arg, 256);
13739	      s[255] = 0;
13740	      t = strchr (s, ',');
13741	      if (t != NULL)
13742		{
13743		  *t = 0;
13744		  t++;
13745		  val1 = integral_argument (s);
13746		  val2 = integral_argument (t);
13747		}
13748	      else
13749		{
13750		  val1 = -1;
13751		  val2 = -1;
13752		}
13753	      if (val1 == -1 || val2 == -1)
13754		{
13755		  /* argument is not a plain number */
13756		  error ("arguments to %qs should be non-negative integers",
13757			 "-mhotpatch=n,m");
13758		  break;
13759		}
13760	      else if (val1 > s390_hotpatch_hw_max
13761		       || val2 > s390_hotpatch_hw_max)
13762		{
13763		  error ("argument to %qs is too large (max. %d)",
13764			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
13765		  break;
13766		}
13767	      s390_hotpatch_hw_before_label = val1;
13768	      s390_hotpatch_hw_after_label = val2;
13769	      break;
13770	    }
13771	  default:
13772	    gcc_unreachable ();
13773	  }
13774      }
13775
13776  /* Set up function hooks.  */
13777  init_machine_status = s390_init_machine_status;
13778
13779  /* Architecture mode defaults according to ABI.  */
13780  if (!(target_flags_explicit & MASK_ZARCH))
13781    {
13782      if (TARGET_64BIT)
13783	target_flags |= MASK_ZARCH;
13784      else
13785	target_flags &= ~MASK_ZARCH;
13786    }
13787
13788  /* Set the march default in case it hasn't been specified on
13789     cmdline.  */
13790  if (s390_arch == PROCESSOR_max)
13791    {
13792      s390_arch_string = TARGET_ZARCH? "z900" : "g5";
13793      s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
13794      s390_arch_flags = processor_flags_table[(int)s390_arch];
13795    }
13796
13797  /* Determine processor to tune for.  */
13798  if (s390_tune == PROCESSOR_max)
13799    {
13800      s390_tune = s390_arch;
13801      s390_tune_flags = s390_arch_flags;
13802    }
13803
13804  /* Sanity checks.  */
13805  if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
13806    error ("z/Architecture mode not supported on %s", s390_arch_string);
13807  if (TARGET_64BIT && !TARGET_ZARCH)
13808    error ("64-bit ABI not supported in ESA/390 mode");
13809
13810  /* Use hardware DFP if available and not explicitly disabled by
13811     user. E.g. with -m31 -march=z10 -mzarch   */
13812  if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
13813    target_flags |= MASK_HARD_DFP;
13814
13815  /* Enable hardware transactions if available and not explicitly
13816     disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
13817  if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13818    target_flags |= MASK_OPT_HTM;
13819
13820  if (target_flags_explicit & MASK_OPT_VX)
13821    {
13822      if (TARGET_OPT_VX)
13823	{
13824	  if (!TARGET_CPU_VX)
13825	    error ("hardware vector support not available on %s",
13826		   s390_arch_string);
13827	  if (TARGET_SOFT_FLOAT)
13828	    error ("hardware vector support not available with -msoft-float");
13829	}
13830    }
13831  else if (TARGET_CPU_VX)
13832    /* Enable vector support if available and not explicitly disabled
13833       by user.  E.g. with -m31 -march=z13 -mzarch */
13834    target_flags |= MASK_OPT_VX;
13835
13836  if (TARGET_HARD_DFP && !TARGET_DFP)
13837    {
13838      if (target_flags_explicit & MASK_HARD_DFP)
13839	{
13840	  if (!TARGET_CPU_DFP)
13841	    error ("hardware decimal floating point instructions"
13842		   " not available on %s", s390_arch_string);
13843	  if (!TARGET_ZARCH)
13844	    error ("hardware decimal floating point instructions"
13845		   " not available in ESA/390 mode");
13846	}
13847      else
13848	target_flags &= ~MASK_HARD_DFP;
13849    }
13850
13851  if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13852    {
13853      if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13854	error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13855
13856      target_flags &= ~MASK_HARD_DFP;
13857    }
13858
13859  /* Set processor cost function.  */
13860  switch (s390_tune)
13861    {
13862    case PROCESSOR_2084_Z990:
13863      s390_cost = &z990_cost;
13864      break;
13865    case PROCESSOR_2094_Z9_109:
13866      s390_cost = &z9_109_cost;
13867      break;
13868    case PROCESSOR_2097_Z10:
13869      s390_cost = &z10_cost;
13870      break;
13871    case PROCESSOR_2817_Z196:
13872      s390_cost = &z196_cost;
13873      break;
13874    case PROCESSOR_2827_ZEC12:
13875    case PROCESSOR_2964_Z13:
13876      s390_cost = &zEC12_cost;
13877      break;
13878    default:
13879      s390_cost = &z900_cost;
13880    }
13881
13882  if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13883    error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13884	   "in combination");
13885
13886  if (s390_stack_size)
13887    {
13888      if (s390_stack_guard >= s390_stack_size)
13889	error ("stack size must be greater than the stack guard value");
13890      else if (s390_stack_size > 1 << 16)
13891	error ("stack size must not be greater than 64k");
13892    }
13893  else if (s390_stack_guard)
13894    error ("-mstack-guard implies use of -mstack-size");
13895
13896#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13897  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13898    target_flags |= MASK_LONG_DOUBLE_128;
13899#endif
13900
13901  if (s390_tune == PROCESSOR_2097_Z10
13902      || s390_tune == PROCESSOR_2817_Z196
13903      || s390_tune == PROCESSOR_2827_ZEC12
13904      || s390_tune == PROCESSOR_2964_Z13)
13905    {
13906      maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13907			     global_options.x_param_values,
13908			     global_options_set.x_param_values);
13909      maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13910			     global_options.x_param_values,
13911			     global_options_set.x_param_values);
13912      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13913			     global_options.x_param_values,
13914			     global_options_set.x_param_values);
13915      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13916			     global_options.x_param_values,
13917			     global_options_set.x_param_values);
13918    }
13919
13920  maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13921			 global_options.x_param_values,
13922			 global_options_set.x_param_values);
13923  /* values for loop prefetching */
13924  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13925			 global_options.x_param_values,
13926			 global_options_set.x_param_values);
13927  maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13928			 global_options.x_param_values,
13929			 global_options_set.x_param_values);
13930  /* s390 has more than 2 levels and the size is much larger.  Since
13931     we are always running virtualized assume that we only get a small
13932     part of the caches above l1.  */
13933  maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13934			 global_options.x_param_values,
13935			 global_options_set.x_param_values);
13936  maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13937			 global_options.x_param_values,
13938			 global_options_set.x_param_values);
13939  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13940			 global_options.x_param_values,
13941			 global_options_set.x_param_values);
13942
13943  /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13944     requires the arch flags to be evaluated already.  Since prefetching
13945     is beneficial on s390, we enable it if available.  */
13946  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13947    flag_prefetch_loop_arrays = 1;
13948
13949  /* Use the alternative scheduling-pressure algorithm by default.  */
13950  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13951                         global_options.x_param_values,
13952                         global_options_set.x_param_values);
13953
13954  if (TARGET_TPF)
13955    {
13956      /* Don't emit DWARF3/4 unless specifically selected.  The TPF
13957	 debuggers do not yet support DWARF 3/4.  */
13958      if (!global_options_set.x_dwarf_strict)
13959	dwarf_strict = 1;
13960      if (!global_options_set.x_dwarf_version)
13961	dwarf_version = 2;
13962    }
13963
13964  /* Register a target-specific optimization-and-lowering pass
13965     to run immediately before prologue and epilogue generation.
13966
13967     Registering the pass must be done at start up.  It's
13968     convenient to do it here.  */
13969  opt_pass *new_pass = new pass_s390_early_mach (g);
13970  struct register_pass_info insert_pass_s390_early_mach =
13971    {
13972      new_pass,			/* pass */
13973      "pro_and_epilogue",	/* reference_pass_name */
13974      1,			/* ref_pass_instance_number */
13975      PASS_POS_INSERT_BEFORE	/* po_op */
13976    };
13977  register_pass (&insert_pass_s390_early_mach);
13978}
13979
13980/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
13981
13982static bool
13983s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13984				     unsigned int align ATTRIBUTE_UNUSED,
13985				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
13986				     bool speed_p ATTRIBUTE_UNUSED)
13987{
13988  return (size == 1 || size == 2
13989	  || size == 4 || (TARGET_ZARCH && size == 8));
13990}
13991
13992/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
13993
13994static void
13995s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13996{
13997  tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
13998  tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
13999  tree call_efpc = build_call_expr (efpc, 0);
14000  tree fenv_var = create_tmp_var (unsigned_type_node);
14001
14002#define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
14003#define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
14004#define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
14005#define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
14006#define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
14007#define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
14008
14009  /* Generates the equivalent of feholdexcept (&fenv_var)
14010
14011     fenv_var = __builtin_s390_efpc ();
14012     __builtin_s390_sfpc (fenv_var & mask) */
14013  tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
14014  tree new_fpc =
14015    build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
14016	    build_int_cst (unsigned_type_node,
14017			   ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
14018			     FPC_EXCEPTION_MASK)));
14019  tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
14020  *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
14021
14022  /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
14023
14024     __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
14025  new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
14026		    build_int_cst (unsigned_type_node,
14027				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
14028  *clear = build_call_expr (sfpc, 1, new_fpc);
14029
14030  /* Generates the equivalent of feupdateenv (fenv_var)
14031
14032  old_fpc = __builtin_s390_efpc ();
14033  __builtin_s390_sfpc (fenv_var);
14034  __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
14035
14036  old_fpc = create_tmp_var (unsigned_type_node);
14037  tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
14038			       old_fpc, call_efpc);
14039
14040  set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
14041
14042  tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
14043				  build_int_cst (unsigned_type_node,
14044						 FPC_FLAGS_MASK));
14045  raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
14046			     build_int_cst (unsigned_type_node,
14047					    FPC_FLAGS_SHIFT));
14048  tree atomic_feraiseexcept
14049    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
14050  raise_old_except = build_call_expr (atomic_feraiseexcept,
14051				      1, raise_old_except);
14052
14053  *update = build2 (COMPOUND_EXPR, void_type_node,
14054		    build2 (COMPOUND_EXPR, void_type_node,
14055			    store_old_fpc, set_new_fpc),
14056		    raise_old_except);
14057
14058#undef FPC_EXCEPTION_MASK
14059#undef FPC_FLAGS_MASK
14060#undef FPC_DXC_MASK
14061#undef FPC_EXCEPTION_MASK_SHIFT
14062#undef FPC_FLAGS_SHIFT
14063#undef FPC_DXC_SHIFT
14064}
14065
14066/* Return the vector mode to be used for inner mode MODE when doing
14067   vectorization.  */
14068static machine_mode
14069s390_preferred_simd_mode (machine_mode mode)
14070{
14071  if (TARGET_VX)
14072    switch (mode)
14073      {
14074      case DFmode:
14075	return V2DFmode;
14076      case DImode:
14077	return V2DImode;
14078      case SImode:
14079	return V4SImode;
14080      case HImode:
14081	return V8HImode;
14082      case QImode:
14083	return V16QImode;
14084      default:;
14085      }
14086  return word_mode;
14087}
14088
14089/* Our hardware does not require vectors to be strictly aligned.  */
14090static bool
14091s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
14092				  const_tree type ATTRIBUTE_UNUSED,
14093				  int misalignment ATTRIBUTE_UNUSED,
14094				  bool is_packed ATTRIBUTE_UNUSED)
14095{
14096  if (TARGET_VX)
14097    return true;
14098
14099  return default_builtin_support_vector_misalignment (mode, type, misalignment,
14100						      is_packed);
14101}
14102
14103/* The vector ABI requires vector types to be aligned on an 8 byte
14104   boundary (our stack alignment).  However, we allow this to be
14105   overriden by the user, while this definitely breaks the ABI.  */
14106static HOST_WIDE_INT
14107s390_vector_alignment (const_tree type)
14108{
14109  if (!TARGET_VX_ABI)
14110    return default_vector_alignment (type);
14111
14112  if (TYPE_USER_ALIGN (type))
14113    return TYPE_ALIGN (type);
14114
14115  return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
14116}
14117
14118/* Implement TARGET_ASM_FILE_END.  */
14119static void
14120s390_asm_file_end (void)
14121{
14122#ifdef HAVE_AS_GNU_ATTRIBUTE
14123  varpool_node *vnode;
14124  cgraph_node *cnode;
14125
14126  FOR_EACH_VARIABLE (vnode)
14127    if (TREE_PUBLIC (vnode->decl))
14128      s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
14129
14130  FOR_EACH_FUNCTION (cnode)
14131    if (TREE_PUBLIC (cnode->decl))
14132      s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
14133
14134
14135  if (s390_vector_abi != 0)
14136    fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
14137	     s390_vector_abi);
14138#endif
14139  file_end_indicate_exec_stack ();
14140}
14141
14142/* Return true if TYPE is a vector bool type.  */
14143static inline bool
14144s390_vector_bool_type_p (const_tree type)
14145{
14146  return TYPE_VECTOR_OPAQUE (type);
14147}
14148
14149/* Return the diagnostic message string if the binary operation OP is
14150   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
14151static const char*
14152s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
14153{
14154  bool bool1_p, bool2_p;
14155  bool plusminus_p;
14156  bool muldiv_p;
14157  bool compare_p;
14158  machine_mode mode1, mode2;
14159
14160  if (!TARGET_ZVECTOR)
14161    return NULL;
14162
14163  if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
14164    return NULL;
14165
14166  bool1_p = s390_vector_bool_type_p (type1);
14167  bool2_p = s390_vector_bool_type_p (type2);
14168
14169  /* Mixing signed and unsigned types is forbidden for all
14170     operators.  */
14171  if (!bool1_p && !bool2_p
14172      && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
14173    return N_("types differ in signess");
14174
14175  plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
14176  muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
14177	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
14178	      || op == ROUND_DIV_EXPR);
14179  compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
14180	       || op == EQ_EXPR || op == NE_EXPR);
14181
14182  if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
14183    return N_("binary operator does not support two vector bool operands");
14184
14185  if (bool1_p != bool2_p && (muldiv_p || compare_p))
14186    return N_("binary operator does not support vector bool operand");
14187
14188  mode1 = TYPE_MODE (type1);
14189  mode2 = TYPE_MODE (type2);
14190
14191  if (bool1_p != bool2_p && plusminus_p
14192      && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
14193	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
14194    return N_("binary operator does not support mixing vector "
14195	      "bool with floating point vector operands");
14196
14197  return NULL;
14198}
14199
14200
14201/* Initialize GCC target structure.  */
14202
14203#undef  TARGET_ASM_ALIGNED_HI_OP
14204#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
14205#undef  TARGET_ASM_ALIGNED_DI_OP
14206#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
14207#undef  TARGET_ASM_INTEGER
14208#define TARGET_ASM_INTEGER s390_assemble_integer
14209
14210#undef  TARGET_ASM_OPEN_PAREN
14211#define TARGET_ASM_OPEN_PAREN ""
14212
14213#undef  TARGET_ASM_CLOSE_PAREN
14214#define TARGET_ASM_CLOSE_PAREN ""
14215
14216#undef TARGET_OPTION_OVERRIDE
14217#define TARGET_OPTION_OVERRIDE s390_option_override
14218
14219#undef	TARGET_ENCODE_SECTION_INFO
14220#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
14221
14222#undef TARGET_SCALAR_MODE_SUPPORTED_P
14223#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14224
14225#ifdef HAVE_AS_TLS
14226#undef TARGET_HAVE_TLS
14227#define TARGET_HAVE_TLS true
14228#endif
14229#undef TARGET_CANNOT_FORCE_CONST_MEM
14230#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
14231
14232#undef TARGET_DELEGITIMIZE_ADDRESS
14233#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
14234
14235#undef TARGET_LEGITIMIZE_ADDRESS
14236#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
14237
14238#undef TARGET_RETURN_IN_MEMORY
14239#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
14240
14241#undef  TARGET_INIT_BUILTINS
14242#define TARGET_INIT_BUILTINS s390_init_builtins
14243#undef  TARGET_EXPAND_BUILTIN
14244#define TARGET_EXPAND_BUILTIN s390_expand_builtin
14245#undef  TARGET_BUILTIN_DECL
14246#define TARGET_BUILTIN_DECL s390_builtin_decl
14247
14248#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
14249#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
14250
14251#undef TARGET_ASM_OUTPUT_MI_THUNK
14252#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
14253#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14254#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
14255
14256#undef  TARGET_SCHED_ADJUST_PRIORITY
14257#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
14258#undef TARGET_SCHED_ISSUE_RATE
14259#define TARGET_SCHED_ISSUE_RATE s390_issue_rate
14260#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14261#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
14262
14263#undef TARGET_SCHED_VARIABLE_ISSUE
14264#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
14265#undef TARGET_SCHED_REORDER
14266#define TARGET_SCHED_REORDER s390_sched_reorder
14267#undef TARGET_SCHED_INIT
14268#define TARGET_SCHED_INIT s390_sched_init
14269
14270#undef TARGET_CANNOT_COPY_INSN_P
14271#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
14272#undef TARGET_RTX_COSTS
14273#define TARGET_RTX_COSTS s390_rtx_costs
14274#undef TARGET_ADDRESS_COST
14275#define TARGET_ADDRESS_COST s390_address_cost
14276#undef TARGET_REGISTER_MOVE_COST
14277#define TARGET_REGISTER_MOVE_COST s390_register_move_cost
14278#undef TARGET_MEMORY_MOVE_COST
14279#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
14280
14281#undef TARGET_MACHINE_DEPENDENT_REORG
14282#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
14283
14284#undef TARGET_VALID_POINTER_MODE
14285#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
14286
14287#undef TARGET_BUILD_BUILTIN_VA_LIST
14288#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
14289#undef TARGET_EXPAND_BUILTIN_VA_START
14290#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
14291#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14292#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
14293
14294#undef TARGET_PROMOTE_FUNCTION_MODE
14295#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
14296#undef TARGET_PASS_BY_REFERENCE
14297#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
14298
14299#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14300#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
14301#undef TARGET_FUNCTION_ARG
14302#define TARGET_FUNCTION_ARG s390_function_arg
14303#undef TARGET_FUNCTION_ARG_ADVANCE
14304#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
14305#undef TARGET_FUNCTION_VALUE
14306#define TARGET_FUNCTION_VALUE s390_function_value
14307#undef TARGET_LIBCALL_VALUE
14308#define TARGET_LIBCALL_VALUE s390_libcall_value
14309#undef TARGET_STRICT_ARGUMENT_NAMING
14310#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
14311
14312#undef TARGET_KEEP_LEAF_WHEN_PROFILED
14313#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
14314
14315#undef TARGET_FIXED_CONDITION_CODE_REGS
14316#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
14317
14318#undef TARGET_CC_MODES_COMPATIBLE
14319#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
14320
14321#undef TARGET_INVALID_WITHIN_DOLOOP
14322#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
14323
14324#ifdef HAVE_AS_TLS
14325#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
14326#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
14327#endif
14328
14329#undef TARGET_DWARF_FRAME_REG_MODE
14330#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
14331
14332#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
14333#undef TARGET_MANGLE_TYPE
14334#define TARGET_MANGLE_TYPE s390_mangle_type
14335#endif
14336
14337#undef TARGET_SCALAR_MODE_SUPPORTED_P
14338#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14339
14340#undef TARGET_VECTOR_MODE_SUPPORTED_P
14341#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
14342
14343#undef  TARGET_PREFERRED_RELOAD_CLASS
14344#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
14345
14346#undef TARGET_SECONDARY_RELOAD
14347#define TARGET_SECONDARY_RELOAD s390_secondary_reload
14348
14349#undef TARGET_LIBGCC_CMP_RETURN_MODE
14350#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
14351
14352#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
14353#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
14354
14355#undef TARGET_LEGITIMATE_ADDRESS_P
14356#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
14357
14358#undef TARGET_LEGITIMATE_CONSTANT_P
14359#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
14360
14361#undef TARGET_LRA_P
14362#define TARGET_LRA_P s390_lra_p
14363
14364#undef TARGET_CAN_ELIMINATE
14365#define TARGET_CAN_ELIMINATE s390_can_eliminate
14366
14367#undef TARGET_CONDITIONAL_REGISTER_USAGE
14368#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
14369
14370#undef TARGET_LOOP_UNROLL_ADJUST
14371#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
14372
14373#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14374#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
14375#undef TARGET_TRAMPOLINE_INIT
14376#define TARGET_TRAMPOLINE_INIT s390_trampoline_init
14377
14378#undef TARGET_UNWIND_WORD_MODE
14379#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
14380
14381#undef TARGET_CANONICALIZE_COMPARISON
14382#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
14383
14384#undef TARGET_HARD_REGNO_SCRATCH_OK
14385#define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
14386
14387#undef TARGET_ATTRIBUTE_TABLE
14388#define TARGET_ATTRIBUTE_TABLE s390_attribute_table
14389
14390#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
14391#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
14392
14393#undef TARGET_SET_UP_BY_PROLOGUE
14394#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
14395
14396#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14397#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14398  s390_use_by_pieces_infrastructure_p
14399
14400#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14401#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
14402
14403#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
14404#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
14405
14406#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14407#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
14408
14409#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14410#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
14411
14412#undef TARGET_VECTOR_ALIGNMENT
14413#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
14414
14415#undef TARGET_INVALID_BINARY_OP
14416#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
14417
14418#undef TARGET_ASM_FILE_END
14419#define TARGET_ASM_FILE_END s390_asm_file_end
14420
14421struct gcc_target targetm = TARGET_INITIALIZER;
14422
14423#include "gt-s390.h"
14424