1/* Subroutines used for code generation on IBM S/390 and zSeries 2 Copyright (C) 1999-2020 Free Software Foundation, Inc. 3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and 4 Ulrich Weigand (uweigand@de.ibm.com) and 5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com). 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#define IN_TARGET_CODE 1 24 25#include "config.h" 26#include "system.h" 27#include "coretypes.h" 28#include "backend.h" 29#include "target.h" 30#include "target-globals.h" 31#include "rtl.h" 32#include "tree.h" 33#include "gimple.h" 34#include "cfghooks.h" 35#include "cfgloop.h" 36#include "df.h" 37#include "memmodel.h" 38#include "tm_p.h" 39#include "stringpool.h" 40#include "attribs.h" 41#include "expmed.h" 42#include "optabs.h" 43#include "regs.h" 44#include "emit-rtl.h" 45#include "recog.h" 46#include "cgraph.h" 47#include "diagnostic-core.h" 48#include "diagnostic.h" 49#include "alias.h" 50#include "fold-const.h" 51#include "print-tree.h" 52#include "stor-layout.h" 53#include "varasm.h" 54#include "calls.h" 55#include "conditions.h" 56#include "output.h" 57#include "insn-attr.h" 58#include "flags.h" 59#include "except.h" 60#include "dojump.h" 61#include "explow.h" 62#include "stmt.h" 63#include "expr.h" 64#include "reload.h" 65#include "cfgrtl.h" 66#include "cfganal.h" 67#include "lcm.h" 68#include "cfgbuild.h" 69#include "cfgcleanup.h" 70#include "debug.h" 71#include "langhooks.h" 72#include "internal-fn.h" 73#include "gimple-fold.h" 74#include "tree-eh.h" 75#include "gimplify.h" 76#include "opts.h" 77#include "tree-pass.h" 78#include "context.h" 79#include "builtins.h" 80#include "rtl-iter.h" 81#include "intl.h" 82#include "tm-constrs.h" 83#include "tree-vrp.h" 84#include "symbol-summary.h" 85#include "ipa-prop.h" 86#include "ipa-fnsummary.h" 87#include "sched-int.h" 88 89/* This file should be included last. */ 90#include "target-def.h" 91 92static bool s390_hard_regno_mode_ok (unsigned int, machine_mode); 93 94/* Remember the last target of s390_set_current_function. */ 95static GTY(()) tree s390_previous_fndecl; 96 97/* Define the specific costs for a given cpu. */ 98 99struct processor_costs 100{ 101 /* multiplication */ 102 const int m; /* cost of an M instruction. */ 103 const int mghi; /* cost of an MGHI instruction. */ 104 const int mh; /* cost of an MH instruction. */ 105 const int mhi; /* cost of an MHI instruction. */ 106 const int ml; /* cost of an ML instruction. */ 107 const int mr; /* cost of an MR instruction. */ 108 const int ms; /* cost of an MS instruction. */ 109 const int msg; /* cost of an MSG instruction. */ 110 const int msgf; /* cost of an MSGF instruction. */ 111 const int msgfr; /* cost of an MSGFR instruction. */ 112 const int msgr; /* cost of an MSGR instruction. */ 113 const int msr; /* cost of an MSR instruction. */ 114 const int mult_df; /* cost of multiplication in DFmode. */ 115 const int mxbr; 116 /* square root */ 117 const int sqxbr; /* cost of square root in TFmode. */ 118 const int sqdbr; /* cost of square root in DFmode. */ 119 const int sqebr; /* cost of square root in SFmode. */ 120 /* multiply and add */ 121 const int madbr; /* cost of multiply and add in DFmode. */ 122 const int maebr; /* cost of multiply and add in SFmode. */ 123 /* division */ 124 const int dxbr; 125 const int ddbr; 126 const int debr; 127 const int dlgr; 128 const int dlr; 129 const int dr; 130 const int dsgfr; 131 const int dsgr; 132}; 133 134#define s390_cost ((const struct processor_costs *)(s390_cost_pointer)) 135 136static const 137struct processor_costs z900_cost = 138{ 139 COSTS_N_INSNS (5), /* M */ 140 COSTS_N_INSNS (10), /* MGHI */ 141 COSTS_N_INSNS (5), /* MH */ 142 COSTS_N_INSNS (4), /* MHI */ 143 COSTS_N_INSNS (5), /* ML */ 144 COSTS_N_INSNS (5), /* MR */ 145 COSTS_N_INSNS (4), /* MS */ 146 COSTS_N_INSNS (15), /* MSG */ 147 COSTS_N_INSNS (7), /* MSGF */ 148 COSTS_N_INSNS (7), /* MSGFR */ 149 COSTS_N_INSNS (10), /* MSGR */ 150 COSTS_N_INSNS (4), /* MSR */ 151 COSTS_N_INSNS (7), /* multiplication in DFmode */ 152 COSTS_N_INSNS (13), /* MXBR */ 153 COSTS_N_INSNS (136), /* SQXBR */ 154 COSTS_N_INSNS (44), /* SQDBR */ 155 COSTS_N_INSNS (35), /* SQEBR */ 156 COSTS_N_INSNS (18), /* MADBR */ 157 COSTS_N_INSNS (13), /* MAEBR */ 158 COSTS_N_INSNS (134), /* DXBR */ 159 COSTS_N_INSNS (30), /* DDBR */ 160 COSTS_N_INSNS (27), /* DEBR */ 161 COSTS_N_INSNS (220), /* DLGR */ 162 COSTS_N_INSNS (34), /* DLR */ 163 COSTS_N_INSNS (34), /* DR */ 164 COSTS_N_INSNS (32), /* DSGFR */ 165 COSTS_N_INSNS (32), /* DSGR */ 166}; 167 168static const 169struct processor_costs z990_cost = 170{ 171 COSTS_N_INSNS (4), /* M */ 172 COSTS_N_INSNS (2), /* MGHI */ 173 COSTS_N_INSNS (2), /* MH */ 174 COSTS_N_INSNS (2), /* MHI */ 175 COSTS_N_INSNS (4), /* ML */ 176 COSTS_N_INSNS (4), /* MR */ 177 COSTS_N_INSNS (5), /* MS */ 178 COSTS_N_INSNS (6), /* MSG */ 179 COSTS_N_INSNS (4), /* MSGF */ 180 COSTS_N_INSNS (4), /* MSGFR */ 181 COSTS_N_INSNS (4), /* MSGR */ 182 COSTS_N_INSNS (4), /* MSR */ 183 COSTS_N_INSNS (1), /* multiplication in DFmode */ 184 COSTS_N_INSNS (28), /* MXBR */ 185 COSTS_N_INSNS (130), /* SQXBR */ 186 COSTS_N_INSNS (66), /* SQDBR */ 187 COSTS_N_INSNS (38), /* SQEBR */ 188 COSTS_N_INSNS (1), /* MADBR */ 189 COSTS_N_INSNS (1), /* MAEBR */ 190 COSTS_N_INSNS (60), /* DXBR */ 191 COSTS_N_INSNS (40), /* DDBR */ 192 COSTS_N_INSNS (26), /* DEBR */ 193 COSTS_N_INSNS (176), /* DLGR */ 194 COSTS_N_INSNS (31), /* DLR */ 195 COSTS_N_INSNS (31), /* DR */ 196 COSTS_N_INSNS (31), /* DSGFR */ 197 COSTS_N_INSNS (31), /* DSGR */ 198}; 199 200static const 201struct processor_costs z9_109_cost = 202{ 203 COSTS_N_INSNS (4), /* M */ 204 COSTS_N_INSNS (2), /* MGHI */ 205 COSTS_N_INSNS (2), /* MH */ 206 COSTS_N_INSNS (2), /* MHI */ 207 COSTS_N_INSNS (4), /* ML */ 208 COSTS_N_INSNS (4), /* MR */ 209 COSTS_N_INSNS (5), /* MS */ 210 COSTS_N_INSNS (6), /* MSG */ 211 COSTS_N_INSNS (4), /* MSGF */ 212 COSTS_N_INSNS (4), /* MSGFR */ 213 COSTS_N_INSNS (4), /* MSGR */ 214 COSTS_N_INSNS (4), /* MSR */ 215 COSTS_N_INSNS (1), /* multiplication in DFmode */ 216 COSTS_N_INSNS (28), /* MXBR */ 217 COSTS_N_INSNS (130), /* SQXBR */ 218 COSTS_N_INSNS (66), /* SQDBR */ 219 COSTS_N_INSNS (38), /* SQEBR */ 220 COSTS_N_INSNS (1), /* MADBR */ 221 COSTS_N_INSNS (1), /* MAEBR */ 222 COSTS_N_INSNS (60), /* DXBR */ 223 COSTS_N_INSNS (40), /* DDBR */ 224 COSTS_N_INSNS (26), /* DEBR */ 225 COSTS_N_INSNS (30), /* DLGR */ 226 COSTS_N_INSNS (23), /* DLR */ 227 COSTS_N_INSNS (23), /* DR */ 228 COSTS_N_INSNS (24), /* DSGFR */ 229 COSTS_N_INSNS (24), /* DSGR */ 230}; 231 232static const 233struct processor_costs z10_cost = 234{ 235 COSTS_N_INSNS (10), /* M */ 236 COSTS_N_INSNS (10), /* MGHI */ 237 COSTS_N_INSNS (10), /* MH */ 238 COSTS_N_INSNS (10), /* MHI */ 239 COSTS_N_INSNS (10), /* ML */ 240 COSTS_N_INSNS (10), /* MR */ 241 COSTS_N_INSNS (10), /* MS */ 242 COSTS_N_INSNS (10), /* MSG */ 243 COSTS_N_INSNS (10), /* MSGF */ 244 COSTS_N_INSNS (10), /* MSGFR */ 245 COSTS_N_INSNS (10), /* MSGR */ 246 COSTS_N_INSNS (10), /* MSR */ 247 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 248 COSTS_N_INSNS (50), /* MXBR */ 249 COSTS_N_INSNS (120), /* SQXBR */ 250 COSTS_N_INSNS (52), /* SQDBR */ 251 COSTS_N_INSNS (38), /* SQEBR */ 252 COSTS_N_INSNS (1), /* MADBR */ 253 COSTS_N_INSNS (1), /* MAEBR */ 254 COSTS_N_INSNS (111), /* DXBR */ 255 COSTS_N_INSNS (39), /* DDBR */ 256 COSTS_N_INSNS (32), /* DEBR */ 257 COSTS_N_INSNS (160), /* DLGR */ 258 COSTS_N_INSNS (71), /* DLR */ 259 COSTS_N_INSNS (71), /* DR */ 260 COSTS_N_INSNS (71), /* DSGFR */ 261 COSTS_N_INSNS (71), /* DSGR */ 262}; 263 264static const 265struct processor_costs z196_cost = 266{ 267 COSTS_N_INSNS (7), /* M */ 268 COSTS_N_INSNS (5), /* MGHI */ 269 COSTS_N_INSNS (5), /* MH */ 270 COSTS_N_INSNS (5), /* MHI */ 271 COSTS_N_INSNS (7), /* ML */ 272 COSTS_N_INSNS (7), /* MR */ 273 COSTS_N_INSNS (6), /* MS */ 274 COSTS_N_INSNS (8), /* MSG */ 275 COSTS_N_INSNS (6), /* MSGF */ 276 COSTS_N_INSNS (6), /* MSGFR */ 277 COSTS_N_INSNS (8), /* MSGR */ 278 COSTS_N_INSNS (6), /* MSR */ 279 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 280 COSTS_N_INSNS (40), /* MXBR B+40 */ 281 COSTS_N_INSNS (100), /* SQXBR B+100 */ 282 COSTS_N_INSNS (42), /* SQDBR B+42 */ 283 COSTS_N_INSNS (28), /* SQEBR B+28 */ 284 COSTS_N_INSNS (1), /* MADBR B */ 285 COSTS_N_INSNS (1), /* MAEBR B */ 286 COSTS_N_INSNS (101), /* DXBR B+101 */ 287 COSTS_N_INSNS (29), /* DDBR */ 288 COSTS_N_INSNS (22), /* DEBR */ 289 COSTS_N_INSNS (160), /* DLGR cracked */ 290 COSTS_N_INSNS (160), /* DLR cracked */ 291 COSTS_N_INSNS (160), /* DR expanded */ 292 COSTS_N_INSNS (160), /* DSGFR cracked */ 293 COSTS_N_INSNS (160), /* DSGR cracked */ 294}; 295 296static const 297struct processor_costs zEC12_cost = 298{ 299 COSTS_N_INSNS (7), /* M */ 300 COSTS_N_INSNS (5), /* MGHI */ 301 COSTS_N_INSNS (5), /* MH */ 302 COSTS_N_INSNS (5), /* MHI */ 303 COSTS_N_INSNS (7), /* ML */ 304 COSTS_N_INSNS (7), /* MR */ 305 COSTS_N_INSNS (6), /* MS */ 306 COSTS_N_INSNS (8), /* MSG */ 307 COSTS_N_INSNS (6), /* MSGF */ 308 COSTS_N_INSNS (6), /* MSGFR */ 309 COSTS_N_INSNS (8), /* MSGR */ 310 COSTS_N_INSNS (6), /* MSR */ 311 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 312 COSTS_N_INSNS (40), /* MXBR B+40 */ 313 COSTS_N_INSNS (100), /* SQXBR B+100 */ 314 COSTS_N_INSNS (42), /* SQDBR B+42 */ 315 COSTS_N_INSNS (28), /* SQEBR B+28 */ 316 COSTS_N_INSNS (1), /* MADBR B */ 317 COSTS_N_INSNS (1), /* MAEBR B */ 318 COSTS_N_INSNS (131), /* DXBR B+131 */ 319 COSTS_N_INSNS (29), /* DDBR */ 320 COSTS_N_INSNS (22), /* DEBR */ 321 COSTS_N_INSNS (160), /* DLGR cracked */ 322 COSTS_N_INSNS (160), /* DLR cracked */ 323 COSTS_N_INSNS (160), /* DR expanded */ 324 COSTS_N_INSNS (160), /* DSGFR cracked */ 325 COSTS_N_INSNS (160), /* DSGR cracked */ 326}; 327 328const struct s390_processor processor_table[] = 329{ 330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 }, 331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 }, 332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 }, 333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 }, 334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 }, 335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 }, 336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 }, 337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 }, 338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, 339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, 340 { "native", "", PROCESSOR_NATIVE, NULL, 0 } 341}; 342 343extern int reload_completed; 344 345/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */ 346static rtx_insn *last_scheduled_insn; 347#define NUM_SIDES 2 348 349#define MAX_SCHED_UNITS 4 350static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES]; 351 352/* Estimate of number of cycles a long-running insn occupies an 353 execution unit. */ 354static int fxd_longrunning[NUM_SIDES]; 355static int fpd_longrunning[NUM_SIDES]; 356 357/* The maximum score added for an instruction whose unit hasn't been 358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to 359 give instruction mix scheduling more priority over instruction 360 grouping. */ 361#define MAX_SCHED_MIX_SCORE 2 362 363/* The maximum distance up to which individual scores will be 364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE. 365 Increase this with the OOO windows size of the machine. */ 366#define MAX_SCHED_MIX_DISTANCE 70 367 368/* Structure used to hold the components of a S/390 memory 369 address. A legitimate address on S/390 is of the general 370 form 371 base + index + displacement 372 where any of the components is optional. 373 374 base and index are registers of the class ADDR_REGS, 375 displacement is an unsigned 12-bit immediate constant. */ 376 377/* The max number of insns of backend generated memset/memcpy/memcmp 378 loops. This value is used in the unroll adjust hook to detect such 379 loops. Current max is 9 coming from the memcmp loop. */ 380#define BLOCK_MEM_OPS_LOOP_INSNS 9 381 382struct s390_address 383{ 384 rtx base; 385 rtx indx; 386 rtx disp; 387 bool pointer; 388 bool literal_pool; 389}; 390 391/* Few accessor macros for struct cfun->machine->s390_frame_layout. */ 392 393#define cfun_frame_layout (cfun->machine->frame_layout) 394#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) 395#define cfun_save_arg_fprs_p (!!(TARGET_64BIT \ 396 ? cfun_frame_layout.fpr_bitmap & 0x0f \ 397 : cfun_frame_layout.fpr_bitmap & 0x03)) 398#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ 399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) 400#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \ 401 (1 << (REGNO - FPR0_REGNUM))) 402#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \ 403 (1 << (REGNO - FPR0_REGNUM)))) 404#define cfun_gpr_save_slot(REGNO) \ 405 cfun->machine->frame_layout.gpr_save_slots[REGNO] 406 407/* Number of GPRs and FPRs used for argument passing. */ 408#define GP_ARG_NUM_REG 5 409#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2) 410#define VEC_ARG_NUM_REG 8 411 412/* A couple of shortcuts. */ 413#define CONST_OK_FOR_J(x) \ 414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J") 415#define CONST_OK_FOR_K(x) \ 416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K") 417#define CONST_OK_FOR_Os(x) \ 418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os") 419#define CONST_OK_FOR_Op(x) \ 420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op") 421#define CONST_OK_FOR_On(x) \ 422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On") 423 424#define REGNO_PAIR_OK(REGNO, MODE) \ 425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) 426 427/* That's the read ahead of the dynamic branch prediction unit in 428 bytes on a z10 (or higher) CPU. */ 429#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) 430 431/* Masks per jump target register indicating which thunk need to be 432 generated. */ 433static GTY(()) int indirect_branch_prez10thunk_mask = 0; 434static GTY(()) int indirect_branch_z10thunk_mask = 0; 435 436#define INDIRECT_BRANCH_NUM_OPTIONS 4 437 438enum s390_indirect_branch_option 439 { 440 s390_opt_indirect_branch_jump = 0, 441 s390_opt_indirect_branch_call, 442 s390_opt_function_return_reg, 443 s390_opt_function_return_mem 444 }; 445 446static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 }; 447const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \ 448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" }; 449const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \ 450 { ".s390_indirect_jump", ".s390_indirect_call", 451 ".s390_return_reg", ".s390_return_mem" }; 452 453bool 454s390_return_addr_from_memory () 455{ 456 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK; 457} 458 459/* Indicate which ABI has been used for passing vector args. 460 0 - no vector type arguments have been passed where the ABI is relevant 461 1 - the old ABI has been used 462 2 - a vector type argument has been passed either in a vector register 463 or on the stack by value */ 464static int s390_vector_abi = 0; 465 466/* Set the vector ABI marker if TYPE is subject to the vector ABI 467 switch. The vector ABI affects only vector data types. There are 468 two aspects of the vector ABI relevant here: 469 470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new 471 ABI and natural alignment with the old. 472 473 2. vector <= 16 bytes are passed in VRs or by value on the stack 474 with the new ABI but by reference on the stack with the old. 475 476 If ARG_P is true TYPE is used for a function argument or return 477 value. The ABI marker then is set for all vector data types. If 478 ARG_P is false only type 1 vectors are being checked. */ 479 480static void 481s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p) 482{ 483 static hash_set<const_tree> visited_types_hash; 484 485 if (s390_vector_abi) 486 return; 487 488 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK) 489 return; 490 491 if (visited_types_hash.contains (type)) 492 return; 493 494 visited_types_hash.add (type); 495 496 if (VECTOR_TYPE_P (type)) 497 { 498 int type_size = int_size_in_bytes (type); 499 500 /* Outside arguments only the alignment is changing and this 501 only happens for vector types >= 16 bytes. */ 502 if (!arg_p && type_size < 16) 503 return; 504 505 /* In arguments vector types > 16 are passed as before (GCC 506 never enforced the bigger alignment for arguments which was 507 required by the old vector ABI). However, it might still be 508 ABI relevant due to the changed alignment if it is a struct 509 member. */ 510 if (arg_p && type_size > 16 && !in_struct_p) 511 return; 512 513 s390_vector_abi = TARGET_VX_ABI ? 2 : 1; 514 } 515 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) 516 { 517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than 518 natural alignment there will never be ABI dependent padding 519 in an array type. That's why we do not set in_struct_p to 520 true here. */ 521 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p); 522 } 523 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) 524 { 525 tree arg_chain; 526 527 /* Check the return type. */ 528 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false); 529 530 for (arg_chain = TYPE_ARG_TYPES (type); 531 arg_chain; 532 arg_chain = TREE_CHAIN (arg_chain)) 533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false); 534 } 535 else if (RECORD_OR_UNION_TYPE_P (type)) 536 { 537 tree field; 538 539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 540 { 541 if (TREE_CODE (field) != FIELD_DECL) 542 continue; 543 544 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true); 545 } 546 } 547} 548 549 550/* System z builtins. */ 551 552#include "s390-builtins.h" 553 554const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] = 555 { 556#undef B_DEF 557#undef OB_DEF 558#undef OB_DEF_VAR 559#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS, 560#define OB_DEF(...) 561#define OB_DEF_VAR(...) 562#include "s390-builtins.def" 563 0 564 }; 565 566const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] = 567 { 568#undef B_DEF 569#undef OB_DEF 570#undef OB_DEF_VAR 571#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS, 572#define OB_DEF(...) 573#define OB_DEF_VAR(...) 574#include "s390-builtins.def" 575 0 576 }; 577 578const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = 579 { 580#undef B_DEF 581#undef OB_DEF 582#undef OB_DEF_VAR 583#define B_DEF(...) 584#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS, 585#define OB_DEF_VAR(...) 586#include "s390-builtins.def" 587 0 588 }; 589 590const unsigned int 591bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = 592 { 593#undef B_DEF 594#undef OB_DEF 595#undef OB_DEF_VAR 596#define B_DEF(...) 597#define OB_DEF(...) 598#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS, 599#include "s390-builtins.def" 600 0 601 }; 602 603const unsigned int 604opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = 605 { 606#undef B_DEF 607#undef OB_DEF 608#undef OB_DEF_VAR 609#define B_DEF(...) 610#define OB_DEF(...) 611#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS, 612#include "s390-builtins.def" 613 0 614 }; 615 616tree s390_builtin_types[BT_MAX]; 617tree s390_builtin_fn_types[BT_FN_MAX]; 618tree s390_builtin_decls[S390_BUILTIN_MAX + 619 S390_OVERLOADED_BUILTIN_MAX + 620 S390_OVERLOADED_BUILTIN_VAR_MAX]; 621 622static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { 623#undef B_DEF 624#undef OB_DEF 625#undef OB_DEF_VAR 626#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN, 627#define OB_DEF(...) 628#define OB_DEF_VAR(...) 629 630#include "s390-builtins.def" 631 CODE_FOR_nothing 632}; 633 634static void 635s390_init_builtins (void) 636{ 637 /* These definitions are being used in s390-builtins.def. */ 638 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), 639 NULL, NULL); 640 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); 641 tree c_uint64_type_node; 642 643 /* The uint64_type_node from tree.c is not compatible to the C99 644 uint64_t data type. What we want is c_uint64_type_node from 645 c-common.c. But since backend code is not supposed to interface 646 with the frontend we recreate it here. */ 647 if (TARGET_64BIT) 648 c_uint64_type_node = long_unsigned_type_node; 649 else 650 c_uint64_type_node = long_long_unsigned_type_node; 651 652#undef DEF_TYPE 653#define DEF_TYPE(INDEX, NODE, CONST_P) \ 654 if (s390_builtin_types[INDEX] == NULL) \ 655 s390_builtin_types[INDEX] = (!CONST_P) ? \ 656 (NODE) : build_type_variant ((NODE), 1, 0); 657 658#undef DEF_POINTER_TYPE 659#define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \ 660 if (s390_builtin_types[INDEX] == NULL) \ 661 s390_builtin_types[INDEX] = \ 662 build_pointer_type (s390_builtin_types[INDEX_BASE]); 663 664#undef DEF_DISTINCT_TYPE 665#define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \ 666 if (s390_builtin_types[INDEX] == NULL) \ 667 s390_builtin_types[INDEX] = \ 668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); 669 670#undef DEF_VECTOR_TYPE 671#define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \ 672 if (s390_builtin_types[INDEX] == NULL) \ 673 s390_builtin_types[INDEX] = \ 674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); 675 676#undef DEF_OPAQUE_VECTOR_TYPE 677#define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \ 678 if (s390_builtin_types[INDEX] == NULL) \ 679 s390_builtin_types[INDEX] = \ 680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); 681 682#undef DEF_FN_TYPE 683#define DEF_FN_TYPE(INDEX, args...) \ 684 if (s390_builtin_fn_types[INDEX] == NULL) \ 685 s390_builtin_fn_types[INDEX] = \ 686 build_function_type_list (args, NULL_TREE); 687#undef DEF_OV_TYPE 688#define DEF_OV_TYPE(...) 689#include "s390-builtin-types.def" 690 691#undef B_DEF 692#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ 693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \ 694 s390_builtin_decls[S390_BUILTIN_##NAME] = \ 695 add_builtin_function ("__builtin_" #NAME, \ 696 s390_builtin_fn_types[FNTYPE], \ 697 S390_BUILTIN_##NAME, \ 698 BUILT_IN_MD, \ 699 NULL, \ 700 ATTRS); 701#undef OB_DEF 702#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ 703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \ 704 == NULL) \ 705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ 706 add_builtin_function ("__builtin_" #NAME, \ 707 s390_builtin_fn_types[FNTYPE], \ 708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ 709 BUILT_IN_MD, \ 710 NULL, \ 711 0); 712#undef OB_DEF_VAR 713#define OB_DEF_VAR(...) 714#include "s390-builtins.def" 715 716} 717 718/* Return true if ARG is appropriate as argument number ARGNUM of 719 builtin DECL. The operand flags from s390-builtins.def have to 720 passed as OP_FLAGS. */ 721bool 722s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) 723{ 724 if (O_UIMM_P (op_flags)) 725 { 726 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 }; 727 int bitwidth = bitwidths[op_flags - O_U1]; 728 729 if (!tree_fits_uhwi_p (arg) 730 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1) 731 { 732 error ("constant argument %d for builtin %qF is out of range " 733 "(0..%wu)", argnum, decl, 734 (HOST_WIDE_INT_1U << bitwidth) - 1); 735 return false; 736 } 737 } 738 739 if (O_SIMM_P (op_flags)) 740 { 741 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 }; 742 int bitwidth = bitwidths[op_flags - O_S2]; 743 744 if (!tree_fits_shwi_p (arg) 745 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1)) 746 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1)) 747 { 748 error ("constant argument %d for builtin %qF is out of range " 749 "(%wd..%wd)", argnum, decl, 750 -(HOST_WIDE_INT_1 << (bitwidth - 1)), 751 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1); 752 return false; 753 } 754 } 755 return true; 756} 757 758/* Expand an expression EXP that calls a built-in function, 759 with result going to TARGET if that's convenient 760 (and in mode MODE if that's convenient). 761 SUBTARGET may be used as the target for computing one of EXP's operands. 762 IGNORE is nonzero if the value is to be ignored. */ 763 764static rtx 765s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 766 machine_mode mode ATTRIBUTE_UNUSED, 767 int ignore ATTRIBUTE_UNUSED) 768{ 769#define MAX_ARGS 6 770 771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 772 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); 773 enum insn_code icode; 774 rtx op[MAX_ARGS], pat; 775 int arity; 776 bool nonvoid; 777 tree arg; 778 call_expr_arg_iterator iter; 779 unsigned int all_op_flags = opflags_for_builtin (fcode); 780 machine_mode last_vec_mode = VOIDmode; 781 782 if (TARGET_DEBUG_ARG) 783 { 784 fprintf (stderr, 785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n", 786 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)), 787 bflags_for_builtin (fcode)); 788 } 789 790 if (S390_USE_TARGET_ATTRIBUTE) 791 { 792 unsigned int bflags; 793 794 bflags = bflags_for_builtin (fcode); 795 if ((bflags & B_HTM) && !TARGET_HTM) 796 { 797 error ("builtin %qF is not supported without %<-mhtm%> " 798 "(default with %<-march=zEC12%> and higher).", fndecl); 799 return const0_rtx; 800 } 801 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX) 802 { 803 error ("builtin %qF requires %<-mvx%> " 804 "(default with %<-march=z13%> and higher).", fndecl); 805 return const0_rtx; 806 } 807 808 if ((bflags & B_VXE) && !TARGET_VXE) 809 { 810 error ("Builtin %qF requires z14 or higher.", fndecl); 811 return const0_rtx; 812 } 813 814 if ((bflags & B_VXE2) && !TARGET_VXE2) 815 { 816 error ("Builtin %qF requires z15 or higher.", fndecl); 817 return const0_rtx; 818 } 819 } 820 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET 821 && fcode < S390_ALL_BUILTIN_MAX) 822 { 823 gcc_unreachable (); 824 } 825 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET) 826 { 827 icode = code_for_builtin[fcode]; 828 /* Set a flag in the machine specific cfun part in order to support 829 saving/restoring of FPRs. */ 830 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry) 831 cfun->machine->tbegin_p = true; 832 } 833 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET) 834 { 835 error ("unresolved overloaded builtin"); 836 return const0_rtx; 837 } 838 else 839 internal_error ("bad builtin fcode"); 840 841 if (icode == 0) 842 internal_error ("bad builtin icode"); 843 844 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 845 846 if (nonvoid) 847 { 848 machine_mode tmode = insn_data[icode].operand[0].mode; 849 if (!target 850 || GET_MODE (target) != tmode 851 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 852 target = gen_reg_rtx (tmode); 853 854 /* There are builtins (e.g. vec_promote) with no vector 855 arguments but an element selector. So we have to also look 856 at the vector return type when emitting the modulo 857 operation. */ 858 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode)) 859 last_vec_mode = insn_data[icode].operand[0].mode; 860 } 861 862 arity = 0; 863 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 864 { 865 rtx tmp_rtx; 866 const struct insn_operand_data *insn_op; 867 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); 868 869 all_op_flags = all_op_flags >> O_SHIFT; 870 871 if (arg == error_mark_node) 872 return NULL_RTX; 873 if (arity >= MAX_ARGS) 874 return NULL_RTX; 875 876 if (O_IMM_P (op_flags) 877 && TREE_CODE (arg) != INTEGER_CST) 878 { 879 error ("constant value required for builtin %qF argument %d", 880 fndecl, arity + 1); 881 return const0_rtx; 882 } 883 884 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl)) 885 return const0_rtx; 886 887 insn_op = &insn_data[icode].operand[arity + nonvoid]; 888 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); 889 890 /* expand_expr truncates constants to the target mode only if it 891 is "convenient". However, our checks below rely on this 892 being done. */ 893 if (CONST_INT_P (op[arity]) 894 && SCALAR_INT_MODE_P (insn_op->mode) 895 && GET_MODE (op[arity]) != insn_op->mode) 896 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]), 897 insn_op->mode)); 898 899 /* Wrap the expanded RTX for pointer types into a MEM expr with 900 the proper mode. This allows us to use e.g. (match_operand 901 "memory_operand"..) in the insn patterns instead of (mem 902 (match_operand "address_operand)). This is helpful for 903 patterns not just accepting MEMs. */ 904 if (POINTER_TYPE_P (TREE_TYPE (arg)) 905 && insn_op->predicate != address_operand) 906 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); 907 908 /* Expand the module operation required on element selectors. */ 909 if (op_flags == O_ELEM) 910 { 911 gcc_assert (last_vec_mode != VOIDmode); 912 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND), 913 op[arity], 914 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1), 915 NULL_RTX, 1, OPTAB_DIRECT); 916 } 917 918 /* Record the vector mode used for an element selector. This assumes: 919 1. There is no builtin with two different vector modes and an element selector 920 2. The element selector comes after the vector type it is referring to. 921 This currently the true for all the builtins but FIXME we 922 should better check for that. */ 923 if (VECTOR_MODE_P (insn_op->mode)) 924 last_vec_mode = insn_op->mode; 925 926 if (insn_op->predicate (op[arity], insn_op->mode)) 927 { 928 arity++; 929 continue; 930 } 931 932 /* A memory operand is rejected by the memory_operand predicate. 933 Try making the address legal by copying it into a register. */ 934 if (MEM_P (op[arity]) 935 && insn_op->predicate == memory_operand 936 && (GET_MODE (XEXP (op[arity], 0)) == Pmode 937 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode)) 938 { 939 op[arity] = replace_equiv_address (op[arity], 940 copy_to_mode_reg (Pmode, 941 XEXP (op[arity], 0))); 942 } 943 /* Some of the builtins require different modes/types than the 944 pattern in order to implement a specific API. Instead of 945 adding many expanders which do the mode change we do it here. 946 E.g. s390_vec_add_u128 required to have vector unsigned char 947 arguments is mapped to addti3. */ 948 else if (insn_op->mode != VOIDmode 949 && GET_MODE (op[arity]) != VOIDmode 950 && GET_MODE (op[arity]) != insn_op->mode 951 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity], 952 GET_MODE (op[arity]), 0)) 953 != NULL_RTX)) 954 { 955 op[arity] = tmp_rtx; 956 } 957 958 /* The predicate rejects the operand although the mode is fine. 959 Copy the operand to register. */ 960 if (!insn_op->predicate (op[arity], insn_op->mode) 961 && (GET_MODE (op[arity]) == insn_op->mode 962 || GET_MODE (op[arity]) == VOIDmode 963 || (insn_op->predicate == address_operand 964 && GET_MODE (op[arity]) == Pmode))) 965 { 966 /* An address_operand usually has VOIDmode in the expander 967 so we cannot use this. */ 968 machine_mode target_mode = 969 (insn_op->predicate == address_operand 970 ? (machine_mode) Pmode : insn_op->mode); 971 op[arity] = copy_to_mode_reg (target_mode, op[arity]); 972 } 973 974 if (!insn_op->predicate (op[arity], insn_op->mode)) 975 { 976 error ("invalid argument %d for builtin %qF", arity + 1, fndecl); 977 return const0_rtx; 978 } 979 arity++; 980 } 981 982 switch (arity) 983 { 984 case 0: 985 pat = GEN_FCN (icode) (target); 986 break; 987 case 1: 988 if (nonvoid) 989 pat = GEN_FCN (icode) (target, op[0]); 990 else 991 pat = GEN_FCN (icode) (op[0]); 992 break; 993 case 2: 994 if (nonvoid) 995 pat = GEN_FCN (icode) (target, op[0], op[1]); 996 else 997 pat = GEN_FCN (icode) (op[0], op[1]); 998 break; 999 case 3: 1000 if (nonvoid) 1001 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); 1002 else 1003 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 1004 break; 1005 case 4: 1006 if (nonvoid) 1007 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); 1008 else 1009 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 1010 break; 1011 case 5: 1012 if (nonvoid) 1013 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); 1014 else 1015 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); 1016 break; 1017 case 6: 1018 if (nonvoid) 1019 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); 1020 else 1021 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); 1022 break; 1023 default: 1024 gcc_unreachable (); 1025 } 1026 if (!pat) 1027 return NULL_RTX; 1028 emit_insn (pat); 1029 1030 if (nonvoid) 1031 return target; 1032 else 1033 return const0_rtx; 1034} 1035 1036 1037static const int s390_hotpatch_hw_max = 1000000; 1038static int s390_hotpatch_hw_before_label = 0; 1039static int s390_hotpatch_hw_after_label = 0; 1040 1041/* Check whether the hotpatch attribute is applied to a function and, if it has 1042 an argument, the argument is valid. */ 1043 1044static tree 1045s390_handle_hotpatch_attribute (tree *node, tree name, tree args, 1046 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1047{ 1048 tree expr; 1049 tree expr2; 1050 int err; 1051 1052 if (TREE_CODE (*node) != FUNCTION_DECL) 1053 { 1054 warning (OPT_Wattributes, "%qE attribute only applies to functions", 1055 name); 1056 *no_add_attrs = true; 1057 } 1058 if (args != NULL && TREE_CHAIN (args) != NULL) 1059 { 1060 expr = TREE_VALUE (args); 1061 expr2 = TREE_VALUE (TREE_CHAIN (args)); 1062 } 1063 if (args == NULL || TREE_CHAIN (args) == NULL) 1064 err = 1; 1065 else if (TREE_CODE (expr) != INTEGER_CST 1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr)) 1067 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max)) 1068 err = 1; 1069 else if (TREE_CODE (expr2) != INTEGER_CST 1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2)) 1071 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max)) 1072 err = 1; 1073 else 1074 err = 0; 1075 if (err) 1076 { 1077 error ("requested %qE attribute is not a comma separated pair of" 1078 " non-negative integer constants or too large (max. %d)", name, 1079 s390_hotpatch_hw_max); 1080 *no_add_attrs = true; 1081 } 1082 1083 return NULL_TREE; 1084} 1085 1086/* Expand the s390_vector_bool type attribute. */ 1087 1088static tree 1089s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED, 1090 tree args ATTRIBUTE_UNUSED, 1091 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1092{ 1093 tree type = *node, result = NULL_TREE; 1094 machine_mode mode; 1095 1096 while (POINTER_TYPE_P (type) 1097 || TREE_CODE (type) == FUNCTION_TYPE 1098 || TREE_CODE (type) == METHOD_TYPE 1099 || TREE_CODE (type) == ARRAY_TYPE) 1100 type = TREE_TYPE (type); 1101 1102 mode = TYPE_MODE (type); 1103 switch (mode) 1104 { 1105 case E_DImode: case E_V2DImode: 1106 result = s390_builtin_types[BT_BV2DI]; 1107 break; 1108 case E_SImode: case E_V4SImode: 1109 result = s390_builtin_types[BT_BV4SI]; 1110 break; 1111 case E_HImode: case E_V8HImode: 1112 result = s390_builtin_types[BT_BV8HI]; 1113 break; 1114 case E_QImode: case E_V16QImode: 1115 result = s390_builtin_types[BT_BV16QI]; 1116 break; 1117 default: 1118 break; 1119 } 1120 1121 *no_add_attrs = true; /* No need to hang on to the attribute. */ 1122 1123 if (result) 1124 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 1125 1126 return NULL_TREE; 1127} 1128 1129/* Check syntax of function decl attributes having a string type value. */ 1130 1131static tree 1132s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED, 1133 tree args ATTRIBUTE_UNUSED, 1134 int flags ATTRIBUTE_UNUSED, 1135 bool *no_add_attrs) 1136{ 1137 tree cst; 1138 1139 if (TREE_CODE (*node) != FUNCTION_DECL) 1140 { 1141 warning (OPT_Wattributes, "%qE attribute only applies to functions", 1142 name); 1143 *no_add_attrs = true; 1144 } 1145 1146 cst = TREE_VALUE (args); 1147 1148 if (TREE_CODE (cst) != STRING_CST) 1149 { 1150 warning (OPT_Wattributes, 1151 "%qE attribute requires a string constant argument", 1152 name); 1153 *no_add_attrs = true; 1154 } 1155 1156 if (is_attribute_p ("indirect_branch", name) 1157 || is_attribute_p ("indirect_branch_call", name) 1158 || is_attribute_p ("function_return", name) 1159 || is_attribute_p ("function_return_reg", name) 1160 || is_attribute_p ("function_return_mem", name)) 1161 { 1162 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 1163 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 1164 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) 1165 { 1166 warning (OPT_Wattributes, 1167 "argument to %qE attribute is not " 1168 "(keep|thunk|thunk-extern)", name); 1169 *no_add_attrs = true; 1170 } 1171 } 1172 1173 if (is_attribute_p ("indirect_branch_jump", name) 1174 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0 1175 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 1176 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 1177 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) 1178 { 1179 warning (OPT_Wattributes, 1180 "argument to %qE attribute is not " 1181 "(keep|thunk|thunk-inline|thunk-extern)", name); 1182 *no_add_attrs = true; 1183 } 1184 1185 return NULL_TREE; 1186} 1187 1188static const struct attribute_spec s390_attribute_table[] = { 1189 { "hotpatch", 2, 2, true, false, false, false, 1190 s390_handle_hotpatch_attribute, NULL }, 1191 { "s390_vector_bool", 0, 0, false, true, false, true, 1192 s390_handle_vectorbool_attribute, NULL }, 1193 { "indirect_branch", 1, 1, true, false, false, false, 1194 s390_handle_string_attribute, NULL }, 1195 { "indirect_branch_jump", 1, 1, true, false, false, false, 1196 s390_handle_string_attribute, NULL }, 1197 { "indirect_branch_call", 1, 1, true, false, false, false, 1198 s390_handle_string_attribute, NULL }, 1199 { "function_return", 1, 1, true, false, false, false, 1200 s390_handle_string_attribute, NULL }, 1201 { "function_return_reg", 1, 1, true, false, false, false, 1202 s390_handle_string_attribute, NULL }, 1203 { "function_return_mem", 1, 1, true, false, false, false, 1204 s390_handle_string_attribute, NULL }, 1205 1206 /* End element. */ 1207 { NULL, 0, 0, false, false, false, false, NULL, NULL } 1208}; 1209 1210/* Return the alignment for LABEL. We default to the -falign-labels 1211 value except for the literal pool base label. */ 1212int 1213s390_label_align (rtx_insn *label) 1214{ 1215 rtx_insn *prev_insn = prev_active_insn (label); 1216 rtx set, src; 1217 1218 if (prev_insn == NULL_RTX) 1219 goto old; 1220 1221 set = single_set (prev_insn); 1222 1223 if (set == NULL_RTX) 1224 goto old; 1225 1226 src = SET_SRC (set); 1227 1228 /* Don't align literal pool base labels. */ 1229 if (GET_CODE (src) == UNSPEC 1230 && XINT (src, 1) == UNSPEC_MAIN_BASE) 1231 return 0; 1232 1233 old: 1234 return align_labels.levels[0].log; 1235} 1236 1237static GTY(()) rtx got_symbol; 1238 1239/* Return the GOT table symbol. The symbol will be created when the 1240 function is invoked for the first time. */ 1241 1242static rtx 1243s390_got_symbol (void) 1244{ 1245 if (!got_symbol) 1246 { 1247 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 1248 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL; 1249 } 1250 1251 return got_symbol; 1252} 1253 1254static scalar_int_mode 1255s390_libgcc_cmp_return_mode (void) 1256{ 1257 return TARGET_64BIT ? DImode : SImode; 1258} 1259 1260static scalar_int_mode 1261s390_libgcc_shift_count_mode (void) 1262{ 1263 return TARGET_64BIT ? DImode : SImode; 1264} 1265 1266static scalar_int_mode 1267s390_unwind_word_mode (void) 1268{ 1269 return TARGET_64BIT ? DImode : SImode; 1270} 1271 1272/* Return true if the back end supports mode MODE. */ 1273static bool 1274s390_scalar_mode_supported_p (scalar_mode mode) 1275{ 1276 /* In contrast to the default implementation reject TImode constants on 31bit 1277 TARGET_ZARCH for ABI compliance. */ 1278 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode) 1279 return false; 1280 1281 if (DECIMAL_FLOAT_MODE_P (mode)) 1282 return default_decimal_float_supported_p (); 1283 1284 return default_scalar_mode_supported_p (mode); 1285} 1286 1287/* Return true if the back end supports vector mode MODE. */ 1288static bool 1289s390_vector_mode_supported_p (machine_mode mode) 1290{ 1291 machine_mode inner; 1292 1293 if (!VECTOR_MODE_P (mode) 1294 || !TARGET_VX 1295 || GET_MODE_SIZE (mode) > 16) 1296 return false; 1297 1298 inner = GET_MODE_INNER (mode); 1299 1300 switch (inner) 1301 { 1302 case E_QImode: 1303 case E_HImode: 1304 case E_SImode: 1305 case E_DImode: 1306 case E_TImode: 1307 case E_SFmode: 1308 case E_DFmode: 1309 case E_TFmode: 1310 return true; 1311 default: 1312 return false; 1313 } 1314} 1315 1316/* Set the has_landing_pad_p flag in struct machine_function to VALUE. */ 1317 1318void 1319s390_set_has_landing_pad_p (bool value) 1320{ 1321 cfun->machine->has_landing_pad_p = value; 1322} 1323 1324/* If two condition code modes are compatible, return a condition code 1325 mode which is compatible with both. Otherwise, return 1326 VOIDmode. */ 1327 1328static machine_mode 1329s390_cc_modes_compatible (machine_mode m1, machine_mode m2) 1330{ 1331 if (m1 == m2) 1332 return m1; 1333 1334 switch (m1) 1335 { 1336 case E_CCZmode: 1337 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode 1338 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode) 1339 return m2; 1340 return VOIDmode; 1341 1342 case E_CCSmode: 1343 case E_CCUmode: 1344 case E_CCTmode: 1345 case E_CCSRmode: 1346 case E_CCURmode: 1347 case E_CCZ1mode: 1348 if (m2 == CCZmode) 1349 return m1; 1350 1351 return VOIDmode; 1352 1353 default: 1354 return VOIDmode; 1355 } 1356 return VOIDmode; 1357} 1358 1359/* Return true if SET either doesn't set the CC register, or else 1360 the source and destination have matching CC modes and that 1361 CC mode is at least as constrained as REQ_MODE. */ 1362 1363static bool 1364s390_match_ccmode_set (rtx set, machine_mode req_mode) 1365{ 1366 machine_mode set_mode; 1367 1368 gcc_assert (GET_CODE (set) == SET); 1369 1370 /* These modes are supposed to be used only in CC consumer 1371 patterns. */ 1372 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode 1373 && req_mode != CCVFALLmode && req_mode != CCVFANYmode); 1374 1375 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set)))) 1376 return 1; 1377 1378 set_mode = GET_MODE (SET_DEST (set)); 1379 switch (set_mode) 1380 { 1381 case E_CCZ1mode: 1382 case E_CCSmode: 1383 case E_CCSRmode: 1384 case E_CCSFPSmode: 1385 case E_CCUmode: 1386 case E_CCURmode: 1387 case E_CCOmode: 1388 case E_CCLmode: 1389 case E_CCL1mode: 1390 case E_CCL2mode: 1391 case E_CCL3mode: 1392 case E_CCT1mode: 1393 case E_CCT2mode: 1394 case E_CCT3mode: 1395 case E_CCVEQmode: 1396 case E_CCVIHmode: 1397 case E_CCVIHUmode: 1398 case E_CCVFHmode: 1399 case E_CCVFHEmode: 1400 if (req_mode != set_mode) 1401 return 0; 1402 break; 1403 1404 case E_CCZmode: 1405 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode 1406 && req_mode != CCSRmode && req_mode != CCURmode 1407 && req_mode != CCZ1mode) 1408 return 0; 1409 break; 1410 1411 case E_CCAPmode: 1412 case E_CCANmode: 1413 if (req_mode != CCAmode) 1414 return 0; 1415 break; 1416 1417 default: 1418 gcc_unreachable (); 1419 } 1420 1421 return (GET_MODE (SET_SRC (set)) == set_mode); 1422} 1423 1424/* Return true if every SET in INSN that sets the CC register 1425 has source and destination with matching CC modes and that 1426 CC mode is at least as constrained as REQ_MODE. 1427 If REQ_MODE is VOIDmode, always return false. */ 1428 1429bool 1430s390_match_ccmode (rtx_insn *insn, machine_mode req_mode) 1431{ 1432 int i; 1433 1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */ 1435 if (req_mode == VOIDmode) 1436 return false; 1437 1438 if (GET_CODE (PATTERN (insn)) == SET) 1439 return s390_match_ccmode_set (PATTERN (insn), req_mode); 1440 1441 if (GET_CODE (PATTERN (insn)) == PARALLEL) 1442 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) 1443 { 1444 rtx set = XVECEXP (PATTERN (insn), 0, i); 1445 if (GET_CODE (set) == SET) 1446 if (!s390_match_ccmode_set (set, req_mode)) 1447 return false; 1448 } 1449 1450 return true; 1451} 1452 1453/* If a test-under-mask instruction can be used to implement 1454 (compare (and ... OP1) OP2), return the CC mode required 1455 to do that. Otherwise, return VOIDmode. 1456 MIXED is true if the instruction can distinguish between 1457 CC1 and CC2 for mixed selected bits (TMxx), it is false 1458 if the instruction cannot (TM). */ 1459 1460machine_mode 1461s390_tm_ccmode (rtx op1, rtx op2, bool mixed) 1462{ 1463 int bit0, bit1; 1464 1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */ 1466 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT) 1467 return VOIDmode; 1468 1469 /* Selected bits all zero: CC0. 1470 e.g.: int a; if ((a & (16 + 128)) == 0) */ 1471 if (INTVAL (op2) == 0) 1472 return CCTmode; 1473 1474 /* Selected bits all one: CC3. 1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */ 1476 if (INTVAL (op2) == INTVAL (op1)) 1477 return CCT3mode; 1478 1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.: 1480 int a; 1481 if ((a & (16 + 128)) == 16) -> CCT1 1482 if ((a & (16 + 128)) == 128) -> CCT2 */ 1483 if (mixed) 1484 { 1485 bit1 = exact_log2 (INTVAL (op2)); 1486 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2)); 1487 if (bit0 != -1 && bit1 != -1) 1488 return bit0 > bit1 ? CCT1mode : CCT2mode; 1489 } 1490 1491 return VOIDmode; 1492} 1493 1494/* Given a comparison code OP (EQ, NE, etc.) and the operands 1495 OP0 and OP1 of a COMPARE, return the mode to be used for the 1496 comparison. */ 1497 1498machine_mode 1499s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) 1500{ 1501 switch (code) 1502 { 1503 case EQ: 1504 case NE: 1505 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS) 1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1507 return CCAPmode; 1508 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT 1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))) 1510 return CCAPmode; 1511 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS 1512 || GET_CODE (op1) == NEG) 1513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1514 return CCLmode; 1515 1516 if (GET_CODE (op0) == AND) 1517 { 1518 /* Check whether we can potentially do it via TM. */ 1519 machine_mode ccmode; 1520 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1); 1521 if (ccmode != VOIDmode) 1522 { 1523 /* Relax CCTmode to CCZmode to allow fall-back to AND 1524 if that turns out to be beneficial. */ 1525 return ccmode == CCTmode ? CCZmode : ccmode; 1526 } 1527 } 1528 1529 if (register_operand (op0, HImode) 1530 && GET_CODE (op1) == CONST_INT 1531 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535)) 1532 return CCT3mode; 1533 if (register_operand (op0, QImode) 1534 && GET_CODE (op1) == CONST_INT 1535 && (INTVAL (op1) == -1 || INTVAL (op1) == 255)) 1536 return CCT3mode; 1537 1538 return CCZmode; 1539 1540 case LE: 1541 case LT: 1542 case GE: 1543 case GT: 1544 /* The only overflow condition of NEG and ABS happens when 1545 -INT_MAX is used as parameter, which stays negative. So 1546 we have an overflow from a positive value to a negative. 1547 Using CCAP mode the resulting cc can be used for comparisons. */ 1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS) 1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1550 return CCAPmode; 1551 1552 /* If constants are involved in an add instruction it is possible to use 1553 the resulting cc for comparisons with zero. Knowing the sign of the 1554 constant the overflow behavior gets predictable. e.g.: 1555 int a, b; if ((b = a + c) > 0) 1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */ 1557 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT 1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))) 1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os") 1560 /* Avoid INT32_MIN on 32 bit. */ 1561 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1)))) 1562 { 1563 if (INTVAL (XEXP((op0), 1)) < 0) 1564 return CCANmode; 1565 else 1566 return CCAPmode; 1567 } 1568 1569 /* Fall through. */ 1570 case LTGT: 1571 if (HONOR_NANS (op0) || HONOR_NANS (op1)) 1572 return CCSFPSmode; 1573 1574 /* Fall through. */ 1575 case UNORDERED: 1576 case ORDERED: 1577 case UNEQ: 1578 case UNLE: 1579 case UNLT: 1580 case UNGE: 1581 case UNGT: 1582 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1583 && GET_CODE (op1) != CONST_INT) 1584 return CCSRmode; 1585 return CCSmode; 1586 1587 case LTU: 1588 case GEU: 1589 if (GET_CODE (op0) == PLUS 1590 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1591 return CCL1mode; 1592 1593 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1594 && GET_CODE (op1) != CONST_INT) 1595 return CCURmode; 1596 return CCUmode; 1597 1598 case LEU: 1599 case GTU: 1600 if (GET_CODE (op0) == MINUS 1601 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1602 return CCL2mode; 1603 1604 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1605 && GET_CODE (op1) != CONST_INT) 1606 return CCURmode; 1607 return CCUmode; 1608 1609 default: 1610 gcc_unreachable (); 1611 } 1612} 1613 1614/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one 1615 that we can implement more efficiently. */ 1616 1617static void 1618s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 1619 bool op0_preserve_value) 1620{ 1621 if (op0_preserve_value) 1622 return; 1623 1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */ 1625 if ((*code == EQ || *code == NE) 1626 && *op1 == const0_rtx 1627 && GET_CODE (*op0) == ZERO_EXTRACT 1628 && GET_CODE (XEXP (*op0, 1)) == CONST_INT 1629 && GET_CODE (XEXP (*op0, 2)) == CONST_INT 1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0)))) 1631 { 1632 rtx inner = XEXP (*op0, 0); 1633 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner)); 1634 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1)); 1635 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2)); 1636 1637 if (len > 0 && len < modesize 1638 && pos >= 0 && pos + len <= modesize 1639 && modesize <= HOST_BITS_PER_WIDE_INT) 1640 { 1641 unsigned HOST_WIDE_INT block; 1642 block = (HOST_WIDE_INT_1U << len) - 1; 1643 block <<= modesize - pos - len; 1644 1645 *op0 = gen_rtx_AND (GET_MODE (inner), inner, 1646 gen_int_mode (block, GET_MODE (inner))); 1647 } 1648 } 1649 1650 /* Narrow AND of memory against immediate to enable TM. */ 1651 if ((*code == EQ || *code == NE) 1652 && *op1 == const0_rtx 1653 && GET_CODE (*op0) == AND 1654 && GET_CODE (XEXP (*op0, 1)) == CONST_INT 1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0)))) 1656 { 1657 rtx inner = XEXP (*op0, 0); 1658 rtx mask = XEXP (*op0, 1); 1659 1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */ 1661 if (GET_CODE (inner) == SUBREG 1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner))) 1663 && (GET_MODE_SIZE (GET_MODE (inner)) 1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1665 && ((INTVAL (mask) 1666 & GET_MODE_MASK (GET_MODE (inner)) 1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner)))) 1668 == 0)) 1669 inner = SUBREG_REG (inner); 1670 1671 /* Do not change volatile MEMs. */ 1672 if (MEM_P (inner) && !MEM_VOLATILE_P (inner)) 1673 { 1674 int part = s390_single_part (XEXP (*op0, 1), 1675 GET_MODE (inner), QImode, 0); 1676 if (part >= 0) 1677 { 1678 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode); 1679 inner = adjust_address_nv (inner, QImode, part); 1680 *op0 = gen_rtx_AND (QImode, inner, mask); 1681 } 1682 } 1683 } 1684 1685 /* Narrow comparisons against 0xffff to HImode if possible. */ 1686 if ((*code == EQ || *code == NE) 1687 && GET_CODE (*op1) == CONST_INT 1688 && INTVAL (*op1) == 0xffff 1689 && SCALAR_INT_MODE_P (GET_MODE (*op0)) 1690 && (nonzero_bits (*op0, GET_MODE (*op0)) 1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0) 1692 { 1693 *op0 = gen_lowpart (HImode, *op0); 1694 *op1 = constm1_rtx; 1695 } 1696 1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */ 1698 if (GET_CODE (*op0) == UNSPEC 1699 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT 1700 && XVECLEN (*op0, 0) == 1 1701 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode 1702 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG 1703 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM 1704 && *op1 == const0_rtx) 1705 { 1706 enum rtx_code new_code = UNKNOWN; 1707 switch (*code) 1708 { 1709 case EQ: new_code = EQ; break; 1710 case NE: new_code = NE; break; 1711 case LT: new_code = GTU; break; 1712 case GT: new_code = LTU; break; 1713 case LE: new_code = GEU; break; 1714 case GE: new_code = LEU; break; 1715 default: break; 1716 } 1717 1718 if (new_code != UNKNOWN) 1719 { 1720 *op0 = XVECEXP (*op0, 0, 0); 1721 *code = new_code; 1722 } 1723 } 1724 1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */ 1726 if (GET_CODE (*op0) == UNSPEC 1727 && XINT (*op0, 1) == UNSPEC_CC_TO_INT 1728 && XVECLEN (*op0, 0) == 1 1729 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG 1730 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM 1731 && CONST_INT_P (*op1)) 1732 { 1733 enum rtx_code new_code = UNKNOWN; 1734 switch (GET_MODE (XVECEXP (*op0, 0, 0))) 1735 { 1736 case E_CCZmode: 1737 case E_CCRAWmode: 1738 switch (*code) 1739 { 1740 case EQ: new_code = EQ; break; 1741 case NE: new_code = NE; break; 1742 default: break; 1743 } 1744 break; 1745 default: break; 1746 } 1747 1748 if (new_code != UNKNOWN) 1749 { 1750 /* For CCRAWmode put the required cc mask into the second 1751 operand. */ 1752 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode 1753 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3) 1754 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1))); 1755 *op0 = XVECEXP (*op0, 0, 0); 1756 *code = new_code; 1757 } 1758 } 1759 1760 /* Simplify cascaded EQ, NE with const0_rtx. */ 1761 if ((*code == NE || *code == EQ) 1762 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE) 1763 && GET_MODE (*op0) == SImode 1764 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode 1765 && REG_P (XEXP (*op0, 0)) 1766 && XEXP (*op0, 1) == const0_rtx 1767 && *op1 == const0_rtx) 1768 { 1769 if ((*code == EQ && GET_CODE (*op0) == NE) 1770 || (*code == NE && GET_CODE (*op0) == EQ)) 1771 *code = EQ; 1772 else 1773 *code = NE; 1774 *op0 = XEXP (*op0, 0); 1775 } 1776 1777 /* Prefer register over memory as first operand. */ 1778 if (MEM_P (*op0) && REG_P (*op1)) 1779 { 1780 rtx tem = *op0; *op0 = *op1; *op1 = tem; 1781 *code = (int)swap_condition ((enum rtx_code)*code); 1782 } 1783 1784 /* A comparison result is compared against zero. Replace it with 1785 the (perhaps inverted) original comparison. 1786 This probably should be done by simplify_relational_operation. */ 1787 if ((*code == EQ || *code == NE) 1788 && *op1 == const0_rtx 1789 && COMPARISON_P (*op0) 1790 && CC_REG_P (XEXP (*op0, 0))) 1791 { 1792 enum rtx_code new_code; 1793 1794 if (*code == EQ) 1795 new_code = reversed_comparison_code_parts (GET_CODE (*op0), 1796 XEXP (*op0, 0), 1797 XEXP (*op0, 1), NULL); 1798 else 1799 new_code = GET_CODE (*op0); 1800 1801 if (new_code != UNKNOWN) 1802 { 1803 *code = new_code; 1804 *op1 = XEXP (*op0, 1); 1805 *op0 = XEXP (*op0, 0); 1806 } 1807 } 1808 1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */ 1810 if (TARGET_Z15 1811 && (*code == EQ || *code == NE) 1812 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode) 1813 && GET_CODE (*op0) == NOT) 1814 { 1815 machine_mode mode = GET_MODE (*op0); 1816 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1); 1817 *op0 = gen_rtx_NOT (mode, *op0); 1818 *op1 = const0_rtx; 1819 } 1820 1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */ 1822 if (TARGET_Z15 1823 && (*code == EQ || *code == NE) 1824 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR) 1825 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode) 1826 && CONST_INT_P (*op1) 1827 && *op1 == constm1_rtx) 1828 { 1829 machine_mode mode = GET_MODE (*op0); 1830 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0)); 1831 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1)); 1832 1833 if (GET_CODE (*op0) == AND) 1834 *op0 = gen_rtx_IOR (mode, op00, op01); 1835 else 1836 *op0 = gen_rtx_AND (mode, op00, op01); 1837 1838 *op1 = const0_rtx; 1839 } 1840} 1841 1842 1843/* Emit a compare instruction suitable to implement the comparison 1844 OP0 CODE OP1. Return the correct condition RTL to be placed in 1845 the IF_THEN_ELSE of the conditional branch testing the result. */ 1846 1847rtx 1848s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) 1849{ 1850 machine_mode mode = s390_select_ccmode (code, op0, op1); 1851 rtx cc; 1852 1853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) 1854 { 1855 /* Do not output a redundant compare instruction if a 1856 compare_and_swap pattern already computed the result and the 1857 machine modes are compatible. */ 1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) 1859 == GET_MODE (op0)); 1860 cc = op0; 1861 } 1862 else 1863 { 1864 cc = gen_rtx_REG (mode, CC_REGNUM); 1865 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1))); 1866 } 1867 1868 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx); 1869} 1870 1871/* If MEM is not a legitimate compare-and-swap memory operand, return a new 1872 MEM, whose address is a pseudo containing the original MEM's address. */ 1873 1874static rtx 1875s390_legitimize_cs_operand (rtx mem) 1876{ 1877 rtx tmp; 1878 1879 if (!contains_symbol_ref_p (mem)) 1880 return mem; 1881 tmp = gen_reg_rtx (Pmode); 1882 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0))); 1883 return change_address (mem, VOIDmode, tmp); 1884} 1885 1886/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD 1887 matches CMP. 1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the 1889 conditional branch testing the result. */ 1890 1891static rtx 1892s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, 1893 rtx cmp, rtx new_rtx, machine_mode ccmode) 1894{ 1895 rtx cc; 1896 1897 mem = s390_legitimize_cs_operand (mem); 1898 cc = gen_rtx_REG (ccmode, CC_REGNUM); 1899 switch (GET_MODE (mem)) 1900 { 1901 case E_SImode: 1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, 1903 new_rtx, cc)); 1904 break; 1905 case E_DImode: 1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp, 1907 new_rtx, cc)); 1908 break; 1909 case E_TImode: 1910 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp, 1911 new_rtx, cc)); 1912 break; 1913 case E_QImode: 1914 case E_HImode: 1915 default: 1916 gcc_unreachable (); 1917 } 1918 return s390_emit_compare (code, cc, const0_rtx); 1919} 1920 1921/* Emit a jump instruction to TARGET and return it. If COND is 1922 NULL_RTX, emit an unconditional jump, else a conditional jump under 1923 condition COND. */ 1924 1925rtx_insn * 1926s390_emit_jump (rtx target, rtx cond) 1927{ 1928 rtx insn; 1929 1930 target = gen_rtx_LABEL_REF (VOIDmode, target); 1931 if (cond) 1932 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx); 1933 1934 insn = gen_rtx_SET (pc_rtx, target); 1935 return emit_jump_insn (insn); 1936} 1937 1938/* Return branch condition mask to implement a branch 1939 specified by CODE. Return -1 for invalid comparisons. */ 1940 1941int 1942s390_branch_condition_mask (rtx code) 1943{ 1944 const int CC0 = 1 << 3; 1945 const int CC1 = 1 << 2; 1946 const int CC2 = 1 << 1; 1947 const int CC3 = 1 << 0; 1948 1949 gcc_assert (GET_CODE (XEXP (code, 0)) == REG); 1950 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM); 1951 gcc_assert (XEXP (code, 1) == const0_rtx 1952 || (GET_MODE (XEXP (code, 0)) == CCRAWmode 1953 && CONST_INT_P (XEXP (code, 1)))); 1954 1955 1956 switch (GET_MODE (XEXP (code, 0))) 1957 { 1958 case E_CCZmode: 1959 case E_CCZ1mode: 1960 switch (GET_CODE (code)) 1961 { 1962 case EQ: return CC0; 1963 case NE: return CC1 | CC2 | CC3; 1964 default: return -1; 1965 } 1966 break; 1967 1968 case E_CCT1mode: 1969 switch (GET_CODE (code)) 1970 { 1971 case EQ: return CC1; 1972 case NE: return CC0 | CC2 | CC3; 1973 default: return -1; 1974 } 1975 break; 1976 1977 case E_CCT2mode: 1978 switch (GET_CODE (code)) 1979 { 1980 case EQ: return CC2; 1981 case NE: return CC0 | CC1 | CC3; 1982 default: return -1; 1983 } 1984 break; 1985 1986 case E_CCT3mode: 1987 switch (GET_CODE (code)) 1988 { 1989 case EQ: return CC3; 1990 case NE: return CC0 | CC1 | CC2; 1991 default: return -1; 1992 } 1993 break; 1994 1995 case E_CCLmode: 1996 switch (GET_CODE (code)) 1997 { 1998 case EQ: return CC0 | CC2; 1999 case NE: return CC1 | CC3; 2000 default: return -1; 2001 } 2002 break; 2003 2004 case E_CCL1mode: 2005 switch (GET_CODE (code)) 2006 { 2007 case LTU: return CC2 | CC3; /* carry */ 2008 case GEU: return CC0 | CC1; /* no carry */ 2009 default: return -1; 2010 } 2011 break; 2012 2013 case E_CCL2mode: 2014 switch (GET_CODE (code)) 2015 { 2016 case GTU: return CC0 | CC1; /* borrow */ 2017 case LEU: return CC2 | CC3; /* no borrow */ 2018 default: return -1; 2019 } 2020 break; 2021 2022 case E_CCL3mode: 2023 switch (GET_CODE (code)) 2024 { 2025 case EQ: return CC0 | CC2; 2026 case NE: return CC1 | CC3; 2027 case LTU: return CC1; 2028 case GTU: return CC3; 2029 case LEU: return CC1 | CC2; 2030 case GEU: return CC2 | CC3; 2031 default: return -1; 2032 } 2033 2034 case E_CCUmode: 2035 switch (GET_CODE (code)) 2036 { 2037 case EQ: return CC0; 2038 case NE: return CC1 | CC2 | CC3; 2039 case LTU: return CC1; 2040 case GTU: return CC2; 2041 case LEU: return CC0 | CC1; 2042 case GEU: return CC0 | CC2; 2043 default: return -1; 2044 } 2045 break; 2046 2047 case E_CCURmode: 2048 switch (GET_CODE (code)) 2049 { 2050 case EQ: return CC0; 2051 case NE: return CC2 | CC1 | CC3; 2052 case LTU: return CC2; 2053 case GTU: return CC1; 2054 case LEU: return CC0 | CC2; 2055 case GEU: return CC0 | CC1; 2056 default: return -1; 2057 } 2058 break; 2059 2060 case E_CCAPmode: 2061 switch (GET_CODE (code)) 2062 { 2063 case EQ: return CC0; 2064 case NE: return CC1 | CC2 | CC3; 2065 case LT: return CC1 | CC3; 2066 case GT: return CC2; 2067 case LE: return CC0 | CC1 | CC3; 2068 case GE: return CC0 | CC2; 2069 default: return -1; 2070 } 2071 break; 2072 2073 case E_CCANmode: 2074 switch (GET_CODE (code)) 2075 { 2076 case EQ: return CC0; 2077 case NE: return CC1 | CC2 | CC3; 2078 case LT: return CC1; 2079 case GT: return CC2 | CC3; 2080 case LE: return CC0 | CC1; 2081 case GE: return CC0 | CC2 | CC3; 2082 default: return -1; 2083 } 2084 break; 2085 2086 case E_CCOmode: 2087 switch (GET_CODE (code)) 2088 { 2089 case EQ: return CC0 | CC1 | CC2; 2090 case NE: return CC3; 2091 default: return -1; 2092 } 2093 break; 2094 2095 case E_CCSmode: 2096 case E_CCSFPSmode: 2097 switch (GET_CODE (code)) 2098 { 2099 case EQ: return CC0; 2100 case NE: return CC1 | CC2 | CC3; 2101 case LT: return CC1; 2102 case GT: return CC2; 2103 case LE: return CC0 | CC1; 2104 case GE: return CC0 | CC2; 2105 case UNORDERED: return CC3; 2106 case ORDERED: return CC0 | CC1 | CC2; 2107 case UNEQ: return CC0 | CC3; 2108 case UNLT: return CC1 | CC3; 2109 case UNGT: return CC2 | CC3; 2110 case UNLE: return CC0 | CC1 | CC3; 2111 case UNGE: return CC0 | CC2 | CC3; 2112 case LTGT: return CC1 | CC2; 2113 default: return -1; 2114 } 2115 break; 2116 2117 case E_CCSRmode: 2118 switch (GET_CODE (code)) 2119 { 2120 case EQ: return CC0; 2121 case NE: return CC2 | CC1 | CC3; 2122 case LT: return CC2; 2123 case GT: return CC1; 2124 case LE: return CC0 | CC2; 2125 case GE: return CC0 | CC1; 2126 case UNORDERED: return CC3; 2127 case ORDERED: return CC0 | CC2 | CC1; 2128 case UNEQ: return CC0 | CC3; 2129 case UNLT: return CC2 | CC3; 2130 case UNGT: return CC1 | CC3; 2131 case UNLE: return CC0 | CC2 | CC3; 2132 case UNGE: return CC0 | CC1 | CC3; 2133 case LTGT: return CC2 | CC1; 2134 default: return -1; 2135 } 2136 break; 2137 2138 /* Vector comparison modes. */ 2139 /* CC2 will never be set. It however is part of the negated 2140 masks. */ 2141 case E_CCVIALLmode: 2142 switch (GET_CODE (code)) 2143 { 2144 case EQ: 2145 case GTU: 2146 case GT: 2147 case GE: return CC0; 2148 /* The inverted modes are in fact *any* modes. */ 2149 case NE: 2150 case LEU: 2151 case LE: 2152 case LT: return CC3 | CC1 | CC2; 2153 default: return -1; 2154 } 2155 2156 case E_CCVIANYmode: 2157 switch (GET_CODE (code)) 2158 { 2159 case EQ: 2160 case GTU: 2161 case GT: 2162 case GE: return CC0 | CC1; 2163 /* The inverted modes are in fact *all* modes. */ 2164 case NE: 2165 case LEU: 2166 case LE: 2167 case LT: return CC3 | CC2; 2168 default: return -1; 2169 } 2170 case E_CCVFALLmode: 2171 switch (GET_CODE (code)) 2172 { 2173 case EQ: 2174 case GT: 2175 case GE: return CC0; 2176 /* The inverted modes are in fact *any* modes. */ 2177 case NE: 2178 case UNLE: 2179 case UNLT: return CC3 | CC1 | CC2; 2180 default: return -1; 2181 } 2182 2183 case E_CCVFANYmode: 2184 switch (GET_CODE (code)) 2185 { 2186 case EQ: 2187 case GT: 2188 case GE: return CC0 | CC1; 2189 /* The inverted modes are in fact *all* modes. */ 2190 case NE: 2191 case UNLE: 2192 case UNLT: return CC3 | CC2; 2193 default: return -1; 2194 } 2195 2196 case E_CCRAWmode: 2197 switch (GET_CODE (code)) 2198 { 2199 case EQ: 2200 return INTVAL (XEXP (code, 1)); 2201 case NE: 2202 return (INTVAL (XEXP (code, 1))) ^ 0xf; 2203 default: 2204 gcc_unreachable (); 2205 } 2206 2207 default: 2208 return -1; 2209 } 2210} 2211 2212 2213/* Return branch condition mask to implement a compare and branch 2214 specified by CODE. Return -1 for invalid comparisons. */ 2215 2216int 2217s390_compare_and_branch_condition_mask (rtx code) 2218{ 2219 const int CC0 = 1 << 3; 2220 const int CC1 = 1 << 2; 2221 const int CC2 = 1 << 1; 2222 2223 switch (GET_CODE (code)) 2224 { 2225 case EQ: 2226 return CC0; 2227 case NE: 2228 return CC1 | CC2; 2229 case LT: 2230 case LTU: 2231 return CC1; 2232 case GT: 2233 case GTU: 2234 return CC2; 2235 case LE: 2236 case LEU: 2237 return CC0 | CC1; 2238 case GE: 2239 case GEU: 2240 return CC0 | CC2; 2241 default: 2242 gcc_unreachable (); 2243 } 2244 return -1; 2245} 2246 2247/* If INV is false, return assembler mnemonic string to implement 2248 a branch specified by CODE. If INV is true, return mnemonic 2249 for the corresponding inverted branch. */ 2250 2251static const char * 2252s390_branch_condition_mnemonic (rtx code, int inv) 2253{ 2254 int mask; 2255 2256 static const char *const mnemonic[16] = 2257 { 2258 NULL, "o", "h", "nle", 2259 "l", "nhe", "lh", "ne", 2260 "e", "nlh", "he", "nl", 2261 "le", "nh", "no", NULL 2262 }; 2263 2264 if (GET_CODE (XEXP (code, 0)) == REG 2265 && REGNO (XEXP (code, 0)) == CC_REGNUM 2266 && (XEXP (code, 1) == const0_rtx 2267 || (GET_MODE (XEXP (code, 0)) == CCRAWmode 2268 && CONST_INT_P (XEXP (code, 1))))) 2269 mask = s390_branch_condition_mask (code); 2270 else 2271 mask = s390_compare_and_branch_condition_mask (code); 2272 2273 gcc_assert (mask >= 0); 2274 2275 if (inv) 2276 mask ^= 15; 2277 2278 gcc_assert (mask >= 1 && mask <= 14); 2279 2280 return mnemonic[mask]; 2281} 2282 2283/* Return the part of op which has a value different from def. 2284 The size of the part is determined by mode. 2285 Use this function only if you already know that op really 2286 contains such a part. */ 2287 2288unsigned HOST_WIDE_INT 2289s390_extract_part (rtx op, machine_mode mode, int def) 2290{ 2291 unsigned HOST_WIDE_INT value = 0; 2292 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode); 2293 int part_bits = GET_MODE_BITSIZE (mode); 2294 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1; 2295 int i; 2296 2297 for (i = 0; i < max_parts; i++) 2298 { 2299 if (i == 0) 2300 value = UINTVAL (op); 2301 else 2302 value >>= part_bits; 2303 2304 if ((value & part_mask) != (def & part_mask)) 2305 return value & part_mask; 2306 } 2307 2308 gcc_unreachable (); 2309} 2310 2311/* If OP is an integer constant of mode MODE with exactly one 2312 part of mode PART_MODE unequal to DEF, return the number of that 2313 part. Otherwise, return -1. */ 2314 2315int 2316s390_single_part (rtx op, 2317 machine_mode mode, 2318 machine_mode part_mode, 2319 int def) 2320{ 2321 unsigned HOST_WIDE_INT value = 0; 2322 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode); 2323 unsigned HOST_WIDE_INT part_mask 2324 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1; 2325 int i, part = -1; 2326 2327 if (GET_CODE (op) != CONST_INT) 2328 return -1; 2329 2330 for (i = 0; i < n_parts; i++) 2331 { 2332 if (i == 0) 2333 value = UINTVAL (op); 2334 else 2335 value >>= GET_MODE_BITSIZE (part_mode); 2336 2337 if ((value & part_mask) != (def & part_mask)) 2338 { 2339 if (part != -1) 2340 return -1; 2341 else 2342 part = i; 2343 } 2344 } 2345 return part == -1 ? -1 : n_parts - 1 - part; 2346} 2347 2348/* Return true if IN contains a contiguous bitfield in the lower SIZE 2349 bits and no other bits are set in (the lower SIZE bits of) IN. 2350 2351 PSTART and PEND can be used to obtain the start and end 2352 position (inclusive) of the bitfield relative to 64 2353 bits. *PSTART / *PEND gives the position of the first/last bit 2354 of the bitfield counting from the highest order bit starting 2355 with zero. */ 2356 2357bool 2358s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size, 2359 int *pstart, int *pend) 2360{ 2361 int start; 2362 int end = -1; 2363 int lowbit = HOST_BITS_PER_WIDE_INT - 1; 2364 int highbit = HOST_BITS_PER_WIDE_INT - size; 2365 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U; 2366 2367 gcc_assert (!!pstart == !!pend); 2368 for (start = lowbit; start >= highbit; bitmask <<= 1, start--) 2369 if (end == -1) 2370 { 2371 /* Look for the rightmost bit of a contiguous range of ones. */ 2372 if (bitmask & in) 2373 /* Found it. */ 2374 end = start; 2375 } 2376 else 2377 { 2378 /* Look for the firt zero bit after the range of ones. */ 2379 if (! (bitmask & in)) 2380 /* Found it. */ 2381 break; 2382 } 2383 /* We're one past the last one-bit. */ 2384 start++; 2385 2386 if (end == -1) 2387 /* No one bits found. */ 2388 return false; 2389 2390 if (start > highbit) 2391 { 2392 unsigned HOST_WIDE_INT mask; 2393 2394 /* Calculate a mask for all bits beyond the contiguous bits. */ 2395 mask = ((~HOST_WIDE_INT_0U >> highbit) 2396 & (~HOST_WIDE_INT_0U << (lowbit - start + 1))); 2397 if (mask & in) 2398 /* There are more bits set beyond the first range of one bits. */ 2399 return false; 2400 } 2401 2402 if (pstart) 2403 { 2404 *pstart = start; 2405 *pend = end; 2406 } 2407 2408 return true; 2409} 2410 2411/* Same as s390_contiguous_bitmask_nowrap_p but also returns true 2412 if ~IN contains a contiguous bitfield. In that case, *END is < 2413 *START. 2414 2415 If WRAP_P is true, a bitmask that wraps around is also tested. 2416 When a wraparoud occurs *START is greater than *END (in 2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus 2418 part of the range. If WRAP_P is false, no wraparound is 2419 tested. */ 2420 2421bool 2422s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p, 2423 int size, int *start, int *end) 2424{ 2425 int bs = HOST_BITS_PER_WIDE_INT; 2426 bool b; 2427 2428 gcc_assert (!!start == !!end); 2429 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0) 2430 /* This cannot be expressed as a contiguous bitmask. Exit early because 2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as 2432 a valid bitmask. */ 2433 return false; 2434 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end); 2435 if (b) 2436 return true; 2437 if (! wrap_p) 2438 return false; 2439 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end); 2440 if (b && start) 2441 { 2442 int s = *start; 2443 int e = *end; 2444 2445 gcc_assert (s >= 1); 2446 *start = ((e + 1) & (bs - 1)); 2447 *end = ((s - 1 + bs) & (bs - 1)); 2448 } 2449 2450 return b; 2451} 2452 2453/* Return true if OP contains the same contiguous bitfield in *all* 2454 its elements. START and END can be used to obtain the start and 2455 end position of the bitfield. 2456 2457 START/STOP give the position of the first/last bit of the bitfield 2458 counting from the lowest order bit starting with zero. In order to 2459 use these values for S/390 instructions this has to be converted to 2460 "bits big endian" style. */ 2461 2462bool 2463s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) 2464{ 2465 unsigned HOST_WIDE_INT mask; 2466 int size; 2467 rtx elt; 2468 bool b; 2469 2470 gcc_assert (!!start == !!end); 2471 if (!const_vec_duplicate_p (op, &elt) 2472 || !CONST_INT_P (elt)) 2473 return false; 2474 2475 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); 2476 2477 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */ 2478 if (size > 64) 2479 return false; 2480 2481 mask = UINTVAL (elt); 2482 2483 b = s390_contiguous_bitmask_p (mask, true, size, start, end); 2484 if (b) 2485 { 2486 if (start) 2487 { 2488 *start -= (HOST_BITS_PER_WIDE_INT - size); 2489 *end -= (HOST_BITS_PER_WIDE_INT - size); 2490 } 2491 return true; 2492 } 2493 else 2494 return false; 2495} 2496 2497/* Return true if C consists only of byte chunks being either 0 or 2498 0xff. If MASK is !=NULL a byte mask is generated which is 2499 appropriate for the vector generate byte mask instruction. */ 2500 2501bool 2502s390_bytemask_vector_p (rtx op, unsigned *mask) 2503{ 2504 int i; 2505 unsigned tmp_mask = 0; 2506 int nunit, unit_size; 2507 2508 if (!VECTOR_MODE_P (GET_MODE (op)) 2509 || GET_CODE (op) != CONST_VECTOR 2510 || !CONST_INT_P (XVECEXP (op, 0, 0))) 2511 return false; 2512 2513 nunit = GET_MODE_NUNITS (GET_MODE (op)); 2514 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); 2515 2516 for (i = 0; i < nunit; i++) 2517 { 2518 unsigned HOST_WIDE_INT c; 2519 int j; 2520 2521 if (!CONST_INT_P (XVECEXP (op, 0, i))) 2522 return false; 2523 2524 c = UINTVAL (XVECEXP (op, 0, i)); 2525 for (j = 0; j < unit_size; j++) 2526 { 2527 if ((c & 0xff) != 0 && (c & 0xff) != 0xff) 2528 return false; 2529 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j); 2530 c = c >> BITS_PER_UNIT; 2531 } 2532 } 2533 2534 if (mask != NULL) 2535 *mask = tmp_mask; 2536 2537 return true; 2538} 2539 2540/* Check whether a rotate of ROTL followed by an AND of CONTIG is 2541 equivalent to a shift followed by the AND. In particular, CONTIG 2542 should not overlap the (rotated) bit 0/bit 63 gap. Negative values 2543 for ROTL indicate a rotate to the right. */ 2544 2545bool 2546s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig) 2547{ 2548 int start, end; 2549 bool ok; 2550 2551 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end); 2552 gcc_assert (ok); 2553 2554 if (rotl >= 0) 2555 return (64 - end >= rotl); 2556 else 2557 { 2558 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in 2559 DIMode. */ 2560 rotl = -rotl + (64 - bitsize); 2561 return (start >= rotl); 2562 } 2563} 2564 2565/* Check whether we can (and want to) split a double-word 2566 move in mode MODE from SRC to DST into two single-word 2567 moves, moving the subword FIRST_SUBWORD first. */ 2568 2569bool 2570s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword) 2571{ 2572 /* Floating point and vector registers cannot be split. */ 2573 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) 2574 return false; 2575 2576 /* Non-offsettable memory references cannot be split. */ 2577 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src)) 2578 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst))) 2579 return false; 2580 2581 /* Moving the first subword must not clobber a register 2582 needed to move the second subword. */ 2583 if (register_operand (dst, mode)) 2584 { 2585 rtx subreg = operand_subword (dst, first_subword, 0, mode); 2586 if (reg_overlap_mentioned_p (subreg, src)) 2587 return false; 2588 } 2589 2590 return true; 2591} 2592 2593/* Return true if it can be proven that [MEM1, MEM1 + SIZE] 2594 and [MEM2, MEM2 + SIZE] do overlap and false 2595 otherwise. */ 2596 2597bool 2598s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size) 2599{ 2600 rtx addr1, addr2, addr_delta; 2601 HOST_WIDE_INT delta; 2602 2603 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM) 2604 return true; 2605 2606 if (size == 0) 2607 return false; 2608 2609 addr1 = XEXP (mem1, 0); 2610 addr2 = XEXP (mem2, 0); 2611 2612 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1); 2613 2614 /* This overlapping check is used by peepholes merging memory block operations. 2615 Overlapping operations would otherwise be recognized by the S/390 hardware 2616 and would fall back to a slower implementation. Allowing overlapping 2617 operations would lead to slow code but not to wrong code. Therefore we are 2618 somewhat optimistic if we cannot prove that the memory blocks are 2619 overlapping. 2620 That's why we return false here although this may accept operations on 2621 overlapping memory areas. */ 2622 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT) 2623 return false; 2624 2625 delta = INTVAL (addr_delta); 2626 2627 if (delta == 0 2628 || (delta > 0 && delta < size) 2629 || (delta < 0 && -delta < size)) 2630 return true; 2631 2632 return false; 2633} 2634 2635/* Check whether the address of memory reference MEM2 equals exactly 2636 the address of memory reference MEM1 plus DELTA. Return true if 2637 we can prove this to be the case, false otherwise. */ 2638 2639bool 2640s390_offset_p (rtx mem1, rtx mem2, rtx delta) 2641{ 2642 rtx addr1, addr2, addr_delta; 2643 2644 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM) 2645 return false; 2646 2647 addr1 = XEXP (mem1, 0); 2648 addr2 = XEXP (mem2, 0); 2649 2650 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1); 2651 if (!addr_delta || !rtx_equal_p (addr_delta, delta)) 2652 return false; 2653 2654 return true; 2655} 2656 2657/* Expand logical operator CODE in mode MODE with operands OPERANDS. */ 2658 2659void 2660s390_expand_logical_operator (enum rtx_code code, machine_mode mode, 2661 rtx *operands) 2662{ 2663 machine_mode wmode = mode; 2664 rtx dst = operands[0]; 2665 rtx src1 = operands[1]; 2666 rtx src2 = operands[2]; 2667 rtx op, clob, tem; 2668 2669 /* If we cannot handle the operation directly, use a temp register. */ 2670 if (!s390_logical_operator_ok_p (operands)) 2671 dst = gen_reg_rtx (mode); 2672 2673 /* QImode and HImode patterns make sense only if we have a destination 2674 in memory. Otherwise perform the operation in SImode. */ 2675 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM) 2676 wmode = SImode; 2677 2678 /* Widen operands if required. */ 2679 if (mode != wmode) 2680 { 2681 if (GET_CODE (dst) == SUBREG 2682 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0) 2683 dst = tem; 2684 else if (REG_P (dst)) 2685 dst = gen_rtx_SUBREG (wmode, dst, 0); 2686 else 2687 dst = gen_reg_rtx (wmode); 2688 2689 if (GET_CODE (src1) == SUBREG 2690 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0) 2691 src1 = tem; 2692 else if (GET_MODE (src1) != VOIDmode) 2693 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0); 2694 2695 if (GET_CODE (src2) == SUBREG 2696 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0) 2697 src2 = tem; 2698 else if (GET_MODE (src2) != VOIDmode) 2699 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0); 2700 } 2701 2702 /* Emit the instruction. */ 2703 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); 2704 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 2705 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 2706 2707 /* Fix up the destination if needed. */ 2708 if (dst != operands[0]) 2709 emit_move_insn (operands[0], gen_lowpart (mode, dst)); 2710} 2711 2712/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */ 2713 2714bool 2715s390_logical_operator_ok_p (rtx *operands) 2716{ 2717 /* If the destination operand is in memory, it needs to coincide 2718 with one of the source operands. After reload, it has to be 2719 the first source operand. */ 2720 if (GET_CODE (operands[0]) == MEM) 2721 return rtx_equal_p (operands[0], operands[1]) 2722 || (!reload_completed && rtx_equal_p (operands[0], operands[2])); 2723 2724 return true; 2725} 2726 2727/* Narrow logical operation CODE of memory operand MEMOP with immediate 2728 operand IMMOP to switch from SS to SI type instructions. */ 2729 2730void 2731s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop) 2732{ 2733 int def = code == AND ? -1 : 0; 2734 HOST_WIDE_INT mask; 2735 int part; 2736 2737 gcc_assert (GET_CODE (*memop) == MEM); 2738 gcc_assert (!MEM_VOLATILE_P (*memop)); 2739 2740 mask = s390_extract_part (*immop, QImode, def); 2741 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def); 2742 gcc_assert (part >= 0); 2743 2744 *memop = adjust_address (*memop, QImode, part); 2745 *immop = gen_int_mode (mask, QImode); 2746} 2747 2748 2749/* How to allocate a 'struct machine_function'. */ 2750 2751static struct machine_function * 2752s390_init_machine_status (void) 2753{ 2754 return ggc_cleared_alloc<machine_function> (); 2755} 2756 2757/* Map for smallest class containing reg regno. */ 2758 2759const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = 2760{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */ 2761 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */ 2762 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */ 2763 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */ 2764 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */ 2765 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */ 2766 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */ 2767 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */ 2768 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */ 2769 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */ 2770 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */ 2771 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */ 2772 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */ 2773 VEC_REGS, VEC_REGS /* 52 */ 2774}; 2775 2776/* Return attribute type of insn. */ 2777 2778static enum attr_type 2779s390_safe_attr_type (rtx_insn *insn) 2780{ 2781 if (recog_memoized (insn) >= 0) 2782 return get_attr_type (insn); 2783 else 2784 return TYPE_NONE; 2785} 2786 2787/* Return attribute relative_long of insn. */ 2788 2789static bool 2790s390_safe_relative_long_p (rtx_insn *insn) 2791{ 2792 if (recog_memoized (insn) >= 0) 2793 return get_attr_relative_long (insn) == RELATIVE_LONG_YES; 2794 else 2795 return false; 2796} 2797 2798/* Return true if DISP is a valid short displacement. */ 2799 2800static bool 2801s390_short_displacement (rtx disp) 2802{ 2803 /* No displacement is OK. */ 2804 if (!disp) 2805 return true; 2806 2807 /* Without the long displacement facility we don't need to 2808 distingiush between long and short displacement. */ 2809 if (!TARGET_LONG_DISPLACEMENT) 2810 return true; 2811 2812 /* Integer displacement in range. */ 2813 if (GET_CODE (disp) == CONST_INT) 2814 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096; 2815 2816 /* GOT offset is not OK, the GOT can be large. */ 2817 if (GET_CODE (disp) == CONST 2818 && GET_CODE (XEXP (disp, 0)) == UNSPEC 2819 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT 2820 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF)) 2821 return false; 2822 2823 /* All other symbolic constants are literal pool references, 2824 which are OK as the literal pool must be small. */ 2825 if (GET_CODE (disp) == CONST) 2826 return true; 2827 2828 return false; 2829} 2830 2831/* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp'). 2832 If successful, also determines the 2833 following characteristics of `ref': `is_ptr' - whether it can be an 2834 LA argument, `is_base_ptr' - whether the resulting base is a well-known 2835 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is 2836 considered a literal pool pointer for purposes of avoiding two different 2837 literal pool pointers per insn during or after reload (`B' constraint). */ 2838static bool 2839s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr, 2840 bool *is_base_ptr, bool *is_pool_ptr) 2841{ 2842 if (!*ref) 2843 return true; 2844 2845 if (GET_CODE (*ref) == UNSPEC) 2846 switch (XINT (*ref, 1)) 2847 { 2848 case UNSPEC_LTREF: 2849 if (!*disp) 2850 *disp = gen_rtx_UNSPEC (Pmode, 2851 gen_rtvec (1, XVECEXP (*ref, 0, 0)), 2852 UNSPEC_LTREL_OFFSET); 2853 else 2854 return false; 2855 2856 *ref = XVECEXP (*ref, 0, 1); 2857 break; 2858 2859 default: 2860 return false; 2861 } 2862 2863 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode) 2864 return false; 2865 2866 if (REGNO (*ref) == STACK_POINTER_REGNUM 2867 || REGNO (*ref) == FRAME_POINTER_REGNUM 2868 || ((reload_completed || reload_in_progress) 2869 && frame_pointer_needed 2870 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM) 2871 || REGNO (*ref) == ARG_POINTER_REGNUM 2872 || (flag_pic 2873 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM)) 2874 *is_ptr = *is_base_ptr = true; 2875 2876 if ((reload_completed || reload_in_progress) 2877 && *ref == cfun->machine->base_reg) 2878 *is_ptr = *is_base_ptr = *is_pool_ptr = true; 2879 2880 return true; 2881} 2882 2883/* Decompose a RTL expression ADDR for a memory address into 2884 its components, returned in OUT. 2885 2886 Returns false if ADDR is not a valid memory address, true 2887 otherwise. If OUT is NULL, don't return the components, 2888 but check for validity only. 2889 2890 Note: Only addresses in canonical form are recognized. 2891 LEGITIMIZE_ADDRESS should convert non-canonical forms to the 2892 canonical form so that they will be recognized. */ 2893 2894static int 2895s390_decompose_address (rtx addr, struct s390_address *out) 2896{ 2897 HOST_WIDE_INT offset = 0; 2898 rtx base = NULL_RTX; 2899 rtx indx = NULL_RTX; 2900 rtx disp = NULL_RTX; 2901 rtx orig_disp; 2902 bool pointer = false; 2903 bool base_ptr = false; 2904 bool indx_ptr = false; 2905 bool literal_pool = false; 2906 2907 /* We may need to substitute the literal pool base register into the address 2908 below. However, at this point we do not know which register is going to 2909 be used as base, so we substitute the arg pointer register. This is going 2910 to be treated as holding a pointer below -- it shouldn't be used for any 2911 other purpose. */ 2912 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM); 2913 2914 /* Decompose address into base + index + displacement. */ 2915 2916 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC) 2917 base = addr; 2918 2919 else if (GET_CODE (addr) == PLUS) 2920 { 2921 rtx op0 = XEXP (addr, 0); 2922 rtx op1 = XEXP (addr, 1); 2923 enum rtx_code code0 = GET_CODE (op0); 2924 enum rtx_code code1 = GET_CODE (op1); 2925 2926 if (code0 == REG || code0 == UNSPEC) 2927 { 2928 if (code1 == REG || code1 == UNSPEC) 2929 { 2930 indx = op0; /* index + base */ 2931 base = op1; 2932 } 2933 2934 else 2935 { 2936 base = op0; /* base + displacement */ 2937 disp = op1; 2938 } 2939 } 2940 2941 else if (code0 == PLUS) 2942 { 2943 indx = XEXP (op0, 0); /* index + base + disp */ 2944 base = XEXP (op0, 1); 2945 disp = op1; 2946 } 2947 2948 else 2949 { 2950 return false; 2951 } 2952 } 2953 2954 else 2955 disp = addr; /* displacement */ 2956 2957 /* Extract integer part of displacement. */ 2958 orig_disp = disp; 2959 if (disp) 2960 { 2961 if (GET_CODE (disp) == CONST_INT) 2962 { 2963 offset = INTVAL (disp); 2964 disp = NULL_RTX; 2965 } 2966 else if (GET_CODE (disp) == CONST 2967 && GET_CODE (XEXP (disp, 0)) == PLUS 2968 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 2969 { 2970 offset = INTVAL (XEXP (XEXP (disp, 0), 1)); 2971 disp = XEXP (XEXP (disp, 0), 0); 2972 } 2973 } 2974 2975 /* Strip off CONST here to avoid special case tests later. */ 2976 if (disp && GET_CODE (disp) == CONST) 2977 disp = XEXP (disp, 0); 2978 2979 /* We can convert literal pool addresses to 2980 displacements by basing them off the base register. */ 2981 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp)) 2982 { 2983 if (base || indx) 2984 return false; 2985 2986 base = fake_pool_base, literal_pool = true; 2987 2988 /* Mark up the displacement. */ 2989 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp), 2990 UNSPEC_LTREL_OFFSET); 2991 } 2992 2993 /* Validate base register. */ 2994 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr, 2995 &literal_pool)) 2996 return false; 2997 2998 /* Validate index register. */ 2999 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr, 3000 &literal_pool)) 3001 return false; 3002 3003 /* Prefer to use pointer as base, not index. */ 3004 if (base && indx && !base_ptr 3005 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx)))) 3006 { 3007 rtx tmp = base; 3008 base = indx; 3009 indx = tmp; 3010 } 3011 3012 /* Validate displacement. */ 3013 if (!disp) 3014 { 3015 /* If virtual registers are involved, the displacement will change later 3016 anyway as the virtual registers get eliminated. This could make a 3017 valid displacement invalid, but it is more likely to make an invalid 3018 displacement valid, because we sometimes access the register save area 3019 via negative offsets to one of those registers. 3020 Thus we don't check the displacement for validity here. If after 3021 elimination the displacement turns out to be invalid after all, 3022 this is fixed up by reload in any case. */ 3023 /* LRA maintains always displacements up to date and we need to 3024 know the displacement is right during all LRA not only at the 3025 final elimination. */ 3026 if (lra_in_progress 3027 || (base != arg_pointer_rtx 3028 && indx != arg_pointer_rtx 3029 && base != return_address_pointer_rtx 3030 && indx != return_address_pointer_rtx 3031 && base != frame_pointer_rtx 3032 && indx != frame_pointer_rtx 3033 && base != virtual_stack_vars_rtx 3034 && indx != virtual_stack_vars_rtx)) 3035 if (!DISP_IN_RANGE (offset)) 3036 return false; 3037 } 3038 else 3039 { 3040 /* All the special cases are pointers. */ 3041 pointer = true; 3042 3043 /* In the small-PIC case, the linker converts @GOT 3044 and @GOTNTPOFF offsets to possible displacements. */ 3045 if (GET_CODE (disp) == UNSPEC 3046 && (XINT (disp, 1) == UNSPEC_GOT 3047 || XINT (disp, 1) == UNSPEC_GOTNTPOFF) 3048 && flag_pic == 1) 3049 { 3050 ; 3051 } 3052 3053 /* Accept pool label offsets. */ 3054 else if (GET_CODE (disp) == UNSPEC 3055 && XINT (disp, 1) == UNSPEC_POOL_OFFSET) 3056 ; 3057 3058 /* Accept literal pool references. */ 3059 else if (GET_CODE (disp) == UNSPEC 3060 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET) 3061 { 3062 /* In case CSE pulled a non literal pool reference out of 3063 the pool we have to reject the address. This is 3064 especially important when loading the GOT pointer on non 3065 zarch CPUs. In this case the literal pool contains an lt 3066 relative offset to the _GLOBAL_OFFSET_TABLE_ label which 3067 will most likely exceed the displacement. */ 3068 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 3069 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0))) 3070 return false; 3071 3072 orig_disp = gen_rtx_CONST (Pmode, disp); 3073 if (offset) 3074 { 3075 /* If we have an offset, make sure it does not 3076 exceed the size of the constant pool entry. 3077 Otherwise we might generate an out-of-range 3078 displacement for the base register form. */ 3079 rtx sym = XVECEXP (disp, 0, 0); 3080 if (offset >= GET_MODE_SIZE (get_pool_mode (sym))) 3081 return false; 3082 3083 orig_disp = plus_constant (Pmode, orig_disp, offset); 3084 } 3085 } 3086 3087 else 3088 return false; 3089 } 3090 3091 if (!base && !indx) 3092 pointer = true; 3093 3094 if (out) 3095 { 3096 out->base = base; 3097 out->indx = indx; 3098 out->disp = orig_disp; 3099 out->pointer = pointer; 3100 out->literal_pool = literal_pool; 3101 } 3102 3103 return true; 3104} 3105 3106/* Decompose a RTL expression OP for an address style operand into its 3107 components, and return the base register in BASE and the offset in 3108 OFFSET. While OP looks like an address it is never supposed to be 3109 used as such. 3110 3111 Return true if OP is a valid address operand, false if not. */ 3112 3113bool 3114s390_decompose_addrstyle_without_index (rtx op, rtx *base, 3115 HOST_WIDE_INT *offset) 3116{ 3117 rtx off = NULL_RTX; 3118 3119 /* We can have an integer constant, an address register, 3120 or a sum of the two. */ 3121 if (CONST_SCALAR_INT_P (op)) 3122 { 3123 off = op; 3124 op = NULL_RTX; 3125 } 3126 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1))) 3127 { 3128 off = XEXP (op, 1); 3129 op = XEXP (op, 0); 3130 } 3131 while (op && GET_CODE (op) == SUBREG) 3132 op = SUBREG_REG (op); 3133 3134 if (op && GET_CODE (op) != REG) 3135 return false; 3136 3137 if (offset) 3138 { 3139 if (off == NULL_RTX) 3140 *offset = 0; 3141 else if (CONST_INT_P (off)) 3142 *offset = INTVAL (off); 3143 else if (CONST_WIDE_INT_P (off)) 3144 /* The offset will anyway be cut down to 12 bits so take just 3145 the lowest order chunk of the wide int. */ 3146 *offset = CONST_WIDE_INT_ELT (off, 0); 3147 else 3148 gcc_unreachable (); 3149 } 3150 if (base) 3151 *base = op; 3152 3153 return true; 3154} 3155 3156/* Check that OP is a valid shift count operand. 3157 It should be of the following structure: 3158 (subreg (and (plus (reg imm_op)) 2^k-1) 7) 3159 where subreg, and and plus are optional. 3160 3161 If IMPLICIT_MASK is > 0 and OP contains and 3162 (AND ... immediate) 3163 it is checked whether IMPLICIT_MASK and the immediate match. 3164 Otherwise, no checking is performed. 3165 */ 3166bool 3167s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask) 3168{ 3169 /* Strip subreg. */ 3170 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op)) 3171 op = XEXP (op, 0); 3172 3173 /* Check for an and with proper constant. */ 3174 if (GET_CODE (op) == AND) 3175 { 3176 rtx op1 = XEXP (op, 0); 3177 rtx imm = XEXP (op, 1); 3178 3179 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1)) 3180 op1 = XEXP (op1, 0); 3181 3182 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS)) 3183 return false; 3184 3185 if (!immediate_operand (imm, GET_MODE (imm))) 3186 return false; 3187 3188 HOST_WIDE_INT val = INTVAL (imm); 3189 if (implicit_mask > 0 3190 && (val & implicit_mask) != implicit_mask) 3191 return false; 3192 3193 op = op1; 3194 } 3195 3196 /* Check the rest. */ 3197 return s390_decompose_addrstyle_without_index (op, NULL, NULL); 3198} 3199 3200/* Return true if CODE is a valid address without index. */ 3201 3202bool 3203s390_legitimate_address_without_index_p (rtx op) 3204{ 3205 struct s390_address addr; 3206 3207 if (!s390_decompose_address (XEXP (op, 0), &addr)) 3208 return false; 3209 if (addr.indx) 3210 return false; 3211 3212 return true; 3213} 3214 3215 3216/* Return TRUE if ADDR is an operand valid for a load/store relative 3217 instruction. Be aware that the alignment of the operand needs to 3218 be checked separately. 3219 Valid addresses are single references or a sum of a reference and a 3220 constant integer. Return these parts in SYMREF and ADDEND. You can 3221 pass NULL in REF and/or ADDEND if you are not interested in these 3222 values. */ 3223 3224static bool 3225s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend) 3226{ 3227 HOST_WIDE_INT tmpaddend = 0; 3228 3229 if (GET_CODE (addr) == CONST) 3230 addr = XEXP (addr, 0); 3231 3232 if (GET_CODE (addr) == PLUS) 3233 { 3234 if (!CONST_INT_P (XEXP (addr, 1))) 3235 return false; 3236 3237 tmpaddend = INTVAL (XEXP (addr, 1)); 3238 addr = XEXP (addr, 0); 3239 } 3240 3241 if (GET_CODE (addr) == SYMBOL_REF 3242 || (GET_CODE (addr) == UNSPEC 3243 && (XINT (addr, 1) == UNSPEC_GOTENT 3244 || XINT (addr, 1) == UNSPEC_PLT))) 3245 { 3246 if (symref) 3247 *symref = addr; 3248 if (addend) 3249 *addend = tmpaddend; 3250 3251 return true; 3252 } 3253 return false; 3254} 3255 3256/* Return true if the address in OP is valid for constraint letter C 3257 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal 3258 pool MEMs should be accepted. Only the Q, R, S, T constraint 3259 letters are allowed for C. */ 3260 3261static int 3262s390_check_qrst_address (char c, rtx op, bool lit_pool_ok) 3263{ 3264 rtx symref; 3265 struct s390_address addr; 3266 bool decomposed = false; 3267 3268 if (!address_operand (op, GET_MODE (op))) 3269 return 0; 3270 3271 /* This check makes sure that no symbolic address (except literal 3272 pool references) are accepted by the R or T constraints. */ 3273 if (s390_loadrelative_operand_p (op, &symref, NULL) 3274 && (!lit_pool_ok 3275 || !SYMBOL_REF_P (symref) 3276 || !CONSTANT_POOL_ADDRESS_P (symref))) 3277 return 0; 3278 3279 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */ 3280 if (!lit_pool_ok) 3281 { 3282 if (!s390_decompose_address (op, &addr)) 3283 return 0; 3284 if (addr.literal_pool) 3285 return 0; 3286 decomposed = true; 3287 } 3288 3289 /* With reload, we sometimes get intermediate address forms that are 3290 actually invalid as-is, but we need to accept them in the most 3291 generic cases below ('R' or 'T'), since reload will in fact fix 3292 them up. LRA behaves differently here; we never see such forms, 3293 but on the other hand, we need to strictly reject every invalid 3294 address form. After both reload and LRA invalid address forms 3295 must be rejected, because nothing will fix them up later. Perform 3296 this check right up front. */ 3297 if (lra_in_progress || reload_completed) 3298 { 3299 if (!decomposed && !s390_decompose_address (op, &addr)) 3300 return 0; 3301 decomposed = true; 3302 } 3303 3304 switch (c) 3305 { 3306 case 'Q': /* no index short displacement */ 3307 if (!decomposed && !s390_decompose_address (op, &addr)) 3308 return 0; 3309 if (addr.indx) 3310 return 0; 3311 if (!s390_short_displacement (addr.disp)) 3312 return 0; 3313 break; 3314 3315 case 'R': /* with index short displacement */ 3316 if (TARGET_LONG_DISPLACEMENT) 3317 { 3318 if (!decomposed && !s390_decompose_address (op, &addr)) 3319 return 0; 3320 if (!s390_short_displacement (addr.disp)) 3321 return 0; 3322 } 3323 /* Any invalid address here will be fixed up by reload, 3324 so accept it for the most generic constraint. */ 3325 break; 3326 3327 case 'S': /* no index long displacement */ 3328 if (!decomposed && !s390_decompose_address (op, &addr)) 3329 return 0; 3330 if (addr.indx) 3331 return 0; 3332 break; 3333 3334 case 'T': /* with index long displacement */ 3335 /* Any invalid address here will be fixed up by reload, 3336 so accept it for the most generic constraint. */ 3337 break; 3338 3339 default: 3340 return 0; 3341 } 3342 return 1; 3343} 3344 3345 3346/* Evaluates constraint strings described by the regular expression 3347 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for 3348 the constraint given in STR, or 0 else. */ 3349 3350int 3351s390_mem_constraint (const char *str, rtx op) 3352{ 3353 char c = str[0]; 3354 3355 switch (c) 3356 { 3357 case 'A': 3358 /* Check for offsettable variants of memory constraints. */ 3359 if (!MEM_P (op) || MEM_VOLATILE_P (op)) 3360 return 0; 3361 if ((reload_completed || reload_in_progress) 3362 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op)) 3363 return 0; 3364 return s390_check_qrst_address (str[1], XEXP (op, 0), true); 3365 case 'B': 3366 /* Check for non-literal-pool variants of memory constraints. */ 3367 if (!MEM_P (op)) 3368 return 0; 3369 return s390_check_qrst_address (str[1], XEXP (op, 0), false); 3370 case 'Q': 3371 case 'R': 3372 case 'S': 3373 case 'T': 3374 if (GET_CODE (op) != MEM) 3375 return 0; 3376 return s390_check_qrst_address (c, XEXP (op, 0), true); 3377 case 'Y': 3378 /* Simply check for the basic form of a shift count. Reload will 3379 take care of making sure we have a proper base register. */ 3380 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL)) 3381 return 0; 3382 break; 3383 case 'Z': 3384 return s390_check_qrst_address (str[1], op, true); 3385 default: 3386 return 0; 3387 } 3388 return 1; 3389} 3390 3391 3392/* Evaluates constraint strings starting with letter O. Input 3393 parameter C is the second letter following the "O" in the constraint 3394 string. Returns 1 if VALUE meets the respective constraint and 0 3395 otherwise. */ 3396 3397int 3398s390_O_constraint_str (const char c, HOST_WIDE_INT value) 3399{ 3400 if (!TARGET_EXTIMM) 3401 return 0; 3402 3403 switch (c) 3404 { 3405 case 's': 3406 return trunc_int_for_mode (value, SImode) == value; 3407 3408 case 'p': 3409 return value == 0 3410 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1; 3411 3412 case 'n': 3413 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1; 3414 3415 default: 3416 gcc_unreachable (); 3417 } 3418} 3419 3420 3421/* Evaluates constraint strings starting with letter N. Parameter STR 3422 contains the letters following letter "N" in the constraint string. 3423 Returns true if VALUE matches the constraint. */ 3424 3425int 3426s390_N_constraint_str (const char *str, HOST_WIDE_INT value) 3427{ 3428 machine_mode mode, part_mode; 3429 int def; 3430 int part, part_goal; 3431 3432 3433 if (str[0] == 'x') 3434 part_goal = -1; 3435 else 3436 part_goal = str[0] - '0'; 3437 3438 switch (str[1]) 3439 { 3440 case 'Q': 3441 part_mode = QImode; 3442 break; 3443 case 'H': 3444 part_mode = HImode; 3445 break; 3446 case 'S': 3447 part_mode = SImode; 3448 break; 3449 default: 3450 return 0; 3451 } 3452 3453 switch (str[2]) 3454 { 3455 case 'H': 3456 mode = HImode; 3457 break; 3458 case 'S': 3459 mode = SImode; 3460 break; 3461 case 'D': 3462 mode = DImode; 3463 break; 3464 default: 3465 return 0; 3466 } 3467 3468 switch (str[3]) 3469 { 3470 case '0': 3471 def = 0; 3472 break; 3473 case 'F': 3474 def = -1; 3475 break; 3476 default: 3477 return 0; 3478 } 3479 3480 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode)) 3481 return 0; 3482 3483 part = s390_single_part (GEN_INT (value), mode, part_mode, def); 3484 if (part < 0) 3485 return 0; 3486 if (part_goal != -1 && part_goal != part) 3487 return 0; 3488 3489 return 1; 3490} 3491 3492 3493/* Returns true if the input parameter VALUE is a float zero. */ 3494 3495int 3496s390_float_const_zero_p (rtx value) 3497{ 3498 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT 3499 && value == CONST0_RTX (GET_MODE (value))); 3500} 3501 3502/* Implement TARGET_REGISTER_MOVE_COST. */ 3503 3504static int 3505s390_register_move_cost (machine_mode mode, 3506 reg_class_t from, reg_class_t to) 3507{ 3508 /* On s390, copy between fprs and gprs is expensive. */ 3509 3510 /* It becomes somewhat faster having ldgr/lgdr. */ 3511 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8) 3512 { 3513 /* ldgr is single cycle. */ 3514 if (reg_classes_intersect_p (from, GENERAL_REGS) 3515 && reg_classes_intersect_p (to, FP_REGS)) 3516 return 1; 3517 /* lgdr needs 3 cycles. */ 3518 if (reg_classes_intersect_p (to, GENERAL_REGS) 3519 && reg_classes_intersect_p (from, FP_REGS)) 3520 return 3; 3521 } 3522 3523 /* Otherwise copying is done via memory. */ 3524 if ((reg_classes_intersect_p (from, GENERAL_REGS) 3525 && reg_classes_intersect_p (to, FP_REGS)) 3526 || (reg_classes_intersect_p (from, FP_REGS) 3527 && reg_classes_intersect_p (to, GENERAL_REGS))) 3528 return 10; 3529 3530 /* We usually do not want to copy via CC. */ 3531 if (reg_classes_intersect_p (from, CC_REGS) 3532 || reg_classes_intersect_p (to, CC_REGS)) 3533 return 5; 3534 3535 return 1; 3536} 3537 3538/* Implement TARGET_MEMORY_MOVE_COST. */ 3539 3540static int 3541s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 3542 reg_class_t rclass ATTRIBUTE_UNUSED, 3543 bool in ATTRIBUTE_UNUSED) 3544{ 3545 return 2; 3546} 3547 3548/* Compute a (partial) cost for rtx X. Return true if the complete 3549 cost has been computed, and false if subexpressions should be 3550 scanned. In either case, *TOTAL contains the cost result. The 3551 initial value of *TOTAL is the default value computed by 3552 rtx_cost. It may be left unmodified. OUTER_CODE contains the 3553 code of the superexpression of x. */ 3554 3555static bool 3556s390_rtx_costs (rtx x, machine_mode mode, int outer_code, 3557 int opno ATTRIBUTE_UNUSED, 3558 int *total, bool speed ATTRIBUTE_UNUSED) 3559{ 3560 int code = GET_CODE (x); 3561 switch (code) 3562 { 3563 case CONST: 3564 case CONST_INT: 3565 case LABEL_REF: 3566 case SYMBOL_REF: 3567 case CONST_DOUBLE: 3568 case CONST_WIDE_INT: 3569 case MEM: 3570 *total = 0; 3571 return true; 3572 3573 case SET: 3574 { 3575 /* Without this a conditional move instruction would be 3576 accounted as 3 * COSTS_N_INSNS (set, if_then_else, 3577 comparison operator). That's a bit pessimistic. */ 3578 3579 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE) 3580 return false; 3581 3582 rtx cond = XEXP (SET_SRC (x), 0); 3583 3584 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1))) 3585 return false; 3586 3587 /* It is going to be a load/store on condition. Make it 3588 slightly more expensive than a normal load. */ 3589 *total = COSTS_N_INSNS (1) + 1; 3590 3591 rtx dst = SET_DEST (x); 3592 rtx then = XEXP (SET_SRC (x), 1); 3593 rtx els = XEXP (SET_SRC (x), 2); 3594 3595 /* It is a real IF-THEN-ELSE. An additional move will be 3596 needed to implement that. */ 3597 if (!TARGET_Z15 3598 && reload_completed 3599 && !rtx_equal_p (dst, then) 3600 && !rtx_equal_p (dst, els)) 3601 *total += COSTS_N_INSNS (1) / 2; 3602 3603 /* A minor penalty for constants we cannot directly handle. */ 3604 if ((CONST_INT_P (then) || CONST_INT_P (els)) 3605 && (!TARGET_Z13 || MEM_P (dst) 3606 || (CONST_INT_P (then) && !satisfies_constraint_K (then)) 3607 || (CONST_INT_P (els) && !satisfies_constraint_K (els)))) 3608 *total += COSTS_N_INSNS (1) / 2; 3609 3610 /* A store on condition can only handle register src operands. */ 3611 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els))) 3612 *total += COSTS_N_INSNS (1) / 2; 3613 3614 return true; 3615 } 3616 case IOR: 3617 3618 /* nnrk, nngrk */ 3619 if (TARGET_Z15 3620 && (mode == SImode || mode == DImode) 3621 && GET_CODE (XEXP (x, 0)) == NOT 3622 && GET_CODE (XEXP (x, 1)) == NOT) 3623 { 3624 *total = COSTS_N_INSNS (1); 3625 if (!REG_P (XEXP (XEXP (x, 0), 0))) 3626 *total += 1; 3627 if (!REG_P (XEXP (XEXP (x, 1), 0))) 3628 *total += 1; 3629 return true; 3630 } 3631 3632 /* risbg */ 3633 if (GET_CODE (XEXP (x, 0)) == AND 3634 && GET_CODE (XEXP (x, 1)) == ASHIFT 3635 && REG_P (XEXP (XEXP (x, 0), 0)) 3636 && REG_P (XEXP (XEXP (x, 1), 0)) 3637 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3638 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 3639 && (UINTVAL (XEXP (XEXP (x, 0), 1)) == 3640 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1)) 3641 { 3642 *total = COSTS_N_INSNS (2); 3643 return true; 3644 } 3645 3646 /* ~AND on a 128 bit mode. This can be done using a vector 3647 instruction. */ 3648 if (TARGET_VXE 3649 && GET_CODE (XEXP (x, 0)) == NOT 3650 && GET_CODE (XEXP (x, 1)) == NOT 3651 && REG_P (XEXP (XEXP (x, 0), 0)) 3652 && REG_P (XEXP (XEXP (x, 1), 0)) 3653 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16 3654 && s390_hard_regno_mode_ok (VR0_REGNUM, 3655 GET_MODE (XEXP (XEXP (x, 0), 0)))) 3656 { 3657 *total = COSTS_N_INSNS (1); 3658 return true; 3659 } 3660 3661 *total = COSTS_N_INSNS (1); 3662 return false; 3663 3664 case AND: 3665 /* nork, nogrk */ 3666 if (TARGET_Z15 3667 && (mode == SImode || mode == DImode) 3668 && GET_CODE (XEXP (x, 0)) == NOT 3669 && GET_CODE (XEXP (x, 1)) == NOT) 3670 { 3671 *total = COSTS_N_INSNS (1); 3672 if (!REG_P (XEXP (XEXP (x, 0), 0))) 3673 *total += 1; 3674 if (!REG_P (XEXP (XEXP (x, 1), 0))) 3675 *total += 1; 3676 return true; 3677 } 3678 /* fallthrough */ 3679 case ASHIFT: 3680 case ASHIFTRT: 3681 case LSHIFTRT: 3682 case ROTATE: 3683 case ROTATERT: 3684 case XOR: 3685 case NEG: 3686 case NOT: 3687 case PLUS: 3688 case MINUS: 3689 *total = COSTS_N_INSNS (1); 3690 return false; 3691 3692 case MULT: 3693 switch (mode) 3694 { 3695 case E_SImode: 3696 { 3697 rtx left = XEXP (x, 0); 3698 rtx right = XEXP (x, 1); 3699 if (GET_CODE (right) == CONST_INT 3700 && CONST_OK_FOR_K (INTVAL (right))) 3701 *total = s390_cost->mhi; 3702 else if (GET_CODE (left) == SIGN_EXTEND) 3703 *total = s390_cost->mh; 3704 else 3705 *total = s390_cost->ms; /* msr, ms, msy */ 3706 break; 3707 } 3708 case E_DImode: 3709 { 3710 rtx left = XEXP (x, 0); 3711 rtx right = XEXP (x, 1); 3712 if (TARGET_ZARCH) 3713 { 3714 if (GET_CODE (right) == CONST_INT 3715 && CONST_OK_FOR_K (INTVAL (right))) 3716 *total = s390_cost->mghi; 3717 else if (GET_CODE (left) == SIGN_EXTEND) 3718 *total = s390_cost->msgf; 3719 else 3720 *total = s390_cost->msg; /* msgr, msg */ 3721 } 3722 else /* TARGET_31BIT */ 3723 { 3724 if (GET_CODE (left) == SIGN_EXTEND 3725 && GET_CODE (right) == SIGN_EXTEND) 3726 /* mulsidi case: mr, m */ 3727 *total = s390_cost->m; 3728 else if (GET_CODE (left) == ZERO_EXTEND 3729 && GET_CODE (right) == ZERO_EXTEND) 3730 /* umulsidi case: ml, mlr */ 3731 *total = s390_cost->ml; 3732 else 3733 /* Complex calculation is required. */ 3734 *total = COSTS_N_INSNS (40); 3735 } 3736 break; 3737 } 3738 case E_SFmode: 3739 case E_DFmode: 3740 *total = s390_cost->mult_df; 3741 break; 3742 case E_TFmode: 3743 *total = s390_cost->mxbr; 3744 break; 3745 default: 3746 return false; 3747 } 3748 return false; 3749 3750 case FMA: 3751 switch (mode) 3752 { 3753 case E_DFmode: 3754 *total = s390_cost->madbr; 3755 break; 3756 case E_SFmode: 3757 *total = s390_cost->maebr; 3758 break; 3759 default: 3760 return false; 3761 } 3762 /* Negate in the third argument is free: FMSUB. */ 3763 if (GET_CODE (XEXP (x, 2)) == NEG) 3764 { 3765 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed) 3766 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed) 3767 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed)); 3768 return true; 3769 } 3770 return false; 3771 3772 case UDIV: 3773 case UMOD: 3774 if (mode == TImode) /* 128 bit division */ 3775 *total = s390_cost->dlgr; 3776 else if (mode == DImode) 3777 { 3778 rtx right = XEXP (x, 1); 3779 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ 3780 *total = s390_cost->dlr; 3781 else /* 64 by 64 bit division */ 3782 *total = s390_cost->dlgr; 3783 } 3784 else if (mode == SImode) /* 32 bit division */ 3785 *total = s390_cost->dlr; 3786 return false; 3787 3788 case DIV: 3789 case MOD: 3790 if (mode == DImode) 3791 { 3792 rtx right = XEXP (x, 1); 3793 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ 3794 if (TARGET_ZARCH) 3795 *total = s390_cost->dsgfr; 3796 else 3797 *total = s390_cost->dr; 3798 else /* 64 by 64 bit division */ 3799 *total = s390_cost->dsgr; 3800 } 3801 else if (mode == SImode) /* 32 bit division */ 3802 *total = s390_cost->dlr; 3803 else if (mode == SFmode) 3804 { 3805 *total = s390_cost->debr; 3806 } 3807 else if (mode == DFmode) 3808 { 3809 *total = s390_cost->ddbr; 3810 } 3811 else if (mode == TFmode) 3812 { 3813 *total = s390_cost->dxbr; 3814 } 3815 return false; 3816 3817 case SQRT: 3818 if (mode == SFmode) 3819 *total = s390_cost->sqebr; 3820 else if (mode == DFmode) 3821 *total = s390_cost->sqdbr; 3822 else /* TFmode */ 3823 *total = s390_cost->sqxbr; 3824 return false; 3825 3826 case SIGN_EXTEND: 3827 case ZERO_EXTEND: 3828 if (outer_code == MULT || outer_code == DIV || outer_code == MOD 3829 || outer_code == PLUS || outer_code == MINUS 3830 || outer_code == COMPARE) 3831 *total = 0; 3832 return false; 3833 3834 case COMPARE: 3835 *total = COSTS_N_INSNS (1); 3836 3837 /* nxrk, nxgrk ~(a^b)==0 */ 3838 if (TARGET_Z15 3839 && GET_CODE (XEXP (x, 0)) == NOT 3840 && XEXP (x, 1) == const0_rtx 3841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR 3842 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode) 3843 && mode == CCZmode) 3844 { 3845 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0))) 3846 *total += 1; 3847 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1))) 3848 *total += 1; 3849 return true; 3850 } 3851 3852 /* nnrk, nngrk, nork, nogrk */ 3853 if (TARGET_Z15 3854 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR) 3855 && XEXP (x, 1) == const0_rtx 3856 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode) 3857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT 3858 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT 3859 && mode == CCZmode) 3860 { 3861 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0))) 3862 *total += 1; 3863 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0))) 3864 *total += 1; 3865 return true; 3866 } 3867 3868 if (GET_CODE (XEXP (x, 0)) == AND 3869 && GET_CODE (XEXP (x, 1)) == CONST_INT 3870 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 3871 { 3872 rtx op0 = XEXP (XEXP (x, 0), 0); 3873 rtx op1 = XEXP (XEXP (x, 0), 1); 3874 rtx op2 = XEXP (x, 1); 3875 3876 if (memory_operand (op0, GET_MODE (op0)) 3877 && s390_tm_ccmode (op1, op2, 0) != VOIDmode) 3878 return true; 3879 if (register_operand (op0, GET_MODE (op0)) 3880 && s390_tm_ccmode (op1, op2, 1) != VOIDmode) 3881 return true; 3882 } 3883 return false; 3884 3885 default: 3886 return false; 3887 } 3888} 3889 3890/* Return the cost of an address rtx ADDR. */ 3891 3892static int 3893s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED, 3894 addr_space_t as ATTRIBUTE_UNUSED, 3895 bool speed ATTRIBUTE_UNUSED) 3896{ 3897 struct s390_address ad; 3898 if (!s390_decompose_address (addr, &ad)) 3899 return 1000; 3900 3901 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1); 3902} 3903 3904/* Implement targetm.vectorize.builtin_vectorization_cost. */ 3905static int 3906s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 3907 tree vectype, 3908 int misalign ATTRIBUTE_UNUSED) 3909{ 3910 switch (type_of_cost) 3911 { 3912 case scalar_stmt: 3913 case scalar_load: 3914 case scalar_store: 3915 case vector_stmt: 3916 case vector_load: 3917 case vector_store: 3918 case vector_gather_load: 3919 case vector_scatter_store: 3920 case vec_to_scalar: 3921 case scalar_to_vec: 3922 case cond_branch_not_taken: 3923 case vec_perm: 3924 case vec_promote_demote: 3925 case unaligned_load: 3926 case unaligned_store: 3927 return 1; 3928 3929 case cond_branch_taken: 3930 return 3; 3931 3932 case vec_construct: 3933 return TYPE_VECTOR_SUBPARTS (vectype) - 1; 3934 3935 default: 3936 gcc_unreachable (); 3937 } 3938} 3939 3940/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode, 3941 otherwise return 0. */ 3942 3943int 3944tls_symbolic_operand (rtx op) 3945{ 3946 if (GET_CODE (op) != SYMBOL_REF) 3947 return 0; 3948 return SYMBOL_REF_TLS_MODEL (op); 3949} 3950 3951/* Split DImode access register reference REG (on 64-bit) into its constituent 3952 low and high parts, and store them into LO and HI. Note that gen_lowpart/ 3953 gen_highpart cannot be used as they assume all registers are word-sized, 3954 while our access registers have only half that size. */ 3955 3956void 3957s390_split_access_reg (rtx reg, rtx *lo, rtx *hi) 3958{ 3959 gcc_assert (TARGET_64BIT); 3960 gcc_assert (ACCESS_REG_P (reg)); 3961 gcc_assert (GET_MODE (reg) == DImode); 3962 gcc_assert (!(REGNO (reg) & 1)); 3963 3964 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1); 3965 *hi = gen_rtx_REG (SImode, REGNO (reg)); 3966} 3967 3968/* Return true if OP contains a symbol reference */ 3969 3970bool 3971symbolic_reference_mentioned_p (rtx op) 3972{ 3973 const char *fmt; 3974 int i; 3975 3976 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3977 return 1; 3978 3979 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3980 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3981 { 3982 if (fmt[i] == 'E') 3983 { 3984 int j; 3985 3986 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3987 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3988 return 1; 3989 } 3990 3991 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3992 return 1; 3993 } 3994 3995 return 0; 3996} 3997 3998/* Return true if OP contains a reference to a thread-local symbol. */ 3999 4000bool 4001tls_symbolic_reference_mentioned_p (rtx op) 4002{ 4003 const char *fmt; 4004 int i; 4005 4006 if (GET_CODE (op) == SYMBOL_REF) 4007 return tls_symbolic_operand (op); 4008 4009 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4010 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4011 { 4012 if (fmt[i] == 'E') 4013 { 4014 int j; 4015 4016 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4017 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4018 return true; 4019 } 4020 4021 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i))) 4022 return true; 4023 } 4024 4025 return false; 4026} 4027 4028 4029/* Return true if OP is a legitimate general operand when 4030 generating PIC code. It is given that flag_pic is on 4031 and that OP satisfies CONSTANT_P. */ 4032 4033int 4034legitimate_pic_operand_p (rtx op) 4035{ 4036 /* Accept all non-symbolic constants. */ 4037 if (!SYMBOLIC_CONST (op)) 4038 return 1; 4039 4040 /* Accept addresses that can be expressed relative to (pc). */ 4041 if (larl_operand (op, VOIDmode)) 4042 return 1; 4043 4044 /* Reject everything else; must be handled 4045 via emit_symbolic_move. */ 4046 return 0; 4047} 4048 4049/* Returns true if the constant value OP is a legitimate general operand. 4050 It is given that OP satisfies CONSTANT_P. */ 4051 4052static bool 4053s390_legitimate_constant_p (machine_mode mode, rtx op) 4054{ 4055 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) 4056 { 4057 if (GET_MODE_SIZE (mode) != 16) 4058 return 0; 4059 4060 if (!satisfies_constraint_j00 (op) 4061 && !satisfies_constraint_jm1 (op) 4062 && !satisfies_constraint_jKK (op) 4063 && !satisfies_constraint_jxx (op) 4064 && !satisfies_constraint_jyy (op)) 4065 return 0; 4066 } 4067 4068 /* Accept all non-symbolic constants. */ 4069 if (!SYMBOLIC_CONST (op)) 4070 return 1; 4071 4072 /* Accept immediate LARL operands. */ 4073 if (larl_operand (op, mode)) 4074 return 1; 4075 4076 /* Thread-local symbols are never legal constants. This is 4077 so that emit_call knows that computing such addresses 4078 might require a function call. */ 4079 if (TLS_SYMBOLIC_CONST (op)) 4080 return 0; 4081 4082 /* In the PIC case, symbolic constants must *not* be 4083 forced into the literal pool. We accept them here, 4084 so that they will be handled by emit_symbolic_move. */ 4085 if (flag_pic) 4086 return 1; 4087 4088 /* All remaining non-PIC symbolic constants are 4089 forced into the literal pool. */ 4090 return 0; 4091} 4092 4093/* Determine if it's legal to put X into the constant pool. This 4094 is not possible if X contains the address of a symbol that is 4095 not constant (TLS) or not known at final link time (PIC). */ 4096 4097static bool 4098s390_cannot_force_const_mem (machine_mode mode, rtx x) 4099{ 4100 switch (GET_CODE (x)) 4101 { 4102 case CONST_INT: 4103 case CONST_DOUBLE: 4104 case CONST_WIDE_INT: 4105 case CONST_VECTOR: 4106 /* Accept all non-symbolic constants. */ 4107 return false; 4108 4109 case LABEL_REF: 4110 /* Labels are OK iff we are non-PIC. */ 4111 return flag_pic != 0; 4112 4113 case SYMBOL_REF: 4114 /* 'Naked' TLS symbol references are never OK, 4115 non-TLS symbols are OK iff we are non-PIC. */ 4116 if (tls_symbolic_operand (x)) 4117 return true; 4118 else 4119 return flag_pic != 0; 4120 4121 case CONST: 4122 return s390_cannot_force_const_mem (mode, XEXP (x, 0)); 4123 case PLUS: 4124 case MINUS: 4125 return s390_cannot_force_const_mem (mode, XEXP (x, 0)) 4126 || s390_cannot_force_const_mem (mode, XEXP (x, 1)); 4127 4128 case UNSPEC: 4129 switch (XINT (x, 1)) 4130 { 4131 /* Only lt-relative or GOT-relative UNSPECs are OK. */ 4132 case UNSPEC_LTREL_OFFSET: 4133 case UNSPEC_GOT: 4134 case UNSPEC_GOTOFF: 4135 case UNSPEC_PLTOFF: 4136 case UNSPEC_TLSGD: 4137 case UNSPEC_TLSLDM: 4138 case UNSPEC_NTPOFF: 4139 case UNSPEC_DTPOFF: 4140 case UNSPEC_GOTNTPOFF: 4141 case UNSPEC_INDNTPOFF: 4142 return false; 4143 4144 /* If the literal pool shares the code section, be put 4145 execute template placeholders into the pool as well. */ 4146 case UNSPEC_INSN: 4147 default: 4148 return true; 4149 } 4150 break; 4151 4152 default: 4153 gcc_unreachable (); 4154 } 4155} 4156 4157/* Returns true if the constant value OP is a legitimate general 4158 operand during and after reload. The difference to 4159 legitimate_constant_p is that this function will not accept 4160 a constant that would need to be forced to the literal pool 4161 before it can be used as operand. 4162 This function accepts all constants which can be loaded directly 4163 into a GPR. */ 4164 4165bool 4166legitimate_reload_constant_p (rtx op) 4167{ 4168 /* Accept la(y) operands. */ 4169 if (GET_CODE (op) == CONST_INT 4170 && DISP_IN_RANGE (INTVAL (op))) 4171 return true; 4172 4173 /* Accept l(g)hi/l(g)fi operands. */ 4174 if (GET_CODE (op) == CONST_INT 4175 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op)))) 4176 return true; 4177 4178 /* Accept lliXX operands. */ 4179 if (TARGET_ZARCH 4180 && GET_CODE (op) == CONST_INT 4181 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op) 4182 && s390_single_part (op, word_mode, HImode, 0) >= 0) 4183 return true; 4184 4185 if (TARGET_EXTIMM 4186 && GET_CODE (op) == CONST_INT 4187 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op) 4188 && s390_single_part (op, word_mode, SImode, 0) >= 0) 4189 return true; 4190 4191 /* Accept larl operands. */ 4192 if (larl_operand (op, VOIDmode)) 4193 return true; 4194 4195 /* Accept floating-point zero operands that fit into a single GPR. */ 4196 if (GET_CODE (op) == CONST_DOUBLE 4197 && s390_float_const_zero_p (op) 4198 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD) 4199 return true; 4200 4201 /* Accept double-word operands that can be split. */ 4202 if (GET_CODE (op) == CONST_WIDE_INT 4203 || (GET_CODE (op) == CONST_INT 4204 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))) 4205 { 4206 machine_mode dword_mode = word_mode == SImode ? DImode : TImode; 4207 rtx hi = operand_subword (op, 0, 0, dword_mode); 4208 rtx lo = operand_subword (op, 1, 0, dword_mode); 4209 return legitimate_reload_constant_p (hi) 4210 && legitimate_reload_constant_p (lo); 4211 } 4212 4213 /* Everything else cannot be handled without reload. */ 4214 return false; 4215} 4216 4217/* Returns true if the constant value OP is a legitimate fp operand 4218 during and after reload. 4219 This function accepts all constants which can be loaded directly 4220 into an FPR. */ 4221 4222static bool 4223legitimate_reload_fp_constant_p (rtx op) 4224{ 4225 /* Accept floating-point zero operands if the load zero instruction 4226 can be used. Prior to z196 the load fp zero instruction caused a 4227 performance penalty if the result is used as BFP number. */ 4228 if (TARGET_Z196 4229 && GET_CODE (op) == CONST_DOUBLE 4230 && s390_float_const_zero_p (op)) 4231 return true; 4232 4233 return false; 4234} 4235 4236/* Returns true if the constant value OP is a legitimate vector operand 4237 during and after reload. 4238 This function accepts all constants which can be loaded directly 4239 into an VR. */ 4240 4241static bool 4242legitimate_reload_vector_constant_p (rtx op) 4243{ 4244 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 4245 && (satisfies_constraint_j00 (op) 4246 || satisfies_constraint_jm1 (op) 4247 || satisfies_constraint_jKK (op) 4248 || satisfies_constraint_jxx (op) 4249 || satisfies_constraint_jyy (op))) 4250 return true; 4251 4252 return false; 4253} 4254 4255/* Given an rtx OP being reloaded into a reg required to be in class RCLASS, 4256 return the class of reg to actually use. */ 4257 4258static reg_class_t 4259s390_preferred_reload_class (rtx op, reg_class_t rclass) 4260{ 4261 switch (GET_CODE (op)) 4262 { 4263 /* Constants we cannot reload into general registers 4264 must be forced into the literal pool. */ 4265 case CONST_VECTOR: 4266 case CONST_DOUBLE: 4267 case CONST_INT: 4268 case CONST_WIDE_INT: 4269 if (reg_class_subset_p (GENERAL_REGS, rclass) 4270 && legitimate_reload_constant_p (op)) 4271 return GENERAL_REGS; 4272 else if (reg_class_subset_p (ADDR_REGS, rclass) 4273 && legitimate_reload_constant_p (op)) 4274 return ADDR_REGS; 4275 else if (reg_class_subset_p (FP_REGS, rclass) 4276 && legitimate_reload_fp_constant_p (op)) 4277 return FP_REGS; 4278 else if (reg_class_subset_p (VEC_REGS, rclass) 4279 && legitimate_reload_vector_constant_p (op)) 4280 return VEC_REGS; 4281 4282 return NO_REGS; 4283 4284 /* If a symbolic constant or a PLUS is reloaded, 4285 it is most likely being used as an address, so 4286 prefer ADDR_REGS. If 'class' is not a superset 4287 of ADDR_REGS, e.g. FP_REGS, reject this reload. */ 4288 case CONST: 4289 /* Symrefs cannot be pushed into the literal pool with -fPIC 4290 so we *MUST NOT* return NO_REGS for these cases 4291 (s390_cannot_force_const_mem will return true). 4292 4293 On the other hand we MUST return NO_REGS for symrefs with 4294 invalid addend which might have been pushed to the literal 4295 pool (no -fPIC). Usually we would expect them to be 4296 handled via secondary reload but this does not happen if 4297 they are used as literal pool slot replacement in reload 4298 inheritance (see emit_input_reload_insns). */ 4299 if (GET_CODE (XEXP (op, 0)) == PLUS 4300 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF 4301 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT) 4302 { 4303 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass)) 4304 return ADDR_REGS; 4305 else 4306 return NO_REGS; 4307 } 4308 /* fallthrough */ 4309 case LABEL_REF: 4310 case SYMBOL_REF: 4311 if (!legitimate_reload_constant_p (op)) 4312 return NO_REGS; 4313 /* fallthrough */ 4314 case PLUS: 4315 /* load address will be used. */ 4316 if (reg_class_subset_p (ADDR_REGS, rclass)) 4317 return ADDR_REGS; 4318 else 4319 return NO_REGS; 4320 4321 default: 4322 break; 4323 } 4324 4325 return rclass; 4326} 4327 4328/* Return true if ADDR is SYMBOL_REF + addend with addend being a 4329 multiple of ALIGNMENT and the SYMBOL_REF being naturally 4330 aligned. */ 4331 4332bool 4333s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment) 4334{ 4335 HOST_WIDE_INT addend; 4336 rtx symref; 4337 4338 /* The "required alignment" might be 0 (e.g. for certain structs 4339 accessed via BLKmode). Early abort in this case, as well as when 4340 an alignment > 8 is required. */ 4341 if (alignment < 2 || alignment > 8) 4342 return false; 4343 4344 if (!s390_loadrelative_operand_p (addr, &symref, &addend)) 4345 return false; 4346 4347 if (addend & (alignment - 1)) 4348 return false; 4349 4350 if (GET_CODE (symref) == SYMBOL_REF) 4351 { 4352 /* s390_encode_section_info is not called for anchors, since they don't 4353 have corresponding VAR_DECLs. Therefore, we cannot rely on 4354 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */ 4355 if (SYMBOL_REF_ANCHOR_P (symref)) 4356 { 4357 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref); 4358 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment 4359 / BITS_PER_UNIT); 4360 4361 gcc_assert (block_offset >= 0); 4362 return ((block_offset & (alignment - 1)) == 0 4363 && block_alignment >= alignment); 4364 } 4365 4366 /* We have load-relative instructions for 2-byte, 4-byte, and 4367 8-byte alignment so allow only these. */ 4368 switch (alignment) 4369 { 4370 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref); 4371 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref); 4372 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref); 4373 default: return false; 4374 } 4375 } 4376 4377 if (GET_CODE (symref) == UNSPEC 4378 && alignment <= UNITS_PER_LONG) 4379 return true; 4380 4381 return false; 4382} 4383 4384/* ADDR is moved into REG using larl. If ADDR isn't a valid larl 4385 operand SCRATCH is used to reload the even part of the address and 4386 adding one. */ 4387 4388void 4389s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch) 4390{ 4391 HOST_WIDE_INT addend; 4392 rtx symref; 4393 4394 if (!s390_loadrelative_operand_p (addr, &symref, &addend)) 4395 gcc_unreachable (); 4396 4397 if (!(addend & 1)) 4398 /* Easy case. The addend is even so larl will do fine. */ 4399 emit_move_insn (reg, addr); 4400 else 4401 { 4402 /* We can leave the scratch register untouched if the target 4403 register is a valid base register. */ 4404 if (REGNO (reg) < FIRST_PSEUDO_REGISTER 4405 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS) 4406 scratch = reg; 4407 4408 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER); 4409 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS); 4410 4411 if (addend != 1) 4412 emit_move_insn (scratch, 4413 gen_rtx_CONST (Pmode, 4414 gen_rtx_PLUS (Pmode, symref, 4415 GEN_INT (addend - 1)))); 4416 else 4417 emit_move_insn (scratch, symref); 4418 4419 /* Increment the address using la in order to avoid clobbering cc. */ 4420 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx)); 4421 } 4422} 4423 4424/* Generate what is necessary to move between REG and MEM using 4425 SCRATCH. The direction is given by TOMEM. */ 4426 4427void 4428s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem) 4429{ 4430 /* Reload might have pulled a constant out of the literal pool. 4431 Force it back in. */ 4432 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE 4433 || GET_CODE (mem) == CONST_WIDE_INT 4434 || GET_CODE (mem) == CONST_VECTOR 4435 || GET_CODE (mem) == CONST) 4436 mem = force_const_mem (GET_MODE (reg), mem); 4437 4438 gcc_assert (MEM_P (mem)); 4439 4440 /* For a load from memory we can leave the scratch register 4441 untouched if the target register is a valid base register. */ 4442 if (!tomem 4443 && REGNO (reg) < FIRST_PSEUDO_REGISTER 4444 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS 4445 && GET_MODE (reg) == GET_MODE (scratch)) 4446 scratch = reg; 4447 4448 /* Load address into scratch register. Since we can't have a 4449 secondary reload for a secondary reload we have to cover the case 4450 where larl would need a secondary reload here as well. */ 4451 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch); 4452 4453 /* Now we can use a standard load/store to do the move. */ 4454 if (tomem) 4455 emit_move_insn (replace_equiv_address (mem, scratch), reg); 4456 else 4457 emit_move_insn (reg, replace_equiv_address (mem, scratch)); 4458} 4459 4460/* Inform reload about cases where moving X with a mode MODE to a register in 4461 RCLASS requires an extra scratch or immediate register. Return the class 4462 needed for the immediate register. */ 4463 4464static reg_class_t 4465s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 4466 machine_mode mode, secondary_reload_info *sri) 4467{ 4468 enum reg_class rclass = (enum reg_class) rclass_i; 4469 4470 /* Intermediate register needed. */ 4471 if (reg_classes_intersect_p (CC_REGS, rclass)) 4472 return GENERAL_REGS; 4473 4474 if (TARGET_VX) 4475 { 4476 /* The vst/vl vector move instructions allow only for short 4477 displacements. */ 4478 if (MEM_P (x) 4479 && GET_CODE (XEXP (x, 0)) == PLUS 4480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4481 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1))) 4482 && reg_class_subset_p (rclass, VEC_REGS) 4483 && (!reg_class_subset_p (rclass, FP_REGS) 4484 || (GET_MODE_SIZE (mode) > 8 4485 && s390_class_max_nregs (FP_REGS, mode) == 1))) 4486 { 4487 if (in_p) 4488 sri->icode = (TARGET_64BIT ? 4489 CODE_FOR_reloaddi_la_in : 4490 CODE_FOR_reloadsi_la_in); 4491 else 4492 sri->icode = (TARGET_64BIT ? 4493 CODE_FOR_reloaddi_la_out : 4494 CODE_FOR_reloadsi_la_out); 4495 } 4496 } 4497 4498 if (TARGET_Z10) 4499 { 4500 HOST_WIDE_INT offset; 4501 rtx symref; 4502 4503 /* On z10 several optimizer steps may generate larl operands with 4504 an odd addend. */ 4505 if (in_p 4506 && s390_loadrelative_operand_p (x, &symref, &offset) 4507 && mode == Pmode 4508 && !SYMBOL_FLAG_NOTALIGN2_P (symref) 4509 && (offset & 1) == 1) 4510 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 4511 : CODE_FOR_reloadsi_larl_odd_addend_z10); 4512 4513 /* Handle all the (mem (symref)) accesses we cannot use the z10 4514 instructions for. */ 4515 if (MEM_P (x) 4516 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL) 4517 && (mode == QImode 4518 || !reg_class_subset_p (rclass, GENERAL_REGS) 4519 || GET_MODE_SIZE (mode) > UNITS_PER_WORD 4520 || !s390_check_symref_alignment (XEXP (x, 0), 4521 GET_MODE_SIZE (mode)))) 4522 { 4523#define __SECONDARY_RELOAD_CASE(M,m) \ 4524 case E_##M##mode: \ 4525 if (TARGET_64BIT) \ 4526 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \ 4527 CODE_FOR_reload##m##di_tomem_z10; \ 4528 else \ 4529 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \ 4530 CODE_FOR_reload##m##si_tomem_z10; \ 4531 break; 4532 4533 switch (GET_MODE (x)) 4534 { 4535 __SECONDARY_RELOAD_CASE (QI, qi); 4536 __SECONDARY_RELOAD_CASE (HI, hi); 4537 __SECONDARY_RELOAD_CASE (SI, si); 4538 __SECONDARY_RELOAD_CASE (DI, di); 4539 __SECONDARY_RELOAD_CASE (TI, ti); 4540 __SECONDARY_RELOAD_CASE (SF, sf); 4541 __SECONDARY_RELOAD_CASE (DF, df); 4542 __SECONDARY_RELOAD_CASE (TF, tf); 4543 __SECONDARY_RELOAD_CASE (SD, sd); 4544 __SECONDARY_RELOAD_CASE (DD, dd); 4545 __SECONDARY_RELOAD_CASE (TD, td); 4546 __SECONDARY_RELOAD_CASE (V1QI, v1qi); 4547 __SECONDARY_RELOAD_CASE (V2QI, v2qi); 4548 __SECONDARY_RELOAD_CASE (V4QI, v4qi); 4549 __SECONDARY_RELOAD_CASE (V8QI, v8qi); 4550 __SECONDARY_RELOAD_CASE (V16QI, v16qi); 4551 __SECONDARY_RELOAD_CASE (V1HI, v1hi); 4552 __SECONDARY_RELOAD_CASE (V2HI, v2hi); 4553 __SECONDARY_RELOAD_CASE (V4HI, v4hi); 4554 __SECONDARY_RELOAD_CASE (V8HI, v8hi); 4555 __SECONDARY_RELOAD_CASE (V1SI, v1si); 4556 __SECONDARY_RELOAD_CASE (V2SI, v2si); 4557 __SECONDARY_RELOAD_CASE (V4SI, v4si); 4558 __SECONDARY_RELOAD_CASE (V1DI, v1di); 4559 __SECONDARY_RELOAD_CASE (V2DI, v2di); 4560 __SECONDARY_RELOAD_CASE (V1TI, v1ti); 4561 __SECONDARY_RELOAD_CASE (V1SF, v1sf); 4562 __SECONDARY_RELOAD_CASE (V2SF, v2sf); 4563 __SECONDARY_RELOAD_CASE (V4SF, v4sf); 4564 __SECONDARY_RELOAD_CASE (V1DF, v1df); 4565 __SECONDARY_RELOAD_CASE (V2DF, v2df); 4566 __SECONDARY_RELOAD_CASE (V1TF, v1tf); 4567 default: 4568 gcc_unreachable (); 4569 } 4570#undef __SECONDARY_RELOAD_CASE 4571 } 4572 } 4573 4574 /* We need a scratch register when loading a PLUS expression which 4575 is not a legitimate operand of the LOAD ADDRESS instruction. */ 4576 /* LRA can deal with transformation of plus op very well -- so we 4577 don't need to prompt LRA in this case. */ 4578 if (! lra_in_progress && in_p && s390_plus_operand (x, mode)) 4579 sri->icode = (TARGET_64BIT ? 4580 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus); 4581 4582 /* Performing a multiword move from or to memory we have to make sure the 4583 second chunk in memory is addressable without causing a displacement 4584 overflow. If that would be the case we calculate the address in 4585 a scratch register. */ 4586 if (MEM_P (x) 4587 && GET_CODE (XEXP (x, 0)) == PLUS 4588 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4589 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)) 4590 + GET_MODE_SIZE (mode) - 1)) 4591 { 4592 /* For GENERAL_REGS a displacement overflow is no problem if occurring 4593 in a s_operand address since we may fallback to lm/stm. So we only 4594 have to care about overflows in the b+i+d case. */ 4595 if ((reg_classes_intersect_p (GENERAL_REGS, rclass) 4596 && s390_class_max_nregs (GENERAL_REGS, mode) > 1 4597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS) 4598 /* For FP_REGS no lm/stm is available so this check is triggered 4599 for displacement overflows in b+i+d and b+d like addresses. */ 4600 || (reg_classes_intersect_p (FP_REGS, rclass) 4601 && s390_class_max_nregs (FP_REGS, mode) > 1)) 4602 { 4603 if (in_p) 4604 sri->icode = (TARGET_64BIT ? 4605 CODE_FOR_reloaddi_la_in : 4606 CODE_FOR_reloadsi_la_in); 4607 else 4608 sri->icode = (TARGET_64BIT ? 4609 CODE_FOR_reloaddi_la_out : 4610 CODE_FOR_reloadsi_la_out); 4611 } 4612 } 4613 4614 /* A scratch address register is needed when a symbolic constant is 4615 copied to r0 compiling with -fPIC. In other cases the target 4616 register might be used as temporary (see legitimize_pic_address). */ 4617 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS) 4618 sri->icode = (TARGET_64BIT ? 4619 CODE_FOR_reloaddi_PIC_addr : 4620 CODE_FOR_reloadsi_PIC_addr); 4621 4622 /* Either scratch or no register needed. */ 4623 return NO_REGS; 4624} 4625 4626/* Implement TARGET_SECONDARY_MEMORY_NEEDED. 4627 4628 We need secondary memory to move data between GPRs and FPRs. 4629 4630 - With DFP the ldgr lgdr instructions are available. Due to the 4631 different alignment we cannot use them for SFmode. For 31 bit a 4632 64 bit value in GPR would be a register pair so here we still 4633 need to go via memory. 4634 4635 - With z13 we can do the SF/SImode moves with vlgvf. Due to the 4636 overlapping of FPRs and VRs we still disallow TF/TD modes to be 4637 in full VRs so as before also on z13 we do these moves via 4638 memory. 4639 4640 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */ 4641 4642static bool 4643s390_secondary_memory_needed (machine_mode mode, 4644 reg_class_t class1, reg_class_t class2) 4645{ 4646 return (((reg_classes_intersect_p (class1, VEC_REGS) 4647 && reg_classes_intersect_p (class2, GENERAL_REGS)) 4648 || (reg_classes_intersect_p (class1, GENERAL_REGS) 4649 && reg_classes_intersect_p (class2, VEC_REGS))) 4650 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT 4651 || GET_MODE_SIZE (mode) != 8) 4652 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode) 4653 && GET_MODE_SIZE (mode) > 8))); 4654} 4655 4656/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. 4657 4658 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit 4659 because the movsi and movsf patterns don't handle r/f moves. */ 4660 4661static machine_mode 4662s390_secondary_memory_needed_mode (machine_mode mode) 4663{ 4664 if (GET_MODE_BITSIZE (mode) < 32) 4665 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); 4666 return mode; 4667} 4668 4669/* Generate code to load SRC, which is PLUS that is not a 4670 legitimate operand for the LA instruction, into TARGET. 4671 SCRATCH may be used as scratch register. */ 4672 4673void 4674s390_expand_plus_operand (rtx target, rtx src, 4675 rtx scratch) 4676{ 4677 rtx sum1, sum2; 4678 struct s390_address ad; 4679 4680 /* src must be a PLUS; get its two operands. */ 4681 gcc_assert (GET_CODE (src) == PLUS); 4682 gcc_assert (GET_MODE (src) == Pmode); 4683 4684 /* Check if any of the two operands is already scheduled 4685 for replacement by reload. This can happen e.g. when 4686 float registers occur in an address. */ 4687 sum1 = find_replacement (&XEXP (src, 0)); 4688 sum2 = find_replacement (&XEXP (src, 1)); 4689 src = gen_rtx_PLUS (Pmode, sum1, sum2); 4690 4691 /* If the address is already strictly valid, there's nothing to do. */ 4692 if (!s390_decompose_address (src, &ad) 4693 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 4694 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))) 4695 { 4696 /* Otherwise, one of the operands cannot be an address register; 4697 we reload its value into the scratch register. */ 4698 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15) 4699 { 4700 emit_move_insn (scratch, sum1); 4701 sum1 = scratch; 4702 } 4703 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15) 4704 { 4705 emit_move_insn (scratch, sum2); 4706 sum2 = scratch; 4707 } 4708 4709 /* According to the way these invalid addresses are generated 4710 in reload.c, it should never happen (at least on s390) that 4711 *neither* of the PLUS components, after find_replacements 4712 was applied, is an address register. */ 4713 if (sum1 == scratch && sum2 == scratch) 4714 { 4715 debug_rtx (src); 4716 gcc_unreachable (); 4717 } 4718 4719 src = gen_rtx_PLUS (Pmode, sum1, sum2); 4720 } 4721 4722 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS 4723 is only ever performed on addresses, so we can mark the 4724 sum as legitimate for LA in any case. */ 4725 s390_load_address (target, src); 4726} 4727 4728 4729/* Return true if ADDR is a valid memory address. 4730 STRICT specifies whether strict register checking applies. */ 4731 4732static bool 4733s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 4734{ 4735 struct s390_address ad; 4736 4737 if (TARGET_Z10 4738 && larl_operand (addr, VOIDmode) 4739 && (mode == VOIDmode 4740 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode)))) 4741 return true; 4742 4743 if (!s390_decompose_address (addr, &ad)) 4744 return false; 4745 4746 /* The vector memory instructions only support short displacements. 4747 Reject invalid displacements early to prevent plenty of lay 4748 instructions to be generated later which then cannot be merged 4749 properly. */ 4750 if (TARGET_VX 4751 && VECTOR_MODE_P (mode) 4752 && ad.disp != NULL_RTX 4753 && CONST_INT_P (ad.disp) 4754 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp))) 4755 return false; 4756 4757 if (strict) 4758 { 4759 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 4760 return false; 4761 4762 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))) 4763 return false; 4764 } 4765 else 4766 { 4767 if (ad.base 4768 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER 4769 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS)) 4770 return false; 4771 4772 if (ad.indx 4773 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER 4774 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS)) 4775 return false; 4776 } 4777 return true; 4778} 4779 4780/* Return true if OP is a valid operand for the LA instruction. 4781 In 31-bit, we need to prove that the result is used as an 4782 address, as LA performs only a 31-bit addition. */ 4783 4784bool 4785legitimate_la_operand_p (rtx op) 4786{ 4787 struct s390_address addr; 4788 if (!s390_decompose_address (op, &addr)) 4789 return false; 4790 4791 return (TARGET_64BIT || addr.pointer); 4792} 4793 4794/* Return true if it is valid *and* preferable to use LA to 4795 compute the sum of OP1 and OP2. */ 4796 4797bool 4798preferred_la_operand_p (rtx op1, rtx op2) 4799{ 4800 struct s390_address addr; 4801 4802 if (op2 != const0_rtx) 4803 op1 = gen_rtx_PLUS (Pmode, op1, op2); 4804 4805 if (!s390_decompose_address (op1, &addr)) 4806 return false; 4807 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base))) 4808 return false; 4809 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx))) 4810 return false; 4811 4812 /* Avoid LA instructions with index (and base) register on z196 or 4813 later; it is preferable to use regular add instructions when 4814 possible. Starting with zEC12 the la with index register is 4815 "uncracked" again but still slower than a regular add. */ 4816 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196) 4817 return false; 4818 4819 if (!TARGET_64BIT && !addr.pointer) 4820 return false; 4821 4822 if (addr.pointer) 4823 return true; 4824 4825 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base)) 4826 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx))) 4827 return true; 4828 4829 return false; 4830} 4831 4832/* Emit a forced load-address operation to load SRC into DST. 4833 This will use the LOAD ADDRESS instruction even in situations 4834 where legitimate_la_operand_p (SRC) returns false. */ 4835 4836void 4837s390_load_address (rtx dst, rtx src) 4838{ 4839 if (TARGET_64BIT) 4840 emit_move_insn (dst, src); 4841 else 4842 emit_insn (gen_force_la_31 (dst, src)); 4843} 4844 4845/* Return true if it ok to use SYMBOL_REF in a relative address. */ 4846 4847bool 4848s390_rel_address_ok_p (rtx symbol_ref) 4849{ 4850 tree decl; 4851 4852 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref)) 4853 return true; 4854 4855 decl = SYMBOL_REF_DECL (symbol_ref); 4856 4857 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref)) 4858 return (s390_pic_data_is_text_relative 4859 || (decl 4860 && TREE_CODE (decl) == FUNCTION_DECL)); 4861 4862 return false; 4863} 4864 4865/* Return a legitimate reference for ORIG (an address) using the 4866 register REG. If REG is 0, a new pseudo is generated. 4867 4868 There are two types of references that must be handled: 4869 4870 1. Global data references must load the address from the GOT, via 4871 the PIC reg. An insn is emitted to do this load, and the reg is 4872 returned. 4873 4874 2. Static data references, constant pool addresses, and code labels 4875 compute the address as an offset from the GOT, whose base is in 4876 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 4877 differentiate them from global data objects. The returned 4878 address is the PIC reg + an unspec constant. 4879 4880 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC 4881 reg also appears in the address. */ 4882 4883rtx 4884legitimize_pic_address (rtx orig, rtx reg) 4885{ 4886 rtx addr = orig; 4887 rtx addend = const0_rtx; 4888 rtx new_rtx = orig; 4889 4890 gcc_assert (!TLS_SYMBOLIC_CONST (addr)); 4891 4892 if (GET_CODE (addr) == CONST) 4893 addr = XEXP (addr, 0); 4894 4895 if (GET_CODE (addr) == PLUS) 4896 { 4897 addend = XEXP (addr, 1); 4898 addr = XEXP (addr, 0); 4899 } 4900 4901 if ((GET_CODE (addr) == LABEL_REF 4902 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr)) 4903 || (GET_CODE (addr) == UNSPEC && 4904 (XINT (addr, 1) == UNSPEC_GOTENT 4905 || XINT (addr, 1) == UNSPEC_PLT))) 4906 && GET_CODE (addend) == CONST_INT) 4907 { 4908 /* This can be locally addressed. */ 4909 4910 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */ 4911 rtx const_addr = (GET_CODE (addr) == UNSPEC ? 4912 gen_rtx_CONST (Pmode, addr) : addr); 4913 4914 if (larl_operand (const_addr, VOIDmode) 4915 && INTVAL (addend) < HOST_WIDE_INT_1 << 31 4916 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31)) 4917 { 4918 if (INTVAL (addend) & 1) 4919 { 4920 /* LARL can't handle odd offsets, so emit a pair of LARL 4921 and LA. */ 4922 rtx temp = reg? reg : gen_reg_rtx (Pmode); 4923 4924 if (!DISP_IN_RANGE (INTVAL (addend))) 4925 { 4926 HOST_WIDE_INT even = INTVAL (addend) - 1; 4927 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even)); 4928 addr = gen_rtx_CONST (Pmode, addr); 4929 addend = const1_rtx; 4930 } 4931 4932 emit_move_insn (temp, addr); 4933 new_rtx = gen_rtx_PLUS (Pmode, temp, addend); 4934 4935 if (reg != 0) 4936 { 4937 s390_load_address (reg, new_rtx); 4938 new_rtx = reg; 4939 } 4940 } 4941 else 4942 { 4943 /* If the offset is even, we can just use LARL. This 4944 will happen automatically. */ 4945 } 4946 } 4947 else 4948 { 4949 /* No larl - Access local symbols relative to the GOT. */ 4950 4951 rtx temp = reg? reg : gen_reg_rtx (Pmode); 4952 4953 if (reload_in_progress || reload_completed) 4954 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4955 4956 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 4957 if (addend != const0_rtx) 4958 addr = gen_rtx_PLUS (Pmode, addr, addend); 4959 addr = gen_rtx_CONST (Pmode, addr); 4960 addr = force_const_mem (Pmode, addr); 4961 emit_move_insn (temp, addr); 4962 4963 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); 4964 if (reg != 0) 4965 { 4966 s390_load_address (reg, new_rtx); 4967 new_rtx = reg; 4968 } 4969 } 4970 } 4971 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx) 4972 { 4973 /* A non-local symbol reference without addend. 4974 4975 The symbol ref is wrapped into an UNSPEC to make sure the 4976 proper operand modifier (@GOT or @GOTENT) will be emitted. 4977 This will tell the linker to put the symbol into the GOT. 4978 4979 Additionally the code dereferencing the GOT slot is emitted here. 4980 4981 An addend to the symref needs to be added afterwards. 4982 legitimize_pic_address calls itself recursively to handle 4983 that case. So no need to do it here. */ 4984 4985 if (reg == 0) 4986 reg = gen_reg_rtx (Pmode); 4987 4988 if (TARGET_Z10) 4989 { 4990 /* Use load relative if possible. 4991 lgrl <target>, sym@GOTENT */ 4992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); 4993 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4994 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx); 4995 4996 emit_move_insn (reg, new_rtx); 4997 new_rtx = reg; 4998 } 4999 else if (flag_pic == 1) 5000 { 5001 /* Assume GOT offset is a valid displacement operand (< 4k 5002 or < 512k with z990). This is handled the same way in 5003 both 31- and 64-bit code (@GOT). 5004 lg <target>, sym@GOT(r12) */ 5005 5006 if (reload_in_progress || reload_completed) 5007 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 5008 5009 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 5010 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5011 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 5012 new_rtx = gen_const_mem (Pmode, new_rtx); 5013 emit_move_insn (reg, new_rtx); 5014 new_rtx = reg; 5015 } 5016 else 5017 { 5018 /* If the GOT offset might be >= 4k, we determine the position 5019 of the GOT entry via a PC-relative LARL (@GOTENT). 5020 larl temp, sym@GOTENT 5021 lg <target>, 0(temp) */ 5022 5023 rtx temp = reg ? reg : gen_reg_rtx (Pmode); 5024 5025 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER 5026 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS); 5027 5028 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); 5029 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5030 emit_move_insn (temp, new_rtx); 5031 new_rtx = gen_const_mem (Pmode, temp); 5032 emit_move_insn (reg, new_rtx); 5033 5034 new_rtx = reg; 5035 } 5036 } 5037 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT) 5038 { 5039 gcc_assert (XVECLEN (addr, 0) == 1); 5040 switch (XINT (addr, 1)) 5041 { 5042 /* These address symbols (or PLT slots) relative to the GOT 5043 (not GOT slots!). In general this will exceed the 5044 displacement range so these value belong into the literal 5045 pool. */ 5046 case UNSPEC_GOTOFF: 5047 case UNSPEC_PLTOFF: 5048 new_rtx = force_const_mem (Pmode, orig); 5049 break; 5050 5051 /* For -fPIC the GOT size might exceed the displacement 5052 range so make sure the value is in the literal pool. */ 5053 case UNSPEC_GOT: 5054 if (flag_pic == 2) 5055 new_rtx = force_const_mem (Pmode, orig); 5056 break; 5057 5058 /* For @GOTENT larl is used. This is handled like local 5059 symbol refs. */ 5060 case UNSPEC_GOTENT: 5061 gcc_unreachable (); 5062 break; 5063 5064 /* For @PLT larl is used. This is handled like local 5065 symbol refs. */ 5066 case UNSPEC_PLT: 5067 gcc_unreachable (); 5068 break; 5069 5070 /* Everything else cannot happen. */ 5071 default: 5072 gcc_unreachable (); 5073 } 5074 } 5075 else if (addend != const0_rtx) 5076 { 5077 /* Otherwise, compute the sum. */ 5078 5079 rtx base = legitimize_pic_address (addr, reg); 5080 new_rtx = legitimize_pic_address (addend, 5081 base == reg ? NULL_RTX : reg); 5082 if (GET_CODE (new_rtx) == CONST_INT) 5083 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx)); 5084 else 5085 { 5086 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1))) 5087 { 5088 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0)); 5089 new_rtx = XEXP (new_rtx, 1); 5090 } 5091 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx); 5092 } 5093 5094 if (GET_CODE (new_rtx) == CONST) 5095 new_rtx = XEXP (new_rtx, 0); 5096 new_rtx = force_operand (new_rtx, 0); 5097 } 5098 5099 return new_rtx; 5100} 5101 5102/* Load the thread pointer into a register. */ 5103 5104rtx 5105s390_get_thread_pointer (void) 5106{ 5107 rtx tp = gen_reg_rtx (Pmode); 5108 5109 emit_insn (gen_get_thread_pointer (Pmode, tp)); 5110 5111 mark_reg_pointer (tp, BITS_PER_WORD); 5112 5113 return tp; 5114} 5115 5116/* Emit a tls call insn. The call target is the SYMBOL_REF stored 5117 in s390_tls_symbol which always refers to __tls_get_offset. 5118 The returned offset is written to RESULT_REG and an USE rtx is 5119 generated for TLS_CALL. */ 5120 5121static GTY(()) rtx s390_tls_symbol; 5122 5123static void 5124s390_emit_tls_call_insn (rtx result_reg, rtx tls_call) 5125{ 5126 rtx insn; 5127 5128 if (!flag_pic) 5129 emit_insn (s390_load_got ()); 5130 5131 if (!s390_tls_symbol) 5132 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); 5133 5134 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg, 5135 gen_rtx_REG (Pmode, RETURN_REGNUM)); 5136 5137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg); 5138 RTL_CONST_CALL_P (insn) = 1; 5139} 5140 5141/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 5142 this (thread-local) address. REG may be used as temporary. */ 5143 5144static rtx 5145legitimize_tls_address (rtx addr, rtx reg) 5146{ 5147 rtx new_rtx, tls_call, temp, base, r2; 5148 rtx_insn *insn; 5149 5150 if (GET_CODE (addr) == SYMBOL_REF) 5151 switch (tls_symbolic_operand (addr)) 5152 { 5153 case TLS_MODEL_GLOBAL_DYNAMIC: 5154 start_sequence (); 5155 r2 = gen_rtx_REG (Pmode, 2); 5156 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD); 5157 new_rtx = gen_rtx_CONST (Pmode, tls_call); 5158 new_rtx = force_const_mem (Pmode, new_rtx); 5159 emit_move_insn (r2, new_rtx); 5160 s390_emit_tls_call_insn (r2, tls_call); 5161 insn = get_insns (); 5162 end_sequence (); 5163 5164 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); 5165 temp = gen_reg_rtx (Pmode); 5166 emit_libcall_block (insn, temp, r2, new_rtx); 5167 5168 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 5169 if (reg != 0) 5170 { 5171 s390_load_address (reg, new_rtx); 5172 new_rtx = reg; 5173 } 5174 break; 5175 5176 case TLS_MODEL_LOCAL_DYNAMIC: 5177 start_sequence (); 5178 r2 = gen_rtx_REG (Pmode, 2); 5179 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM); 5180 new_rtx = gen_rtx_CONST (Pmode, tls_call); 5181 new_rtx = force_const_mem (Pmode, new_rtx); 5182 emit_move_insn (r2, new_rtx); 5183 s390_emit_tls_call_insn (r2, tls_call); 5184 insn = get_insns (); 5185 end_sequence (); 5186 5187 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF); 5188 temp = gen_reg_rtx (Pmode); 5189 emit_libcall_block (insn, temp, r2, new_rtx); 5190 5191 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 5192 base = gen_reg_rtx (Pmode); 5193 s390_load_address (base, new_rtx); 5194 5195 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF); 5196 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5197 new_rtx = force_const_mem (Pmode, new_rtx); 5198 temp = gen_reg_rtx (Pmode); 5199 emit_move_insn (temp, new_rtx); 5200 5201 new_rtx = gen_rtx_PLUS (Pmode, base, temp); 5202 if (reg != 0) 5203 { 5204 s390_load_address (reg, new_rtx); 5205 new_rtx = reg; 5206 } 5207 break; 5208 5209 case TLS_MODEL_INITIAL_EXEC: 5210 if (flag_pic == 1) 5211 { 5212 /* Assume GOT offset < 4k. This is handled the same way 5213 in both 31- and 64-bit code. */ 5214 5215 if (reload_in_progress || reload_completed) 5216 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 5217 5218 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); 5219 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5220 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 5221 new_rtx = gen_const_mem (Pmode, new_rtx); 5222 temp = gen_reg_rtx (Pmode); 5223 emit_move_insn (temp, new_rtx); 5224 } 5225 else 5226 { 5227 /* If the GOT offset might be >= 4k, we determine the position 5228 of the GOT entry via a PC-relative LARL. */ 5229 5230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); 5231 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5232 temp = gen_reg_rtx (Pmode); 5233 emit_move_insn (temp, new_rtx); 5234 5235 new_rtx = gen_const_mem (Pmode, temp); 5236 temp = gen_reg_rtx (Pmode); 5237 emit_move_insn (temp, new_rtx); 5238 } 5239 5240 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 5241 if (reg != 0) 5242 { 5243 s390_load_address (reg, new_rtx); 5244 new_rtx = reg; 5245 } 5246 break; 5247 5248 case TLS_MODEL_LOCAL_EXEC: 5249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); 5250 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5251 new_rtx = force_const_mem (Pmode, new_rtx); 5252 temp = gen_reg_rtx (Pmode); 5253 emit_move_insn (temp, new_rtx); 5254 5255 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 5256 if (reg != 0) 5257 { 5258 s390_load_address (reg, new_rtx); 5259 new_rtx = reg; 5260 } 5261 break; 5262 5263 default: 5264 gcc_unreachable (); 5265 } 5266 5267 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC) 5268 { 5269 switch (XINT (XEXP (addr, 0), 1)) 5270 { 5271 case UNSPEC_INDNTPOFF: 5272 new_rtx = addr; 5273 break; 5274 5275 default: 5276 gcc_unreachable (); 5277 } 5278 } 5279 5280 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS 5281 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 5282 { 5283 new_rtx = XEXP (XEXP (addr, 0), 0); 5284 if (GET_CODE (new_rtx) != SYMBOL_REF) 5285 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 5286 5287 new_rtx = legitimize_tls_address (new_rtx, reg); 5288 new_rtx = plus_constant (Pmode, new_rtx, 5289 INTVAL (XEXP (XEXP (addr, 0), 1))); 5290 new_rtx = force_operand (new_rtx, 0); 5291 } 5292 5293 else 5294 gcc_unreachable (); /* for now ... */ 5295 5296 return new_rtx; 5297} 5298 5299/* Emit insns making the address in operands[1] valid for a standard 5300 move to operands[0]. operands[1] is replaced by an address which 5301 should be used instead of the former RTX to emit the move 5302 pattern. */ 5303 5304void 5305emit_symbolic_move (rtx *operands) 5306{ 5307 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); 5308 5309 if (GET_CODE (operands[0]) == MEM) 5310 operands[1] = force_reg (Pmode, operands[1]); 5311 else if (TLS_SYMBOLIC_CONST (operands[1])) 5312 operands[1] = legitimize_tls_address (operands[1], temp); 5313 else if (flag_pic) 5314 operands[1] = legitimize_pic_address (operands[1], temp); 5315} 5316 5317/* Try machine-dependent ways of modifying an illegitimate address X 5318 to be legitimate. If we find one, return the new, valid address. 5319 5320 OLDX is the address as it was before break_out_memory_refs was called. 5321 In some cases it is useful to look at this to decide what needs to be done. 5322 5323 MODE is the mode of the operand pointed to by X. 5324 5325 When -fpic is used, special handling is needed for symbolic references. 5326 See comments by legitimize_pic_address for details. */ 5327 5328static rtx 5329s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 5330 machine_mode mode ATTRIBUTE_UNUSED) 5331{ 5332 rtx constant_term = const0_rtx; 5333 5334 if (TLS_SYMBOLIC_CONST (x)) 5335 { 5336 x = legitimize_tls_address (x, 0); 5337 5338 if (s390_legitimate_address_p (mode, x, FALSE)) 5339 return x; 5340 } 5341 else if (GET_CODE (x) == PLUS 5342 && (TLS_SYMBOLIC_CONST (XEXP (x, 0)) 5343 || TLS_SYMBOLIC_CONST (XEXP (x, 1)))) 5344 { 5345 return x; 5346 } 5347 else if (flag_pic) 5348 { 5349 if (SYMBOLIC_CONST (x) 5350 || (GET_CODE (x) == PLUS 5351 && (SYMBOLIC_CONST (XEXP (x, 0)) 5352 || SYMBOLIC_CONST (XEXP (x, 1))))) 5353 x = legitimize_pic_address (x, 0); 5354 5355 if (s390_legitimate_address_p (mode, x, FALSE)) 5356 return x; 5357 } 5358 5359 x = eliminate_constant_term (x, &constant_term); 5360 5361 /* Optimize loading of large displacements by splitting them 5362 into the multiple of 4K and the rest; this allows the 5363 former to be CSE'd if possible. 5364 5365 Don't do this if the displacement is added to a register 5366 pointing into the stack frame, as the offsets will 5367 change later anyway. */ 5368 5369 if (GET_CODE (constant_term) == CONST_INT 5370 && !TARGET_LONG_DISPLACEMENT 5371 && !DISP_IN_RANGE (INTVAL (constant_term)) 5372 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x)))) 5373 { 5374 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff; 5375 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower; 5376 5377 rtx temp = gen_reg_rtx (Pmode); 5378 rtx val = force_operand (GEN_INT (upper), temp); 5379 if (val != temp) 5380 emit_move_insn (temp, val); 5381 5382 x = gen_rtx_PLUS (Pmode, x, temp); 5383 constant_term = GEN_INT (lower); 5384 } 5385 5386 if (GET_CODE (x) == PLUS) 5387 { 5388 if (GET_CODE (XEXP (x, 0)) == REG) 5389 { 5390 rtx temp = gen_reg_rtx (Pmode); 5391 rtx val = force_operand (XEXP (x, 1), temp); 5392 if (val != temp) 5393 emit_move_insn (temp, val); 5394 5395 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp); 5396 } 5397 5398 else if (GET_CODE (XEXP (x, 1)) == REG) 5399 { 5400 rtx temp = gen_reg_rtx (Pmode); 5401 rtx val = force_operand (XEXP (x, 0), temp); 5402 if (val != temp) 5403 emit_move_insn (temp, val); 5404 5405 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1)); 5406 } 5407 } 5408 5409 if (constant_term != const0_rtx) 5410 x = gen_rtx_PLUS (Pmode, x, constant_term); 5411 5412 return x; 5413} 5414 5415/* Try a machine-dependent way of reloading an illegitimate address AD 5416 operand. If we find one, push the reload and return the new address. 5417 5418 MODE is the mode of the enclosing MEM. OPNUM is the operand number 5419 and TYPE is the reload type of the current reload. */ 5420 5421rtx 5422legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED, 5423 int opnum, int type) 5424{ 5425 if (!optimize || TARGET_LONG_DISPLACEMENT) 5426 return NULL_RTX; 5427 5428 if (GET_CODE (ad) == PLUS) 5429 { 5430 rtx tem = simplify_binary_operation (PLUS, Pmode, 5431 XEXP (ad, 0), XEXP (ad, 1)); 5432 if (tem) 5433 ad = tem; 5434 } 5435 5436 if (GET_CODE (ad) == PLUS 5437 && GET_CODE (XEXP (ad, 0)) == REG 5438 && GET_CODE (XEXP (ad, 1)) == CONST_INT 5439 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1)))) 5440 { 5441 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff; 5442 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower; 5443 rtx cst, tem, new_rtx; 5444 5445 cst = GEN_INT (upper); 5446 if (!legitimate_reload_constant_p (cst)) 5447 cst = force_const_mem (Pmode, cst); 5448 5449 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst); 5450 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower)); 5451 5452 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0, 5453 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 5454 opnum, (enum reload_type) type); 5455 return new_rtx; 5456 } 5457 5458 return NULL_RTX; 5459} 5460 5461/* Emit code to move LEN bytes from DST to SRC. */ 5462 5463bool 5464s390_expand_cpymem (rtx dst, rtx src, rtx len) 5465{ 5466 /* When tuning for z10 or higher we rely on the Glibc functions to 5467 do the right thing. Only for constant lengths below 64k we will 5468 generate inline code. */ 5469 if (s390_tune >= PROCESSOR_2097_Z10 5470 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) 5471 return false; 5472 5473 /* Expand memcpy for constant length operands without a loop if it 5474 is shorter that way. 5475 5476 With a constant length argument a 5477 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */ 5478 if (GET_CODE (len) == CONST_INT 5479 && INTVAL (len) >= 0 5480 && INTVAL (len) <= 256 * 6 5481 && (!TARGET_MVCLE || INTVAL (len) <= 256)) 5482 { 5483 HOST_WIDE_INT o, l; 5484 5485 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256) 5486 { 5487 rtx newdst = adjust_address (dst, BLKmode, o); 5488 rtx newsrc = adjust_address (src, BLKmode, o); 5489 emit_insn (gen_cpymem_short (newdst, newsrc, 5490 GEN_INT (l > 256 ? 255 : l - 1))); 5491 } 5492 } 5493 5494 else if (TARGET_MVCLE) 5495 { 5496 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1))); 5497 } 5498 5499 else 5500 { 5501 rtx dst_addr, src_addr, count, blocks, temp; 5502 rtx_code_label *loop_start_label = gen_label_rtx (); 5503 rtx_code_label *loop_end_label = gen_label_rtx (); 5504 rtx_code_label *end_label = gen_label_rtx (); 5505 machine_mode mode; 5506 5507 mode = GET_MODE (len); 5508 if (mode == VOIDmode) 5509 mode = Pmode; 5510 5511 dst_addr = gen_reg_rtx (Pmode); 5512 src_addr = gen_reg_rtx (Pmode); 5513 count = gen_reg_rtx (mode); 5514 blocks = gen_reg_rtx (mode); 5515 5516 convert_move (count, len, 1); 5517 emit_cmp_and_jump_insns (count, const0_rtx, 5518 EQ, NULL_RTX, mode, 1, end_label); 5519 5520 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); 5521 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX)); 5522 dst = change_address (dst, VOIDmode, dst_addr); 5523 src = change_address (src, VOIDmode, src_addr); 5524 5525 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 5526 OPTAB_DIRECT); 5527 if (temp != count) 5528 emit_move_insn (count, temp); 5529 5530 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5531 OPTAB_DIRECT); 5532 if (temp != blocks) 5533 emit_move_insn (blocks, temp); 5534 5535 emit_cmp_and_jump_insns (blocks, const0_rtx, 5536 EQ, NULL_RTX, mode, 1, loop_end_label); 5537 5538 emit_label (loop_start_label); 5539 5540 if (TARGET_Z10 5541 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768)) 5542 { 5543 rtx prefetch; 5544 5545 /* Issue a read prefetch for the +3 cache line. */ 5546 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)), 5547 const0_rtx, const0_rtx); 5548 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5549 emit_insn (prefetch); 5550 5551 /* Issue a write prefetch for the +3 cache line. */ 5552 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)), 5553 const1_rtx, const0_rtx); 5554 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5555 emit_insn (prefetch); 5556 } 5557 5558 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255))); 5559 s390_load_address (dst_addr, 5560 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); 5561 s390_load_address (src_addr, 5562 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256))); 5563 5564 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5565 OPTAB_DIRECT); 5566 if (temp != blocks) 5567 emit_move_insn (blocks, temp); 5568 5569 emit_cmp_and_jump_insns (blocks, const0_rtx, 5570 EQ, NULL_RTX, mode, 1, loop_end_label); 5571 5572 emit_jump (loop_start_label); 5573 emit_label (loop_end_label); 5574 5575 emit_insn (gen_cpymem_short (dst, src, 5576 convert_to_mode (Pmode, count, 1))); 5577 emit_label (end_label); 5578 } 5579 return true; 5580} 5581 5582/* Emit code to set LEN bytes at DST to VAL. 5583 Make use of clrmem if VAL is zero. */ 5584 5585void 5586s390_expand_setmem (rtx dst, rtx len, rtx val) 5587{ 5588 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0) 5589 return; 5590 5591 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode); 5592 5593 /* Expand setmem/clrmem for a constant length operand without a 5594 loop if it will be shorter that way. 5595 clrmem loop (with PFD) is 30 bytes -> 5 * xc 5596 clrmem loop (without PFD) is 24 bytes -> 4 * xc 5597 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc) 5598 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */ 5599 if (GET_CODE (len) == CONST_INT 5600 && ((val == const0_rtx 5601 && (INTVAL (len) <= 256 * 4 5602 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len)))) 5603 || (val != const0_rtx && INTVAL (len) <= 257 * 4)) 5604 && (!TARGET_MVCLE || INTVAL (len) <= 256)) 5605 { 5606 HOST_WIDE_INT o, l; 5607 5608 if (val == const0_rtx) 5609 /* clrmem: emit 256 byte blockwise XCs. */ 5610 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256) 5611 { 5612 rtx newdst = adjust_address (dst, BLKmode, o); 5613 emit_insn (gen_clrmem_short (newdst, 5614 GEN_INT (l > 256 ? 255 : l - 1))); 5615 } 5616 else 5617 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by 5618 setting first byte to val and using a 256 byte mvc with one 5619 byte overlap to propagate the byte. */ 5620 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257) 5621 { 5622 rtx newdst = adjust_address (dst, BLKmode, o); 5623 emit_move_insn (adjust_address (dst, QImode, o), val); 5624 if (l > 1) 5625 { 5626 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1); 5627 emit_insn (gen_cpymem_short (newdstp1, newdst, 5628 GEN_INT (l > 257 ? 255 : l - 2))); 5629 } 5630 } 5631 } 5632 5633 else if (TARGET_MVCLE) 5634 { 5635 val = force_not_mem (convert_modes (Pmode, QImode, val, 1)); 5636 if (TARGET_64BIT) 5637 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1), 5638 val)); 5639 else 5640 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1), 5641 val)); 5642 } 5643 5644 else 5645 { 5646 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX; 5647 rtx_code_label *loop_start_label = gen_label_rtx (); 5648 rtx_code_label *onebyte_end_label = gen_label_rtx (); 5649 rtx_code_label *zerobyte_end_label = gen_label_rtx (); 5650 rtx_code_label *restbyte_end_label = gen_label_rtx (); 5651 machine_mode mode; 5652 5653 mode = GET_MODE (len); 5654 if (mode == VOIDmode) 5655 mode = Pmode; 5656 5657 dst_addr = gen_reg_rtx (Pmode); 5658 count = gen_reg_rtx (mode); 5659 blocks = gen_reg_rtx (mode); 5660 5661 convert_move (count, len, 1); 5662 emit_cmp_and_jump_insns (count, const0_rtx, 5663 EQ, NULL_RTX, mode, 1, zerobyte_end_label, 5664 profile_probability::very_unlikely ()); 5665 5666 /* We need to make a copy of the target address since memset is 5667 supposed to return it unmodified. We have to make it here 5668 already since the new reg is used at onebyte_end_label. */ 5669 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); 5670 dst = change_address (dst, VOIDmode, dst_addr); 5671 5672 if (val != const0_rtx) 5673 { 5674 /* When using the overlapping mvc the original target 5675 address is only accessed as single byte entity (even by 5676 the mvc reading this value). */ 5677 set_mem_size (dst, 1); 5678 dstp1 = adjust_address (dst, VOIDmode, 1); 5679 emit_cmp_and_jump_insns (count, 5680 const1_rtx, EQ, NULL_RTX, mode, 1, 5681 onebyte_end_label, 5682 profile_probability::very_unlikely ()); 5683 } 5684 5685 /* There is one unconditional (mvi+mvc)/xc after the loop 5686 dealing with the rest of the bytes, subtracting two (mvi+mvc) 5687 or one (xc) here leaves this number of bytes to be handled by 5688 it. */ 5689 temp = expand_binop (mode, add_optab, count, 5690 val == const0_rtx ? constm1_rtx : GEN_INT (-2), 5691 count, 1, OPTAB_DIRECT); 5692 if (temp != count) 5693 emit_move_insn (count, temp); 5694 5695 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5696 OPTAB_DIRECT); 5697 if (temp != blocks) 5698 emit_move_insn (blocks, temp); 5699 5700 emit_cmp_and_jump_insns (blocks, const0_rtx, 5701 EQ, NULL_RTX, mode, 1, restbyte_end_label); 5702 5703 emit_jump (loop_start_label); 5704 5705 if (val != const0_rtx) 5706 { 5707 /* The 1 byte != 0 special case. Not handled efficiently 5708 since we require two jumps for that. However, this 5709 should be very rare. */ 5710 emit_label (onebyte_end_label); 5711 emit_move_insn (adjust_address (dst, QImode, 0), val); 5712 emit_jump (zerobyte_end_label); 5713 } 5714 5715 emit_label (loop_start_label); 5716 5717 if (TARGET_SETMEM_PFD (val, len)) 5718 { 5719 /* Issue a write prefetch. */ 5720 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE); 5721 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance), 5722 const1_rtx, const0_rtx); 5723 emit_insn (prefetch); 5724 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5725 } 5726 5727 if (val == const0_rtx) 5728 emit_insn (gen_clrmem_short (dst, GEN_INT (255))); 5729 else 5730 { 5731 /* Set the first byte in the block to the value and use an 5732 overlapping mvc for the block. */ 5733 emit_move_insn (adjust_address (dst, QImode, 0), val); 5734 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254))); 5735 } 5736 s390_load_address (dst_addr, 5737 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); 5738 5739 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5740 OPTAB_DIRECT); 5741 if (temp != blocks) 5742 emit_move_insn (blocks, temp); 5743 5744 emit_cmp_and_jump_insns (blocks, const0_rtx, 5745 NE, NULL_RTX, mode, 1, loop_start_label); 5746 5747 emit_label (restbyte_end_label); 5748 5749 if (val == const0_rtx) 5750 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1))); 5751 else 5752 { 5753 /* Set the first byte in the block to the value and use an 5754 overlapping mvc for the block. */ 5755 emit_move_insn (adjust_address (dst, QImode, 0), val); 5756 /* execute only uses the lowest 8 bits of count that's 5757 exactly what we need here. */ 5758 emit_insn (gen_cpymem_short (dstp1, dst, 5759 convert_to_mode (Pmode, count, 1))); 5760 } 5761 5762 emit_label (zerobyte_end_label); 5763 } 5764} 5765 5766/* Emit code to compare LEN bytes at OP0 with those at OP1, 5767 and return the result in TARGET. */ 5768 5769bool 5770s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len) 5771{ 5772 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM); 5773 rtx tmp; 5774 5775 /* When tuning for z10 or higher we rely on the Glibc functions to 5776 do the right thing. Only for constant lengths below 64k we will 5777 generate inline code. */ 5778 if (s390_tune >= PROCESSOR_2097_Z10 5779 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) 5780 return false; 5781 5782 /* As the result of CMPINT is inverted compared to what we need, 5783 we have to swap the operands. */ 5784 tmp = op0; op0 = op1; op1 = tmp; 5785 5786 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) 5787 { 5788 if (INTVAL (len) > 0) 5789 { 5790 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1))); 5791 emit_insn (gen_cmpint (target, ccreg)); 5792 } 5793 else 5794 emit_move_insn (target, const0_rtx); 5795 } 5796 else if (TARGET_MVCLE) 5797 { 5798 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1))); 5799 emit_insn (gen_cmpint (target, ccreg)); 5800 } 5801 else 5802 { 5803 rtx addr0, addr1, count, blocks, temp; 5804 rtx_code_label *loop_start_label = gen_label_rtx (); 5805 rtx_code_label *loop_end_label = gen_label_rtx (); 5806 rtx_code_label *end_label = gen_label_rtx (); 5807 machine_mode mode; 5808 5809 mode = GET_MODE (len); 5810 if (mode == VOIDmode) 5811 mode = Pmode; 5812 5813 addr0 = gen_reg_rtx (Pmode); 5814 addr1 = gen_reg_rtx (Pmode); 5815 count = gen_reg_rtx (mode); 5816 blocks = gen_reg_rtx (mode); 5817 5818 convert_move (count, len, 1); 5819 emit_cmp_and_jump_insns (count, const0_rtx, 5820 EQ, NULL_RTX, mode, 1, end_label); 5821 5822 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX)); 5823 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX)); 5824 op0 = change_address (op0, VOIDmode, addr0); 5825 op1 = change_address (op1, VOIDmode, addr1); 5826 5827 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 5828 OPTAB_DIRECT); 5829 if (temp != count) 5830 emit_move_insn (count, temp); 5831 5832 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5833 OPTAB_DIRECT); 5834 if (temp != blocks) 5835 emit_move_insn (blocks, temp); 5836 5837 emit_cmp_and_jump_insns (blocks, const0_rtx, 5838 EQ, NULL_RTX, mode, 1, loop_end_label); 5839 5840 emit_label (loop_start_label); 5841 5842 if (TARGET_Z10 5843 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512)) 5844 { 5845 rtx prefetch; 5846 5847 /* Issue a read prefetch for the +2 cache line of operand 1. */ 5848 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)), 5849 const0_rtx, const0_rtx); 5850 emit_insn (prefetch); 5851 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5852 5853 /* Issue a read prefetch for the +2 cache line of operand 2. */ 5854 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)), 5855 const0_rtx, const0_rtx); 5856 emit_insn (prefetch); 5857 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5858 } 5859 5860 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255))); 5861 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); 5862 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 5863 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx); 5864 temp = gen_rtx_SET (pc_rtx, temp); 5865 emit_jump_insn (temp); 5866 5867 s390_load_address (addr0, 5868 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256))); 5869 s390_load_address (addr1, 5870 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256))); 5871 5872 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5873 OPTAB_DIRECT); 5874 if (temp != blocks) 5875 emit_move_insn (blocks, temp); 5876 5877 emit_cmp_and_jump_insns (blocks, const0_rtx, 5878 EQ, NULL_RTX, mode, 1, loop_end_label); 5879 5880 emit_jump (loop_start_label); 5881 emit_label (loop_end_label); 5882 5883 emit_insn (gen_cmpmem_short (op0, op1, 5884 convert_to_mode (Pmode, count, 1))); 5885 emit_label (end_label); 5886 5887 emit_insn (gen_cmpint (target, ccreg)); 5888 } 5889 return true; 5890} 5891 5892/* Emit a conditional jump to LABEL for condition code mask MASK using 5893 comparsion operator COMPARISON. Return the emitted jump insn. */ 5894 5895static rtx_insn * 5896s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label) 5897{ 5898 rtx temp; 5899 5900 gcc_assert (comparison == EQ || comparison == NE); 5901 gcc_assert (mask > 0 && mask < 15); 5902 5903 temp = gen_rtx_fmt_ee (comparison, VOIDmode, 5904 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask)); 5905 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 5906 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); 5907 temp = gen_rtx_SET (pc_rtx, temp); 5908 return emit_jump_insn (temp); 5909} 5910 5911/* Emit the instructions to implement strlen of STRING and store the 5912 result in TARGET. The string has the known ALIGNMENT. This 5913 version uses vector instructions and is therefore not appropriate 5914 for targets prior to z13. */ 5915 5916void 5917s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) 5918{ 5919 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode); 5920 rtx str_reg = gen_reg_rtx (V16QImode); 5921 rtx str_addr_base_reg = gen_reg_rtx (Pmode); 5922 rtx str_idx_reg = gen_reg_rtx (Pmode); 5923 rtx result_reg = gen_reg_rtx (V16QImode); 5924 rtx is_aligned_label = gen_label_rtx (); 5925 rtx into_loop_label = NULL_RTX; 5926 rtx loop_start_label = gen_label_rtx (); 5927 rtx temp; 5928 rtx len = gen_reg_rtx (QImode); 5929 rtx cond; 5930 5931 s390_load_address (str_addr_base_reg, XEXP (string, 0)); 5932 emit_move_insn (str_idx_reg, const0_rtx); 5933 5934 if (INTVAL (alignment) < 16) 5935 { 5936 /* Check whether the address happens to be aligned properly so 5937 jump directly to the aligned loop. */ 5938 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode, 5939 str_addr_base_reg, GEN_INT (15)), 5940 const0_rtx, EQ, NULL_RTX, 5941 Pmode, 1, is_aligned_label); 5942 5943 temp = gen_reg_rtx (Pmode); 5944 temp = expand_binop (Pmode, and_optab, str_addr_base_reg, 5945 GEN_INT (15), temp, 1, OPTAB_DIRECT); 5946 gcc_assert (REG_P (temp)); 5947 highest_index_to_load_reg = 5948 expand_binop (Pmode, sub_optab, GEN_INT (15), temp, 5949 highest_index_to_load_reg, 1, OPTAB_DIRECT); 5950 gcc_assert (REG_P (highest_index_to_load_reg)); 5951 emit_insn (gen_vllv16qi (str_reg, 5952 convert_to_mode (SImode, highest_index_to_load_reg, 1), 5953 gen_rtx_MEM (BLKmode, str_addr_base_reg))); 5954 5955 into_loop_label = gen_label_rtx (); 5956 s390_emit_jump (into_loop_label, NULL_RTX); 5957 emit_barrier (); 5958 } 5959 5960 emit_label (is_aligned_label); 5961 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1; 5962 5963 /* Reaching this point we are only performing 16 bytes aligned 5964 loads. */ 5965 emit_move_insn (highest_index_to_load_reg, GEN_INT (15)); 5966 5967 emit_label (loop_start_label); 5968 LABEL_NUSES (loop_start_label) = 1; 5969 5970 /* Load 16 bytes of the string into VR. */ 5971 emit_move_insn (str_reg, 5972 gen_rtx_MEM (V16QImode, 5973 gen_rtx_PLUS (Pmode, str_idx_reg, 5974 str_addr_base_reg))); 5975 if (into_loop_label != NULL_RTX) 5976 { 5977 emit_label (into_loop_label); 5978 LABEL_NUSES (into_loop_label) = 1; 5979 } 5980 5981 /* Increment string index by 16 bytes. */ 5982 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16), 5983 str_idx_reg, 1, OPTAB_DIRECT); 5984 5985 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg, 5986 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); 5987 5988 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label), 5989 REG_BR_PROB, 5990 profile_probability::very_likely ().to_reg_br_prob_note ()); 5991 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7))); 5992 5993 /* If the string pointer wasn't aligned we have loaded less then 16 5994 bytes and the remaining bytes got filled with zeros (by vll). 5995 Now we have to check whether the resulting index lies within the 5996 bytes actually part of the string. */ 5997 5998 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1), 5999 highest_index_to_load_reg); 6000 s390_load_address (highest_index_to_load_reg, 6001 gen_rtx_PLUS (Pmode, highest_index_to_load_reg, 6002 const1_rtx)); 6003 if (TARGET_64BIT) 6004 emit_insn (gen_movdicc (str_idx_reg, cond, 6005 highest_index_to_load_reg, str_idx_reg)); 6006 else 6007 emit_insn (gen_movsicc (str_idx_reg, cond, 6008 highest_index_to_load_reg, str_idx_reg)); 6009 6010 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond), 6011 profile_probability::very_unlikely ()); 6012 6013 expand_binop (Pmode, add_optab, str_idx_reg, 6014 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); 6015 /* FIXME: len is already zero extended - so avoid the llgcr emitted 6016 here. */ 6017 temp = expand_binop (Pmode, add_optab, str_idx_reg, 6018 convert_to_mode (Pmode, len, 1), 6019 target, 1, OPTAB_DIRECT); 6020 if (temp != target) 6021 emit_move_insn (target, temp); 6022} 6023 6024void 6025s390_expand_vec_movstr (rtx result, rtx dst, rtx src) 6026{ 6027 rtx temp = gen_reg_rtx (Pmode); 6028 rtx src_addr = XEXP (src, 0); 6029 rtx dst_addr = XEXP (dst, 0); 6030 rtx src_addr_reg = gen_reg_rtx (Pmode); 6031 rtx dst_addr_reg = gen_reg_rtx (Pmode); 6032 rtx offset = gen_reg_rtx (Pmode); 6033 rtx vsrc = gen_reg_rtx (V16QImode); 6034 rtx vpos = gen_reg_rtx (V16QImode); 6035 rtx loadlen = gen_reg_rtx (SImode); 6036 rtx gpos_qi = gen_reg_rtx(QImode); 6037 rtx gpos = gen_reg_rtx (SImode); 6038 rtx done_label = gen_label_rtx (); 6039 rtx loop_label = gen_label_rtx (); 6040 rtx exit_label = gen_label_rtx (); 6041 rtx full_label = gen_label_rtx (); 6042 6043 /* Perform a quick check for string ending on the first up to 16 6044 bytes and exit early if successful. */ 6045 6046 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6))); 6047 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6))); 6048 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc)); 6049 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7))); 6050 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); 6051 /* gpos is the byte index if a zero was found and 16 otherwise. 6052 So if it is lower than the loaded bytes we have a hit. */ 6053 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1, 6054 full_label); 6055 emit_insn (gen_vstlv16qi (vsrc, gpos, dst)); 6056 6057 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result, 6058 1, OPTAB_DIRECT); 6059 emit_jump (exit_label); 6060 emit_barrier (); 6061 6062 emit_label (full_label); 6063 LABEL_NUSES (full_label) = 1; 6064 6065 /* Calculate `offset' so that src + offset points to the last byte 6066 before 16 byte alignment. */ 6067 6068 /* temp = src_addr & 0xf */ 6069 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp, 6070 1, OPTAB_DIRECT); 6071 6072 /* offset = 0xf - temp */ 6073 emit_move_insn (offset, GEN_INT (15)); 6074 force_expand_binop (Pmode, sub_optab, offset, temp, offset, 6075 1, OPTAB_DIRECT); 6076 6077 /* Store `offset' bytes in the dstination string. The quick check 6078 has loaded at least `offset' bytes into vsrc. */ 6079 6080 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst)); 6081 6082 /* Advance to the next byte to be loaded. */ 6083 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset, 6084 1, OPTAB_DIRECT); 6085 6086 /* Make sure the addresses are single regs which can be used as a 6087 base. */ 6088 emit_move_insn (src_addr_reg, src_addr); 6089 emit_move_insn (dst_addr_reg, dst_addr); 6090 6091 /* MAIN LOOP */ 6092 6093 emit_label (loop_label); 6094 LABEL_NUSES (loop_label) = 1; 6095 6096 emit_move_insn (vsrc, 6097 gen_rtx_MEM (V16QImode, 6098 gen_rtx_PLUS (Pmode, src_addr_reg, offset))); 6099 6100 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc, 6101 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); 6102 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label), 6103 REG_BR_PROB, profile_probability::very_unlikely () 6104 .to_reg_br_prob_note ()); 6105 6106 emit_move_insn (gen_rtx_MEM (V16QImode, 6107 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)), 6108 vsrc); 6109 /* offset += 16 */ 6110 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16), 6111 offset, 1, OPTAB_DIRECT); 6112 6113 emit_jump (loop_label); 6114 emit_barrier (); 6115 6116 /* REGULAR EXIT */ 6117 6118 /* We are done. Add the offset of the zero character to the dst_addr 6119 pointer to get the result. */ 6120 6121 emit_label (done_label); 6122 LABEL_NUSES (done_label) = 1; 6123 6124 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg, 6125 1, OPTAB_DIRECT); 6126 6127 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7))); 6128 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); 6129 6130 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg))); 6131 6132 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result, 6133 1, OPTAB_DIRECT); 6134 6135 /* EARLY EXIT */ 6136 6137 emit_label (exit_label); 6138 LABEL_NUSES (exit_label) = 1; 6139} 6140 6141 6142/* Expand conditional increment or decrement using alc/slb instructions. 6143 Should generate code setting DST to either SRC or SRC + INCREMENT, 6144 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1. 6145 Returns true if successful, false otherwise. 6146 6147 That makes it possible to implement some if-constructs without jumps e.g.: 6148 (borrow = CC0 | CC1 and carry = CC2 | CC3) 6149 unsigned int a, b, c; 6150 if (a < b) c++; -> CCU b > a -> CC2; c += carry; 6151 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow; 6152 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry; 6153 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow; 6154 6155 Checks for EQ and NE with a nonzero value need an additional xor e.g.: 6156 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry; 6157 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow; 6158 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry; 6159 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */ 6160 6161bool 6162s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, 6163 rtx dst, rtx src, rtx increment) 6164{ 6165 machine_mode cmp_mode; 6166 machine_mode cc_mode; 6167 rtx op_res; 6168 rtx insn; 6169 rtvec p; 6170 int ret; 6171 6172 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode) 6173 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode)) 6174 cmp_mode = SImode; 6175 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode) 6176 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode)) 6177 cmp_mode = DImode; 6178 else 6179 return false; 6180 6181 /* Try ADD LOGICAL WITH CARRY. */ 6182 if (increment == const1_rtx) 6183 { 6184 /* Determine CC mode to use. */ 6185 if (cmp_code == EQ || cmp_code == NE) 6186 { 6187 if (cmp_op1 != const0_rtx) 6188 { 6189 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1, 6190 NULL_RTX, 0, OPTAB_WIDEN); 6191 cmp_op1 = const0_rtx; 6192 } 6193 6194 cmp_code = cmp_code == EQ ? LEU : GTU; 6195 } 6196 6197 if (cmp_code == LTU || cmp_code == LEU) 6198 { 6199 rtx tem = cmp_op0; 6200 cmp_op0 = cmp_op1; 6201 cmp_op1 = tem; 6202 cmp_code = swap_condition (cmp_code); 6203 } 6204 6205 switch (cmp_code) 6206 { 6207 case GTU: 6208 cc_mode = CCUmode; 6209 break; 6210 6211 case GEU: 6212 cc_mode = CCL3mode; 6213 break; 6214 6215 default: 6216 return false; 6217 } 6218 6219 /* Emit comparison instruction pattern. */ 6220 if (!register_operand (cmp_op0, cmp_mode)) 6221 cmp_op0 = force_reg (cmp_mode, cmp_op0); 6222 6223 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), 6224 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); 6225 /* We use insn_invalid_p here to add clobbers if required. */ 6226 ret = insn_invalid_p (emit_insn (insn), false); 6227 gcc_assert (!ret); 6228 6229 /* Emit ALC instruction pattern. */ 6230 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst), 6231 gen_rtx_REG (cc_mode, CC_REGNUM), 6232 const0_rtx); 6233 6234 if (src != const0_rtx) 6235 { 6236 if (!register_operand (src, GET_MODE (dst))) 6237 src = force_reg (GET_MODE (dst), src); 6238 6239 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src); 6240 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx); 6241 } 6242 6243 p = rtvec_alloc (2); 6244 RTVEC_ELT (p, 0) = 6245 gen_rtx_SET (dst, op_res); 6246 RTVEC_ELT (p, 1) = 6247 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 6248 emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); 6249 6250 return true; 6251 } 6252 6253 /* Try SUBTRACT LOGICAL WITH BORROW. */ 6254 if (increment == constm1_rtx) 6255 { 6256 /* Determine CC mode to use. */ 6257 if (cmp_code == EQ || cmp_code == NE) 6258 { 6259 if (cmp_op1 != const0_rtx) 6260 { 6261 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1, 6262 NULL_RTX, 0, OPTAB_WIDEN); 6263 cmp_op1 = const0_rtx; 6264 } 6265 6266 cmp_code = cmp_code == EQ ? LEU : GTU; 6267 } 6268 6269 if (cmp_code == GTU || cmp_code == GEU) 6270 { 6271 rtx tem = cmp_op0; 6272 cmp_op0 = cmp_op1; 6273 cmp_op1 = tem; 6274 cmp_code = swap_condition (cmp_code); 6275 } 6276 6277 switch (cmp_code) 6278 { 6279 case LEU: 6280 cc_mode = CCUmode; 6281 break; 6282 6283 case LTU: 6284 cc_mode = CCL3mode; 6285 break; 6286 6287 default: 6288 return false; 6289 } 6290 6291 /* Emit comparison instruction pattern. */ 6292 if (!register_operand (cmp_op0, cmp_mode)) 6293 cmp_op0 = force_reg (cmp_mode, cmp_op0); 6294 6295 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), 6296 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); 6297 /* We use insn_invalid_p here to add clobbers if required. */ 6298 ret = insn_invalid_p (emit_insn (insn), false); 6299 gcc_assert (!ret); 6300 6301 /* Emit SLB instruction pattern. */ 6302 if (!register_operand (src, GET_MODE (dst))) 6303 src = force_reg (GET_MODE (dst), src); 6304 6305 op_res = gen_rtx_MINUS (GET_MODE (dst), 6306 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx), 6307 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst), 6308 gen_rtx_REG (cc_mode, CC_REGNUM), 6309 const0_rtx)); 6310 p = rtvec_alloc (2); 6311 RTVEC_ELT (p, 0) = 6312 gen_rtx_SET (dst, op_res); 6313 RTVEC_ELT (p, 1) = 6314 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 6315 emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); 6316 6317 return true; 6318 } 6319 6320 return false; 6321} 6322 6323/* Expand code for the insv template. Return true if successful. */ 6324 6325bool 6326s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) 6327{ 6328 int bitsize = INTVAL (op1); 6329 int bitpos = INTVAL (op2); 6330 machine_mode mode = GET_MODE (dest); 6331 machine_mode smode; 6332 int smode_bsize, mode_bsize; 6333 rtx op, clobber; 6334 6335 if (bitsize + bitpos > GET_MODE_BITSIZE (mode)) 6336 return false; 6337 6338 /* Generate INSERT IMMEDIATE (IILL et al). */ 6339 /* (set (ze (reg)) (const_int)). */ 6340 if (TARGET_ZARCH 6341 && register_operand (dest, word_mode) 6342 && (bitpos % 16) == 0 6343 && (bitsize % 16) == 0 6344 && const_int_operand (src, VOIDmode)) 6345 { 6346 HOST_WIDE_INT val = INTVAL (src); 6347 int regpos = bitpos + bitsize; 6348 6349 while (regpos > bitpos) 6350 { 6351 machine_mode putmode; 6352 int putsize; 6353 6354 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32)) 6355 putmode = SImode; 6356 else 6357 putmode = HImode; 6358 6359 putsize = GET_MODE_BITSIZE (putmode); 6360 regpos -= putsize; 6361 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, 6362 GEN_INT (putsize), 6363 GEN_INT (regpos)), 6364 gen_int_mode (val, putmode)); 6365 val >>= putsize; 6366 } 6367 gcc_assert (regpos == bitpos); 6368 return true; 6369 } 6370 6371 smode = smallest_int_mode_for_size (bitsize); 6372 smode_bsize = GET_MODE_BITSIZE (smode); 6373 mode_bsize = GET_MODE_BITSIZE (mode); 6374 6375 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */ 6376 if (bitpos == 0 6377 && (bitsize % BITS_PER_UNIT) == 0 6378 && MEM_P (dest) 6379 && (register_operand (src, word_mode) 6380 || const_int_operand (src, VOIDmode))) 6381 { 6382 /* Emit standard pattern if possible. */ 6383 if (smode_bsize == bitsize) 6384 { 6385 emit_move_insn (adjust_address (dest, smode, 0), 6386 gen_lowpart (smode, src)); 6387 return true; 6388 } 6389 6390 /* (set (ze (mem)) (const_int)). */ 6391 else if (const_int_operand (src, VOIDmode)) 6392 { 6393 int size = bitsize / BITS_PER_UNIT; 6394 rtx src_mem = adjust_address (force_const_mem (word_mode, src), 6395 BLKmode, 6396 UNITS_PER_WORD - size); 6397 6398 dest = adjust_address (dest, BLKmode, 0); 6399 set_mem_size (dest, size); 6400 s390_expand_cpymem (dest, src_mem, GEN_INT (size)); 6401 return true; 6402 } 6403 6404 /* (set (ze (mem)) (reg)). */ 6405 else if (register_operand (src, word_mode)) 6406 { 6407 if (bitsize <= 32) 6408 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1, 6409 const0_rtx), src); 6410 else 6411 { 6412 /* Emit st,stcmh sequence. */ 6413 int stcmh_width = bitsize - 32; 6414 int size = stcmh_width / BITS_PER_UNIT; 6415 6416 emit_move_insn (adjust_address (dest, SImode, size), 6417 gen_lowpart (SImode, src)); 6418 set_mem_size (dest, size); 6419 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, 6420 GEN_INT (stcmh_width), 6421 const0_rtx), 6422 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32))); 6423 } 6424 return true; 6425 } 6426 } 6427 6428 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */ 6429 if ((bitpos % BITS_PER_UNIT) == 0 6430 && (bitsize % BITS_PER_UNIT) == 0 6431 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32) 6432 && MEM_P (src) 6433 && (mode == DImode || mode == SImode) 6434 && register_operand (dest, mode)) 6435 { 6436 /* Emit a strict_low_part pattern if possible. */ 6437 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) 6438 { 6439 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest)); 6440 op = gen_rtx_SET (op, gen_lowpart (smode, src)); 6441 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 6442 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); 6443 return true; 6444 } 6445 6446 /* ??? There are more powerful versions of ICM that are not 6447 completely represented in the md file. */ 6448 } 6449 6450 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */ 6451 if (TARGET_Z10 && (mode == DImode || mode == SImode)) 6452 { 6453 machine_mode mode_s = GET_MODE (src); 6454 6455 if (CONSTANT_P (src)) 6456 { 6457 /* For constant zero values the representation with AND 6458 appears to be folded in more situations than the (set 6459 (zero_extract) ...). 6460 We only do this when the start and end of the bitfield 6461 remain in the same SImode chunk. That way nihf or nilf 6462 can be used. 6463 The AND patterns might still generate a risbg for this. */ 6464 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32) 6465 return false; 6466 else 6467 src = force_reg (mode, src); 6468 } 6469 else if (mode_s != mode) 6470 { 6471 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize); 6472 src = force_reg (mode_s, src); 6473 src = gen_lowpart (mode, src); 6474 } 6475 6476 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2), 6477 op = gen_rtx_SET (op, src); 6478 6479 if (!TARGET_ZEC12) 6480 { 6481 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 6482 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)); 6483 } 6484 emit_insn (op); 6485 6486 return true; 6487 } 6488 6489 return false; 6490} 6491 6492/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a 6493 register that holds VAL of mode MODE shifted by COUNT bits. */ 6494 6495static inline rtx 6496s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) 6497{ 6498 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)), 6499 NULL_RTX, 1, OPTAB_DIRECT); 6500 return expand_simple_binop (SImode, ASHIFT, val, count, 6501 NULL_RTX, 1, OPTAB_DIRECT); 6502} 6503 6504/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store 6505 the result in TARGET. */ 6506 6507void 6508s390_expand_vec_compare (rtx target, enum rtx_code cond, 6509 rtx cmp_op1, rtx cmp_op2) 6510{ 6511 machine_mode mode = GET_MODE (target); 6512 bool neg_p = false, swap_p = false; 6513 rtx tmp; 6514 6515 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT) 6516 { 6517 switch (cond) 6518 { 6519 /* NE a != b -> !(a == b) */ 6520 case NE: cond = EQ; neg_p = true; break; 6521 case UNGT: 6522 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2)); 6523 return; 6524 case UNGE: 6525 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2)); 6526 return; 6527 case LE: cond = GE; swap_p = true; break; 6528 /* UNLE: (a u<= b) -> (b u>= a). */ 6529 case UNLE: 6530 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1)); 6531 return; 6532 /* LT: a < b -> b > a */ 6533 case LT: cond = GT; swap_p = true; break; 6534 /* UNLT: (a u< b) -> (b u> a). */ 6535 case UNLT: 6536 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1)); 6537 return; 6538 case UNEQ: 6539 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2)); 6540 return; 6541 case LTGT: 6542 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2)); 6543 return; 6544 case ORDERED: 6545 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2)); 6546 return; 6547 case UNORDERED: 6548 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2)); 6549 return; 6550 default: break; 6551 } 6552 } 6553 else 6554 { 6555 switch (cond) 6556 { 6557 /* NE: a != b -> !(a == b) */ 6558 case NE: cond = EQ; neg_p = true; break; 6559 /* GE: a >= b -> !(b > a) */ 6560 case GE: cond = GT; neg_p = true; swap_p = true; break; 6561 /* GEU: a >= b -> !(b > a) */ 6562 case GEU: cond = GTU; neg_p = true; swap_p = true; break; 6563 /* LE: a <= b -> !(a > b) */ 6564 case LE: cond = GT; neg_p = true; break; 6565 /* LEU: a <= b -> !(a > b) */ 6566 case LEU: cond = GTU; neg_p = true; break; 6567 /* LT: a < b -> b > a */ 6568 case LT: cond = GT; swap_p = true; break; 6569 /* LTU: a < b -> b > a */ 6570 case LTU: cond = GTU; swap_p = true; break; 6571 default: break; 6572 } 6573 } 6574 6575 if (swap_p) 6576 { 6577 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp; 6578 } 6579 6580 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond, 6581 mode, 6582 cmp_op1, cmp_op2))); 6583 if (neg_p) 6584 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target))); 6585} 6586 6587/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into 6588 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the 6589 elements in CMP1 and CMP2 fulfill the comparison. 6590 This function is only used to emit patterns for the vx builtins and 6591 therefore only handles comparison codes required by the 6592 builtins. */ 6593void 6594s390_expand_vec_compare_cc (rtx target, enum rtx_code code, 6595 rtx cmp1, rtx cmp2, bool all_p) 6596{ 6597 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode; 6598 rtx tmp_reg = gen_reg_rtx (SImode); 6599 bool swap_p = false; 6600 6601 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT) 6602 { 6603 switch (code) 6604 { 6605 case EQ: 6606 case NE: 6607 cc_producer_mode = CCVEQmode; 6608 break; 6609 case GE: 6610 case LT: 6611 code = swap_condition (code); 6612 swap_p = true; 6613 /* fallthrough */ 6614 case GT: 6615 case LE: 6616 cc_producer_mode = CCVIHmode; 6617 break; 6618 case GEU: 6619 case LTU: 6620 code = swap_condition (code); 6621 swap_p = true; 6622 /* fallthrough */ 6623 case GTU: 6624 case LEU: 6625 cc_producer_mode = CCVIHUmode; 6626 break; 6627 default: 6628 gcc_unreachable (); 6629 } 6630 6631 scratch_mode = GET_MODE (cmp1); 6632 /* These codes represent inverted CC interpretations. Inverting 6633 an ALL CC mode results in an ANY CC mode and the other way 6634 around. Invert the all_p flag here to compensate for 6635 that. */ 6636 if (code == NE || code == LE || code == LEU) 6637 all_p = !all_p; 6638 6639 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode; 6640 } 6641 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT) 6642 { 6643 bool inv_p = false; 6644 6645 switch (code) 6646 { 6647 case EQ: cc_producer_mode = CCVEQmode; break; 6648 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break; 6649 case GT: cc_producer_mode = CCVFHmode; break; 6650 case GE: cc_producer_mode = CCVFHEmode; break; 6651 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break; 6652 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break; 6653 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break; 6654 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break; 6655 default: gcc_unreachable (); 6656 } 6657 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require (); 6658 6659 if (inv_p) 6660 all_p = !all_p; 6661 6662 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode; 6663 } 6664 else 6665 gcc_unreachable (); 6666 6667 if (swap_p) 6668 { 6669 rtx tmp = cmp2; 6670 cmp2 = cmp1; 6671 cmp1 = tmp; 6672 } 6673 6674 emit_insn (gen_rtx_PARALLEL (VOIDmode, 6675 gen_rtvec (2, gen_rtx_SET ( 6676 gen_rtx_REG (cc_producer_mode, CC_REGNUM), 6677 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)), 6678 gen_rtx_CLOBBER (VOIDmode, 6679 gen_rtx_SCRATCH (scratch_mode))))); 6680 emit_move_insn (target, const0_rtx); 6681 emit_move_insn (tmp_reg, const1_rtx); 6682 6683 emit_move_insn (target, 6684 gen_rtx_IF_THEN_ELSE (SImode, 6685 gen_rtx_fmt_ee (code, VOIDmode, 6686 gen_rtx_REG (cc_consumer_mode, CC_REGNUM), 6687 const0_rtx), 6688 tmp_reg, target)); 6689} 6690 6691/* Invert the comparison CODE applied to a CC mode. This is only safe 6692 if we know whether there result was created by a floating point 6693 compare or not. For the CCV modes this is encoded as part of the 6694 mode. */ 6695enum rtx_code 6696s390_reverse_condition (machine_mode mode, enum rtx_code code) 6697{ 6698 /* Reversal of FP compares takes care -- an ordered compare 6699 becomes an unordered compare and vice versa. */ 6700 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode) 6701 return reverse_condition_maybe_unordered (code); 6702 else if (mode == CCVIALLmode || mode == CCVIANYmode) 6703 return reverse_condition (code); 6704 else 6705 gcc_unreachable (); 6706} 6707 6708/* Generate a vector comparison expression loading either elements of 6709 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1 6710 and CMP_OP2. */ 6711 6712void 6713s390_expand_vcond (rtx target, rtx then, rtx els, 6714 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2) 6715{ 6716 rtx tmp; 6717 machine_mode result_mode; 6718 rtx result_target; 6719 6720 machine_mode target_mode = GET_MODE (target); 6721 machine_mode cmp_mode = GET_MODE (cmp_op1); 6722 rtx op = (cond == LT) ? els : then; 6723 6724 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 6725 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise 6726 for short and byte (x >> 15 and x >> 7 respectively). */ 6727 if ((cond == LT || cond == GE) 6728 && target_mode == cmp_mode 6729 && cmp_op2 == CONST0_RTX (cmp_mode) 6730 && op == CONST0_RTX (target_mode) 6731 && s390_vector_mode_supported_p (target_mode) 6732 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT) 6733 { 6734 rtx negop = (cond == LT) ? then : els; 6735 6736 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1; 6737 6738 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */ 6739 if (negop == CONST1_RTX (target_mode)) 6740 { 6741 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1, 6742 GEN_INT (shift), target, 6743 1, OPTAB_DIRECT); 6744 if (res != target) 6745 emit_move_insn (target, res); 6746 return; 6747 } 6748 6749 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */ 6750 else if (all_ones_operand (negop, target_mode)) 6751 { 6752 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1, 6753 GEN_INT (shift), target, 6754 0, OPTAB_DIRECT); 6755 if (res != target) 6756 emit_move_insn (target, res); 6757 return; 6758 } 6759 } 6760 6761 /* We always use an integral type vector to hold the comparison 6762 result. */ 6763 result_mode = related_int_vector_mode (cmp_mode).require (); 6764 result_target = gen_reg_rtx (result_mode); 6765 6766 /* We allow vector immediates as comparison operands that 6767 can be handled by the optimization above but not by the 6768 following code. Hence, force them into registers here. */ 6769 if (!REG_P (cmp_op1)) 6770 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1); 6771 6772 if (!REG_P (cmp_op2)) 6773 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2); 6774 6775 s390_expand_vec_compare (result_target, cond, 6776 cmp_op1, cmp_op2); 6777 6778 /* If the results are supposed to be either -1 or 0 we are done 6779 since this is what our compare instructions generate anyway. */ 6780 if (all_ones_operand (then, GET_MODE (then)) 6781 && const0_operand (els, GET_MODE (els))) 6782 { 6783 emit_move_insn (target, gen_rtx_SUBREG (target_mode, 6784 result_target, 0)); 6785 return; 6786 } 6787 6788 /* Otherwise we will do a vsel afterwards. */ 6789 /* This gets triggered e.g. 6790 with gcc.c-torture/compile/pr53410-1.c */ 6791 if (!REG_P (then)) 6792 then = force_reg (target_mode, then); 6793 6794 if (!REG_P (els)) 6795 els = force_reg (target_mode, els); 6796 6797 tmp = gen_rtx_fmt_ee (EQ, VOIDmode, 6798 result_target, 6799 CONST0_RTX (result_mode)); 6800 6801 /* We compared the result against zero above so we have to swap then 6802 and els here. */ 6803 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then); 6804 6805 gcc_assert (target_mode == GET_MODE (then)); 6806 emit_insn (gen_rtx_SET (target, tmp)); 6807} 6808 6809/* Emit the RTX necessary to initialize the vector TARGET with values 6810 in VALS. */ 6811void 6812s390_expand_vec_init (rtx target, rtx vals) 6813{ 6814 machine_mode mode = GET_MODE (target); 6815 machine_mode inner_mode = GET_MODE_INNER (mode); 6816 int n_elts = GET_MODE_NUNITS (mode); 6817 bool all_same = true, all_regs = true, all_const_int = true; 6818 rtx x; 6819 int i; 6820 6821 for (i = 0; i < n_elts; ++i) 6822 { 6823 x = XVECEXP (vals, 0, i); 6824 6825 if (!CONST_INT_P (x)) 6826 all_const_int = false; 6827 6828 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 6829 all_same = false; 6830 6831 if (!REG_P (x)) 6832 all_regs = false; 6833 } 6834 6835 /* Use vector gen mask or vector gen byte mask if possible. */ 6836 if (all_same && all_const_int 6837 && (XVECEXP (vals, 0, 0) == const0_rtx 6838 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), 6839 NULL, NULL) 6840 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) 6841 { 6842 emit_insn (gen_rtx_SET (target, 6843 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)))); 6844 return; 6845 } 6846 6847 /* Use vector replicate instructions. vlrep/vrepi/vrep */ 6848 if (all_same) 6849 { 6850 rtx elem = XVECEXP (vals, 0, 0); 6851 6852 /* vec_splats accepts general_operand as source. */ 6853 if (!general_operand (elem, GET_MODE (elem))) 6854 elem = force_reg (inner_mode, elem); 6855 6856 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem))); 6857 return; 6858 } 6859 6860 if (all_regs 6861 && REG_P (target) 6862 && n_elts == 2 6863 && GET_MODE_SIZE (inner_mode) == 8) 6864 { 6865 /* Use vector load pair. */ 6866 emit_insn (gen_rtx_SET (target, 6867 gen_rtx_VEC_CONCAT (mode, 6868 XVECEXP (vals, 0, 0), 6869 XVECEXP (vals, 0, 1)))); 6870 return; 6871 } 6872 6873 /* Use vector load logical element and zero. */ 6874 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode)) 6875 { 6876 bool found = true; 6877 6878 x = XVECEXP (vals, 0, 0); 6879 if (memory_operand (x, inner_mode)) 6880 { 6881 for (i = 1; i < n_elts; ++i) 6882 found = found && XVECEXP (vals, 0, i) == const0_rtx; 6883 6884 if (found) 6885 { 6886 machine_mode half_mode = (inner_mode == SFmode 6887 ? V2SFmode : V2SImode); 6888 emit_insn (gen_rtx_SET (target, 6889 gen_rtx_VEC_CONCAT (mode, 6890 gen_rtx_VEC_CONCAT (half_mode, 6891 x, 6892 const0_rtx), 6893 gen_rtx_VEC_CONCAT (half_mode, 6894 const0_rtx, 6895 const0_rtx)))); 6896 return; 6897 } 6898 } 6899 } 6900 6901 /* We are about to set the vector elements one by one. Zero out the 6902 full register first in order to help the data flow framework to 6903 detect it as full VR set. */ 6904 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); 6905 6906 /* Unfortunately the vec_init expander is not allowed to fail. So 6907 we have to implement the fallback ourselves. */ 6908 for (i = 0; i < n_elts; i++) 6909 { 6910 rtx elem = XVECEXP (vals, 0, i); 6911 if (!general_operand (elem, GET_MODE (elem))) 6912 elem = force_reg (inner_mode, elem); 6913 6914 emit_insn (gen_rtx_SET (target, 6915 gen_rtx_UNSPEC (mode, 6916 gen_rtvec (3, elem, 6917 GEN_INT (i), target), 6918 UNSPEC_VEC_SET))); 6919 } 6920} 6921 6922/* Structure to hold the initial parameters for a compare_and_swap operation 6923 in HImode and QImode. */ 6924 6925struct alignment_context 6926{ 6927 rtx memsi; /* SI aligned memory location. */ 6928 rtx shift; /* Bit offset with regard to lsb. */ 6929 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */ 6930 rtx modemaski; /* ~modemask */ 6931 bool aligned; /* True if memory is aligned, false else. */ 6932}; 6933 6934/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize 6935 structure AC for transparent simplifying, if the memory alignment is known 6936 to be at least 32bit. MEM is the memory location for the actual operation 6937 and MODE its mode. */ 6938 6939static void 6940init_alignment_context (struct alignment_context *ac, rtx mem, 6941 machine_mode mode) 6942{ 6943 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode)); 6944 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode)); 6945 6946 if (ac->aligned) 6947 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */ 6948 else 6949 { 6950 /* Alignment is unknown. */ 6951 rtx byteoffset, addr, align; 6952 6953 /* Force the address into a register. */ 6954 addr = force_reg (Pmode, XEXP (mem, 0)); 6955 6956 /* Align it to SImode. */ 6957 align = expand_simple_binop (Pmode, AND, addr, 6958 GEN_INT (-GET_MODE_SIZE (SImode)), 6959 NULL_RTX, 1, OPTAB_DIRECT); 6960 /* Generate MEM. */ 6961 ac->memsi = gen_rtx_MEM (SImode, align); 6962 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem); 6963 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER); 6964 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode)); 6965 6966 /* Calculate shiftcount. */ 6967 byteoffset = expand_simple_binop (Pmode, AND, addr, 6968 GEN_INT (GET_MODE_SIZE (SImode) - 1), 6969 NULL_RTX, 1, OPTAB_DIRECT); 6970 /* As we already have some offset, evaluate the remaining distance. */ 6971 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset, 6972 NULL_RTX, 1, OPTAB_DIRECT); 6973 } 6974 6975 /* Shift is the byte count, but we need the bitcount. */ 6976 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3), 6977 NULL_RTX, 1, OPTAB_DIRECT); 6978 6979 /* Calculate masks. */ 6980 ac->modemask = expand_simple_binop (SImode, ASHIFT, 6981 GEN_INT (GET_MODE_MASK (mode)), 6982 ac->shift, NULL_RTX, 1, OPTAB_DIRECT); 6983 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, 6984 NULL_RTX, 1); 6985} 6986 6987/* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible, 6988 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and 6989 perform the merge in SEQ2. */ 6990 6991static rtx 6992s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2, 6993 machine_mode mode, rtx val, rtx ins) 6994{ 6995 rtx tmp; 6996 6997 if (ac->aligned) 6998 { 6999 start_sequence (); 7000 tmp = copy_to_mode_reg (SImode, val); 7001 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)), 7002 const0_rtx, ins)) 7003 { 7004 *seq1 = NULL; 7005 *seq2 = get_insns (); 7006 end_sequence (); 7007 return tmp; 7008 } 7009 end_sequence (); 7010 } 7011 7012 /* Failed to use insv. Generate a two part shift and mask. */ 7013 start_sequence (); 7014 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift); 7015 *seq1 = get_insns (); 7016 end_sequence (); 7017 7018 start_sequence (); 7019 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT); 7020 *seq2 = get_insns (); 7021 end_sequence (); 7022 7023 return tmp; 7024} 7025 7026/* Expand an atomic compare and swap operation for HImode and QImode. MEM is 7027 the memory location, CMP the old value to compare MEM with and NEW_RTX the 7028 value to set if CMP == MEM. */ 7029 7030static void 7031s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem, 7032 rtx cmp, rtx new_rtx, bool is_weak) 7033{ 7034 struct alignment_context ac; 7035 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3; 7036 rtx res = gen_reg_rtx (SImode); 7037 rtx_code_label *csloop = NULL, *csend = NULL; 7038 7039 gcc_assert (MEM_P (mem)); 7040 7041 init_alignment_context (&ac, mem, mode); 7042 7043 /* Load full word. Subsequent loads are performed by CS. */ 7044 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski, 7045 NULL_RTX, 1, OPTAB_DIRECT); 7046 7047 /* Prepare insertions of cmp and new_rtx into the loaded value. When 7048 possible, we try to use insv to make this happen efficiently. If 7049 that fails we'll generate code both inside and outside the loop. */ 7050 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp); 7051 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx); 7052 7053 if (seq0) 7054 emit_insn (seq0); 7055 if (seq1) 7056 emit_insn (seq1); 7057 7058 /* Start CS loop. */ 7059 if (!is_weak) 7060 { 7061 /* Begin assuming success. */ 7062 emit_move_insn (btarget, const1_rtx); 7063 7064 csloop = gen_label_rtx (); 7065 csend = gen_label_rtx (); 7066 emit_label (csloop); 7067 } 7068 7069 /* val = "<mem>00..0<mem>" 7070 * cmp = "00..0<cmp>00..0" 7071 * new = "00..0<new>00..0" 7072 */ 7073 7074 emit_insn (seq2); 7075 emit_insn (seq3); 7076 7077 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode); 7078 if (is_weak) 7079 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1))); 7080 else 7081 { 7082 rtx tmp; 7083 7084 /* Jump to end if we're done (likely?). */ 7085 s390_emit_jump (csend, cc); 7086 7087 /* Check for changes outside mode, and loop internal if so. 7088 Arrange the moves so that the compare is adjacent to the 7089 branch so that we can generate CRJ. */ 7090 tmp = copy_to_reg (val); 7091 force_expand_binop (SImode, and_optab, res, ac.modemaski, val, 7092 1, OPTAB_DIRECT); 7093 cc = s390_emit_compare (NE, val, tmp); 7094 s390_emit_jump (csloop, cc); 7095 7096 /* Failed. */ 7097 emit_move_insn (btarget, const0_rtx); 7098 emit_label (csend); 7099 } 7100 7101 /* Return the correct part of the bitfield. */ 7102 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, 7103 NULL_RTX, 1, OPTAB_DIRECT), 1); 7104} 7105 7106/* Variant of s390_expand_cs for SI, DI and TI modes. */ 7107static void 7108s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem, 7109 rtx cmp, rtx new_rtx, bool is_weak) 7110{ 7111 rtx output = vtarget; 7112 rtx_code_label *skip_cs_label = NULL; 7113 bool do_const_opt = false; 7114 7115 if (!register_operand (output, mode)) 7116 output = gen_reg_rtx (mode); 7117 7118 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory 7119 with the constant first and skip the compare_and_swap because its very 7120 expensive and likely to fail anyway. 7121 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may 7122 cause spurious in that case. 7123 Note 2: It may be useful to do this also for non-constant INPUT. 7124 Note 3: Currently only targets with "load on condition" are supported 7125 (z196 and newer). */ 7126 7127 if (TARGET_Z196 7128 && (mode == SImode || mode == DImode)) 7129 do_const_opt = (is_weak && CONST_INT_P (cmp)); 7130 7131 if (do_const_opt) 7132 { 7133 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM); 7134 7135 skip_cs_label = gen_label_rtx (); 7136 emit_move_insn (btarget, const0_rtx); 7137 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0) 7138 { 7139 rtvec lt = rtvec_alloc (2); 7140 7141 /* Load-and-test + conditional jump. */ 7142 RTVEC_ELT (lt, 0) 7143 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp)); 7144 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem); 7145 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt)); 7146 } 7147 else 7148 { 7149 emit_move_insn (output, mem); 7150 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp))); 7151 } 7152 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx)); 7153 add_reg_br_prob_note (get_last_insn (), 7154 profile_probability::very_unlikely ()); 7155 /* If the jump is not taken, OUTPUT is the expected value. */ 7156 cmp = output; 7157 /* Reload newval to a register manually, *after* the compare and jump 7158 above. Otherwise Reload might place it before the jump. */ 7159 } 7160 else 7161 cmp = force_reg (mode, cmp); 7162 new_rtx = force_reg (mode, new_rtx); 7163 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx, 7164 (do_const_opt) ? CCZmode : CCZ1mode); 7165 if (skip_cs_label != NULL) 7166 emit_label (skip_cs_label); 7167 7168 /* We deliberately accept non-register operands in the predicate 7169 to ensure the write back to the output operand happens *before* 7170 the store-flags code below. This makes it easier for combine 7171 to merge the store-flags code with a potential test-and-branch 7172 pattern following (immediately!) afterwards. */ 7173 if (output != vtarget) 7174 emit_move_insn (vtarget, output); 7175 7176 if (do_const_opt) 7177 { 7178 rtx cc, cond, ite; 7179 7180 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but 7181 btarget has already been initialized with 0 above. */ 7182 cc = gen_rtx_REG (CCZmode, CC_REGNUM); 7183 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx); 7184 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget); 7185 emit_insn (gen_rtx_SET (btarget, ite)); 7186 } 7187 else 7188 { 7189 rtx cc, cond; 7190 7191 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM); 7192 cond = gen_rtx_EQ (SImode, cc, const0_rtx); 7193 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx)); 7194 } 7195} 7196 7197/* Expand an atomic compare and swap operation. MEM is the memory location, 7198 CMP the old value to compare MEM with and NEW_RTX the value to set if 7199 CMP == MEM. */ 7200 7201void 7202s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem, 7203 rtx cmp, rtx new_rtx, bool is_weak) 7204{ 7205 switch (mode) 7206 { 7207 case E_TImode: 7208 case E_DImode: 7209 case E_SImode: 7210 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak); 7211 break; 7212 case E_HImode: 7213 case E_QImode: 7214 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak); 7215 break; 7216 default: 7217 gcc_unreachable (); 7218 } 7219} 7220 7221/* Expand an atomic_exchange operation simulated with a compare-and-swap loop. 7222 The memory location MEM is set to INPUT. OUTPUT is set to the previous value 7223 of MEM. */ 7224 7225void 7226s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input) 7227{ 7228 machine_mode mode = GET_MODE (mem); 7229 rtx_code_label *csloop; 7230 7231 if (TARGET_Z196 7232 && (mode == DImode || mode == SImode) 7233 && CONST_INT_P (input) && INTVAL (input) == 0) 7234 { 7235 emit_move_insn (output, const0_rtx); 7236 if (mode == DImode) 7237 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input)); 7238 else 7239 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input)); 7240 return; 7241 } 7242 7243 input = force_reg (mode, input); 7244 emit_move_insn (output, mem); 7245 csloop = gen_label_rtx (); 7246 emit_label (csloop); 7247 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output, 7248 input, CCZ1mode)); 7249} 7250 7251/* Expand an atomic operation CODE of mode MODE. MEM is the memory location 7252 and VAL the value to play with. If AFTER is true then store the value 7253 MEM holds after the operation, if AFTER is false then store the value MEM 7254 holds before the operation. If TARGET is zero then discard that value, else 7255 store it to TARGET. */ 7256 7257void 7258s390_expand_atomic (machine_mode mode, enum rtx_code code, 7259 rtx target, rtx mem, rtx val, bool after) 7260{ 7261 struct alignment_context ac; 7262 rtx cmp; 7263 rtx new_rtx = gen_reg_rtx (SImode); 7264 rtx orig = gen_reg_rtx (SImode); 7265 rtx_code_label *csloop = gen_label_rtx (); 7266 7267 gcc_assert (!target || register_operand (target, VOIDmode)); 7268 gcc_assert (MEM_P (mem)); 7269 7270 init_alignment_context (&ac, mem, mode); 7271 7272 /* Shift val to the correct bit positions. 7273 Preserve "icm", but prevent "ex icm". */ 7274 if (!(ac.aligned && code == SET && MEM_P (val))) 7275 val = s390_expand_mask_and_shift (val, mode, ac.shift); 7276 7277 /* Further preparation insns. */ 7278 if (code == PLUS || code == MINUS) 7279 emit_move_insn (orig, val); 7280 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */ 7281 val = expand_simple_binop (SImode, XOR, val, ac.modemaski, 7282 NULL_RTX, 1, OPTAB_DIRECT); 7283 7284 /* Load full word. Subsequent loads are performed by CS. */ 7285 cmp = force_reg (SImode, ac.memsi); 7286 7287 /* Start CS loop. */ 7288 emit_label (csloop); 7289 emit_move_insn (new_rtx, cmp); 7290 7291 /* Patch new with val at correct position. */ 7292 switch (code) 7293 { 7294 case PLUS: 7295 case MINUS: 7296 val = expand_simple_binop (SImode, code, new_rtx, orig, 7297 NULL_RTX, 1, OPTAB_DIRECT); 7298 val = expand_simple_binop (SImode, AND, val, ac.modemask, 7299 NULL_RTX, 1, OPTAB_DIRECT); 7300 /* FALLTHRU */ 7301 case SET: 7302 if (ac.aligned && MEM_P (val)) 7303 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0, 7304 0, 0, SImode, val, false); 7305 else 7306 { 7307 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski, 7308 NULL_RTX, 1, OPTAB_DIRECT); 7309 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val, 7310 NULL_RTX, 1, OPTAB_DIRECT); 7311 } 7312 break; 7313 case AND: 7314 case IOR: 7315 case XOR: 7316 new_rtx = expand_simple_binop (SImode, code, new_rtx, val, 7317 NULL_RTX, 1, OPTAB_DIRECT); 7318 break; 7319 case MULT: /* NAND */ 7320 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val, 7321 NULL_RTX, 1, OPTAB_DIRECT); 7322 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask, 7323 NULL_RTX, 1, OPTAB_DIRECT); 7324 break; 7325 default: 7326 gcc_unreachable (); 7327 } 7328 7329 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp, 7330 ac.memsi, cmp, new_rtx, 7331 CCZ1mode)); 7332 7333 /* Return the correct part of the bitfield. */ 7334 if (target) 7335 convert_move (target, expand_simple_binop (SImode, LSHIFTRT, 7336 after ? new_rtx : cmp, ac.shift, 7337 NULL_RTX, 1, OPTAB_DIRECT), 1); 7338} 7339 7340/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7341 We need to emit DTP-relative relocations. */ 7342 7343static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 7344 7345static void 7346s390_output_dwarf_dtprel (FILE *file, int size, rtx x) 7347{ 7348 switch (size) 7349 { 7350 case 4: 7351 fputs ("\t.long\t", file); 7352 break; 7353 case 8: 7354 fputs ("\t.quad\t", file); 7355 break; 7356 default: 7357 gcc_unreachable (); 7358 } 7359 output_addr_const (file, x); 7360 fputs ("@DTPOFF", file); 7361} 7362 7363/* Return the proper mode for REGNO being represented in the dwarf 7364 unwind table. */ 7365machine_mode 7366s390_dwarf_frame_reg_mode (int regno) 7367{ 7368 machine_mode save_mode = default_dwarf_frame_reg_mode (regno); 7369 7370 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */ 7371 if (GENERAL_REGNO_P (regno)) 7372 save_mode = Pmode; 7373 7374 /* The rightmost 64 bits of vector registers are call-clobbered. */ 7375 if (GET_MODE_SIZE (save_mode) > 8) 7376 save_mode = DImode; 7377 7378 return save_mode; 7379} 7380 7381#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 7382/* Implement TARGET_MANGLE_TYPE. */ 7383 7384static const char * 7385s390_mangle_type (const_tree type) 7386{ 7387 type = TYPE_MAIN_VARIANT (type); 7388 7389 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 7390 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 7391 return NULL; 7392 7393 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc"; 7394 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools"; 7395 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli"; 7396 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll"; 7397 7398 if (TYPE_MAIN_VARIANT (type) == long_double_type_node 7399 && TARGET_LONG_DOUBLE_128) 7400 return "g"; 7401 7402 /* For all other types, use normal C++ mangling. */ 7403 return NULL; 7404} 7405#endif 7406 7407/* In the name of slightly smaller debug output, and to cater to 7408 general assembler lossage, recognize various UNSPEC sequences 7409 and turn them back into a direct symbol reference. */ 7410 7411static rtx 7412s390_delegitimize_address (rtx orig_x) 7413{ 7414 rtx x, y; 7415 7416 orig_x = delegitimize_mem_from_attrs (orig_x); 7417 x = orig_x; 7418 7419 /* Extract the symbol ref from: 7420 (plus:SI (reg:SI 12 %r12) 7421 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))] 7422 UNSPEC_GOTOFF/PLTOFF))) 7423 and 7424 (plus:SI (reg:SI 12 %r12) 7425 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))] 7426 UNSPEC_GOTOFF/PLTOFF) 7427 (const_int 4 [0x4])))) */ 7428 if (GET_CODE (x) == PLUS 7429 && REG_P (XEXP (x, 0)) 7430 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM 7431 && GET_CODE (XEXP (x, 1)) == CONST) 7432 { 7433 HOST_WIDE_INT offset = 0; 7434 7435 /* The const operand. */ 7436 y = XEXP (XEXP (x, 1), 0); 7437 7438 if (GET_CODE (y) == PLUS 7439 && GET_CODE (XEXP (y, 1)) == CONST_INT) 7440 { 7441 offset = INTVAL (XEXP (y, 1)); 7442 y = XEXP (y, 0); 7443 } 7444 7445 if (GET_CODE (y) == UNSPEC 7446 && (XINT (y, 1) == UNSPEC_GOTOFF 7447 || XINT (y, 1) == UNSPEC_PLTOFF)) 7448 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset); 7449 } 7450 7451 if (GET_CODE (x) != MEM) 7452 return orig_x; 7453 7454 x = XEXP (x, 0); 7455 if (GET_CODE (x) == PLUS 7456 && GET_CODE (XEXP (x, 1)) == CONST 7457 && GET_CODE (XEXP (x, 0)) == REG 7458 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7459 { 7460 y = XEXP (XEXP (x, 1), 0); 7461 if (GET_CODE (y) == UNSPEC 7462 && XINT (y, 1) == UNSPEC_GOT) 7463 y = XVECEXP (y, 0, 0); 7464 else 7465 return orig_x; 7466 } 7467 else if (GET_CODE (x) == CONST) 7468 { 7469 /* Extract the symbol ref from: 7470 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))] 7471 UNSPEC_PLT/GOTENT))) */ 7472 7473 y = XEXP (x, 0); 7474 if (GET_CODE (y) == UNSPEC 7475 && (XINT (y, 1) == UNSPEC_GOTENT 7476 || XINT (y, 1) == UNSPEC_PLT)) 7477 y = XVECEXP (y, 0, 0); 7478 else 7479 return orig_x; 7480 } 7481 else 7482 return orig_x; 7483 7484 if (GET_MODE (orig_x) != Pmode) 7485 { 7486 if (GET_MODE (orig_x) == BLKmode) 7487 return orig_x; 7488 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode); 7489 if (y == NULL_RTX) 7490 return orig_x; 7491 } 7492 return y; 7493} 7494 7495/* Output operand OP to stdio stream FILE. 7496 OP is an address (register + offset) which is not used to address data; 7497 instead the rightmost bits are interpreted as the value. */ 7498 7499static void 7500print_addrstyle_operand (FILE *file, rtx op) 7501{ 7502 HOST_WIDE_INT offset; 7503 rtx base; 7504 7505 /* Extract base register and offset. */ 7506 if (!s390_decompose_addrstyle_without_index (op, &base, &offset)) 7507 gcc_unreachable (); 7508 7509 /* Sanity check. */ 7510 if (base) 7511 { 7512 gcc_assert (GET_CODE (base) == REG); 7513 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER); 7514 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS); 7515 } 7516 7517 /* Offsets are constricted to twelve bits. */ 7518 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1)); 7519 if (base) 7520 fprintf (file, "(%s)", reg_names[REGNO (base)]); 7521} 7522 7523/* Print the shift count operand OP to FILE. 7524 OP is an address-style operand in a form which 7525 s390_valid_shift_count permits. Subregs and no-op 7526 and-masking of the operand are stripped. */ 7527 7528static void 7529print_shift_count_operand (FILE *file, rtx op) 7530{ 7531 /* No checking of the and mask required here. */ 7532 if (!s390_valid_shift_count (op, 0)) 7533 gcc_unreachable (); 7534 7535 while (op && GET_CODE (op) == SUBREG) 7536 op = SUBREG_REG (op); 7537 7538 if (GET_CODE (op) == AND) 7539 op = XEXP (op, 0); 7540 7541 print_addrstyle_operand (file, op); 7542} 7543 7544/* Assigns the number of NOP halfwords to be emitted before and after the 7545 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL. 7546 If hotpatching is disabled for the function, the values are set to zero. 7547*/ 7548 7549static void 7550s390_function_num_hotpatch_hw (tree decl, 7551 int *hw_before, 7552 int *hw_after) 7553{ 7554 tree attr; 7555 7556 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl)); 7557 7558 /* Handle the arguments of the hotpatch attribute. The values 7559 specified via attribute might override the cmdline argument 7560 values. */ 7561 if (attr) 7562 { 7563 tree args = TREE_VALUE (attr); 7564 7565 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args)); 7566 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args))); 7567 } 7568 else 7569 { 7570 /* Use the values specified by the cmdline arguments. */ 7571 *hw_before = s390_hotpatch_hw_before_label; 7572 *hw_after = s390_hotpatch_hw_after_label; 7573 } 7574} 7575 7576/* Write the current .machine and .machinemode specification to the assembler 7577 file. */ 7578 7579#ifdef HAVE_AS_MACHINE_MACHINEMODE 7580static void 7581s390_asm_output_machine_for_arch (FILE *asm_out_file) 7582{ 7583 fprintf (asm_out_file, "\t.machinemode %s\n", 7584 (TARGET_ZARCH) ? "zarch" : "esa"); 7585 fprintf (asm_out_file, "\t.machine \"%s", 7586 processor_table[s390_arch].binutils_name); 7587 if (S390_USE_ARCHITECTURE_MODIFIERS) 7588 { 7589 int cpu_flags; 7590 7591 cpu_flags = processor_flags_table[(int) s390_arch]; 7592 if (TARGET_HTM && !(cpu_flags & PF_TX)) 7593 fprintf (asm_out_file, "+htm"); 7594 else if (!TARGET_HTM && (cpu_flags & PF_TX)) 7595 fprintf (asm_out_file, "+nohtm"); 7596 if (TARGET_VX && !(cpu_flags & PF_VX)) 7597 fprintf (asm_out_file, "+vx"); 7598 else if (!TARGET_VX && (cpu_flags & PF_VX)) 7599 fprintf (asm_out_file, "+novx"); 7600 } 7601 fprintf (asm_out_file, "\"\n"); 7602} 7603 7604/* Write an extra function header before the very start of the function. */ 7605 7606void 7607s390_asm_output_function_prefix (FILE *asm_out_file, 7608 const char *fnname ATTRIBUTE_UNUSED) 7609{ 7610 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL) 7611 return; 7612 /* Since only the function specific options are saved but not the indications 7613 which options are set, it's too much work here to figure out which options 7614 have actually changed. Thus, generate .machine and .machinemode whenever a 7615 function has the target attribute or pragma. */ 7616 fprintf (asm_out_file, "\t.machinemode push\n"); 7617 fprintf (asm_out_file, "\t.machine push\n"); 7618 s390_asm_output_machine_for_arch (asm_out_file); 7619} 7620 7621/* Write an extra function footer after the very end of the function. */ 7622 7623void 7624s390_asm_declare_function_size (FILE *asm_out_file, 7625 const char *fnname, tree decl) 7626{ 7627 if (!flag_inhibit_size_directive) 7628 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname); 7629 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL) 7630 return; 7631 fprintf (asm_out_file, "\t.machine pop\n"); 7632 fprintf (asm_out_file, "\t.machinemode pop\n"); 7633} 7634#endif 7635 7636/* Write the extra assembler code needed to declare a function properly. */ 7637 7638void 7639s390_asm_output_function_label (FILE *asm_out_file, const char *fname, 7640 tree decl) 7641{ 7642 int hw_before, hw_after; 7643 7644 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after); 7645 if (hw_before > 0) 7646 { 7647 unsigned int function_alignment; 7648 int i; 7649 7650 /* Add a trampoline code area before the function label and initialize it 7651 with two-byte nop instructions. This area can be overwritten with code 7652 that jumps to a patched version of the function. */ 7653 asm_fprintf (asm_out_file, "\tnopr\t%%r0" 7654 "\t# pre-label NOPs for hotpatch (%d halfwords)\n", 7655 hw_before); 7656 for (i = 1; i < hw_before; i++) 7657 fputs ("\tnopr\t%r0\n", asm_out_file); 7658 7659 /* Note: The function label must be aligned so that (a) the bytes of the 7660 following nop do not cross a cacheline boundary, and (b) a jump address 7661 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be 7662 stored directly before the label without crossing a cacheline 7663 boundary. All this is necessary to make sure the trampoline code can 7664 be changed atomically. 7665 This alignment is done automatically using the FOUNCTION_BOUNDARY, but 7666 if there are NOPs before the function label, the alignment is placed 7667 before them. So it is necessary to duplicate the alignment after the 7668 NOPs. */ 7669 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT); 7670 if (! DECL_USER_ALIGN (decl)) 7671 function_alignment 7672 = MAX (function_alignment, 7673 (unsigned int) align_functions.levels[0].get_value ()); 7674 fputs ("\t# alignment for hotpatch\n", asm_out_file); 7675 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log); 7676 } 7677 7678 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG) 7679 { 7680 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch); 7681 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune); 7682 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard); 7683 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size); 7684 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost); 7685 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname, 7686 s390_warn_framesize); 7687 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN); 7688 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP); 7689 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT); 7690 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM); 7691 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX); 7692 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname, 7693 TARGET_PACKED_STACK); 7694 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC); 7695 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE); 7696 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR); 7697 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname, 7698 s390_warn_dynamicstack_p); 7699 } 7700 ASM_OUTPUT_LABEL (asm_out_file, fname); 7701 if (hw_after > 0) 7702 asm_fprintf (asm_out_file, 7703 "\t# post-label NOPs for hotpatch (%d halfwords)\n", 7704 hw_after); 7705} 7706 7707/* Output machine-dependent UNSPECs occurring in address constant X 7708 in assembler syntax to stdio stream FILE. Returns true if the 7709 constant X could be recognized, false otherwise. */ 7710 7711static bool 7712s390_output_addr_const_extra (FILE *file, rtx x) 7713{ 7714 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1) 7715 switch (XINT (x, 1)) 7716 { 7717 case UNSPEC_GOTENT: 7718 output_addr_const (file, XVECEXP (x, 0, 0)); 7719 fprintf (file, "@GOTENT"); 7720 return true; 7721 case UNSPEC_GOT: 7722 output_addr_const (file, XVECEXP (x, 0, 0)); 7723 fprintf (file, "@GOT"); 7724 return true; 7725 case UNSPEC_GOTOFF: 7726 output_addr_const (file, XVECEXP (x, 0, 0)); 7727 fprintf (file, "@GOTOFF"); 7728 return true; 7729 case UNSPEC_PLT: 7730 output_addr_const (file, XVECEXP (x, 0, 0)); 7731 fprintf (file, "@PLT"); 7732 return true; 7733 case UNSPEC_PLTOFF: 7734 output_addr_const (file, XVECEXP (x, 0, 0)); 7735 fprintf (file, "@PLTOFF"); 7736 return true; 7737 case UNSPEC_TLSGD: 7738 output_addr_const (file, XVECEXP (x, 0, 0)); 7739 fprintf (file, "@TLSGD"); 7740 return true; 7741 case UNSPEC_TLSLDM: 7742 assemble_name (file, get_some_local_dynamic_name ()); 7743 fprintf (file, "@TLSLDM"); 7744 return true; 7745 case UNSPEC_DTPOFF: 7746 output_addr_const (file, XVECEXP (x, 0, 0)); 7747 fprintf (file, "@DTPOFF"); 7748 return true; 7749 case UNSPEC_NTPOFF: 7750 output_addr_const (file, XVECEXP (x, 0, 0)); 7751 fprintf (file, "@NTPOFF"); 7752 return true; 7753 case UNSPEC_GOTNTPOFF: 7754 output_addr_const (file, XVECEXP (x, 0, 0)); 7755 fprintf (file, "@GOTNTPOFF"); 7756 return true; 7757 case UNSPEC_INDNTPOFF: 7758 output_addr_const (file, XVECEXP (x, 0, 0)); 7759 fprintf (file, "@INDNTPOFF"); 7760 return true; 7761 } 7762 7763 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2) 7764 switch (XINT (x, 1)) 7765 { 7766 case UNSPEC_POOL_OFFSET: 7767 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1)); 7768 output_addr_const (file, x); 7769 return true; 7770 } 7771 return false; 7772} 7773 7774/* Output address operand ADDR in assembler syntax to 7775 stdio stream FILE. */ 7776 7777void 7778print_operand_address (FILE *file, rtx addr) 7779{ 7780 struct s390_address ad; 7781 memset (&ad, 0, sizeof (s390_address)); 7782 7783 if (s390_loadrelative_operand_p (addr, NULL, NULL)) 7784 { 7785 if (!TARGET_Z10) 7786 { 7787 output_operand_lossage ("symbolic memory references are " 7788 "only supported on z10 or later"); 7789 return; 7790 } 7791 output_addr_const (file, addr); 7792 return; 7793 } 7794 7795 if (!s390_decompose_address (addr, &ad) 7796 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7797 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))) 7798 output_operand_lossage ("cannot decompose address"); 7799 7800 if (ad.disp) 7801 output_addr_const (file, ad.disp); 7802 else 7803 fprintf (file, "0"); 7804 7805 if (ad.base && ad.indx) 7806 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)], 7807 reg_names[REGNO (ad.base)]); 7808 else if (ad.base) 7809 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]); 7810} 7811 7812/* Output operand X in assembler syntax to stdio stream FILE. 7813 CODE specified the format flag. The following format flags 7814 are recognized: 7815 7816 'A': On z14 or higher: If operand is a mem print the alignment 7817 hint usable with vl/vst prefixed by a comma. 7818 'C': print opcode suffix for branch condition. 7819 'D': print opcode suffix for inverse branch condition. 7820 'E': print opcode suffix for branch on index instruction. 7821 'G': print the size of the operand in bytes. 7822 'J': print tls_load/tls_gdcall/tls_ldcall suffix 7823 'M': print the second word of a TImode operand. 7824 'N': print the second word of a DImode operand. 7825 'O': print only the displacement of a memory reference or address. 7826 'R': print only the base register of a memory reference or address. 7827 'S': print S-type memory reference (base+displacement). 7828 'Y': print address style operand without index (e.g. shift count or setmem 7829 operand). 7830 7831 'b': print integer X as if it's an unsigned byte. 7832 'c': print integer X as if it's an signed byte. 7833 'e': "end" contiguous bitmask X in either DImode or vector inner mode. 7834 'f': "end" contiguous bitmask X in SImode. 7835 'h': print integer X as if it's a signed halfword. 7836 'i': print the first nonzero HImode part of X. 7837 'j': print the first HImode part unequal to -1 of X. 7838 'k': print the first nonzero SImode part of X. 7839 'm': print the first SImode part unequal to -1 of X. 7840 'o': print integer X as if it's an unsigned 32bit word. 7841 's': "start" of contiguous bitmask X in either DImode or vector inner mode. 7842 't': CONST_INT: "start" of contiguous bitmask X in SImode. 7843 CONST_VECTOR: Generate a bitmask for vgbm instruction. 7844 'x': print integer X as if it's an unsigned halfword. 7845 'v': print register number as vector register (v1 instead of f1). 7846*/ 7847 7848void 7849print_operand (FILE *file, rtx x, int code) 7850{ 7851 HOST_WIDE_INT ival; 7852 7853 switch (code) 7854 { 7855 case 'A': 7856 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x)) 7857 { 7858 if (MEM_ALIGN (x) >= 128) 7859 fprintf (file, ",4"); 7860 else if (MEM_ALIGN (x) == 64) 7861 fprintf (file, ",3"); 7862 } 7863 return; 7864 case 'C': 7865 fprintf (file, s390_branch_condition_mnemonic (x, FALSE)); 7866 return; 7867 7868 case 'D': 7869 fprintf (file, s390_branch_condition_mnemonic (x, TRUE)); 7870 return; 7871 7872 case 'E': 7873 if (GET_CODE (x) == LE) 7874 fprintf (file, "l"); 7875 else if (GET_CODE (x) == GT) 7876 fprintf (file, "h"); 7877 else 7878 output_operand_lossage ("invalid comparison operator " 7879 "for 'E' output modifier"); 7880 return; 7881 7882 case 'J': 7883 if (GET_CODE (x) == SYMBOL_REF) 7884 { 7885 fprintf (file, "%s", ":tls_load:"); 7886 output_addr_const (file, x); 7887 } 7888 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD) 7889 { 7890 fprintf (file, "%s", ":tls_gdcall:"); 7891 output_addr_const (file, XVECEXP (x, 0, 0)); 7892 } 7893 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM) 7894 { 7895 fprintf (file, "%s", ":tls_ldcall:"); 7896 const char *name = get_some_local_dynamic_name (); 7897 gcc_assert (name); 7898 assemble_name (file, name); 7899 } 7900 else 7901 output_operand_lossage ("invalid reference for 'J' output modifier"); 7902 return; 7903 7904 case 'G': 7905 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x))); 7906 return; 7907 7908 case 'O': 7909 { 7910 struct s390_address ad; 7911 int ret; 7912 7913 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); 7914 7915 if (!ret 7916 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7917 || ad.indx) 7918 { 7919 output_operand_lossage ("invalid address for 'O' output modifier"); 7920 return; 7921 } 7922 7923 if (ad.disp) 7924 output_addr_const (file, ad.disp); 7925 else 7926 fprintf (file, "0"); 7927 } 7928 return; 7929 7930 case 'R': 7931 { 7932 struct s390_address ad; 7933 int ret; 7934 7935 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); 7936 7937 if (!ret 7938 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7939 || ad.indx) 7940 { 7941 output_operand_lossage ("invalid address for 'R' output modifier"); 7942 return; 7943 } 7944 7945 if (ad.base) 7946 fprintf (file, "%s", reg_names[REGNO (ad.base)]); 7947 else 7948 fprintf (file, "0"); 7949 } 7950 return; 7951 7952 case 'S': 7953 { 7954 struct s390_address ad; 7955 int ret; 7956 7957 if (!MEM_P (x)) 7958 { 7959 output_operand_lossage ("memory reference expected for " 7960 "'S' output modifier"); 7961 return; 7962 } 7963 ret = s390_decompose_address (XEXP (x, 0), &ad); 7964 7965 if (!ret 7966 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7967 || ad.indx) 7968 { 7969 output_operand_lossage ("invalid address for 'S' output modifier"); 7970 return; 7971 } 7972 7973 if (ad.disp) 7974 output_addr_const (file, ad.disp); 7975 else 7976 fprintf (file, "0"); 7977 7978 if (ad.base) 7979 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]); 7980 } 7981 return; 7982 7983 case 'N': 7984 if (GET_CODE (x) == REG) 7985 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1); 7986 else if (GET_CODE (x) == MEM) 7987 x = change_address (x, VOIDmode, 7988 plus_constant (Pmode, XEXP (x, 0), 4)); 7989 else 7990 output_operand_lossage ("register or memory expression expected " 7991 "for 'N' output modifier"); 7992 break; 7993 7994 case 'M': 7995 if (GET_CODE (x) == REG) 7996 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1); 7997 else if (GET_CODE (x) == MEM) 7998 x = change_address (x, VOIDmode, 7999 plus_constant (Pmode, XEXP (x, 0), 8)); 8000 else 8001 output_operand_lossage ("register or memory expression expected " 8002 "for 'M' output modifier"); 8003 break; 8004 8005 case 'Y': 8006 print_shift_count_operand (file, x); 8007 return; 8008 } 8009 8010 switch (GET_CODE (x)) 8011 { 8012 case REG: 8013 /* Print FP regs as fx instead of vx when they are accessed 8014 through non-vector mode. */ 8015 if (code == 'v' 8016 || VECTOR_NOFP_REG_P (x) 8017 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x))) 8018 || (VECTOR_REG_P (x) 8019 && (GET_MODE_SIZE (GET_MODE (x)) / 8020 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8)) 8021 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2); 8022 else 8023 fprintf (file, "%s", reg_names[REGNO (x)]); 8024 break; 8025 8026 case MEM: 8027 output_address (GET_MODE (x), XEXP (x, 0)); 8028 break; 8029 8030 case CONST: 8031 case CODE_LABEL: 8032 case LABEL_REF: 8033 case SYMBOL_REF: 8034 output_addr_const (file, x); 8035 break; 8036 8037 case CONST_INT: 8038 ival = INTVAL (x); 8039 switch (code) 8040 { 8041 case 0: 8042 break; 8043 case 'b': 8044 ival &= 0xff; 8045 break; 8046 case 'c': 8047 ival = ((ival & 0xff) ^ 0x80) - 0x80; 8048 break; 8049 case 'x': 8050 ival &= 0xffff; 8051 break; 8052 case 'h': 8053 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000; 8054 break; 8055 case 'i': 8056 ival = s390_extract_part (x, HImode, 0); 8057 break; 8058 case 'j': 8059 ival = s390_extract_part (x, HImode, -1); 8060 break; 8061 case 'k': 8062 ival = s390_extract_part (x, SImode, 0); 8063 break; 8064 case 'm': 8065 ival = s390_extract_part (x, SImode, -1); 8066 break; 8067 case 'o': 8068 ival &= 0xffffffff; 8069 break; 8070 case 'e': case 'f': 8071 case 's': case 't': 8072 { 8073 int start, end; 8074 int len; 8075 bool ok; 8076 8077 len = (code == 's' || code == 'e' ? 64 : 32); 8078 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end); 8079 gcc_assert (ok); 8080 if (code == 's' || code == 't') 8081 ival = start; 8082 else 8083 ival = end; 8084 } 8085 break; 8086 default: 8087 output_operand_lossage ("invalid constant for output modifier '%c'", code); 8088 } 8089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); 8090 break; 8091 8092 case CONST_WIDE_INT: 8093 if (code == 'b') 8094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 8095 CONST_WIDE_INT_ELT (x, 0) & 0xff); 8096 else if (code == 'x') 8097 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 8098 CONST_WIDE_INT_ELT (x, 0) & 0xffff); 8099 else if (code == 'h') 8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 8101 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000); 8102 else 8103 { 8104 if (code == 0) 8105 output_operand_lossage ("invalid constant - try using " 8106 "an output modifier"); 8107 else 8108 output_operand_lossage ("invalid constant for output modifier '%c'", 8109 code); 8110 } 8111 break; 8112 case CONST_VECTOR: 8113 switch (code) 8114 { 8115 case 'h': 8116 gcc_assert (const_vec_duplicate_p (x)); 8117 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 8118 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000); 8119 break; 8120 case 'e': 8121 case 's': 8122 { 8123 int start, end; 8124 bool ok; 8125 8126 ok = s390_contiguous_bitmask_vector_p (x, &start, &end); 8127 gcc_assert (ok); 8128 ival = (code == 's') ? start : end; 8129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); 8130 } 8131 break; 8132 case 't': 8133 { 8134 unsigned mask; 8135 bool ok = s390_bytemask_vector_p (x, &mask); 8136 gcc_assert (ok); 8137 fprintf (file, "%u", mask); 8138 } 8139 break; 8140 8141 default: 8142 output_operand_lossage ("invalid constant vector for output " 8143 "modifier '%c'", code); 8144 } 8145 break; 8146 8147 default: 8148 if (code == 0) 8149 output_operand_lossage ("invalid expression - try using " 8150 "an output modifier"); 8151 else 8152 output_operand_lossage ("invalid expression for output " 8153 "modifier '%c'", code); 8154 break; 8155 } 8156} 8157 8158/* Target hook for assembling integer objects. We need to define it 8159 here to work a round a bug in some versions of GAS, which couldn't 8160 handle values smaller than INT_MIN when printed in decimal. */ 8161 8162static bool 8163s390_assemble_integer (rtx x, unsigned int size, int aligned_p) 8164{ 8165 if (size == 8 && aligned_p 8166 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN) 8167 { 8168 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", 8169 INTVAL (x)); 8170 return true; 8171 } 8172 return default_assemble_integer (x, size, aligned_p); 8173} 8174 8175/* Returns true if register REGNO is used for forming 8176 a memory address in expression X. */ 8177 8178static bool 8179reg_used_in_mem_p (int regno, rtx x) 8180{ 8181 enum rtx_code code = GET_CODE (x); 8182 int i, j; 8183 const char *fmt; 8184 8185 if (code == MEM) 8186 { 8187 if (refers_to_regno_p (regno, XEXP (x, 0))) 8188 return true; 8189 } 8190 else if (code == SET 8191 && GET_CODE (SET_DEST (x)) == PC) 8192 { 8193 if (refers_to_regno_p (regno, SET_SRC (x))) 8194 return true; 8195 } 8196 8197 fmt = GET_RTX_FORMAT (code); 8198 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 8199 { 8200 if (fmt[i] == 'e' 8201 && reg_used_in_mem_p (regno, XEXP (x, i))) 8202 return true; 8203 8204 else if (fmt[i] == 'E') 8205 for (j = 0; j < XVECLEN (x, i); j++) 8206 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j))) 8207 return true; 8208 } 8209 return false; 8210} 8211 8212/* Returns true if expression DEP_RTX sets an address register 8213 used by instruction INSN to address memory. */ 8214 8215static bool 8216addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn) 8217{ 8218 rtx target, pat; 8219 8220 if (NONJUMP_INSN_P (dep_rtx)) 8221 dep_rtx = PATTERN (dep_rtx); 8222 8223 if (GET_CODE (dep_rtx) == SET) 8224 { 8225 target = SET_DEST (dep_rtx); 8226 if (GET_CODE (target) == STRICT_LOW_PART) 8227 target = XEXP (target, 0); 8228 while (GET_CODE (target) == SUBREG) 8229 target = SUBREG_REG (target); 8230 8231 if (GET_CODE (target) == REG) 8232 { 8233 int regno = REGNO (target); 8234 8235 if (s390_safe_attr_type (insn) == TYPE_LA) 8236 { 8237 pat = PATTERN (insn); 8238 if (GET_CODE (pat) == PARALLEL) 8239 { 8240 gcc_assert (XVECLEN (pat, 0) == 2); 8241 pat = XVECEXP (pat, 0, 0); 8242 } 8243 gcc_assert (GET_CODE (pat) == SET); 8244 return refers_to_regno_p (regno, SET_SRC (pat)); 8245 } 8246 else if (get_attr_atype (insn) == ATYPE_AGEN) 8247 return reg_used_in_mem_p (regno, PATTERN (insn)); 8248 } 8249 } 8250 return false; 8251} 8252 8253/* Return 1, if dep_insn sets register used in insn in the agen unit. */ 8254 8255int 8256s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn) 8257{ 8258 rtx dep_rtx = PATTERN (dep_insn); 8259 int i; 8260 8261 if (GET_CODE (dep_rtx) == SET 8262 && addr_generation_dependency_p (dep_rtx, insn)) 8263 return 1; 8264 else if (GET_CODE (dep_rtx) == PARALLEL) 8265 { 8266 for (i = 0; i < XVECLEN (dep_rtx, 0); i++) 8267 { 8268 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn)) 8269 return 1; 8270 } 8271 } 8272 return 0; 8273} 8274 8275 8276/* A C statement (sans semicolon) to update the integer scheduling priority 8277 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, 8278 reduce the priority to execute INSN later. Do not define this macro if 8279 you do not need to adjust the scheduling priorities of insns. 8280 8281 A STD instruction should be scheduled earlier, 8282 in order to use the bypass. */ 8283static int 8284s390_adjust_priority (rtx_insn *insn, int priority) 8285{ 8286 if (! INSN_P (insn)) 8287 return priority; 8288 8289 if (s390_tune <= PROCESSOR_2064_Z900) 8290 return priority; 8291 8292 switch (s390_safe_attr_type (insn)) 8293 { 8294 case TYPE_FSTOREDF: 8295 case TYPE_FSTORESF: 8296 priority = priority << 3; 8297 break; 8298 case TYPE_STORE: 8299 case TYPE_STM: 8300 priority = priority << 1; 8301 break; 8302 default: 8303 break; 8304 } 8305 return priority; 8306} 8307 8308 8309/* The number of instructions that can be issued per cycle. */ 8310 8311static int 8312s390_issue_rate (void) 8313{ 8314 switch (s390_tune) 8315 { 8316 case PROCESSOR_2084_Z990: 8317 case PROCESSOR_2094_Z9_109: 8318 case PROCESSOR_2094_Z9_EC: 8319 case PROCESSOR_2817_Z196: 8320 return 3; 8321 case PROCESSOR_2097_Z10: 8322 return 2; 8323 case PROCESSOR_2064_Z900: 8324 /* Starting with EC12 we use the sched_reorder hook to take care 8325 of instruction dispatch constraints. The algorithm only 8326 picks the best instruction and assumes only a single 8327 instruction gets issued per cycle. */ 8328 case PROCESSOR_2827_ZEC12: 8329 case PROCESSOR_2964_Z13: 8330 case PROCESSOR_3906_Z14: 8331 default: 8332 return 1; 8333 } 8334} 8335 8336static int 8337s390_first_cycle_multipass_dfa_lookahead (void) 8338{ 8339 return 4; 8340} 8341 8342static void 8343annotate_constant_pool_refs_1 (rtx *x) 8344{ 8345 int i, j; 8346 const char *fmt; 8347 8348 gcc_assert (GET_CODE (*x) != SYMBOL_REF 8349 || !CONSTANT_POOL_ADDRESS_P (*x)); 8350 8351 /* Literal pool references can only occur inside a MEM ... */ 8352 if (GET_CODE (*x) == MEM) 8353 { 8354 rtx memref = XEXP (*x, 0); 8355 8356 if (GET_CODE (memref) == SYMBOL_REF 8357 && CONSTANT_POOL_ADDRESS_P (memref)) 8358 { 8359 rtx base = cfun->machine->base_reg; 8360 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base), 8361 UNSPEC_LTREF); 8362 8363 *x = replace_equiv_address (*x, addr); 8364 return; 8365 } 8366 8367 if (GET_CODE (memref) == CONST 8368 && GET_CODE (XEXP (memref, 0)) == PLUS 8369 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT 8370 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF 8371 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0))) 8372 { 8373 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1)); 8374 rtx sym = XEXP (XEXP (memref, 0), 0); 8375 rtx base = cfun->machine->base_reg; 8376 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base), 8377 UNSPEC_LTREF); 8378 8379 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off)); 8380 return; 8381 } 8382 } 8383 8384 /* ... or a load-address type pattern. */ 8385 if (GET_CODE (*x) == SET) 8386 { 8387 rtx addrref = SET_SRC (*x); 8388 8389 if (GET_CODE (addrref) == SYMBOL_REF 8390 && CONSTANT_POOL_ADDRESS_P (addrref)) 8391 { 8392 rtx base = cfun->machine->base_reg; 8393 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base), 8394 UNSPEC_LTREF); 8395 8396 SET_SRC (*x) = addr; 8397 return; 8398 } 8399 8400 if (GET_CODE (addrref) == CONST 8401 && GET_CODE (XEXP (addrref, 0)) == PLUS 8402 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT 8403 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF 8404 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0))) 8405 { 8406 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1)); 8407 rtx sym = XEXP (XEXP (addrref, 0), 0); 8408 rtx base = cfun->machine->base_reg; 8409 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base), 8410 UNSPEC_LTREF); 8411 8412 SET_SRC (*x) = plus_constant (Pmode, addr, off); 8413 return; 8414 } 8415 } 8416 8417 fmt = GET_RTX_FORMAT (GET_CODE (*x)); 8418 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) 8419 { 8420 if (fmt[i] == 'e') 8421 { 8422 annotate_constant_pool_refs_1 (&XEXP (*x, i)); 8423 } 8424 else if (fmt[i] == 'E') 8425 { 8426 for (j = 0; j < XVECLEN (*x, i); j++) 8427 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j)); 8428 } 8429 } 8430} 8431 8432/* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression. 8433 Fix up MEMs as required. 8434 Skip insns which support relative addressing, because they do not use a base 8435 register. */ 8436 8437static void 8438annotate_constant_pool_refs (rtx_insn *insn) 8439{ 8440 if (s390_safe_relative_long_p (insn)) 8441 return; 8442 annotate_constant_pool_refs_1 (&PATTERN (insn)); 8443} 8444 8445static void 8446find_constant_pool_ref_1 (rtx x, rtx *ref) 8447{ 8448 int i, j; 8449 const char *fmt; 8450 8451 /* Likewise POOL_ENTRY insns. */ 8452 if (GET_CODE (x) == UNSPEC_VOLATILE 8453 && XINT (x, 1) == UNSPECV_POOL_ENTRY) 8454 return; 8455 8456 gcc_assert (GET_CODE (x) != SYMBOL_REF 8457 || !CONSTANT_POOL_ADDRESS_P (x)); 8458 8459 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF) 8460 { 8461 rtx sym = XVECEXP (x, 0, 0); 8462 gcc_assert (GET_CODE (sym) == SYMBOL_REF 8463 && CONSTANT_POOL_ADDRESS_P (sym)); 8464 8465 if (*ref == NULL_RTX) 8466 *ref = sym; 8467 else 8468 gcc_assert (*ref == sym); 8469 8470 return; 8471 } 8472 8473 fmt = GET_RTX_FORMAT (GET_CODE (x)); 8474 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8475 { 8476 if (fmt[i] == 'e') 8477 { 8478 find_constant_pool_ref_1 (XEXP (x, i), ref); 8479 } 8480 else if (fmt[i] == 'E') 8481 { 8482 for (j = 0; j < XVECLEN (x, i); j++) 8483 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref); 8484 } 8485 } 8486} 8487 8488/* Find an annotated literal pool symbol referenced in INSN, 8489 and store it at REF. Will abort if INSN contains references to 8490 more than one such pool symbol; multiple references to the same 8491 symbol are allowed, however. 8492 8493 The rtx pointed to by REF must be initialized to NULL_RTX 8494 by the caller before calling this routine. 8495 8496 Skip insns which support relative addressing, because they do not use a base 8497 register. */ 8498 8499static void 8500find_constant_pool_ref (rtx_insn *insn, rtx *ref) 8501{ 8502 if (s390_safe_relative_long_p (insn)) 8503 return; 8504 find_constant_pool_ref_1 (PATTERN (insn), ref); 8505} 8506 8507static void 8508replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset) 8509{ 8510 int i, j; 8511 const char *fmt; 8512 8513 gcc_assert (*x != ref); 8514 8515 if (GET_CODE (*x) == UNSPEC 8516 && XINT (*x, 1) == UNSPEC_LTREF 8517 && XVECEXP (*x, 0, 0) == ref) 8518 { 8519 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset); 8520 return; 8521 } 8522 8523 if (GET_CODE (*x) == PLUS 8524 && GET_CODE (XEXP (*x, 1)) == CONST_INT 8525 && GET_CODE (XEXP (*x, 0)) == UNSPEC 8526 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF 8527 && XVECEXP (XEXP (*x, 0), 0, 0) == ref) 8528 { 8529 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset); 8530 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1))); 8531 return; 8532 } 8533 8534 fmt = GET_RTX_FORMAT (GET_CODE (*x)); 8535 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) 8536 { 8537 if (fmt[i] == 'e') 8538 { 8539 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset); 8540 } 8541 else if (fmt[i] == 'E') 8542 { 8543 for (j = 0; j < XVECLEN (*x, i); j++) 8544 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset); 8545 } 8546 } 8547} 8548 8549/* Replace every reference to the annotated literal pool 8550 symbol REF in INSN by its base plus OFFSET. 8551 Skip insns which support relative addressing, because they do not use a base 8552 register. */ 8553 8554static void 8555replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset) 8556{ 8557 if (s390_safe_relative_long_p (insn)) 8558 return; 8559 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset); 8560} 8561 8562/* We keep a list of constants which we have to add to internal 8563 constant tables in the middle of large functions. */ 8564 8565#define NR_C_MODES 32 8566machine_mode constant_modes[NR_C_MODES] = 8567{ 8568 TFmode, TImode, TDmode, 8569 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode, 8570 V4SFmode, V2DFmode, V1TFmode, 8571 DFmode, DImode, DDmode, 8572 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, 8573 SFmode, SImode, SDmode, 8574 V4QImode, V2HImode, V1SImode, V1SFmode, 8575 HImode, 8576 V2QImode, V1HImode, 8577 QImode, 8578 V1QImode 8579}; 8580 8581struct constant 8582{ 8583 struct constant *next; 8584 rtx value; 8585 rtx_code_label *label; 8586}; 8587 8588struct constant_pool 8589{ 8590 struct constant_pool *next; 8591 rtx_insn *first_insn; 8592 rtx_insn *pool_insn; 8593 bitmap insns; 8594 rtx_insn *emit_pool_after; 8595 8596 struct constant *constants[NR_C_MODES]; 8597 struct constant *execute; 8598 rtx_code_label *label; 8599 int size; 8600}; 8601 8602/* Allocate new constant_pool structure. */ 8603 8604static struct constant_pool * 8605s390_alloc_pool (void) 8606{ 8607 struct constant_pool *pool; 8608 int i; 8609 8610 pool = (struct constant_pool *) xmalloc (sizeof *pool); 8611 pool->next = NULL; 8612 for (i = 0; i < NR_C_MODES; i++) 8613 pool->constants[i] = NULL; 8614 8615 pool->execute = NULL; 8616 pool->label = gen_label_rtx (); 8617 pool->first_insn = NULL; 8618 pool->pool_insn = NULL; 8619 pool->insns = BITMAP_ALLOC (NULL); 8620 pool->size = 0; 8621 pool->emit_pool_after = NULL; 8622 8623 return pool; 8624} 8625 8626/* Create new constant pool covering instructions starting at INSN 8627 and chain it to the end of POOL_LIST. */ 8628 8629static struct constant_pool * 8630s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn) 8631{ 8632 struct constant_pool *pool, **prev; 8633 8634 pool = s390_alloc_pool (); 8635 pool->first_insn = insn; 8636 8637 for (prev = pool_list; *prev; prev = &(*prev)->next) 8638 ; 8639 *prev = pool; 8640 8641 return pool; 8642} 8643 8644/* End range of instructions covered by POOL at INSN and emit 8645 placeholder insn representing the pool. */ 8646 8647static void 8648s390_end_pool (struct constant_pool *pool, rtx_insn *insn) 8649{ 8650 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */); 8651 8652 if (!insn) 8653 insn = get_last_insn (); 8654 8655 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn); 8656 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 8657} 8658 8659/* Add INSN to the list of insns covered by POOL. */ 8660 8661static void 8662s390_add_pool_insn (struct constant_pool *pool, rtx insn) 8663{ 8664 bitmap_set_bit (pool->insns, INSN_UID (insn)); 8665} 8666 8667/* Return pool out of POOL_LIST that covers INSN. */ 8668 8669static struct constant_pool * 8670s390_find_pool (struct constant_pool *pool_list, rtx insn) 8671{ 8672 struct constant_pool *pool; 8673 8674 for (pool = pool_list; pool; pool = pool->next) 8675 if (bitmap_bit_p (pool->insns, INSN_UID (insn))) 8676 break; 8677 8678 return pool; 8679} 8680 8681/* Add constant VAL of mode MODE to the constant pool POOL. */ 8682 8683static void 8684s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode) 8685{ 8686 struct constant *c; 8687 int i; 8688 8689 for (i = 0; i < NR_C_MODES; i++) 8690 if (constant_modes[i] == mode) 8691 break; 8692 gcc_assert (i != NR_C_MODES); 8693 8694 for (c = pool->constants[i]; c != NULL; c = c->next) 8695 if (rtx_equal_p (val, c->value)) 8696 break; 8697 8698 if (c == NULL) 8699 { 8700 c = (struct constant *) xmalloc (sizeof *c); 8701 c->value = val; 8702 c->label = gen_label_rtx (); 8703 c->next = pool->constants[i]; 8704 pool->constants[i] = c; 8705 pool->size += GET_MODE_SIZE (mode); 8706 } 8707} 8708 8709/* Return an rtx that represents the offset of X from the start of 8710 pool POOL. */ 8711 8712static rtx 8713s390_pool_offset (struct constant_pool *pool, rtx x) 8714{ 8715 rtx label; 8716 8717 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label); 8718 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label), 8719 UNSPEC_POOL_OFFSET); 8720 return gen_rtx_CONST (GET_MODE (x), x); 8721} 8722 8723/* Find constant VAL of mode MODE in the constant pool POOL. 8724 Return an RTX describing the distance from the start of 8725 the pool to the location of the new constant. */ 8726 8727static rtx 8728s390_find_constant (struct constant_pool *pool, rtx val, 8729 machine_mode mode) 8730{ 8731 struct constant *c; 8732 int i; 8733 8734 for (i = 0; i < NR_C_MODES; i++) 8735 if (constant_modes[i] == mode) 8736 break; 8737 gcc_assert (i != NR_C_MODES); 8738 8739 for (c = pool->constants[i]; c != NULL; c = c->next) 8740 if (rtx_equal_p (val, c->value)) 8741 break; 8742 8743 gcc_assert (c); 8744 8745 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label)); 8746} 8747 8748/* Check whether INSN is an execute. Return the label_ref to its 8749 execute target template if so, NULL_RTX otherwise. */ 8750 8751static rtx 8752s390_execute_label (rtx insn) 8753{ 8754 if (INSN_P (insn) 8755 && GET_CODE (PATTERN (insn)) == PARALLEL 8756 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC 8757 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE 8758 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP)) 8759 { 8760 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE) 8761 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2); 8762 else 8763 { 8764 gcc_assert (JUMP_P (insn)); 8765 /* For jump insns as execute target: 8766 - There is one operand less in the parallel (the 8767 modification register of the execute is always 0). 8768 - The execute target label is wrapped into an 8769 if_then_else in order to hide it from jump analysis. */ 8770 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0); 8771 } 8772 } 8773 8774 return NULL_RTX; 8775} 8776 8777/* Find execute target for INSN in the constant pool POOL. 8778 Return an RTX describing the distance from the start of 8779 the pool to the location of the execute target. */ 8780 8781static rtx 8782s390_find_execute (struct constant_pool *pool, rtx insn) 8783{ 8784 struct constant *c; 8785 8786 for (c = pool->execute; c != NULL; c = c->next) 8787 if (INSN_UID (insn) == INSN_UID (c->value)) 8788 break; 8789 8790 gcc_assert (c); 8791 8792 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label)); 8793} 8794 8795/* For an execute INSN, extract the execute target template. */ 8796 8797static rtx 8798s390_execute_target (rtx insn) 8799{ 8800 rtx pattern = PATTERN (insn); 8801 gcc_assert (s390_execute_label (insn)); 8802 8803 if (XVECLEN (pattern, 0) == 2) 8804 { 8805 pattern = copy_rtx (XVECEXP (pattern, 0, 1)); 8806 } 8807 else 8808 { 8809 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1); 8810 int i; 8811 8812 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++) 8813 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1)); 8814 8815 pattern = gen_rtx_PARALLEL (VOIDmode, vec); 8816 } 8817 8818 return pattern; 8819} 8820 8821/* Indicate that INSN cannot be duplicated. This is the case for 8822 execute insns that carry a unique label. */ 8823 8824static bool 8825s390_cannot_copy_insn_p (rtx_insn *insn) 8826{ 8827 rtx label = s390_execute_label (insn); 8828 return label && label != const0_rtx; 8829} 8830 8831/* Dump out the constants in POOL. If REMOTE_LABEL is true, 8832 do not emit the pool base label. */ 8833 8834static void 8835s390_dump_pool (struct constant_pool *pool, bool remote_label) 8836{ 8837 struct constant *c; 8838 rtx_insn *insn = pool->pool_insn; 8839 int i; 8840 8841 /* Switch to rodata section. */ 8842 insn = emit_insn_after (gen_pool_section_start (), insn); 8843 INSN_ADDRESSES_NEW (insn, -1); 8844 8845 /* Ensure minimum pool alignment. */ 8846 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn); 8847 INSN_ADDRESSES_NEW (insn, -1); 8848 8849 /* Emit pool base label. */ 8850 if (!remote_label) 8851 { 8852 insn = emit_label_after (pool->label, insn); 8853 INSN_ADDRESSES_NEW (insn, -1); 8854 } 8855 8856 /* Dump constants in descending alignment requirement order, 8857 ensuring proper alignment for every constant. */ 8858 for (i = 0; i < NR_C_MODES; i++) 8859 for (c = pool->constants[i]; c; c = c->next) 8860 { 8861 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */ 8862 rtx value = copy_rtx (c->value); 8863 if (GET_CODE (value) == CONST 8864 && GET_CODE (XEXP (value, 0)) == UNSPEC 8865 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET 8866 && XVECLEN (XEXP (value, 0), 0) == 1) 8867 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0)); 8868 8869 insn = emit_label_after (c->label, insn); 8870 INSN_ADDRESSES_NEW (insn, -1); 8871 8872 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i], 8873 gen_rtvec (1, value), 8874 UNSPECV_POOL_ENTRY); 8875 insn = emit_insn_after (value, insn); 8876 INSN_ADDRESSES_NEW (insn, -1); 8877 } 8878 8879 /* Ensure minimum alignment for instructions. */ 8880 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn); 8881 INSN_ADDRESSES_NEW (insn, -1); 8882 8883 /* Output in-pool execute template insns. */ 8884 for (c = pool->execute; c; c = c->next) 8885 { 8886 insn = emit_label_after (c->label, insn); 8887 INSN_ADDRESSES_NEW (insn, -1); 8888 8889 insn = emit_insn_after (s390_execute_target (c->value), insn); 8890 INSN_ADDRESSES_NEW (insn, -1); 8891 } 8892 8893 /* Switch back to previous section. */ 8894 insn = emit_insn_after (gen_pool_section_end (), insn); 8895 INSN_ADDRESSES_NEW (insn, -1); 8896 8897 insn = emit_barrier_after (insn); 8898 INSN_ADDRESSES_NEW (insn, -1); 8899 8900 /* Remove placeholder insn. */ 8901 remove_insn (pool->pool_insn); 8902} 8903 8904/* Free all memory used by POOL. */ 8905 8906static void 8907s390_free_pool (struct constant_pool *pool) 8908{ 8909 struct constant *c, *next; 8910 int i; 8911 8912 for (i = 0; i < NR_C_MODES; i++) 8913 for (c = pool->constants[i]; c; c = next) 8914 { 8915 next = c->next; 8916 free (c); 8917 } 8918 8919 for (c = pool->execute; c; c = next) 8920 { 8921 next = c->next; 8922 free (c); 8923 } 8924 8925 BITMAP_FREE (pool->insns); 8926 free (pool); 8927} 8928 8929 8930/* Collect main literal pool. Return NULL on overflow. */ 8931 8932static struct constant_pool * 8933s390_mainpool_start (void) 8934{ 8935 struct constant_pool *pool; 8936 rtx_insn *insn; 8937 8938 pool = s390_alloc_pool (); 8939 8940 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8941 { 8942 if (NONJUMP_INSN_P (insn) 8943 && GET_CODE (PATTERN (insn)) == SET 8944 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE 8945 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL) 8946 { 8947 /* There might be two main_pool instructions if base_reg 8948 is call-clobbered; one for shrink-wrapped code and one 8949 for the rest. We want to keep the first. */ 8950 if (pool->pool_insn) 8951 { 8952 insn = PREV_INSN (insn); 8953 delete_insn (NEXT_INSN (insn)); 8954 continue; 8955 } 8956 pool->pool_insn = insn; 8957 } 8958 8959 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8960 { 8961 rtx pool_ref = NULL_RTX; 8962 find_constant_pool_ref (insn, &pool_ref); 8963 if (pool_ref) 8964 { 8965 rtx constant = get_pool_constant (pool_ref); 8966 machine_mode mode = get_pool_mode (pool_ref); 8967 s390_add_constant (pool, constant, mode); 8968 } 8969 } 8970 8971 /* If hot/cold partitioning is enabled we have to make sure that 8972 the literal pool is emitted in the same section where the 8973 initialization of the literal pool base pointer takes place. 8974 emit_pool_after is only used in the non-overflow case on non 8975 Z cpus where we can emit the literal pool at the end of the 8976 function body within the text section. */ 8977 if (NOTE_P (insn) 8978 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS 8979 && !pool->emit_pool_after) 8980 pool->emit_pool_after = PREV_INSN (insn); 8981 } 8982 8983 gcc_assert (pool->pool_insn || pool->size == 0); 8984 8985 if (pool->size >= 4096) 8986 { 8987 /* We're going to chunkify the pool, so remove the main 8988 pool placeholder insn. */ 8989 remove_insn (pool->pool_insn); 8990 8991 s390_free_pool (pool); 8992 pool = NULL; 8993 } 8994 8995 /* If the functions ends with the section where the literal pool 8996 should be emitted set the marker to its end. */ 8997 if (pool && !pool->emit_pool_after) 8998 pool->emit_pool_after = get_last_insn (); 8999 9000 return pool; 9001} 9002 9003/* POOL holds the main literal pool as collected by s390_mainpool_start. 9004 Modify the current function to output the pool constants as well as 9005 the pool register setup instruction. */ 9006 9007static void 9008s390_mainpool_finish (struct constant_pool *pool) 9009{ 9010 rtx base_reg = cfun->machine->base_reg; 9011 rtx set; 9012 rtx_insn *insn; 9013 9014 /* If the pool is empty, we're done. */ 9015 if (pool->size == 0) 9016 { 9017 /* We don't actually need a base register after all. */ 9018 cfun->machine->base_reg = NULL_RTX; 9019 9020 if (pool->pool_insn) 9021 remove_insn (pool->pool_insn); 9022 s390_free_pool (pool); 9023 return; 9024 } 9025 9026 /* We need correct insn addresses. */ 9027 shorten_branches (get_insns ()); 9028 9029 /* Use a LARL to load the pool register. The pool is 9030 located in the .rodata section, so we emit it after the function. */ 9031 set = gen_main_base_64 (base_reg, pool->label); 9032 insn = emit_insn_after (set, pool->pool_insn); 9033 INSN_ADDRESSES_NEW (insn, -1); 9034 remove_insn (pool->pool_insn); 9035 9036 insn = get_last_insn (); 9037 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); 9038 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 9039 9040 s390_dump_pool (pool, 0); 9041 9042 /* Replace all literal pool references. */ 9043 9044 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9045 { 9046 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 9047 { 9048 rtx addr, pool_ref = NULL_RTX; 9049 find_constant_pool_ref (insn, &pool_ref); 9050 if (pool_ref) 9051 { 9052 if (s390_execute_label (insn)) 9053 addr = s390_find_execute (pool, insn); 9054 else 9055 addr = s390_find_constant (pool, get_pool_constant (pool_ref), 9056 get_pool_mode (pool_ref)); 9057 9058 replace_constant_pool_ref (insn, pool_ref, addr); 9059 INSN_CODE (insn) = -1; 9060 } 9061 } 9062 } 9063 9064 9065 /* Free the pool. */ 9066 s390_free_pool (pool); 9067} 9068 9069/* Chunkify the literal pool. */ 9070 9071#define S390_POOL_CHUNK_MIN 0xc00 9072#define S390_POOL_CHUNK_MAX 0xe00 9073 9074static struct constant_pool * 9075s390_chunkify_start (void) 9076{ 9077 struct constant_pool *curr_pool = NULL, *pool_list = NULL; 9078 bitmap far_labels; 9079 rtx_insn *insn; 9080 9081 /* We need correct insn addresses. */ 9082 9083 shorten_branches (get_insns ()); 9084 9085 /* Scan all insns and move literals to pool chunks. */ 9086 9087 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9088 { 9089 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 9090 { 9091 rtx pool_ref = NULL_RTX; 9092 find_constant_pool_ref (insn, &pool_ref); 9093 if (pool_ref) 9094 { 9095 rtx constant = get_pool_constant (pool_ref); 9096 machine_mode mode = get_pool_mode (pool_ref); 9097 9098 if (!curr_pool) 9099 curr_pool = s390_start_pool (&pool_list, insn); 9100 9101 s390_add_constant (curr_pool, constant, mode); 9102 s390_add_pool_insn (curr_pool, insn); 9103 } 9104 } 9105 9106 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn)) 9107 { 9108 if (curr_pool) 9109 s390_add_pool_insn (curr_pool, insn); 9110 } 9111 9112 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION) 9113 continue; 9114 9115 if (!curr_pool 9116 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn) 9117 || INSN_ADDRESSES (INSN_UID (insn)) == -1) 9118 continue; 9119 9120 if (curr_pool->size < S390_POOL_CHUNK_MAX) 9121 continue; 9122 9123 s390_end_pool (curr_pool, NULL); 9124 curr_pool = NULL; 9125 } 9126 9127 if (curr_pool) 9128 s390_end_pool (curr_pool, NULL); 9129 9130 /* Find all labels that are branched into 9131 from an insn belonging to a different chunk. */ 9132 9133 far_labels = BITMAP_ALLOC (NULL); 9134 9135 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9136 { 9137 rtx_jump_table_data *table; 9138 9139 /* Labels marked with LABEL_PRESERVE_P can be target 9140 of non-local jumps, so we have to mark them. 9141 The same holds for named labels. 9142 9143 Don't do that, however, if it is the label before 9144 a jump table. */ 9145 9146 if (LABEL_P (insn) 9147 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn))) 9148 { 9149 rtx_insn *vec_insn = NEXT_INSN (insn); 9150 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn)) 9151 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn)); 9152 } 9153 /* Check potential targets in a table jump (casesi_jump). */ 9154 else if (tablejump_p (insn, NULL, &table)) 9155 { 9156 rtx vec_pat = PATTERN (table); 9157 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC; 9158 9159 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++) 9160 { 9161 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0); 9162 9163 if (s390_find_pool (pool_list, label) 9164 != s390_find_pool (pool_list, insn)) 9165 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); 9166 } 9167 } 9168 /* If we have a direct jump (conditional or unconditional), 9169 check all potential targets. */ 9170 else if (JUMP_P (insn)) 9171 { 9172 rtx pat = PATTERN (insn); 9173 9174 if (GET_CODE (pat) == PARALLEL) 9175 pat = XVECEXP (pat, 0, 0); 9176 9177 if (GET_CODE (pat) == SET) 9178 { 9179 rtx label = JUMP_LABEL (insn); 9180 if (label && !ANY_RETURN_P (label)) 9181 { 9182 if (s390_find_pool (pool_list, label) 9183 != s390_find_pool (pool_list, insn)) 9184 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); 9185 } 9186 } 9187 } 9188 } 9189 9190 /* Insert base register reload insns before every pool. */ 9191 9192 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) 9193 { 9194 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg, 9195 curr_pool->label); 9196 rtx_insn *insn = curr_pool->first_insn; 9197 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); 9198 } 9199 9200 /* Insert base register reload insns at every far label. */ 9201 9202 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9203 if (LABEL_P (insn) 9204 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn))) 9205 { 9206 struct constant_pool *pool = s390_find_pool (pool_list, insn); 9207 if (pool) 9208 { 9209 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg, 9210 pool->label); 9211 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); 9212 } 9213 } 9214 9215 9216 BITMAP_FREE (far_labels); 9217 9218 9219 /* Recompute insn addresses. */ 9220 9221 init_insn_lengths (); 9222 shorten_branches (get_insns ()); 9223 9224 return pool_list; 9225} 9226 9227/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. 9228 After we have decided to use this list, finish implementing 9229 all changes to the current function as required. */ 9230 9231static void 9232s390_chunkify_finish (struct constant_pool *pool_list) 9233{ 9234 struct constant_pool *curr_pool = NULL; 9235 rtx_insn *insn; 9236 9237 9238 /* Replace all literal pool references. */ 9239 9240 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9241 { 9242 curr_pool = s390_find_pool (pool_list, insn); 9243 if (!curr_pool) 9244 continue; 9245 9246 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 9247 { 9248 rtx addr, pool_ref = NULL_RTX; 9249 find_constant_pool_ref (insn, &pool_ref); 9250 if (pool_ref) 9251 { 9252 if (s390_execute_label (insn)) 9253 addr = s390_find_execute (curr_pool, insn); 9254 else 9255 addr = s390_find_constant (curr_pool, 9256 get_pool_constant (pool_ref), 9257 get_pool_mode (pool_ref)); 9258 9259 replace_constant_pool_ref (insn, pool_ref, addr); 9260 INSN_CODE (insn) = -1; 9261 } 9262 } 9263 } 9264 9265 /* Dump out all literal pools. */ 9266 9267 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) 9268 s390_dump_pool (curr_pool, 0); 9269 9270 /* Free pool list. */ 9271 9272 while (pool_list) 9273 { 9274 struct constant_pool *next = pool_list->next; 9275 s390_free_pool (pool_list); 9276 pool_list = next; 9277 } 9278} 9279 9280/* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */ 9281 9282void 9283s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align) 9284{ 9285 switch (GET_MODE_CLASS (mode)) 9286 { 9287 case MODE_FLOAT: 9288 case MODE_DECIMAL_FLOAT: 9289 gcc_assert (GET_CODE (exp) == CONST_DOUBLE); 9290 9291 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), 9292 as_a <scalar_float_mode> (mode), align); 9293 break; 9294 9295 case MODE_INT: 9296 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1); 9297 mark_symbol_refs_as_used (exp); 9298 break; 9299 9300 case MODE_VECTOR_INT: 9301 case MODE_VECTOR_FLOAT: 9302 { 9303 int i; 9304 machine_mode inner_mode; 9305 gcc_assert (GET_CODE (exp) == CONST_VECTOR); 9306 9307 inner_mode = GET_MODE_INNER (GET_MODE (exp)); 9308 for (i = 0; i < XVECLEN (exp, 0); i++) 9309 s390_output_pool_entry (XVECEXP (exp, 0, i), 9310 inner_mode, 9311 i == 0 9312 ? align 9313 : GET_MODE_BITSIZE (inner_mode)); 9314 } 9315 break; 9316 9317 default: 9318 gcc_unreachable (); 9319 } 9320} 9321 9322 9323/* Return an RTL expression representing the value of the return address 9324 for the frame COUNT steps up from the current frame. FRAME is the 9325 frame pointer of that frame. */ 9326 9327rtx 9328s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED) 9329{ 9330 int offset; 9331 rtx addr; 9332 9333 /* Without backchain, we fail for all but the current frame. */ 9334 9335 if (!TARGET_BACKCHAIN && count > 0) 9336 return NULL_RTX; 9337 9338 /* For the current frame, we need to make sure the initial 9339 value of RETURN_REGNUM is actually saved. */ 9340 9341 if (count == 0) 9342 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM); 9343 9344 if (TARGET_PACKED_STACK) 9345 offset = -2 * UNITS_PER_LONG; 9346 else 9347 offset = RETURN_REGNUM * UNITS_PER_LONG; 9348 9349 addr = plus_constant (Pmode, frame, offset); 9350 addr = memory_address (Pmode, addr); 9351 return gen_rtx_MEM (Pmode, addr); 9352} 9353 9354/* Return an RTL expression representing the back chain stored in 9355 the current stack frame. */ 9356 9357rtx 9358s390_back_chain_rtx (void) 9359{ 9360 rtx chain; 9361 9362 gcc_assert (TARGET_BACKCHAIN); 9363 9364 if (TARGET_PACKED_STACK) 9365 chain = plus_constant (Pmode, stack_pointer_rtx, 9366 STACK_POINTER_OFFSET - UNITS_PER_LONG); 9367 else 9368 chain = stack_pointer_rtx; 9369 9370 chain = gen_rtx_MEM (Pmode, chain); 9371 return chain; 9372} 9373 9374/* Find first call clobbered register unused in a function. 9375 This could be used as base register in a leaf function 9376 or for holding the return address before epilogue. */ 9377 9378static int 9379find_unused_clobbered_reg (void) 9380{ 9381 int i; 9382 for (i = 0; i < 6; i++) 9383 if (!df_regs_ever_live_p (i)) 9384 return i; 9385 return 0; 9386} 9387 9388 9389/* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all 9390 clobbered hard regs in SETREG. */ 9391 9392static void 9393s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data) 9394{ 9395 char *regs_ever_clobbered = (char *)data; 9396 unsigned int i, regno; 9397 machine_mode mode = GET_MODE (setreg); 9398 9399 if (GET_CODE (setreg) == SUBREG) 9400 { 9401 rtx inner = SUBREG_REG (setreg); 9402 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner)) 9403 return; 9404 regno = subreg_regno (setreg); 9405 } 9406 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg)) 9407 regno = REGNO (setreg); 9408 else 9409 return; 9410 9411 for (i = regno; 9412 i < end_hard_regno (mode, regno); 9413 i++) 9414 regs_ever_clobbered[i] = 1; 9415} 9416 9417/* Walks through all basic blocks of the current function looking 9418 for clobbered hard regs using s390_reg_clobbered_rtx. The fields 9419 of the passed integer array REGS_EVER_CLOBBERED are set to one for 9420 each of those regs. */ 9421 9422static void 9423s390_regs_ever_clobbered (char regs_ever_clobbered[]) 9424{ 9425 basic_block cur_bb; 9426 rtx_insn *cur_insn; 9427 unsigned int i; 9428 9429 memset (regs_ever_clobbered, 0, 32); 9430 9431 /* For non-leaf functions we have to consider all call clobbered regs to be 9432 clobbered. */ 9433 if (!crtl->is_leaf) 9434 { 9435 for (i = 0; i < 32; i++) 9436 regs_ever_clobbered[i] = call_used_regs[i]; 9437 } 9438 9439 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live 9440 this work is done by liveness analysis (mark_regs_live_at_end). 9441 Special care is needed for functions containing landing pads. Landing pads 9442 may use the eh registers, but the code which sets these registers is not 9443 contained in that function. Hence s390_regs_ever_clobbered is not able to 9444 deal with this automatically. */ 9445 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p) 9446 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++) 9447 if (crtl->calls_eh_return 9448 || (cfun->machine->has_landing_pad_p 9449 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i)))) 9450 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1; 9451 9452 /* For nonlocal gotos all call-saved registers have to be saved. 9453 This flag is also set for the unwinding code in libgcc. 9454 See expand_builtin_unwind_init. For regs_ever_live this is done by 9455 reload. */ 9456 if (crtl->saves_all_registers) 9457 for (i = 0; i < 32; i++) 9458 if (!call_used_regs[i]) 9459 regs_ever_clobbered[i] = 1; 9460 9461 FOR_EACH_BB_FN (cur_bb, cfun) 9462 { 9463 FOR_BB_INSNS (cur_bb, cur_insn) 9464 { 9465 rtx pat; 9466 9467 if (!INSN_P (cur_insn)) 9468 continue; 9469 9470 pat = PATTERN (cur_insn); 9471 9472 /* Ignore GPR restore insns. */ 9473 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn)) 9474 { 9475 if (GET_CODE (pat) == SET 9476 && GENERAL_REG_P (SET_DEST (pat))) 9477 { 9478 /* lgdr */ 9479 if (GET_MODE (SET_SRC (pat)) == DImode 9480 && FP_REG_P (SET_SRC (pat))) 9481 continue; 9482 9483 /* l / lg */ 9484 if (GET_CODE (SET_SRC (pat)) == MEM) 9485 continue; 9486 } 9487 9488 /* lm / lmg */ 9489 if (GET_CODE (pat) == PARALLEL 9490 && load_multiple_operation (pat, VOIDmode)) 9491 continue; 9492 } 9493 9494 note_stores (cur_insn, 9495 s390_reg_clobbered_rtx, 9496 regs_ever_clobbered); 9497 } 9498 } 9499} 9500 9501/* Determine the frame area which actually has to be accessed 9502 in the function epilogue. The values are stored at the 9503 given pointers AREA_BOTTOM (address of the lowest used stack 9504 address) and AREA_TOP (address of the first item which does 9505 not belong to the stack frame). */ 9506 9507static void 9508s390_frame_area (int *area_bottom, int *area_top) 9509{ 9510 int b, t; 9511 9512 b = INT_MAX; 9513 t = INT_MIN; 9514 9515 if (cfun_frame_layout.first_restore_gpr != -1) 9516 { 9517 b = (cfun_frame_layout.gprs_offset 9518 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG); 9519 t = b + (cfun_frame_layout.last_restore_gpr 9520 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG; 9521 } 9522 9523 if (TARGET_64BIT && cfun_save_high_fprs_p) 9524 { 9525 b = MIN (b, cfun_frame_layout.f8_offset); 9526 t = MAX (t, (cfun_frame_layout.f8_offset 9527 + cfun_frame_layout.high_fprs * 8)); 9528 } 9529 9530 if (!TARGET_64BIT) 9531 { 9532 if (cfun_fpr_save_p (FPR4_REGNUM)) 9533 { 9534 b = MIN (b, cfun_frame_layout.f4_offset); 9535 t = MAX (t, cfun_frame_layout.f4_offset + 8); 9536 } 9537 if (cfun_fpr_save_p (FPR6_REGNUM)) 9538 { 9539 b = MIN (b, cfun_frame_layout.f4_offset + 8); 9540 t = MAX (t, cfun_frame_layout.f4_offset + 16); 9541 } 9542 } 9543 *area_bottom = b; 9544 *area_top = t; 9545} 9546/* Update gpr_save_slots in the frame layout trying to make use of 9547 FPRs as GPR save slots. 9548 This is a helper routine of s390_register_info. */ 9549 9550static void 9551s390_register_info_gprtofpr () 9552{ 9553 int save_reg_slot = FPR0_REGNUM; 9554 int i, j; 9555 9556 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 9557 return; 9558 9559 /* builtin_eh_return needs to be able to modify the return address 9560 on the stack. It could also adjust the FPR save slot instead but 9561 is it worth the trouble?! */ 9562 if (crtl->calls_eh_return) 9563 return; 9564 9565 for (i = 15; i >= 6; i--) 9566 { 9567 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE) 9568 continue; 9569 9570 /* Advance to the next FP register which can be used as a 9571 GPR save slot. */ 9572 while ((!call_used_regs[save_reg_slot] 9573 || df_regs_ever_live_p (save_reg_slot) 9574 || cfun_fpr_save_p (save_reg_slot)) 9575 && FP_REGNO_P (save_reg_slot)) 9576 save_reg_slot++; 9577 if (!FP_REGNO_P (save_reg_slot)) 9578 { 9579 /* We only want to use ldgr/lgdr if we can get rid of 9580 stm/lm entirely. So undo the gpr slot allocation in 9581 case we ran out of FPR save slots. */ 9582 for (j = 6; j <= 15; j++) 9583 if (FP_REGNO_P (cfun_gpr_save_slot (j))) 9584 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK; 9585 break; 9586 } 9587 cfun_gpr_save_slot (i) = save_reg_slot++; 9588 } 9589} 9590 9591/* Set the bits in fpr_bitmap for FPRs which need to be saved due to 9592 stdarg. 9593 This is a helper routine for s390_register_info. */ 9594 9595static void 9596s390_register_info_stdarg_fpr () 9597{ 9598 int i; 9599 int min_fpr; 9600 int max_fpr; 9601 9602 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and 9603 f0-f4 for 64 bit. */ 9604 if (!cfun->stdarg 9605 || !TARGET_HARD_FLOAT 9606 || !cfun->va_list_fpr_size 9607 || crtl->args.info.fprs >= FP_ARG_NUM_REG) 9608 return; 9609 9610 min_fpr = crtl->args.info.fprs; 9611 max_fpr = min_fpr + cfun->va_list_fpr_size - 1; 9612 if (max_fpr >= FP_ARG_NUM_REG) 9613 max_fpr = FP_ARG_NUM_REG - 1; 9614 9615 /* FPR argument regs start at f0. */ 9616 min_fpr += FPR0_REGNUM; 9617 max_fpr += FPR0_REGNUM; 9618 9619 for (i = min_fpr; i <= max_fpr; i++) 9620 cfun_set_fpr_save (i); 9621} 9622 9623/* Reserve the GPR save slots for GPRs which need to be saved due to 9624 stdarg. 9625 This is a helper routine for s390_register_info. */ 9626 9627static void 9628s390_register_info_stdarg_gpr () 9629{ 9630 int i; 9631 int min_gpr; 9632 int max_gpr; 9633 9634 if (!cfun->stdarg 9635 || !cfun->va_list_gpr_size 9636 || crtl->args.info.gprs >= GP_ARG_NUM_REG) 9637 return; 9638 9639 min_gpr = crtl->args.info.gprs; 9640 max_gpr = min_gpr + cfun->va_list_gpr_size - 1; 9641 if (max_gpr >= GP_ARG_NUM_REG) 9642 max_gpr = GP_ARG_NUM_REG - 1; 9643 9644 /* GPR argument regs start at r2. */ 9645 min_gpr += GPR2_REGNUM; 9646 max_gpr += GPR2_REGNUM; 9647 9648 /* If r6 was supposed to be saved into an FPR and now needs to go to 9649 the stack for vararg we have to adjust the restore range to make 9650 sure that the restore is done from stack as well. */ 9651 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM)) 9652 && min_gpr <= GPR6_REGNUM 9653 && max_gpr >= GPR6_REGNUM) 9654 { 9655 if (cfun_frame_layout.first_restore_gpr == -1 9656 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM) 9657 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM; 9658 if (cfun_frame_layout.last_restore_gpr == -1 9659 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM) 9660 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM; 9661 } 9662 9663 if (cfun_frame_layout.first_save_gpr == -1 9664 || cfun_frame_layout.first_save_gpr > min_gpr) 9665 cfun_frame_layout.first_save_gpr = min_gpr; 9666 9667 if (cfun_frame_layout.last_save_gpr == -1 9668 || cfun_frame_layout.last_save_gpr < max_gpr) 9669 cfun_frame_layout.last_save_gpr = max_gpr; 9670 9671 for (i = min_gpr; i <= max_gpr; i++) 9672 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; 9673} 9674 9675/* Calculate the save and restore ranges for stm(g) and lm(g) in the 9676 prologue and epilogue. */ 9677 9678static void 9679s390_register_info_set_ranges () 9680{ 9681 int i, j; 9682 9683 /* Find the first and the last save slot supposed to use the stack 9684 to set the restore range. 9685 Vararg regs might be marked as save to stack but only the 9686 call-saved regs really need restoring (i.e. r6). This code 9687 assumes that the vararg regs have not yet been recorded in 9688 cfun_gpr_save_slot. */ 9689 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++); 9690 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--); 9691 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; 9692 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; 9693 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; 9694 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; 9695} 9696 9697/* The GPR and FPR save slots in cfun->machine->frame_layout are set 9698 for registers which need to be saved in function prologue. 9699 This function can be used until the insns emitted for save/restore 9700 of the regs are visible in the RTL stream. */ 9701 9702static void 9703s390_register_info () 9704{ 9705 int i; 9706 char clobbered_regs[32]; 9707 9708 gcc_assert (!epilogue_completed); 9709 9710 if (reload_completed) 9711 /* After reload we rely on our own routine to determine which 9712 registers need saving. */ 9713 s390_regs_ever_clobbered (clobbered_regs); 9714 else 9715 /* During reload we use regs_ever_live as a base since reload 9716 does changes in there which we otherwise would not be aware 9717 of. */ 9718 for (i = 0; i < 32; i++) 9719 clobbered_regs[i] = df_regs_ever_live_p (i); 9720 9721 for (i = 0; i < 32; i++) 9722 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i]; 9723 9724 /* Mark the call-saved FPRs which need to be saved. 9725 This needs to be done before checking the special GPRs since the 9726 stack pointer usage depends on whether high FPRs have to be saved 9727 or not. */ 9728 cfun_frame_layout.fpr_bitmap = 0; 9729 cfun_frame_layout.high_fprs = 0; 9730 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) 9731 if (clobbered_regs[i] && !call_used_regs[i]) 9732 { 9733 cfun_set_fpr_save (i); 9734 if (i >= FPR8_REGNUM) 9735 cfun_frame_layout.high_fprs++; 9736 } 9737 9738 /* Register 12 is used for GOT address, but also as temp in prologue 9739 for split-stack stdarg functions (unless r14 is available). */ 9740 clobbered_regs[12] 9741 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 9742 || (flag_split_stack && cfun->stdarg 9743 && (crtl->is_leaf || TARGET_TPF_PROFILING 9744 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM)))); 9745 9746 clobbered_regs[BASE_REGNUM] 9747 |= (cfun->machine->base_reg 9748 && REGNO (cfun->machine->base_reg) == BASE_REGNUM); 9749 9750 clobbered_regs[HARD_FRAME_POINTER_REGNUM] 9751 |= !!frame_pointer_needed; 9752 9753 /* On pre z900 machines this might take until machine dependent 9754 reorg to decide. 9755 save_return_addr_p will only be set on non-zarch machines so 9756 there is no risk that r14 goes into an FPR instead of a stack 9757 slot. */ 9758 clobbered_regs[RETURN_REGNUM] 9759 |= (!crtl->is_leaf 9760 || TARGET_TPF_PROFILING 9761 || cfun_frame_layout.save_return_addr_p 9762 || crtl->calls_eh_return); 9763 9764 clobbered_regs[STACK_POINTER_REGNUM] 9765 |= (!crtl->is_leaf 9766 || TARGET_TPF_PROFILING 9767 || cfun_save_high_fprs_p 9768 || get_frame_size () > 0 9769 || (reload_completed && cfun_frame_layout.frame_size > 0) 9770 || cfun->calls_alloca); 9771 9772 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16); 9773 9774 for (i = 6; i < 16; i++) 9775 if (clobbered_regs[i]) 9776 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; 9777 9778 s390_register_info_stdarg_fpr (); 9779 s390_register_info_gprtofpr (); 9780 s390_register_info_set_ranges (); 9781 /* stdarg functions might need to save GPRs 2 to 6. This might 9782 override the GPR->FPR save decision made by 9783 s390_register_info_gprtofpr for r6 since vararg regs must go to 9784 the stack. */ 9785 s390_register_info_stdarg_gpr (); 9786} 9787 9788/* Return true if REGNO is a global register, but not one 9789 of the special ones that need to be saved/restored in anyway. */ 9790 9791static inline bool 9792global_not_special_regno_p (int regno) 9793{ 9794 return (global_regs[regno] 9795 /* These registers are special and need to be 9796 restored in any case. */ 9797 && !(regno == STACK_POINTER_REGNUM 9798 || regno == RETURN_REGNUM 9799 || regno == BASE_REGNUM 9800 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM))); 9801} 9802 9803/* This function is called by s390_optimize_prologue in order to get 9804 rid of unnecessary GPR save/restore instructions. The register info 9805 for the GPRs is re-computed and the ranges are re-calculated. */ 9806 9807static void 9808s390_optimize_register_info () 9809{ 9810 char clobbered_regs[32]; 9811 int i; 9812 9813 gcc_assert (epilogue_completed); 9814 9815 s390_regs_ever_clobbered (clobbered_regs); 9816 9817 /* Global registers do not need to be saved and restored unless it 9818 is one of our special regs. (r12, r13, r14, or r15). */ 9819 for (i = 0; i < 32; i++) 9820 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i); 9821 9822 /* There is still special treatment needed for cases invisible to 9823 s390_regs_ever_clobbered. */ 9824 clobbered_regs[RETURN_REGNUM] 9825 |= (TARGET_TPF_PROFILING 9826 /* When expanding builtin_return_addr in ESA mode we do not 9827 know whether r14 will later be needed as scratch reg when 9828 doing branch splitting. So the builtin always accesses the 9829 r14 save slot and we need to stick to the save/restore 9830 decision for r14 even if it turns out that it didn't get 9831 clobbered. */ 9832 || cfun_frame_layout.save_return_addr_p 9833 || crtl->calls_eh_return); 9834 9835 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6); 9836 9837 for (i = 6; i < 16; i++) 9838 if (!clobbered_regs[i]) 9839 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE; 9840 9841 s390_register_info_set_ranges (); 9842 s390_register_info_stdarg_gpr (); 9843} 9844 9845/* Fill cfun->machine with info about frame of current function. */ 9846 9847static void 9848s390_frame_info (void) 9849{ 9850 HOST_WIDE_INT lowest_offset; 9851 9852 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr; 9853 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr; 9854 9855 /* The va_arg builtin uses a constant distance of 16 * 9856 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area 9857 pointer. So even if we are going to save the stack pointer in an 9858 FPR we need the stack space in order to keep the offsets 9859 correct. */ 9860 if (cfun->stdarg && cfun_save_arg_fprs_p) 9861 { 9862 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM; 9863 9864 if (cfun_frame_layout.first_save_gpr_slot == -1) 9865 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM; 9866 } 9867 9868 cfun_frame_layout.frame_size = get_frame_size (); 9869 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000) 9870 fatal_error (input_location, 9871 "total size of local variables exceeds architecture limit"); 9872 9873 if (!TARGET_PACKED_STACK) 9874 { 9875 /* Fixed stack layout. */ 9876 cfun_frame_layout.backchain_offset = 0; 9877 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG; 9878 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8; 9879 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8; 9880 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot 9881 * UNITS_PER_LONG); 9882 } 9883 else if (TARGET_BACKCHAIN) 9884 { 9885 /* Kernel stack layout - packed stack, backchain, no float */ 9886 gcc_assert (TARGET_SOFT_FLOAT); 9887 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET 9888 - UNITS_PER_LONG); 9889 9890 /* The distance between the backchain and the return address 9891 save slot must not change. So we always need a slot for the 9892 stack pointer which resides in between. */ 9893 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM; 9894 9895 cfun_frame_layout.gprs_offset 9896 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size; 9897 9898 /* FPRs will not be saved. Nevertheless pick sane values to 9899 keep area calculations valid. */ 9900 cfun_frame_layout.f0_offset = 9901 cfun_frame_layout.f4_offset = 9902 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset; 9903 } 9904 else 9905 { 9906 int num_fprs; 9907 9908 /* Packed stack layout without backchain. */ 9909 9910 /* With stdarg FPRs need their dedicated slots. */ 9911 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2 9912 : (cfun_fpr_save_p (FPR4_REGNUM) + 9913 cfun_fpr_save_p (FPR6_REGNUM))); 9914 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs; 9915 9916 num_fprs = (cfun->stdarg ? 2 9917 : (cfun_fpr_save_p (FPR0_REGNUM) 9918 + cfun_fpr_save_p (FPR2_REGNUM))); 9919 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs; 9920 9921 cfun_frame_layout.gprs_offset 9922 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size; 9923 9924 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset 9925 - cfun_frame_layout.high_fprs * 8); 9926 } 9927 9928 if (cfun_save_high_fprs_p) 9929 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8; 9930 9931 if (!crtl->is_leaf) 9932 cfun_frame_layout.frame_size += crtl->outgoing_args_size; 9933 9934 /* In the following cases we have to allocate a STACK_POINTER_OFFSET 9935 sized area at the bottom of the stack. This is required also for 9936 leaf functions. When GCC generates a local stack reference it 9937 will always add STACK_POINTER_OFFSET to all these references. */ 9938 if (crtl->is_leaf 9939 && !TARGET_TPF_PROFILING 9940 && cfun_frame_layout.frame_size == 0 9941 && !cfun->calls_alloca) 9942 return; 9943 9944 /* Calculate the number of bytes we have used in our own register 9945 save area. With the packed stack layout we can re-use the 9946 remaining bytes for normal stack elements. */ 9947 9948 if (TARGET_PACKED_STACK) 9949 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset, 9950 cfun_frame_layout.f4_offset), 9951 cfun_frame_layout.gprs_offset); 9952 else 9953 lowest_offset = 0; 9954 9955 if (TARGET_BACKCHAIN) 9956 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset); 9957 9958 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset; 9959 9960 /* If under 31 bit an odd number of gprs has to be saved we have to 9961 adjust the frame size to sustain 8 byte alignment of stack 9962 frames. */ 9963 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size + 9964 STACK_BOUNDARY / BITS_PER_UNIT - 1) 9965 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1)); 9966} 9967 9968/* Generate frame layout. Fills in register and frame data for the current 9969 function in cfun->machine. This routine can be called multiple times; 9970 it will re-do the complete frame layout every time. */ 9971 9972static void 9973s390_init_frame_layout (void) 9974{ 9975 HOST_WIDE_INT frame_size; 9976 int base_used; 9977 9978 /* After LRA the frame layout is supposed to be read-only and should 9979 not be re-computed. */ 9980 if (reload_completed) 9981 return; 9982 9983 do 9984 { 9985 frame_size = cfun_frame_layout.frame_size; 9986 9987 /* Try to predict whether we'll need the base register. */ 9988 base_used = crtl->uses_const_pool 9989 || (!DISP_IN_RANGE (frame_size) 9990 && !CONST_OK_FOR_K (frame_size)); 9991 9992 /* Decide which register to use as literal pool base. In small 9993 leaf functions, try to use an unused call-clobbered register 9994 as base register to avoid save/restore overhead. */ 9995 if (!base_used) 9996 cfun->machine->base_reg = NULL_RTX; 9997 else 9998 { 9999 int br = 0; 10000 10001 if (crtl->is_leaf) 10002 /* Prefer r5 (most likely to be free). */ 10003 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--) 10004 ; 10005 cfun->machine->base_reg = 10006 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM); 10007 } 10008 10009 s390_register_info (); 10010 s390_frame_info (); 10011 } 10012 while (frame_size != cfun_frame_layout.frame_size); 10013} 10014 10015/* Remove the FPR clobbers from a tbegin insn if it can be proven that 10016 the TX is nonescaping. A transaction is considered escaping if 10017 there is at least one path from tbegin returning CC0 to the 10018 function exit block without an tend. 10019 10020 The check so far has some limitations: 10021 - only single tbegin/tend BBs are supported 10022 - the first cond jump after tbegin must separate the CC0 path from ~CC0 10023 - when CC is copied to a GPR and the CC0 check is done with the GPR 10024 this is not supported 10025*/ 10026 10027static void 10028s390_optimize_nonescaping_tx (void) 10029{ 10030 const unsigned int CC0 = 1 << 3; 10031 basic_block tbegin_bb = NULL; 10032 basic_block tend_bb = NULL; 10033 basic_block bb; 10034 rtx_insn *insn; 10035 bool result = true; 10036 int bb_index; 10037 rtx_insn *tbegin_insn = NULL; 10038 10039 if (!cfun->machine->tbegin_p) 10040 return; 10041 10042 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++) 10043 { 10044 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index); 10045 10046 if (!bb) 10047 continue; 10048 10049 FOR_BB_INSNS (bb, insn) 10050 { 10051 rtx ite, cc, pat, target; 10052 unsigned HOST_WIDE_INT mask; 10053 10054 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 10055 continue; 10056 10057 pat = PATTERN (insn); 10058 10059 if (GET_CODE (pat) == PARALLEL) 10060 pat = XVECEXP (pat, 0, 0); 10061 10062 if (GET_CODE (pat) != SET 10063 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE) 10064 continue; 10065 10066 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN) 10067 { 10068 rtx_insn *tmp; 10069 10070 tbegin_insn = insn; 10071 10072 /* Just return if the tbegin doesn't have clobbers. */ 10073 if (GET_CODE (PATTERN (insn)) != PARALLEL) 10074 return; 10075 10076 if (tbegin_bb != NULL) 10077 return; 10078 10079 /* Find the next conditional jump. */ 10080 for (tmp = NEXT_INSN (insn); 10081 tmp != NULL_RTX; 10082 tmp = NEXT_INSN (tmp)) 10083 { 10084 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp)) 10085 return; 10086 if (!JUMP_P (tmp)) 10087 continue; 10088 10089 ite = SET_SRC (PATTERN (tmp)); 10090 if (GET_CODE (ite) != IF_THEN_ELSE) 10091 continue; 10092 10093 cc = XEXP (XEXP (ite, 0), 0); 10094 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)) 10095 || GET_MODE (cc) != CCRAWmode 10096 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT) 10097 return; 10098 10099 if (bb->succs->length () != 2) 10100 return; 10101 10102 mask = INTVAL (XEXP (XEXP (ite, 0), 1)); 10103 if (GET_CODE (XEXP (ite, 0)) == NE) 10104 mask ^= 0xf; 10105 10106 if (mask == CC0) 10107 target = XEXP (ite, 1); 10108 else if (mask == (CC0 ^ 0xf)) 10109 target = XEXP (ite, 2); 10110 else 10111 return; 10112 10113 { 10114 edge_iterator ei; 10115 edge e1, e2; 10116 10117 ei = ei_start (bb->succs); 10118 e1 = ei_safe_edge (ei); 10119 ei_next (&ei); 10120 e2 = ei_safe_edge (ei); 10121 10122 if (e2->flags & EDGE_FALLTHRU) 10123 { 10124 e2 = e1; 10125 e1 = ei_safe_edge (ei); 10126 } 10127 10128 if (!(e1->flags & EDGE_FALLTHRU)) 10129 return; 10130 10131 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest; 10132 } 10133 if (tmp == BB_END (bb)) 10134 break; 10135 } 10136 } 10137 10138 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND) 10139 { 10140 if (tend_bb != NULL) 10141 return; 10142 tend_bb = bb; 10143 } 10144 } 10145 } 10146 10147 /* Either we successfully remove the FPR clobbers here or we are not 10148 able to do anything for this TX. Both cases don't qualify for 10149 another look. */ 10150 cfun->machine->tbegin_p = false; 10151 10152 if (tbegin_bb == NULL || tend_bb == NULL) 10153 return; 10154 10155 calculate_dominance_info (CDI_POST_DOMINATORS); 10156 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb); 10157 free_dominance_info (CDI_POST_DOMINATORS); 10158 10159 if (!result) 10160 return; 10161 10162 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode, 10163 gen_rtvec (2, 10164 XVECEXP (PATTERN (tbegin_insn), 0, 0), 10165 XVECEXP (PATTERN (tbegin_insn), 0, 1))); 10166 INSN_CODE (tbegin_insn) = -1; 10167 df_insn_rescan (tbegin_insn); 10168 10169 return; 10170} 10171 10172/* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class 10173 have the same size, this is equivalent to CLASS_MAX_NREGS. */ 10174 10175static unsigned int 10176s390_hard_regno_nregs (unsigned int regno, machine_mode mode) 10177{ 10178 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode); 10179} 10180 10181/* Implement TARGET_HARD_REGNO_MODE_OK. 10182 10183 Integer modes <= word size fit into any GPR. 10184 Integer modes > word size fit into successive GPRs, starting with 10185 an even-numbered register. 10186 SImode and DImode fit into FPRs as well. 10187 10188 Floating point modes <= word size fit into any FPR or GPR. 10189 Floating point modes > word size (i.e. DFmode on 32-bit) fit 10190 into any FPR, or an even-odd GPR pair. 10191 TFmode fits only into an even-odd FPR pair. 10192 10193 Complex floating point modes fit either into two FPRs, or into 10194 successive GPRs (again starting with an even number). 10195 TCmode fits only into two successive even-odd FPR pairs. 10196 10197 Condition code modes fit only into the CC register. */ 10198 10199static bool 10200s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 10201{ 10202 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno)) 10203 return false; 10204 10205 switch (REGNO_REG_CLASS (regno)) 10206 { 10207 case VEC_REGS: 10208 return ((GET_MODE_CLASS (mode) == MODE_INT 10209 && s390_class_max_nregs (VEC_REGS, mode) == 1) 10210 || mode == DFmode 10211 || (TARGET_VXE && mode == SFmode) 10212 || s390_vector_mode_supported_p (mode)); 10213 break; 10214 case FP_REGS: 10215 if (TARGET_VX 10216 && ((GET_MODE_CLASS (mode) == MODE_INT 10217 && s390_class_max_nregs (FP_REGS, mode) == 1) 10218 || mode == DFmode 10219 || s390_vector_mode_supported_p (mode))) 10220 return true; 10221 10222 if (REGNO_PAIR_OK (regno, mode)) 10223 { 10224 if (mode == SImode || mode == DImode) 10225 return true; 10226 10227 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT) 10228 return true; 10229 } 10230 break; 10231 case ADDR_REGS: 10232 if (FRAME_REGNO_P (regno) && mode == Pmode) 10233 return true; 10234 10235 /* fallthrough */ 10236 case GENERAL_REGS: 10237 if (REGNO_PAIR_OK (regno, mode)) 10238 { 10239 if (TARGET_ZARCH 10240 || (mode != TFmode && mode != TCmode && mode != TDmode)) 10241 return true; 10242 } 10243 break; 10244 case CC_REGS: 10245 if (GET_MODE_CLASS (mode) == MODE_CC) 10246 return true; 10247 break; 10248 case ACCESS_REGS: 10249 if (REGNO_PAIR_OK (regno, mode)) 10250 { 10251 if (mode == SImode || mode == Pmode) 10252 return true; 10253 } 10254 break; 10255 default: 10256 return false; 10257 } 10258 10259 return false; 10260} 10261 10262/* Implement TARGET_MODES_TIEABLE_P. */ 10263 10264static bool 10265s390_modes_tieable_p (machine_mode mode1, machine_mode mode2) 10266{ 10267 return ((mode1 == SFmode || mode1 == DFmode) 10268 == (mode2 == SFmode || mode2 == DFmode)); 10269} 10270 10271/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ 10272 10273bool 10274s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg) 10275{ 10276 /* Once we've decided upon a register to use as base register, it must 10277 no longer be used for any other purpose. */ 10278 if (cfun->machine->base_reg) 10279 if (REGNO (cfun->machine->base_reg) == old_reg 10280 || REGNO (cfun->machine->base_reg) == new_reg) 10281 return false; 10282 10283 /* Prevent regrename from using call-saved regs which haven't 10284 actually been saved. This is necessary since regrename assumes 10285 the backend save/restore decisions are based on 10286 df_regs_ever_live. Since we have our own routine we have to tell 10287 regrename manually about it. */ 10288 if (GENERAL_REGNO_P (new_reg) 10289 && !call_used_regs[new_reg] 10290 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE) 10291 return false; 10292 10293 return true; 10294} 10295 10296/* Return nonzero if register REGNO can be used as a scratch register 10297 in peephole2. */ 10298 10299static bool 10300s390_hard_regno_scratch_ok (unsigned int regno) 10301{ 10302 /* See s390_hard_regno_rename_ok. */ 10303 if (GENERAL_REGNO_P (regno) 10304 && !call_used_regs[regno] 10305 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE) 10306 return false; 10307 10308 return true; 10309} 10310 10311/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating 10312 code that runs in z/Architecture mode, but conforms to the 31-bit 10313 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4 10314 bytes are saved across calls, however. */ 10315 10316static bool 10317s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, 10318 machine_mode mode) 10319{ 10320 if (!TARGET_64BIT 10321 && TARGET_ZARCH 10322 && GET_MODE_SIZE (mode) > 4 10323 && ((regno >= 6 && regno <= 15) || regno == 32)) 10324 return true; 10325 10326 if (TARGET_VX 10327 && GET_MODE_SIZE (mode) > 8 10328 && (((TARGET_64BIT && regno >= 24 && regno <= 31)) 10329 || (!TARGET_64BIT && (regno == 18 || regno == 19)))) 10330 return true; 10331 10332 return false; 10333} 10334 10335/* Maximum number of registers to represent a value of mode MODE 10336 in a register of class RCLASS. */ 10337 10338int 10339s390_class_max_nregs (enum reg_class rclass, machine_mode mode) 10340{ 10341 int reg_size; 10342 bool reg_pair_required_p = false; 10343 10344 switch (rclass) 10345 { 10346 case FP_REGS: 10347 case VEC_REGS: 10348 reg_size = TARGET_VX ? 16 : 8; 10349 10350 /* TF and TD modes would fit into a VR but we put them into a 10351 register pair since we do not have 128bit FP instructions on 10352 full VRs. */ 10353 if (TARGET_VX 10354 && SCALAR_FLOAT_MODE_P (mode) 10355 && GET_MODE_SIZE (mode) >= 16) 10356 reg_pair_required_p = true; 10357 10358 /* Even if complex types would fit into a single FPR/VR we force 10359 them into a register pair to deal with the parts more easily. 10360 (FIXME: What about complex ints?) */ 10361 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 10362 reg_pair_required_p = true; 10363 break; 10364 case ACCESS_REGS: 10365 reg_size = 4; 10366 break; 10367 default: 10368 reg_size = UNITS_PER_WORD; 10369 break; 10370 } 10371 10372 if (reg_pair_required_p) 10373 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size); 10374 10375 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; 10376} 10377 10378/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 10379 10380static bool 10381s390_can_change_mode_class (machine_mode from_mode, 10382 machine_mode to_mode, 10383 reg_class_t rclass) 10384{ 10385 machine_mode small_mode; 10386 machine_mode big_mode; 10387 10388 /* V1TF and TF have different representations in vector 10389 registers. */ 10390 if (reg_classes_intersect_p (VEC_REGS, rclass) 10391 && ((from_mode == V1TFmode && to_mode == TFmode) 10392 || (from_mode == TFmode && to_mode == V1TFmode))) 10393 return false; 10394 10395 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode)) 10396 return true; 10397 10398 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) 10399 { 10400 small_mode = from_mode; 10401 big_mode = to_mode; 10402 } 10403 else 10404 { 10405 small_mode = to_mode; 10406 big_mode = from_mode; 10407 } 10408 10409 /* Values residing in VRs are little-endian style. All modes are 10410 placed left-aligned in an VR. This means that we cannot allow 10411 switching between modes with differing sizes. Also if the vector 10412 facility is available we still place TFmode values in VR register 10413 pairs, since the only instructions we have operating on TFmodes 10414 only deal with register pairs. Therefore we have to allow DFmode 10415 subregs of TFmodes to enable the TFmode splitters. */ 10416 if (reg_classes_intersect_p (VEC_REGS, rclass) 10417 && (GET_MODE_SIZE (small_mode) < 8 10418 || s390_class_max_nregs (VEC_REGS, big_mode) == 1)) 10419 return false; 10420 10421 /* Likewise for access registers, since they have only half the 10422 word size on 64-bit. */ 10423 if (reg_classes_intersect_p (ACCESS_REGS, rclass)) 10424 return false; 10425 10426 return true; 10427} 10428 10429/* Return true if we use LRA instead of reload pass. */ 10430static bool 10431s390_lra_p (void) 10432{ 10433 return s390_lra_flag; 10434} 10435 10436/* Return true if register FROM can be eliminated via register TO. */ 10437 10438static bool 10439s390_can_eliminate (const int from, const int to) 10440{ 10441 /* We have not marked the base register as fixed. 10442 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM. 10443 If a function requires the base register, we say here that this 10444 elimination cannot be performed. This will cause reload to free 10445 up the base register (as if it were fixed). On the other hand, 10446 if the current function does *not* require the base register, we 10447 say here the elimination succeeds, which in turn allows reload 10448 to allocate the base register for any other purpose. */ 10449 if (from == BASE_REGNUM && to == BASE_REGNUM) 10450 { 10451 s390_init_frame_layout (); 10452 return cfun->machine->base_reg == NULL_RTX; 10453 } 10454 10455 /* Everything else must point into the stack frame. */ 10456 gcc_assert (to == STACK_POINTER_REGNUM 10457 || to == HARD_FRAME_POINTER_REGNUM); 10458 10459 gcc_assert (from == FRAME_POINTER_REGNUM 10460 || from == ARG_POINTER_REGNUM 10461 || from == RETURN_ADDRESS_POINTER_REGNUM); 10462 10463 /* Make sure we actually saved the return address. */ 10464 if (from == RETURN_ADDRESS_POINTER_REGNUM) 10465 if (!crtl->calls_eh_return 10466 && !cfun->stdarg 10467 && !cfun_frame_layout.save_return_addr_p) 10468 return false; 10469 10470 return true; 10471} 10472 10473/* Return offset between register FROM and TO initially after prolog. */ 10474 10475HOST_WIDE_INT 10476s390_initial_elimination_offset (int from, int to) 10477{ 10478 HOST_WIDE_INT offset; 10479 10480 /* ??? Why are we called for non-eliminable pairs? */ 10481 if (!s390_can_eliminate (from, to)) 10482 return 0; 10483 10484 switch (from) 10485 { 10486 case FRAME_POINTER_REGNUM: 10487 offset = (get_frame_size() 10488 + STACK_POINTER_OFFSET 10489 + crtl->outgoing_args_size); 10490 break; 10491 10492 case ARG_POINTER_REGNUM: 10493 s390_init_frame_layout (); 10494 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET; 10495 break; 10496 10497 case RETURN_ADDRESS_POINTER_REGNUM: 10498 s390_init_frame_layout (); 10499 10500 if (cfun_frame_layout.first_save_gpr_slot == -1) 10501 { 10502 /* If it turns out that for stdarg nothing went into the reg 10503 save area we also do not need the return address 10504 pointer. */ 10505 if (cfun->stdarg && !cfun_save_arg_fprs_p) 10506 return 0; 10507 10508 gcc_unreachable (); 10509 } 10510 10511 /* In order to make the following work it is not necessary for 10512 r14 to have a save slot. It is sufficient if one other GPR 10513 got one. Since the GPRs are always stored without gaps we 10514 are able to calculate where the r14 save slot would 10515 reside. */ 10516 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset + 10517 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) * 10518 UNITS_PER_LONG); 10519 break; 10520 10521 case BASE_REGNUM: 10522 offset = 0; 10523 break; 10524 10525 default: 10526 gcc_unreachable (); 10527 } 10528 10529 return offset; 10530} 10531 10532/* Emit insn to save fpr REGNUM at offset OFFSET relative 10533 to register BASE. Return generated insn. */ 10534 10535static rtx 10536save_fpr (rtx base, int offset, int regnum) 10537{ 10538 rtx addr; 10539 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset)); 10540 10541 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG)) 10542 set_mem_alias_set (addr, get_varargs_alias_set ()); 10543 else 10544 set_mem_alias_set (addr, get_frame_alias_set ()); 10545 10546 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum)); 10547} 10548 10549/* Emit insn to restore fpr REGNUM from offset OFFSET relative 10550 to register BASE. Return generated insn. */ 10551 10552static rtx 10553restore_fpr (rtx base, int offset, int regnum) 10554{ 10555 rtx addr; 10556 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset)); 10557 set_mem_alias_set (addr, get_frame_alias_set ()); 10558 10559 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr); 10560} 10561 10562/* Generate insn to save registers FIRST to LAST into 10563 the register save area located at offset OFFSET 10564 relative to register BASE. */ 10565 10566static rtx 10567save_gprs (rtx base, int offset, int first, int last) 10568{ 10569 rtx addr, insn, note; 10570 int i; 10571 10572 addr = plus_constant (Pmode, base, offset); 10573 addr = gen_rtx_MEM (Pmode, addr); 10574 10575 set_mem_alias_set (addr, get_frame_alias_set ()); 10576 10577 /* Special-case single register. */ 10578 if (first == last) 10579 { 10580 if (TARGET_64BIT) 10581 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first)); 10582 else 10583 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first)); 10584 10585 if (!global_not_special_regno_p (first)) 10586 RTX_FRAME_RELATED_P (insn) = 1; 10587 return insn; 10588 } 10589 10590 10591 insn = gen_store_multiple (addr, 10592 gen_rtx_REG (Pmode, first), 10593 GEN_INT (last - first + 1)); 10594 10595 if (first <= 6 && cfun->stdarg) 10596 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) 10597 { 10598 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0); 10599 10600 if (first + i <= 6) 10601 set_mem_alias_set (mem, get_varargs_alias_set ()); 10602 } 10603 10604 /* We need to set the FRAME_RELATED flag on all SETs 10605 inside the store-multiple pattern. 10606 10607 However, we must not emit DWARF records for registers 2..5 10608 if they are stored for use by variable arguments ... 10609 10610 ??? Unfortunately, it is not enough to simply not the 10611 FRAME_RELATED flags for those SETs, because the first SET 10612 of the PARALLEL is always treated as if it had the flag 10613 set, even if it does not. Therefore we emit a new pattern 10614 without those registers as REG_FRAME_RELATED_EXPR note. */ 10615 10616 if (first >= 6 && !global_not_special_regno_p (first)) 10617 { 10618 rtx pat = PATTERN (insn); 10619 10620 for (i = 0; i < XVECLEN (pat, 0); i++) 10621 if (GET_CODE (XVECEXP (pat, 0, i)) == SET 10622 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat, 10623 0, i))))) 10624 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1; 10625 10626 RTX_FRAME_RELATED_P (insn) = 1; 10627 } 10628 else if (last >= 6) 10629 { 10630 int start; 10631 10632 for (start = first >= 6 ? first : 6; start <= last; start++) 10633 if (!global_not_special_regno_p (start)) 10634 break; 10635 10636 if (start > last) 10637 return insn; 10638 10639 addr = plus_constant (Pmode, base, 10640 offset + (start - first) * UNITS_PER_LONG); 10641 10642 if (start == last) 10643 { 10644 if (TARGET_64BIT) 10645 note = gen_movdi (gen_rtx_MEM (Pmode, addr), 10646 gen_rtx_REG (Pmode, start)); 10647 else 10648 note = gen_movsi (gen_rtx_MEM (Pmode, addr), 10649 gen_rtx_REG (Pmode, start)); 10650 note = PATTERN (note); 10651 10652 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); 10653 RTX_FRAME_RELATED_P (insn) = 1; 10654 10655 return insn; 10656 } 10657 10658 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr), 10659 gen_rtx_REG (Pmode, start), 10660 GEN_INT (last - start + 1)); 10661 note = PATTERN (note); 10662 10663 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); 10664 10665 for (i = 0; i < XVECLEN (note, 0); i++) 10666 if (GET_CODE (XVECEXP (note, 0, i)) == SET 10667 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note, 10668 0, i))))) 10669 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1; 10670 10671 RTX_FRAME_RELATED_P (insn) = 1; 10672 } 10673 10674 return insn; 10675} 10676 10677/* Generate insn to restore registers FIRST to LAST from 10678 the register save area located at offset OFFSET 10679 relative to register BASE. */ 10680 10681static rtx 10682restore_gprs (rtx base, int offset, int first, int last) 10683{ 10684 rtx addr, insn; 10685 10686 addr = plus_constant (Pmode, base, offset); 10687 addr = gen_rtx_MEM (Pmode, addr); 10688 set_mem_alias_set (addr, get_frame_alias_set ()); 10689 10690 /* Special-case single register. */ 10691 if (first == last) 10692 { 10693 if (TARGET_64BIT) 10694 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr); 10695 else 10696 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); 10697 10698 RTX_FRAME_RELATED_P (insn) = 1; 10699 return insn; 10700 } 10701 10702 insn = gen_load_multiple (gen_rtx_REG (Pmode, first), 10703 addr, 10704 GEN_INT (last - first + 1)); 10705 RTX_FRAME_RELATED_P (insn) = 1; 10706 return insn; 10707} 10708 10709/* Return insn sequence to load the GOT register. */ 10710 10711rtx_insn * 10712s390_load_got (void) 10713{ 10714 rtx_insn *insns; 10715 10716 /* We cannot use pic_offset_table_rtx here since we use this 10717 function also for non-pic if __tls_get_offset is called and in 10718 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx 10719 aren't usable. */ 10720 rtx got_rtx = gen_rtx_REG (Pmode, 12); 10721 10722 start_sequence (); 10723 10724 emit_move_insn (got_rtx, s390_got_symbol ()); 10725 10726 insns = get_insns (); 10727 end_sequence (); 10728 return insns; 10729} 10730 10731/* This ties together stack memory (MEM with an alias set of frame_alias_set) 10732 and the change to the stack pointer. */ 10733 10734static void 10735s390_emit_stack_tie (void) 10736{ 10737 rtx mem = gen_frame_mem (BLKmode, 10738 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM)); 10739 10740 emit_insn (gen_stack_tie (mem)); 10741} 10742 10743/* Copy GPRS into FPR save slots. */ 10744 10745static void 10746s390_save_gprs_to_fprs (void) 10747{ 10748 int i; 10749 10750 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 10751 return; 10752 10753 for (i = 6; i < 16; i++) 10754 { 10755 if (FP_REGNO_P (cfun_gpr_save_slot (i))) 10756 { 10757 rtx_insn *insn = 10758 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)), 10759 gen_rtx_REG (DImode, i)); 10760 RTX_FRAME_RELATED_P (insn) = 1; 10761 /* This prevents dwarf2cfi from interpreting the set. Doing 10762 so it might emit def_cfa_register infos setting an FPR as 10763 new CFA. */ 10764 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn))); 10765 } 10766 } 10767} 10768 10769/* Restore GPRs from FPR save slots. */ 10770 10771static void 10772s390_restore_gprs_from_fprs (void) 10773{ 10774 int i; 10775 10776 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 10777 return; 10778 10779 /* Restore the GPRs starting with the stack pointer. That way the 10780 stack pointer already has its original value when it comes to 10781 restoring the hard frame pointer. So we can set the cfa reg back 10782 to the stack pointer. */ 10783 for (i = STACK_POINTER_REGNUM; i >= 6; i--) 10784 { 10785 rtx_insn *insn; 10786 10787 if (!FP_REGNO_P (cfun_gpr_save_slot (i))) 10788 continue; 10789 10790 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i)); 10791 10792 if (i == STACK_POINTER_REGNUM) 10793 insn = emit_insn (gen_stack_restore_from_fpr (fpr)); 10794 else 10795 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr); 10796 10797 df_set_regs_ever_live (i, true); 10798 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i)); 10799 10800 /* If either the stack pointer or the frame pointer get restored 10801 set the CFA value to its value at function start. Doing this 10802 for the frame pointer results in .cfi_def_cfa_register 15 10803 what is ok since if the stack pointer got modified it has 10804 been restored already. */ 10805 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM) 10806 add_reg_note (insn, REG_CFA_DEF_CFA, 10807 plus_constant (Pmode, stack_pointer_rtx, 10808 STACK_POINTER_OFFSET)); 10809 RTX_FRAME_RELATED_P (insn) = 1; 10810 } 10811} 10812 10813 10814/* A pass run immediately before shrink-wrapping and prologue and epilogue 10815 generation. */ 10816 10817namespace { 10818 10819const pass_data pass_data_s390_early_mach = 10820{ 10821 RTL_PASS, /* type */ 10822 "early_mach", /* name */ 10823 OPTGROUP_NONE, /* optinfo_flags */ 10824 TV_MACH_DEP, /* tv_id */ 10825 0, /* properties_required */ 10826 0, /* properties_provided */ 10827 0, /* properties_destroyed */ 10828 0, /* todo_flags_start */ 10829 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ 10830}; 10831 10832class pass_s390_early_mach : public rtl_opt_pass 10833{ 10834public: 10835 pass_s390_early_mach (gcc::context *ctxt) 10836 : rtl_opt_pass (pass_data_s390_early_mach, ctxt) 10837 {} 10838 10839 /* opt_pass methods: */ 10840 virtual unsigned int execute (function *); 10841 10842}; // class pass_s390_early_mach 10843 10844unsigned int 10845pass_s390_early_mach::execute (function *fun) 10846{ 10847 rtx_insn *insn; 10848 10849 /* Try to get rid of the FPR clobbers. */ 10850 s390_optimize_nonescaping_tx (); 10851 10852 /* Re-compute register info. */ 10853 s390_register_info (); 10854 10855 /* If we're using a base register, ensure that it is always valid for 10856 the first non-prologue instruction. */ 10857 if (fun->machine->base_reg) 10858 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg)); 10859 10860 /* Annotate all constant pool references to let the scheduler know 10861 they implicitly use the base register. */ 10862 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 10863 if (INSN_P (insn)) 10864 { 10865 annotate_constant_pool_refs (insn); 10866 df_insn_rescan (insn); 10867 } 10868 return 0; 10869} 10870 10871} // anon namespace 10872 10873rtl_opt_pass * 10874make_pass_s390_early_mach (gcc::context *ctxt) 10875{ 10876 return new pass_s390_early_mach (ctxt); 10877} 10878 10879/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it. 10880 - push too big immediates to the literal pool and annotate the refs 10881 - emit frame related notes for stack pointer changes. */ 10882 10883static rtx 10884s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p) 10885{ 10886 rtx_insn *insn; 10887 rtx orig_offset = offset; 10888 10889 gcc_assert (REG_P (target)); 10890 gcc_assert (REG_P (reg)); 10891 gcc_assert (CONST_INT_P (offset)); 10892 10893 if (offset == const0_rtx) /* lr/lgr */ 10894 { 10895 insn = emit_move_insn (target, reg); 10896 } 10897 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */ 10898 { 10899 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg, 10900 offset)); 10901 } 10902 else 10903 { 10904 if (!satisfies_constraint_K (offset) /* ahi/aghi */ 10905 && (!TARGET_EXTIMM 10906 || (!satisfies_constraint_Op (offset) /* alfi/algfi */ 10907 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */ 10908 offset = force_const_mem (Pmode, offset); 10909 10910 if (target != reg) 10911 { 10912 insn = emit_move_insn (target, reg); 10913 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; 10914 } 10915 10916 insn = emit_insn (gen_add2_insn (target, offset)); 10917 10918 if (!CONST_INT_P (offset)) 10919 { 10920 annotate_constant_pool_refs (insn); 10921 10922 if (frame_related_p) 10923 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 10924 gen_rtx_SET (target, 10925 gen_rtx_PLUS (Pmode, target, 10926 orig_offset))); 10927 } 10928 } 10929 10930 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; 10931 10932 /* If this is a stack adjustment and we are generating a stack clash 10933 prologue, then add a REG_STACK_CHECK note to signal that this insn 10934 should be left alone. */ 10935 if (flag_stack_clash_protection && target == stack_pointer_rtx) 10936 add_reg_note (insn, REG_STACK_CHECK, const0_rtx); 10937 10938 return insn; 10939} 10940 10941/* Emit a compare instruction with a volatile memory access as stack 10942 probe. It does not waste store tags and does not clobber any 10943 registers apart from the condition code. */ 10944static void 10945s390_emit_stack_probe (rtx addr) 10946{ 10947 rtx tmp = gen_rtx_MEM (Pmode, addr); 10948 MEM_VOLATILE_P (tmp) = 1; 10949 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp); 10950 emit_insn (gen_blockage ()); 10951} 10952 10953/* Use a runtime loop if we have to emit more probes than this. */ 10954#define MIN_UNROLL_PROBES 3 10955 10956/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary 10957 if necessary. LAST_PROBE_OFFSET contains the offset of the closest 10958 probe relative to the stack pointer. 10959 10960 Note that SIZE is negative. 10961 10962 The return value is true if TEMP_REG has been clobbered. */ 10963static bool 10964allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset, 10965 rtx temp_reg) 10966{ 10967 bool temp_reg_clobbered_p = false; 10968 HOST_WIDE_INT probe_interval 10969 = 1 << param_stack_clash_protection_probe_interval; 10970 HOST_WIDE_INT guard_size 10971 = 1 << param_stack_clash_protection_guard_size; 10972 10973 if (flag_stack_clash_protection) 10974 { 10975 if (last_probe_offset + -INTVAL (size) < guard_size) 10976 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); 10977 else 10978 { 10979 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG); 10980 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval; 10981 HOST_WIDE_INT num_probes = rounded_size / probe_interval; 10982 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size; 10983 10984 if (num_probes < MIN_UNROLL_PROBES) 10985 { 10986 /* Emit unrolled probe statements. */ 10987 10988 for (unsigned int i = 0; i < num_probes; i++) 10989 { 10990 s390_prologue_plus_offset (stack_pointer_rtx, 10991 stack_pointer_rtx, 10992 GEN_INT (-probe_interval), true); 10993 s390_emit_stack_probe (gen_rtx_PLUS (Pmode, 10994 stack_pointer_rtx, 10995 offset)); 10996 } 10997 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); 10998 } 10999 else 11000 { 11001 /* Emit a loop probing the pages. */ 11002 11003 rtx_code_label *loop_start_label = gen_label_rtx (); 11004 11005 /* From now on temp_reg will be the CFA register. */ 11006 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, 11007 GEN_INT (-rounded_size), true); 11008 emit_label (loop_start_label); 11009 11010 s390_prologue_plus_offset (stack_pointer_rtx, 11011 stack_pointer_rtx, 11012 GEN_INT (-probe_interval), false); 11013 s390_emit_stack_probe (gen_rtx_PLUS (Pmode, 11014 stack_pointer_rtx, 11015 offset)); 11016 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg, 11017 GT, NULL_RTX, 11018 Pmode, 1, loop_start_label); 11019 11020 /* Without this make_edges ICEes. */ 11021 JUMP_LABEL (get_last_insn ()) = loop_start_label; 11022 LABEL_NUSES (loop_start_label) = 1; 11023 11024 /* That's going to be a NOP since stack pointer and 11025 temp_reg are supposed to be the same here. We just 11026 emit it to set the CFA reg back to r15. */ 11027 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg, 11028 const0_rtx, true); 11029 temp_reg_clobbered_p = true; 11030 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0); 11031 } 11032 11033 /* Handle any residual allocation request. */ 11034 s390_prologue_plus_offset (stack_pointer_rtx, 11035 stack_pointer_rtx, 11036 GEN_INT (-residual), true); 11037 last_probe_offset += residual; 11038 if (last_probe_offset >= probe_interval) 11039 s390_emit_stack_probe (gen_rtx_PLUS (Pmode, 11040 stack_pointer_rtx, 11041 GEN_INT (residual 11042 - UNITS_PER_LONG))); 11043 11044 return temp_reg_clobbered_p; 11045 } 11046 } 11047 11048 /* Subtract frame size from stack pointer. */ 11049 s390_prologue_plus_offset (stack_pointer_rtx, 11050 stack_pointer_rtx, 11051 size, true); 11052 11053 return temp_reg_clobbered_p; 11054} 11055 11056/* Expand the prologue into a bunch of separate insns. */ 11057 11058void 11059s390_emit_prologue (void) 11060{ 11061 rtx insn, addr; 11062 rtx temp_reg; 11063 int i; 11064 int offset; 11065 int next_fpr = 0; 11066 11067 /* Choose best register to use for temp use within prologue. 11068 TPF with profiling must avoid the register 14 - the tracing function 11069 needs the original contents of r14 to be preserved. */ 11070 11071 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM) 11072 && !crtl->is_leaf 11073 && !TARGET_TPF_PROFILING) 11074 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); 11075 else if (flag_split_stack && cfun->stdarg) 11076 temp_reg = gen_rtx_REG (Pmode, 12); 11077 else 11078 temp_reg = gen_rtx_REG (Pmode, 1); 11079 11080 /* When probing for stack-clash mitigation, we have to track the distance 11081 between the stack pointer and closest known reference. 11082 11083 Most of the time we have to make a worst case assumption. The 11084 only exception is when TARGET_BACKCHAIN is active, in which case 11085 we know *sp (offset 0) was written. */ 11086 HOST_WIDE_INT probe_interval 11087 = 1 << param_stack_clash_protection_probe_interval; 11088 HOST_WIDE_INT last_probe_offset 11089 = (TARGET_BACKCHAIN 11090 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0) 11091 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD)); 11092 11093 s390_save_gprs_to_fprs (); 11094 11095 /* Save call saved gprs. */ 11096 if (cfun_frame_layout.first_save_gpr != -1) 11097 { 11098 insn = save_gprs (stack_pointer_rtx, 11099 cfun_frame_layout.gprs_offset + 11100 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr 11101 - cfun_frame_layout.first_save_gpr_slot), 11102 cfun_frame_layout.first_save_gpr, 11103 cfun_frame_layout.last_save_gpr); 11104 11105 /* This is not 100% correct. If we have more than one register saved, 11106 then LAST_PROBE_OFFSET can move even closer to sp. */ 11107 last_probe_offset 11108 = (cfun_frame_layout.gprs_offset + 11109 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr 11110 - cfun_frame_layout.first_save_gpr_slot)); 11111 11112 emit_insn (insn); 11113 } 11114 11115 /* Dummy insn to mark literal pool slot. */ 11116 11117 if (cfun->machine->base_reg) 11118 emit_insn (gen_main_pool (cfun->machine->base_reg)); 11119 11120 offset = cfun_frame_layout.f0_offset; 11121 11122 /* Save f0 and f2. */ 11123 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++) 11124 { 11125 if (cfun_fpr_save_p (i)) 11126 { 11127 save_fpr (stack_pointer_rtx, offset, i); 11128 if (offset < last_probe_offset) 11129 last_probe_offset = offset; 11130 offset += 8; 11131 } 11132 else if (!TARGET_PACKED_STACK || cfun->stdarg) 11133 offset += 8; 11134 } 11135 11136 /* Save f4 and f6. */ 11137 offset = cfun_frame_layout.f4_offset; 11138 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++) 11139 { 11140 if (cfun_fpr_save_p (i)) 11141 { 11142 insn = save_fpr (stack_pointer_rtx, offset, i); 11143 if (offset < last_probe_offset) 11144 last_probe_offset = offset; 11145 offset += 8; 11146 11147 /* If f4 and f6 are call clobbered they are saved due to 11148 stdargs and therefore are not frame related. */ 11149 if (!call_used_regs[i]) 11150 RTX_FRAME_RELATED_P (insn) = 1; 11151 } 11152 else if (!TARGET_PACKED_STACK || call_used_regs[i]) 11153 offset += 8; 11154 } 11155 11156 if (TARGET_PACKED_STACK 11157 && cfun_save_high_fprs_p 11158 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0) 11159 { 11160 offset = (cfun_frame_layout.f8_offset 11161 + (cfun_frame_layout.high_fprs - 1) * 8); 11162 11163 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--) 11164 if (cfun_fpr_save_p (i)) 11165 { 11166 insn = save_fpr (stack_pointer_rtx, offset, i); 11167 if (offset < last_probe_offset) 11168 last_probe_offset = offset; 11169 11170 RTX_FRAME_RELATED_P (insn) = 1; 11171 offset -= 8; 11172 } 11173 if (offset >= cfun_frame_layout.f8_offset) 11174 next_fpr = i; 11175 } 11176 11177 if (!TARGET_PACKED_STACK) 11178 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0; 11179 11180 if (flag_stack_usage_info) 11181 current_function_static_stack_size = cfun_frame_layout.frame_size; 11182 11183 /* Decrement stack pointer. */ 11184 11185 if (cfun_frame_layout.frame_size > 0) 11186 { 11187 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size); 11188 rtx_insn *stack_pointer_backup_loc; 11189 bool temp_reg_clobbered_p; 11190 11191 if (s390_stack_size) 11192 { 11193 HOST_WIDE_INT stack_guard; 11194 11195 if (s390_stack_guard) 11196 stack_guard = s390_stack_guard; 11197 else 11198 { 11199 /* If no value for stack guard is provided the smallest power of 2 11200 larger than the current frame size is chosen. */ 11201 stack_guard = 1; 11202 while (stack_guard < cfun_frame_layout.frame_size) 11203 stack_guard <<= 1; 11204 } 11205 11206 if (cfun_frame_layout.frame_size >= s390_stack_size) 11207 { 11208 warning (0, "frame size of function %qs is %wd" 11209 " bytes exceeding user provided stack limit of " 11210 "%d bytes. " 11211 "An unconditional trap is added.", 11212 current_function_name(), cfun_frame_layout.frame_size, 11213 s390_stack_size); 11214 emit_insn (gen_trap ()); 11215 emit_barrier (); 11216 } 11217 else 11218 { 11219 /* stack_guard has to be smaller than s390_stack_size. 11220 Otherwise we would emit an AND with zero which would 11221 not match the test under mask pattern. */ 11222 if (stack_guard >= s390_stack_size) 11223 { 11224 warning (0, "frame size of function %qs is %wd" 11225 " bytes which is more than half the stack size. " 11226 "The dynamic check would not be reliable. " 11227 "No check emitted for this function.", 11228 current_function_name(), 11229 cfun_frame_layout.frame_size); 11230 } 11231 else 11232 { 11233 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1) 11234 & ~(stack_guard - 1)); 11235 11236 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx, 11237 GEN_INT (stack_check_mask)); 11238 if (TARGET_64BIT) 11239 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode, 11240 t, const0_rtx), 11241 t, const0_rtx, const0_rtx)); 11242 else 11243 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode, 11244 t, const0_rtx), 11245 t, const0_rtx, const0_rtx)); 11246 } 11247 } 11248 } 11249 11250 if (s390_warn_framesize > 0 11251 && cfun_frame_layout.frame_size >= s390_warn_framesize) 11252 warning (0, "frame size of %qs is %wd bytes", 11253 current_function_name (), cfun_frame_layout.frame_size); 11254 11255 if (s390_warn_dynamicstack_p && cfun->calls_alloca) 11256 warning (0, "%qs uses dynamic stack allocation", current_function_name ()); 11257 11258 /* Save the location where we could backup the incoming stack 11259 pointer. */ 11260 stack_pointer_backup_loc = get_last_insn (); 11261 11262 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset, 11263 temp_reg); 11264 11265 if (TARGET_BACKCHAIN || next_fpr) 11266 { 11267 if (temp_reg_clobbered_p) 11268 { 11269 /* allocate_stack_space had to make use of temp_reg and 11270 we need it to hold a backup of the incoming stack 11271 pointer. Calculate back that value from the current 11272 stack pointer. */ 11273 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, 11274 GEN_INT (cfun_frame_layout.frame_size), 11275 false); 11276 } 11277 else 11278 { 11279 /* allocate_stack_space didn't actually required 11280 temp_reg. Insert the stack pointer backup insn 11281 before the stack pointer decrement code - knowing now 11282 that the value will survive. */ 11283 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx), 11284 stack_pointer_backup_loc); 11285 } 11286 } 11287 11288 /* Set backchain. */ 11289 11290 if (TARGET_BACKCHAIN) 11291 { 11292 if (cfun_frame_layout.backchain_offset) 11293 addr = gen_rtx_MEM (Pmode, 11294 plus_constant (Pmode, stack_pointer_rtx, 11295 cfun_frame_layout.backchain_offset)); 11296 else 11297 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx); 11298 set_mem_alias_set (addr, get_frame_alias_set ()); 11299 insn = emit_insn (gen_move_insn (addr, temp_reg)); 11300 } 11301 11302 /* If we support non-call exceptions (e.g. for Java), 11303 we need to make sure the backchain pointer is set up 11304 before any possibly trapping memory access. */ 11305 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions) 11306 { 11307 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); 11308 emit_clobber (addr); 11309 } 11310 } 11311 else if (flag_stack_clash_protection) 11312 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); 11313 11314 /* Save fprs 8 - 15 (64 bit ABI). */ 11315 11316 if (cfun_save_high_fprs_p && next_fpr) 11317 { 11318 /* If the stack might be accessed through a different register 11319 we have to make sure that the stack pointer decrement is not 11320 moved below the use of the stack slots. */ 11321 s390_emit_stack_tie (); 11322 11323 insn = emit_insn (gen_add2_insn (temp_reg, 11324 GEN_INT (cfun_frame_layout.f8_offset))); 11325 11326 offset = 0; 11327 11328 for (i = FPR8_REGNUM; i <= next_fpr; i++) 11329 if (cfun_fpr_save_p (i)) 11330 { 11331 rtx addr = plus_constant (Pmode, stack_pointer_rtx, 11332 cfun_frame_layout.frame_size 11333 + cfun_frame_layout.f8_offset 11334 + offset); 11335 11336 insn = save_fpr (temp_reg, offset, i); 11337 offset += 8; 11338 RTX_FRAME_RELATED_P (insn) = 1; 11339 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 11340 gen_rtx_SET (gen_rtx_MEM (DFmode, addr), 11341 gen_rtx_REG (DFmode, i))); 11342 } 11343 } 11344 11345 /* Set frame pointer, if needed. */ 11346 11347 if (frame_pointer_needed) 11348 { 11349 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 11350 RTX_FRAME_RELATED_P (insn) = 1; 11351 } 11352 11353 /* Set up got pointer, if needed. */ 11354 11355 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 11356 { 11357 rtx_insn *insns = s390_load_got (); 11358 11359 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn)) 11360 annotate_constant_pool_refs (insn); 11361 11362 emit_insn (insns); 11363 } 11364 11365#if TARGET_TPF != 0 11366 if (TARGET_TPF_PROFILING) 11367 { 11368 /* Generate a BAS instruction to serve as a function entry 11369 intercept to facilitate the use of tracing algorithms located 11370 at the branch target. */ 11371 emit_insn (gen_prologue_tpf ( 11372 GEN_INT (s390_tpf_trace_hook_prologue_check), 11373 GEN_INT (s390_tpf_trace_hook_prologue_target))); 11374 11375 /* Emit a blockage here so that all code lies between the 11376 profiling mechanisms. */ 11377 emit_insn (gen_blockage ()); 11378 } 11379#endif 11380} 11381 11382/* Expand the epilogue into a bunch of separate insns. */ 11383 11384void 11385s390_emit_epilogue (bool sibcall) 11386{ 11387 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX; 11388 int area_bottom, area_top, offset = 0; 11389 int next_offset; 11390 int i; 11391 11392#if TARGET_TPF != 0 11393 if (TARGET_TPF_PROFILING) 11394 { 11395 /* Generate a BAS instruction to serve as a function entry 11396 intercept to facilitate the use of tracing algorithms located 11397 at the branch target. */ 11398 11399 /* Emit a blockage here so that all code lies between the 11400 profiling mechanisms. */ 11401 emit_insn (gen_blockage ()); 11402 11403 emit_insn (gen_epilogue_tpf ( 11404 GEN_INT (s390_tpf_trace_hook_epilogue_check), 11405 GEN_INT (s390_tpf_trace_hook_epilogue_target))); 11406 } 11407#endif 11408 11409 /* Check whether to use frame or stack pointer for restore. */ 11410 11411 frame_pointer = (frame_pointer_needed 11412 ? hard_frame_pointer_rtx : stack_pointer_rtx); 11413 11414 s390_frame_area (&area_bottom, &area_top); 11415 11416 /* Check whether we can access the register save area. 11417 If not, increment the frame pointer as required. */ 11418 11419 if (area_top <= area_bottom) 11420 { 11421 /* Nothing to restore. */ 11422 } 11423 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom) 11424 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1)) 11425 { 11426 /* Area is in range. */ 11427 offset = cfun_frame_layout.frame_size; 11428 } 11429 else 11430 { 11431 rtx_insn *insn; 11432 rtx frame_off, cfa; 11433 11434 offset = area_bottom < 0 ? -area_bottom : 0; 11435 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset); 11436 11437 cfa = gen_rtx_SET (frame_pointer, 11438 gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); 11439 if (DISP_IN_RANGE (INTVAL (frame_off))) 11440 { 11441 rtx set; 11442 11443 set = gen_rtx_SET (frame_pointer, 11444 gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); 11445 insn = emit_insn (set); 11446 } 11447 else 11448 { 11449 if (!CONST_OK_FOR_K (INTVAL (frame_off))) 11450 frame_off = force_const_mem (Pmode, frame_off); 11451 11452 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off)); 11453 annotate_constant_pool_refs (insn); 11454 } 11455 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa); 11456 RTX_FRAME_RELATED_P (insn) = 1; 11457 } 11458 11459 /* Restore call saved fprs. */ 11460 11461 if (TARGET_64BIT) 11462 { 11463 if (cfun_save_high_fprs_p) 11464 { 11465 next_offset = cfun_frame_layout.f8_offset; 11466 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++) 11467 { 11468 if (cfun_fpr_save_p (i)) 11469 { 11470 restore_fpr (frame_pointer, 11471 offset + next_offset, i); 11472 cfa_restores 11473 = alloc_reg_note (REG_CFA_RESTORE, 11474 gen_rtx_REG (DFmode, i), cfa_restores); 11475 next_offset += 8; 11476 } 11477 } 11478 } 11479 11480 } 11481 else 11482 { 11483 next_offset = cfun_frame_layout.f4_offset; 11484 /* f4, f6 */ 11485 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++) 11486 { 11487 if (cfun_fpr_save_p (i)) 11488 { 11489 restore_fpr (frame_pointer, 11490 offset + next_offset, i); 11491 cfa_restores 11492 = alloc_reg_note (REG_CFA_RESTORE, 11493 gen_rtx_REG (DFmode, i), cfa_restores); 11494 next_offset += 8; 11495 } 11496 else if (!TARGET_PACKED_STACK) 11497 next_offset += 8; 11498 } 11499 11500 } 11501 11502 /* Restore call saved gprs. */ 11503 11504 if (cfun_frame_layout.first_restore_gpr != -1) 11505 { 11506 rtx insn, addr; 11507 int i; 11508 11509 /* Check for global register and save them 11510 to stack location from where they get restored. */ 11511 11512 for (i = cfun_frame_layout.first_restore_gpr; 11513 i <= cfun_frame_layout.last_restore_gpr; 11514 i++) 11515 { 11516 if (global_not_special_regno_p (i)) 11517 { 11518 addr = plus_constant (Pmode, frame_pointer, 11519 offset + cfun_frame_layout.gprs_offset 11520 + (i - cfun_frame_layout.first_save_gpr_slot) 11521 * UNITS_PER_LONG); 11522 addr = gen_rtx_MEM (Pmode, addr); 11523 set_mem_alias_set (addr, get_frame_alias_set ()); 11524 emit_move_insn (addr, gen_rtx_REG (Pmode, i)); 11525 } 11526 else 11527 cfa_restores 11528 = alloc_reg_note (REG_CFA_RESTORE, 11529 gen_rtx_REG (Pmode, i), cfa_restores); 11530 } 11531 11532 /* Fetch return address from stack before load multiple, 11533 this will do good for scheduling. 11534 11535 Only do this if we already decided that r14 needs to be 11536 saved to a stack slot. (And not just because r14 happens to 11537 be in between two GPRs which need saving.) Otherwise it 11538 would be difficult to take that decision back in 11539 s390_optimize_prologue. 11540 11541 This optimization is only helpful on in-order machines. */ 11542 if (! sibcall 11543 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK 11544 && s390_tune <= PROCESSOR_2097_Z10) 11545 { 11546 int return_regnum = find_unused_clobbered_reg(); 11547 if (!return_regnum 11548 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION 11549 && !TARGET_CPU_Z10 11550 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM)) 11551 { 11552 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4); 11553 return_regnum = 4; 11554 } 11555 return_reg = gen_rtx_REG (Pmode, return_regnum); 11556 11557 addr = plus_constant (Pmode, frame_pointer, 11558 offset + cfun_frame_layout.gprs_offset 11559 + (RETURN_REGNUM 11560 - cfun_frame_layout.first_save_gpr_slot) 11561 * UNITS_PER_LONG); 11562 addr = gen_rtx_MEM (Pmode, addr); 11563 set_mem_alias_set (addr, get_frame_alias_set ()); 11564 emit_move_insn (return_reg, addr); 11565 11566 /* Once we did that optimization we have to make sure 11567 s390_optimize_prologue does not try to remove the store 11568 of r14 since we will not be able to find the load issued 11569 here. */ 11570 cfun_frame_layout.save_return_addr_p = true; 11571 } 11572 11573 insn = restore_gprs (frame_pointer, 11574 offset + cfun_frame_layout.gprs_offset 11575 + (cfun_frame_layout.first_restore_gpr 11576 - cfun_frame_layout.first_save_gpr_slot) 11577 * UNITS_PER_LONG, 11578 cfun_frame_layout.first_restore_gpr, 11579 cfun_frame_layout.last_restore_gpr); 11580 insn = emit_insn (insn); 11581 REG_NOTES (insn) = cfa_restores; 11582 add_reg_note (insn, REG_CFA_DEF_CFA, 11583 plus_constant (Pmode, stack_pointer_rtx, 11584 STACK_POINTER_OFFSET)); 11585 RTX_FRAME_RELATED_P (insn) = 1; 11586 } 11587 11588 s390_restore_gprs_from_fprs (); 11589 11590 if (! sibcall) 11591 { 11592 if (!return_reg && !s390_can_use_return_insn ()) 11593 /* We planned to emit (return), be we are not allowed to. */ 11594 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); 11595 11596 if (return_reg) 11597 /* Emit (return) and (use). */ 11598 emit_jump_insn (gen_return_use (return_reg)); 11599 else 11600 /* The fact that RETURN_REGNUM is used is already reflected by 11601 EPILOGUE_USES. Emit plain (return). */ 11602 emit_jump_insn (gen_return ()); 11603 } 11604} 11605 11606/* Implement TARGET_SET_UP_BY_PROLOGUE. */ 11607 11608static void 11609s300_set_up_by_prologue (hard_reg_set_container *regs) 11610{ 11611 if (cfun->machine->base_reg 11612 && !call_used_regs[REGNO (cfun->machine->base_reg)]) 11613 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg)); 11614} 11615 11616/* -fsplit-stack support. */ 11617 11618/* A SYMBOL_REF for __morestack. */ 11619static GTY(()) rtx morestack_ref; 11620 11621/* When using -fsplit-stack, the allocation routines set a field in 11622 the TCB to the bottom of the stack plus this much space, measured 11623 in bytes. */ 11624 11625#define SPLIT_STACK_AVAILABLE 1024 11626 11627/* Emit the parmblock for __morestack into .rodata section. It 11628 consists of 3 pointer size entries: 11629 - frame size 11630 - size of stack arguments 11631 - offset between parm block and __morestack return label */ 11632 11633void 11634s390_output_split_stack_data (rtx parm_block, rtx call_done, 11635 rtx frame_size, rtx args_size) 11636{ 11637 rtx ops[] = { parm_block, call_done }; 11638 11639 switch_to_section (targetm.asm_out.function_rodata_section 11640 (current_function_decl)); 11641 11642 if (TARGET_64BIT) 11643 output_asm_insn (".align\t8", NULL); 11644 else 11645 output_asm_insn (".align\t4", NULL); 11646 11647 (*targetm.asm_out.internal_label) (asm_out_file, "L", 11648 CODE_LABEL_NUMBER (parm_block)); 11649 if (TARGET_64BIT) 11650 { 11651 output_asm_insn (".quad\t%0", &frame_size); 11652 output_asm_insn (".quad\t%0", &args_size); 11653 output_asm_insn (".quad\t%1-%0", ops); 11654 } 11655 else 11656 { 11657 output_asm_insn (".long\t%0", &frame_size); 11658 output_asm_insn (".long\t%0", &args_size); 11659 output_asm_insn (".long\t%1-%0", ops); 11660 } 11661 11662 switch_to_section (current_function_section ()); 11663} 11664 11665/* Emit -fsplit-stack prologue, which goes before the regular function 11666 prologue. */ 11667 11668void 11669s390_expand_split_stack_prologue (void) 11670{ 11671 rtx r1, guard, cc = NULL; 11672 rtx_insn *insn; 11673 /* Offset from thread pointer to __private_ss. */ 11674 int psso = TARGET_64BIT ? 0x38 : 0x20; 11675 /* Pointer size in bytes. */ 11676 /* Frame size and argument size - the two parameters to __morestack. */ 11677 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size; 11678 /* Align argument size to 8 bytes - simplifies __morestack code. */ 11679 HOST_WIDE_INT args_size = crtl->args.size >= 0 11680 ? ((crtl->args.size + 7) & ~7) 11681 : 0; 11682 /* Label to be called by __morestack. */ 11683 rtx_code_label *call_done = NULL; 11684 rtx_code_label *parm_base = NULL; 11685 rtx tmp; 11686 11687 gcc_assert (flag_split_stack && reload_completed); 11688 11689 r1 = gen_rtx_REG (Pmode, 1); 11690 11691 /* If no stack frame will be allocated, don't do anything. */ 11692 if (!frame_size) 11693 { 11694 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 11695 { 11696 /* If va_start is used, just use r15. */ 11697 emit_move_insn (r1, 11698 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 11699 GEN_INT (STACK_POINTER_OFFSET))); 11700 11701 } 11702 return; 11703 } 11704 11705 if (morestack_ref == NULL_RTX) 11706 { 11707 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); 11708 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL 11709 | SYMBOL_FLAG_FUNCTION); 11710 } 11711 11712 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size)) 11713 { 11714 /* If frame_size will fit in an add instruction, do a stack space 11715 check, and only call __morestack if there's not enough space. */ 11716 11717 /* Get thread pointer. r1 is the only register we can always destroy - r0 11718 could contain a static chain (and cannot be used to address memory 11719 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */ 11720 emit_insn (gen_get_thread_pointer (Pmode, r1)); 11721 /* Aim at __private_ss. */ 11722 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso)); 11723 11724 /* If less that 1kiB used, skip addition and compare directly with 11725 __private_ss. */ 11726 if (frame_size > SPLIT_STACK_AVAILABLE) 11727 { 11728 emit_move_insn (r1, guard); 11729 if (TARGET_64BIT) 11730 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size))); 11731 else 11732 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size))); 11733 guard = r1; 11734 } 11735 11736 /* Compare the (maybe adjusted) guard with the stack pointer. */ 11737 cc = s390_emit_compare (LT, stack_pointer_rtx, guard); 11738 } 11739 11740 call_done = gen_label_rtx (); 11741 parm_base = gen_label_rtx (); 11742 LABEL_NUSES (parm_base)++; 11743 LABEL_NUSES (call_done)++; 11744 11745 /* %r1 = litbase. */ 11746 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base)); 11747 add_reg_note (insn, REG_LABEL_OPERAND, parm_base); 11748 LABEL_NUSES (parm_base)++; 11749 11750 /* Now, we need to call __morestack. It has very special calling 11751 conventions: it preserves param/return/static chain registers for 11752 calling main function body, and looks for its own parameters at %r1. */ 11753 if (cc != NULL) 11754 tmp = gen_split_stack_cond_call (Pmode, 11755 morestack_ref, 11756 parm_base, 11757 call_done, 11758 GEN_INT (frame_size), 11759 GEN_INT (args_size), 11760 cc); 11761 else 11762 tmp = gen_split_stack_call (Pmode, 11763 morestack_ref, 11764 parm_base, 11765 call_done, 11766 GEN_INT (frame_size), 11767 GEN_INT (args_size)); 11768 11769 insn = emit_jump_insn (tmp); 11770 JUMP_LABEL (insn) = call_done; 11771 add_reg_note (insn, REG_LABEL_OPERAND, parm_base); 11772 add_reg_note (insn, REG_LABEL_OPERAND, call_done); 11773 11774 if (cc != NULL) 11775 { 11776 /* Mark the jump as very unlikely to be taken. */ 11777 add_reg_br_prob_note (insn, 11778 profile_probability::very_unlikely ()); 11779 11780 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 11781 { 11782 /* If va_start is used, and __morestack was not called, just use 11783 r15. */ 11784 emit_move_insn (r1, 11785 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 11786 GEN_INT (STACK_POINTER_OFFSET))); 11787 } 11788 } 11789 else 11790 { 11791 emit_barrier (); 11792 } 11793 11794 /* __morestack will call us here. */ 11795 11796 emit_label (call_done); 11797} 11798 11799/* We may have to tell the dataflow pass that the split stack prologue 11800 is initializing a register. */ 11801 11802static void 11803s390_live_on_entry (bitmap regs) 11804{ 11805 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 11806 { 11807 gcc_assert (flag_split_stack); 11808 bitmap_set_bit (regs, 1); 11809 } 11810} 11811 11812/* Return true if the function can use simple_return to return outside 11813 of a shrink-wrapped region. At present shrink-wrapping is supported 11814 in all cases. */ 11815 11816bool 11817s390_can_use_simple_return_insn (void) 11818{ 11819 return true; 11820} 11821 11822/* Return true if the epilogue is guaranteed to contain only a return 11823 instruction and if a direct return can therefore be used instead. 11824 One of the main advantages of using direct return instructions 11825 is that we can then use conditional returns. */ 11826 11827bool 11828s390_can_use_return_insn (void) 11829{ 11830 int i; 11831 11832 if (!reload_completed) 11833 return false; 11834 11835 if (crtl->profile) 11836 return false; 11837 11838 if (TARGET_TPF_PROFILING) 11839 return false; 11840 11841 for (i = 0; i < 16; i++) 11842 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE) 11843 return false; 11844 11845 /* For 31 bit this is not covered by the frame_size check below 11846 since f4, f6 are saved in the register save area without needing 11847 additional stack space. */ 11848 if (!TARGET_64BIT 11849 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM))) 11850 return false; 11851 11852 if (cfun->machine->base_reg 11853 && !call_used_regs[REGNO (cfun->machine->base_reg)]) 11854 return false; 11855 11856 return cfun_frame_layout.frame_size == 0; 11857} 11858 11859/* The VX ABI differs for vararg functions. Therefore we need the 11860 prototype of the callee to be available when passing vector type 11861 values. */ 11862static const char * 11863s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) 11864{ 11865 return ((TARGET_VX_ABI 11866 && typelist == 0 11867 && VECTOR_TYPE_P (TREE_TYPE (val)) 11868 && (funcdecl == NULL_TREE 11869 || (TREE_CODE (funcdecl) == FUNCTION_DECL 11870 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) 11871 ? N_("vector argument passed to unprototyped function") 11872 : NULL); 11873} 11874 11875 11876/* Return the size in bytes of a function argument of 11877 type TYPE and/or mode MODE. At least one of TYPE or 11878 MODE must be specified. */ 11879 11880static int 11881s390_function_arg_size (machine_mode mode, const_tree type) 11882{ 11883 if (type) 11884 return int_size_in_bytes (type); 11885 11886 /* No type info available for some library calls ... */ 11887 if (mode != BLKmode) 11888 return GET_MODE_SIZE (mode); 11889 11890 /* If we have neither type nor mode, abort */ 11891 gcc_unreachable (); 11892} 11893 11894/* Return true if a function argument of type TYPE and mode MODE 11895 is to be passed in a vector register, if available. */ 11896 11897bool 11898s390_function_arg_vector (machine_mode mode, const_tree type) 11899{ 11900 if (!TARGET_VX_ABI) 11901 return false; 11902 11903 if (s390_function_arg_size (mode, type) > 16) 11904 return false; 11905 11906 /* No type info available for some library calls ... */ 11907 if (!type) 11908 return VECTOR_MODE_P (mode); 11909 11910 /* The ABI says that record types with a single member are treated 11911 just like that member would be. */ 11912 int empty_base_seen = 0; 11913 const_tree orig_type = type; 11914 while (TREE_CODE (type) == RECORD_TYPE) 11915 { 11916 tree field, single = NULL_TREE; 11917 11918 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 11919 { 11920 if (TREE_CODE (field) != FIELD_DECL) 11921 continue; 11922 11923 if (DECL_FIELD_ABI_IGNORED (field)) 11924 { 11925 if (lookup_attribute ("no_unique_address", 11926 DECL_ATTRIBUTES (field))) 11927 empty_base_seen |= 2; 11928 else 11929 empty_base_seen |= 1; 11930 continue; 11931 } 11932 11933 if (single == NULL_TREE) 11934 single = TREE_TYPE (field); 11935 else 11936 return false; 11937 } 11938 11939 if (single == NULL_TREE) 11940 return false; 11941 else 11942 { 11943 /* If the field declaration adds extra byte due to 11944 e.g. padding this is not accepted as vector type. */ 11945 if (int_size_in_bytes (single) <= 0 11946 || int_size_in_bytes (single) != int_size_in_bytes (type)) 11947 return false; 11948 type = single; 11949 } 11950 } 11951 11952 if (!VECTOR_TYPE_P (type)) 11953 return false; 11954 11955 if (warn_psabi && empty_base_seen) 11956 { 11957 static unsigned last_reported_type_uid; 11958 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type)); 11959 if (uid != last_reported_type_uid) 11960 { 11961 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base"; 11962 last_reported_type_uid = uid; 11963 if (empty_base_seen & 1) 11964 inform (input_location, 11965 "parameter passing for argument of type %qT when C++17 " 11966 "is enabled changed to match C++14 %{in GCC 10.1%}", 11967 orig_type, url); 11968 else 11969 inform (input_location, 11970 "parameter passing for argument of type %qT with " 11971 "%<[[no_unique_address]]%> members changed " 11972 "%{in GCC 10.1%}", orig_type, url); 11973 } 11974 } 11975 return true; 11976} 11977 11978/* Return true if a function argument of type TYPE and mode MODE 11979 is to be passed in a floating-point register, if available. */ 11980 11981static bool 11982s390_function_arg_float (machine_mode mode, const_tree type) 11983{ 11984 if (s390_function_arg_size (mode, type) > 8) 11985 return false; 11986 11987 /* Soft-float changes the ABI: no floating-point registers are used. */ 11988 if (TARGET_SOFT_FLOAT) 11989 return false; 11990 11991 /* No type info available for some library calls ... */ 11992 if (!type) 11993 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode; 11994 11995 /* The ABI says that record types with a single member are treated 11996 just like that member would be. */ 11997 int empty_base_seen = 0; 11998 const_tree orig_type = type; 11999 while (TREE_CODE (type) == RECORD_TYPE) 12000 { 12001 tree field, single = NULL_TREE; 12002 12003 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 12004 { 12005 if (TREE_CODE (field) != FIELD_DECL) 12006 continue; 12007 if (DECL_FIELD_ABI_IGNORED (field)) 12008 { 12009 if (lookup_attribute ("no_unique_address", 12010 DECL_ATTRIBUTES (field))) 12011 empty_base_seen |= 2; 12012 else 12013 empty_base_seen |= 1; 12014 continue; 12015 } 12016 12017 if (single == NULL_TREE) 12018 single = TREE_TYPE (field); 12019 else 12020 return false; 12021 } 12022 12023 if (single == NULL_TREE) 12024 return false; 12025 else 12026 type = single; 12027 } 12028 12029 if (TREE_CODE (type) != REAL_TYPE) 12030 return false; 12031 12032 if (warn_psabi && empty_base_seen) 12033 { 12034 static unsigned last_reported_type_uid; 12035 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type)); 12036 if (uid != last_reported_type_uid) 12037 { 12038 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base"; 12039 last_reported_type_uid = uid; 12040 if (empty_base_seen & 1) 12041 inform (input_location, 12042 "parameter passing for argument of type %qT when C++17 " 12043 "is enabled changed to match C++14 %{in GCC 10.1%}", 12044 orig_type, url); 12045 else 12046 inform (input_location, 12047 "parameter passing for argument of type %qT with " 12048 "%<[[no_unique_address]]%> members changed " 12049 "%{in GCC 10.1%}", orig_type, url); 12050 } 12051 } 12052 12053 return true; 12054} 12055 12056/* Return true if a function argument of type TYPE and mode MODE 12057 is to be passed in an integer register, or a pair of integer 12058 registers, if available. */ 12059 12060static bool 12061s390_function_arg_integer (machine_mode mode, const_tree type) 12062{ 12063 int size = s390_function_arg_size (mode, type); 12064 if (size > 8) 12065 return false; 12066 12067 /* No type info available for some library calls ... */ 12068 if (!type) 12069 return GET_MODE_CLASS (mode) == MODE_INT 12070 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode)); 12071 12072 /* We accept small integral (and similar) types. */ 12073 if (INTEGRAL_TYPE_P (type) 12074 || POINTER_TYPE_P (type) 12075 || TREE_CODE (type) == NULLPTR_TYPE 12076 || TREE_CODE (type) == OFFSET_TYPE 12077 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE)) 12078 return true; 12079 12080 /* We also accept structs of size 1, 2, 4, 8 that are not 12081 passed in floating-point registers. */ 12082 if (AGGREGATE_TYPE_P (type) 12083 && exact_log2 (size) >= 0 12084 && !s390_function_arg_float (mode, type)) 12085 return true; 12086 12087 return false; 12088} 12089 12090/* Return 1 if a function argument ARG is to be passed by reference. 12091 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes 12092 are passed by value, all other structures (and complex numbers) are 12093 passed by reference. */ 12094 12095static bool 12096s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg) 12097{ 12098 int size = s390_function_arg_size (arg.mode, arg.type); 12099 12100 if (s390_function_arg_vector (arg.mode, arg.type)) 12101 return false; 12102 12103 if (size > 8) 12104 return true; 12105 12106 if (tree type = arg.type) 12107 { 12108 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) 12109 return true; 12110 12111 if (TREE_CODE (type) == COMPLEX_TYPE 12112 || TREE_CODE (type) == VECTOR_TYPE) 12113 return true; 12114 } 12115 12116 return false; 12117} 12118 12119/* Update the data in CUM to advance over argument ARG. */ 12120 12121static void 12122s390_function_arg_advance (cumulative_args_t cum_v, 12123 const function_arg_info &arg) 12124{ 12125 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 12126 12127 if (s390_function_arg_vector (arg.mode, arg.type)) 12128 { 12129 /* We are called for unnamed vector stdarg arguments which are 12130 passed on the stack. In this case this hook does not have to 12131 do anything since stack arguments are tracked by common 12132 code. */ 12133 if (!arg.named) 12134 return; 12135 cum->vrs += 1; 12136 } 12137 else if (s390_function_arg_float (arg.mode, arg.type)) 12138 { 12139 cum->fprs += 1; 12140 } 12141 else if (s390_function_arg_integer (arg.mode, arg.type)) 12142 { 12143 int size = s390_function_arg_size (arg.mode, arg.type); 12144 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG); 12145 } 12146 else 12147 gcc_unreachable (); 12148} 12149 12150/* Define where to put the arguments to a function. 12151 Value is zero to push the argument on the stack, 12152 or a hard register in which to store the argument. 12153 12154 CUM is a variable of type CUMULATIVE_ARGS which gives info about 12155 the preceding args and about the function being called. 12156 ARG is a description of the argument. 12157 12158 On S/390, we use general purpose registers 2 through 6 to 12159 pass integer, pointer, and certain structure arguments, and 12160 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit) 12161 to pass floating point arguments. All remaining arguments 12162 are pushed to the stack. */ 12163 12164static rtx 12165s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 12166{ 12167 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 12168 12169 if (!arg.named) 12170 s390_check_type_for_vector_abi (arg.type, true, false); 12171 12172 if (s390_function_arg_vector (arg.mode, arg.type)) 12173 { 12174 /* Vector arguments being part of the ellipsis are passed on the 12175 stack. */ 12176 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG)) 12177 return NULL_RTX; 12178 12179 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO); 12180 } 12181 else if (s390_function_arg_float (arg.mode, arg.type)) 12182 { 12183 if (cum->fprs + 1 > FP_ARG_NUM_REG) 12184 return NULL_RTX; 12185 else 12186 return gen_rtx_REG (arg.mode, cum->fprs + 16); 12187 } 12188 else if (s390_function_arg_integer (arg.mode, arg.type)) 12189 { 12190 int size = s390_function_arg_size (arg.mode, arg.type); 12191 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; 12192 12193 if (cum->gprs + n_gprs > GP_ARG_NUM_REG) 12194 return NULL_RTX; 12195 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG) 12196 return gen_rtx_REG (arg.mode, cum->gprs + 2); 12197 else if (n_gprs == 2) 12198 { 12199 rtvec p = rtvec_alloc (2); 12200 12201 RTVEC_ELT (p, 0) 12202 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2), 12203 const0_rtx); 12204 RTVEC_ELT (p, 1) 12205 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3), 12206 GEN_INT (4)); 12207 12208 return gen_rtx_PARALLEL (arg.mode, p); 12209 } 12210 } 12211 12212 /* After the real arguments, expand_call calls us once again with an 12213 end marker. Whatever we return here is passed as operand 2 to the 12214 call expanders. 12215 12216 We don't need this feature ... */ 12217 else if (arg.end_marker_p ()) 12218 return const0_rtx; 12219 12220 gcc_unreachable (); 12221} 12222 12223/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are 12224 left-justified when placed on the stack during parameter passing. */ 12225 12226static pad_direction 12227s390_function_arg_padding (machine_mode mode, const_tree type) 12228{ 12229 if (s390_function_arg_vector (mode, type)) 12230 return PAD_UPWARD; 12231 12232 return default_function_arg_padding (mode, type); 12233} 12234 12235/* Return true if return values of type TYPE should be returned 12236 in a memory buffer whose address is passed by the caller as 12237 hidden first argument. */ 12238 12239static bool 12240s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED) 12241{ 12242 /* We accept small integral (and similar) types. */ 12243 if (INTEGRAL_TYPE_P (type) 12244 || POINTER_TYPE_P (type) 12245 || TREE_CODE (type) == OFFSET_TYPE 12246 || TREE_CODE (type) == REAL_TYPE) 12247 return int_size_in_bytes (type) > 8; 12248 12249 /* vector types which fit into a VR. */ 12250 if (TARGET_VX_ABI 12251 && VECTOR_TYPE_P (type) 12252 && int_size_in_bytes (type) <= 16) 12253 return false; 12254 12255 /* Aggregates and similar constructs are always returned 12256 in memory. */ 12257 if (AGGREGATE_TYPE_P (type) 12258 || TREE_CODE (type) == COMPLEX_TYPE 12259 || VECTOR_TYPE_P (type)) 12260 return true; 12261 12262 /* ??? We get called on all sorts of random stuff from 12263 aggregate_value_p. We can't abort, but it's not clear 12264 what's safe to return. Pretend it's a struct I guess. */ 12265 return true; 12266} 12267 12268/* Function arguments and return values are promoted to word size. */ 12269 12270static machine_mode 12271s390_promote_function_mode (const_tree type, machine_mode mode, 12272 int *punsignedp, 12273 const_tree fntype ATTRIBUTE_UNUSED, 12274 int for_return ATTRIBUTE_UNUSED) 12275{ 12276 if (INTEGRAL_MODE_P (mode) 12277 && GET_MODE_SIZE (mode) < UNITS_PER_LONG) 12278 { 12279 if (type != NULL_TREE && POINTER_TYPE_P (type)) 12280 *punsignedp = POINTERS_EXTEND_UNSIGNED; 12281 return Pmode; 12282 } 12283 12284 return mode; 12285} 12286 12287/* Define where to return a (scalar) value of type RET_TYPE. 12288 If RET_TYPE is null, define where to return a (scalar) 12289 value of mode MODE from a libcall. */ 12290 12291static rtx 12292s390_function_and_libcall_value (machine_mode mode, 12293 const_tree ret_type, 12294 const_tree fntype_or_decl, 12295 bool outgoing ATTRIBUTE_UNUSED) 12296{ 12297 /* For vector return types it is important to use the RET_TYPE 12298 argument whenever available since the middle-end might have 12299 changed the mode to a scalar mode. */ 12300 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type)) 12301 || (!ret_type && VECTOR_MODE_P (mode))); 12302 12303 /* For normal functions perform the promotion as 12304 promote_function_mode would do. */ 12305 if (ret_type) 12306 { 12307 int unsignedp = TYPE_UNSIGNED (ret_type); 12308 mode = promote_function_mode (ret_type, mode, &unsignedp, 12309 fntype_or_decl, 1); 12310 } 12311 12312 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 12313 || SCALAR_FLOAT_MODE_P (mode) 12314 || (TARGET_VX_ABI && vector_ret_type_p)); 12315 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8)); 12316 12317 if (TARGET_VX_ABI && vector_ret_type_p) 12318 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO); 12319 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) 12320 return gen_rtx_REG (mode, 16); 12321 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG 12322 || UNITS_PER_LONG == UNITS_PER_WORD) 12323 return gen_rtx_REG (mode, 2); 12324 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG) 12325 { 12326 /* This case is triggered when returning a 64 bit value with 12327 -m31 -mzarch. Although the value would fit into a single 12328 register it has to be forced into a 32 bit register pair in 12329 order to match the ABI. */ 12330 rtvec p = rtvec_alloc (2); 12331 12332 RTVEC_ELT (p, 0) 12333 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx); 12334 RTVEC_ELT (p, 1) 12335 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4)); 12336 12337 return gen_rtx_PARALLEL (mode, p); 12338 } 12339 12340 gcc_unreachable (); 12341} 12342 12343/* Define where to return a scalar return value of type RET_TYPE. */ 12344 12345static rtx 12346s390_function_value (const_tree ret_type, const_tree fn_decl_or_type, 12347 bool outgoing) 12348{ 12349 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type, 12350 fn_decl_or_type, outgoing); 12351} 12352 12353/* Define where to return a scalar libcall return value of mode 12354 MODE. */ 12355 12356static rtx 12357s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) 12358{ 12359 return s390_function_and_libcall_value (mode, NULL_TREE, 12360 NULL_TREE, true); 12361} 12362 12363 12364/* Create and return the va_list datatype. 12365 12366 On S/390, va_list is an array type equivalent to 12367 12368 typedef struct __va_list_tag 12369 { 12370 long __gpr; 12371 long __fpr; 12372 void *__overflow_arg_area; 12373 void *__reg_save_area; 12374 } va_list[1]; 12375 12376 where __gpr and __fpr hold the number of general purpose 12377 or floating point arguments used up to now, respectively, 12378 __overflow_arg_area points to the stack location of the 12379 next argument passed on the stack, and __reg_save_area 12380 always points to the start of the register area in the 12381 call frame of the current function. The function prologue 12382 saves all registers used for argument passing into this 12383 area if the function uses variable arguments. */ 12384 12385static tree 12386s390_build_builtin_va_list (void) 12387{ 12388 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 12389 12390 record = lang_hooks.types.make_type (RECORD_TYPE); 12391 12392 type_decl = 12393 build_decl (BUILTINS_LOCATION, 12394 TYPE_DECL, get_identifier ("__va_list_tag"), record); 12395 12396 f_gpr = build_decl (BUILTINS_LOCATION, 12397 FIELD_DECL, get_identifier ("__gpr"), 12398 long_integer_type_node); 12399 f_fpr = build_decl (BUILTINS_LOCATION, 12400 FIELD_DECL, get_identifier ("__fpr"), 12401 long_integer_type_node); 12402 f_ovf = build_decl (BUILTINS_LOCATION, 12403 FIELD_DECL, get_identifier ("__overflow_arg_area"), 12404 ptr_type_node); 12405 f_sav = build_decl (BUILTINS_LOCATION, 12406 FIELD_DECL, get_identifier ("__reg_save_area"), 12407 ptr_type_node); 12408 12409 va_list_gpr_counter_field = f_gpr; 12410 va_list_fpr_counter_field = f_fpr; 12411 12412 DECL_FIELD_CONTEXT (f_gpr) = record; 12413 DECL_FIELD_CONTEXT (f_fpr) = record; 12414 DECL_FIELD_CONTEXT (f_ovf) = record; 12415 DECL_FIELD_CONTEXT (f_sav) = record; 12416 12417 TYPE_STUB_DECL (record) = type_decl; 12418 TYPE_NAME (record) = type_decl; 12419 TYPE_FIELDS (record) = f_gpr; 12420 DECL_CHAIN (f_gpr) = f_fpr; 12421 DECL_CHAIN (f_fpr) = f_ovf; 12422 DECL_CHAIN (f_ovf) = f_sav; 12423 12424 layout_type (record); 12425 12426 /* The correct type is an array type of one element. */ 12427 return build_array_type (record, build_index_type (size_zero_node)); 12428} 12429 12430/* Implement va_start by filling the va_list structure VALIST. 12431 STDARG_P is always true, and ignored. 12432 NEXTARG points to the first anonymous stack argument. 12433 12434 The following global variables are used to initialize 12435 the va_list structure: 12436 12437 crtl->args.info: 12438 holds number of gprs and fprs used for named arguments. 12439 crtl->args.arg_offset_rtx: 12440 holds the offset of the first anonymous stack argument 12441 (relative to the virtual arg pointer). */ 12442 12443static void 12444s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 12445{ 12446 HOST_WIDE_INT n_gpr, n_fpr; 12447 int off; 12448 tree f_gpr, f_fpr, f_ovf, f_sav; 12449 tree gpr, fpr, ovf, sav, t; 12450 12451 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 12452 f_fpr = DECL_CHAIN (f_gpr); 12453 f_ovf = DECL_CHAIN (f_fpr); 12454 f_sav = DECL_CHAIN (f_ovf); 12455 12456 valist = build_simple_mem_ref (valist); 12457 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 12458 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 12459 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 12460 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 12461 12462 /* Count number of gp and fp argument registers used. */ 12463 12464 n_gpr = crtl->args.info.gprs; 12465 n_fpr = crtl->args.info.fprs; 12466 12467 if (cfun->va_list_gpr_size) 12468 { 12469 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 12470 build_int_cst (NULL_TREE, n_gpr)); 12471 TREE_SIDE_EFFECTS (t) = 1; 12472 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 12473 } 12474 12475 if (cfun->va_list_fpr_size) 12476 { 12477 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 12478 build_int_cst (NULL_TREE, n_fpr)); 12479 TREE_SIDE_EFFECTS (t) = 1; 12480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 12481 } 12482 12483 if (flag_split_stack 12484 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) 12485 == NULL) 12486 && cfun->machine->split_stack_varargs_pointer == NULL_RTX) 12487 { 12488 rtx reg; 12489 rtx_insn *seq; 12490 12491 reg = gen_reg_rtx (Pmode); 12492 cfun->machine->split_stack_varargs_pointer = reg; 12493 12494 start_sequence (); 12495 emit_move_insn (reg, gen_rtx_REG (Pmode, 1)); 12496 seq = get_insns (); 12497 end_sequence (); 12498 12499 push_topmost_sequence (); 12500 emit_insn_after (seq, entry_of_function ()); 12501 pop_topmost_sequence (); 12502 } 12503 12504 /* Find the overflow area. 12505 FIXME: This currently is too pessimistic when the vector ABI is 12506 enabled. In that case we *always* set up the overflow area 12507 pointer. */ 12508 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG 12509 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG 12510 || TARGET_VX_ABI) 12511 { 12512 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) 12513 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 12514 else 12515 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer); 12516 12517 off = INTVAL (crtl->args.arg_offset_rtx); 12518 off = off < 0 ? 0 : off; 12519 if (TARGET_DEBUG_ARG) 12520 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n", 12521 (int)n_gpr, (int)n_fpr, off); 12522 12523 t = fold_build_pointer_plus_hwi (t, off); 12524 12525 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 12526 TREE_SIDE_EFFECTS (t) = 1; 12527 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 12528 } 12529 12530 /* Find the register save area. */ 12531 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG) 12532 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG)) 12533 { 12534 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx); 12535 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG); 12536 12537 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 12538 TREE_SIDE_EFFECTS (t) = 1; 12539 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 12540 } 12541} 12542 12543/* Implement va_arg by updating the va_list structure 12544 VALIST as required to retrieve an argument of type 12545 TYPE, and returning that argument. 12546 12547 Generates code equivalent to: 12548 12549 if (integral value) { 12550 if (size <= 4 && args.gpr < 5 || 12551 size > 4 && args.gpr < 4 ) 12552 ret = args.reg_save_area[args.gpr+8] 12553 else 12554 ret = *args.overflow_arg_area++; 12555 } else if (vector value) { 12556 ret = *args.overflow_arg_area; 12557 args.overflow_arg_area += size / 8; 12558 } else if (float value) { 12559 if (args.fgpr < 2) 12560 ret = args.reg_save_area[args.fpr+64] 12561 else 12562 ret = *args.overflow_arg_area++; 12563 } else if (aggregate value) { 12564 if (args.gpr < 5) 12565 ret = *args.reg_save_area[args.gpr] 12566 else 12567 ret = **args.overflow_arg_area++; 12568 } */ 12569 12570static tree 12571s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 12572 gimple_seq *post_p ATTRIBUTE_UNUSED) 12573{ 12574 tree f_gpr, f_fpr, f_ovf, f_sav; 12575 tree gpr, fpr, ovf, sav, reg, t, u; 12576 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg; 12577 tree lab_false, lab_over = NULL_TREE; 12578 tree addr = create_tmp_var (ptr_type_node, "addr"); 12579 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within 12580 a stack slot. */ 12581 12582 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 12583 f_fpr = DECL_CHAIN (f_gpr); 12584 f_ovf = DECL_CHAIN (f_fpr); 12585 f_sav = DECL_CHAIN (f_ovf); 12586 12587 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 12588 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 12589 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 12590 12591 /* The tree for args* cannot be shared between gpr/fpr and ovf since 12592 both appear on a lhs. */ 12593 valist = unshare_expr (valist); 12594 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 12595 12596 size = int_size_in_bytes (type); 12597 12598 s390_check_type_for_vector_abi (type, true, false); 12599 12600 if (pass_va_arg_by_reference (type)) 12601 { 12602 if (TARGET_DEBUG_ARG) 12603 { 12604 fprintf (stderr, "va_arg: aggregate type"); 12605 debug_tree (type); 12606 } 12607 12608 /* Aggregates are passed by reference. */ 12609 indirect_p = 1; 12610 reg = gpr; 12611 n_reg = 1; 12612 12613 /* kernel stack layout on 31 bit: It is assumed here that no padding 12614 will be added by s390_frame_info because for va_args always an even 12615 number of gprs has to be saved r15-r2 = 14 regs. */ 12616 sav_ofs = 2 * UNITS_PER_LONG; 12617 sav_scale = UNITS_PER_LONG; 12618 size = UNITS_PER_LONG; 12619 max_reg = GP_ARG_NUM_REG - n_reg; 12620 left_align_p = false; 12621 } 12622 else if (s390_function_arg_vector (TYPE_MODE (type), type)) 12623 { 12624 if (TARGET_DEBUG_ARG) 12625 { 12626 fprintf (stderr, "va_arg: vector type"); 12627 debug_tree (type); 12628 } 12629 12630 indirect_p = 0; 12631 reg = NULL_TREE; 12632 n_reg = 0; 12633 sav_ofs = 0; 12634 sav_scale = 8; 12635 max_reg = 0; 12636 left_align_p = true; 12637 } 12638 else if (s390_function_arg_float (TYPE_MODE (type), type)) 12639 { 12640 if (TARGET_DEBUG_ARG) 12641 { 12642 fprintf (stderr, "va_arg: float type"); 12643 debug_tree (type); 12644 } 12645 12646 /* FP args go in FP registers, if present. */ 12647 indirect_p = 0; 12648 reg = fpr; 12649 n_reg = 1; 12650 sav_ofs = 16 * UNITS_PER_LONG; 12651 sav_scale = 8; 12652 max_reg = FP_ARG_NUM_REG - n_reg; 12653 left_align_p = false; 12654 } 12655 else 12656 { 12657 if (TARGET_DEBUG_ARG) 12658 { 12659 fprintf (stderr, "va_arg: other type"); 12660 debug_tree (type); 12661 } 12662 12663 /* Otherwise into GP registers. */ 12664 indirect_p = 0; 12665 reg = gpr; 12666 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; 12667 12668 /* kernel stack layout on 31 bit: It is assumed here that no padding 12669 will be added by s390_frame_info because for va_args always an even 12670 number of gprs has to be saved r15-r2 = 14 regs. */ 12671 sav_ofs = 2 * UNITS_PER_LONG; 12672 12673 if (size < UNITS_PER_LONG) 12674 sav_ofs += UNITS_PER_LONG - size; 12675 12676 sav_scale = UNITS_PER_LONG; 12677 max_reg = GP_ARG_NUM_REG - n_reg; 12678 left_align_p = false; 12679 } 12680 12681 /* Pull the value out of the saved registers ... */ 12682 12683 if (reg != NULL_TREE) 12684 { 12685 /* 12686 if (reg > ((typeof (reg))max_reg)) 12687 goto lab_false; 12688 12689 addr = sav + sav_ofs + reg * save_scale; 12690 12691 goto lab_over; 12692 12693 lab_false: 12694 */ 12695 12696 lab_false = create_artificial_label (UNKNOWN_LOCATION); 12697 lab_over = create_artificial_label (UNKNOWN_LOCATION); 12698 12699 t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); 12700 t = build2 (GT_EXPR, boolean_type_node, reg, t); 12701 u = build1 (GOTO_EXPR, void_type_node, lab_false); 12702 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); 12703 gimplify_and_add (t, pre_p); 12704 12705 t = fold_build_pointer_plus_hwi (sav, sav_ofs); 12706 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, 12707 fold_convert (TREE_TYPE (reg), size_int (sav_scale))); 12708 t = fold_build_pointer_plus (t, u); 12709 12710 gimplify_assign (addr, t, pre_p); 12711 12712 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); 12713 12714 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); 12715 } 12716 12717 /* ... Otherwise out of the overflow area. */ 12718 12719 t = ovf; 12720 if (size < UNITS_PER_LONG && !left_align_p) 12721 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size); 12722 12723 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 12724 12725 gimplify_assign (addr, t, pre_p); 12726 12727 if (size < UNITS_PER_LONG && left_align_p) 12728 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG); 12729 else 12730 t = fold_build_pointer_plus_hwi (t, size); 12731 12732 gimplify_assign (ovf, t, pre_p); 12733 12734 if (reg != NULL_TREE) 12735 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); 12736 12737 12738 /* Increment register save count. */ 12739 12740 if (n_reg > 0) 12741 { 12742 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, 12743 fold_convert (TREE_TYPE (reg), size_int (n_reg))); 12744 gimplify_and_add (u, pre_p); 12745 } 12746 12747 if (indirect_p) 12748 { 12749 t = build_pointer_type_for_mode (build_pointer_type (type), 12750 ptr_mode, true); 12751 addr = fold_convert (t, addr); 12752 addr = build_va_arg_indirect_ref (addr); 12753 } 12754 else 12755 { 12756 t = build_pointer_type_for_mode (type, ptr_mode, true); 12757 addr = fold_convert (t, addr); 12758 } 12759 12760 return build_va_arg_indirect_ref (addr); 12761} 12762 12763/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX) 12764 expanders. 12765 DEST - Register location where CC will be stored. 12766 TDB - Pointer to a 256 byte area where to store the transaction. 12767 diagnostic block. NULL if TDB is not needed. 12768 RETRY - Retry count value. If non-NULL a retry loop for CC2 12769 is emitted 12770 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part 12771 of the tbegin instruction pattern. */ 12772 12773void 12774s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) 12775{ 12776 rtx retry_plus_two = gen_reg_rtx (SImode); 12777 rtx retry_reg = gen_reg_rtx (SImode); 12778 rtx_code_label *retry_label = NULL; 12779 12780 if (retry != NULL_RTX) 12781 { 12782 emit_move_insn (retry_reg, retry); 12783 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx)); 12784 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx)); 12785 retry_label = gen_label_rtx (); 12786 emit_label (retry_label); 12787 } 12788 12789 if (clobber_fprs_p) 12790 { 12791 if (TARGET_VX) 12792 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), 12793 tdb)); 12794 else 12795 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), 12796 tdb)); 12797 } 12798 else 12799 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), 12800 tdb)); 12801 12802 emit_move_insn (dest, gen_rtx_UNSPEC (SImode, 12803 gen_rtvec (1, gen_rtx_REG (CCRAWmode, 12804 CC_REGNUM)), 12805 UNSPEC_CC_TO_INT)); 12806 if (retry != NULL_RTX) 12807 { 12808 const int CC0 = 1 << 3; 12809 const int CC1 = 1 << 2; 12810 const int CC3 = 1 << 0; 12811 rtx jump; 12812 rtx count = gen_reg_rtx (SImode); 12813 rtx_code_label *leave_label = gen_label_rtx (); 12814 12815 /* Exit for success and permanent failures. */ 12816 jump = s390_emit_jump (leave_label, 12817 gen_rtx_EQ (VOIDmode, 12818 gen_rtx_REG (CCRAWmode, CC_REGNUM), 12819 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3))); 12820 LABEL_NUSES (leave_label) = 1; 12821 12822 /* CC2 - transient failure. Perform retry with ppa. */ 12823 emit_move_insn (count, retry_plus_two); 12824 emit_insn (gen_subsi3 (count, count, retry_reg)); 12825 emit_insn (gen_tx_assist (count)); 12826 jump = emit_jump_insn (gen_doloop_si64 (retry_label, 12827 retry_reg, 12828 retry_reg)); 12829 JUMP_LABEL (jump) = retry_label; 12830 LABEL_NUSES (retry_label) = 1; 12831 emit_label (leave_label); 12832 } 12833} 12834 12835 12836/* Return the decl for the target specific builtin with the function 12837 code FCODE. */ 12838 12839static tree 12840s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED) 12841{ 12842 if (fcode >= S390_BUILTIN_MAX) 12843 return error_mark_node; 12844 12845 return s390_builtin_decls[fcode]; 12846} 12847 12848/* We call mcount before the function prologue. So a profiled leaf 12849 function should stay a leaf function. */ 12850 12851static bool 12852s390_keep_leaf_when_profiled () 12853{ 12854 return true; 12855} 12856 12857/* Output assembly code for the trampoline template to 12858 stdio stream FILE. 12859 12860 On S/390, we use gpr 1 internally in the trampoline code; 12861 gpr 0 is used to hold the static chain. */ 12862 12863static void 12864s390_asm_trampoline_template (FILE *file) 12865{ 12866 rtx op[2]; 12867 op[0] = gen_rtx_REG (Pmode, 0); 12868 op[1] = gen_rtx_REG (Pmode, 1); 12869 12870 if (TARGET_64BIT) 12871 { 12872 output_asm_insn ("basr\t%1,0", op); /* 2 byte */ 12873 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */ 12874 output_asm_insn ("br\t%1", op); /* 2 byte */ 12875 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10)); 12876 } 12877 else 12878 { 12879 output_asm_insn ("basr\t%1,0", op); /* 2 byte */ 12880 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */ 12881 output_asm_insn ("br\t%1", op); /* 2 byte */ 12882 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8)); 12883 } 12884} 12885 12886/* Emit RTL insns to initialize the variable parts of a trampoline. 12887 FNADDR is an RTX for the address of the function's pure code. 12888 CXT is an RTX for the static chain value for the function. */ 12889 12890static void 12891s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 12892{ 12893 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 12894 rtx mem; 12895 12896 emit_block_move (m_tramp, assemble_trampoline_template (), 12897 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL); 12898 12899 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG); 12900 emit_move_insn (mem, cxt); 12901 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG); 12902 emit_move_insn (mem, fnaddr); 12903} 12904 12905static void 12906output_asm_nops (const char *user, int hw) 12907{ 12908 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw); 12909 while (hw > 0) 12910 { 12911 if (hw >= 3) 12912 { 12913 output_asm_insn ("brcl\t0,0", NULL); 12914 hw -= 3; 12915 } 12916 else if (hw >= 2) 12917 { 12918 output_asm_insn ("bc\t0,0", NULL); 12919 hw -= 2; 12920 } 12921 else 12922 { 12923 output_asm_insn ("bcr\t0,0", NULL); 12924 hw -= 1; 12925 } 12926 } 12927} 12928 12929/* Output assembler code to FILE to increment profiler label # LABELNO 12930 for profiling a function entry. */ 12931 12932void 12933s390_function_profiler (FILE *file, int labelno) 12934{ 12935 rtx op[8]; 12936 12937 char label[128]; 12938 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno); 12939 12940 fprintf (file, "# function profiler \n"); 12941 12942 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM); 12943 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 12944 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG)); 12945 op[7] = GEN_INT (UNITS_PER_LONG); 12946 12947 op[2] = gen_rtx_REG (Pmode, 1); 12948 op[3] = gen_rtx_SYMBOL_REF (Pmode, label); 12949 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL; 12950 12951 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount"); 12952 if (flag_pic) 12953 { 12954 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT); 12955 op[4] = gen_rtx_CONST (Pmode, op[4]); 12956 } 12957 12958 if (flag_record_mcount) 12959 fprintf (file, "1:\n"); 12960 12961 if (flag_fentry) 12962 { 12963 if (flag_nop_mcount) 12964 output_asm_nops ("-mnop-mcount", /* brasl */ 3); 12965 else if (cfun->static_chain_decl) 12966 warning (OPT_Wcannot_profile, "nested functions cannot be profiled " 12967 "with %<-mfentry%> on s390"); 12968 else 12969 output_asm_insn ("brasl\t0,%4", op); 12970 } 12971 else if (TARGET_64BIT) 12972 { 12973 if (flag_nop_mcount) 12974 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 + 12975 /* brasl */ 3 + /* lg */ 3); 12976 else 12977 { 12978 output_asm_insn ("stg\t%0,%1", op); 12979 if (flag_dwarf2_cfi_asm) 12980 output_asm_insn (".cfi_rel_offset\t%0,%7", op); 12981 output_asm_insn ("larl\t%2,%3", op); 12982 output_asm_insn ("brasl\t%0,%4", op); 12983 output_asm_insn ("lg\t%0,%1", op); 12984 if (flag_dwarf2_cfi_asm) 12985 output_asm_insn (".cfi_restore\t%0", op); 12986 } 12987 } 12988 else 12989 { 12990 if (flag_nop_mcount) 12991 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 + 12992 /* brasl */ 3 + /* l */ 2); 12993 else 12994 { 12995 output_asm_insn ("st\t%0,%1", op); 12996 if (flag_dwarf2_cfi_asm) 12997 output_asm_insn (".cfi_rel_offset\t%0,%7", op); 12998 output_asm_insn ("larl\t%2,%3", op); 12999 output_asm_insn ("brasl\t%0,%4", op); 13000 output_asm_insn ("l\t%0,%1", op); 13001 if (flag_dwarf2_cfi_asm) 13002 output_asm_insn (".cfi_restore\t%0", op); 13003 } 13004 } 13005 13006 if (flag_record_mcount) 13007 { 13008 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n"); 13009 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); 13010 fprintf (file, "\t.previous\n"); 13011 } 13012} 13013 13014/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF 13015 into its SYMBOL_REF_FLAGS. */ 13016 13017static void 13018s390_encode_section_info (tree decl, rtx rtl, int first) 13019{ 13020 default_encode_section_info (decl, rtl, first); 13021 13022 if (TREE_CODE (decl) == VAR_DECL) 13023 { 13024 /* Store the alignment to be able to check if we can use 13025 a larl/load-relative instruction. We only handle the cases 13026 that can go wrong (i.e. no FUNC_DECLs). */ 13027 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16) 13028 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0)); 13029 else if (DECL_ALIGN (decl) % 32) 13030 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0)); 13031 else if (DECL_ALIGN (decl) % 64) 13032 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0)); 13033 } 13034 13035 /* Literal pool references don't have a decl so they are handled 13036 differently here. We rely on the information in the MEM_ALIGN 13037 entry to decide upon the alignment. */ 13038 if (MEM_P (rtl) 13039 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF 13040 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))) 13041 { 13042 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16) 13043 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0)); 13044 else if (MEM_ALIGN (rtl) % 32) 13045 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0)); 13046 else if (MEM_ALIGN (rtl) % 64) 13047 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0)); 13048 } 13049} 13050 13051/* Output thunk to FILE that implements a C++ virtual function call (with 13052 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer 13053 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment 13054 stored at VCALL_OFFSET in the vtable whose address is located at offset 0 13055 relative to the resulting this pointer. */ 13056 13057static void 13058s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 13059 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 13060 tree function) 13061{ 13062 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); 13063 rtx op[10]; 13064 int nonlocal = 0; 13065 13066 assemble_start_function (thunk, fnname); 13067 /* Make sure unwind info is emitted for the thunk if needed. */ 13068 final_start_function (emit_barrier (), file, 1); 13069 13070 /* Operand 0 is the target function. */ 13071 op[0] = XEXP (DECL_RTL (function), 0); 13072 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0])) 13073 { 13074 nonlocal = 1; 13075 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), 13076 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT); 13077 op[0] = gen_rtx_CONST (Pmode, op[0]); 13078 } 13079 13080 /* Operand 1 is the 'this' pointer. */ 13081 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 13082 op[1] = gen_rtx_REG (Pmode, 3); 13083 else 13084 op[1] = gen_rtx_REG (Pmode, 2); 13085 13086 /* Operand 2 is the delta. */ 13087 op[2] = GEN_INT (delta); 13088 13089 /* Operand 3 is the vcall_offset. */ 13090 op[3] = GEN_INT (vcall_offset); 13091 13092 /* Operand 4 is the temporary register. */ 13093 op[4] = gen_rtx_REG (Pmode, 1); 13094 13095 /* Operands 5 to 8 can be used as labels. */ 13096 op[5] = NULL_RTX; 13097 op[6] = NULL_RTX; 13098 op[7] = NULL_RTX; 13099 op[8] = NULL_RTX; 13100 13101 /* Operand 9 can be used for temporary register. */ 13102 op[9] = NULL_RTX; 13103 13104 /* Generate code. */ 13105 if (TARGET_64BIT) 13106 { 13107 /* Setup literal pool pointer if required. */ 13108 if ((!DISP_IN_RANGE (delta) 13109 && !CONST_OK_FOR_K (delta) 13110 && !CONST_OK_FOR_Os (delta)) 13111 || (!DISP_IN_RANGE (vcall_offset) 13112 && !CONST_OK_FOR_K (vcall_offset) 13113 && !CONST_OK_FOR_Os (vcall_offset))) 13114 { 13115 op[5] = gen_label_rtx (); 13116 output_asm_insn ("larl\t%4,%5", op); 13117 } 13118 13119 /* Add DELTA to this pointer. */ 13120 if (delta) 13121 { 13122 if (CONST_OK_FOR_J (delta)) 13123 output_asm_insn ("la\t%1,%2(%1)", op); 13124 else if (DISP_IN_RANGE (delta)) 13125 output_asm_insn ("lay\t%1,%2(%1)", op); 13126 else if (CONST_OK_FOR_K (delta)) 13127 output_asm_insn ("aghi\t%1,%2", op); 13128 else if (CONST_OK_FOR_Os (delta)) 13129 output_asm_insn ("agfi\t%1,%2", op); 13130 else 13131 { 13132 op[6] = gen_label_rtx (); 13133 output_asm_insn ("agf\t%1,%6-%5(%4)", op); 13134 } 13135 } 13136 13137 /* Perform vcall adjustment. */ 13138 if (vcall_offset) 13139 { 13140 if (DISP_IN_RANGE (vcall_offset)) 13141 { 13142 output_asm_insn ("lg\t%4,0(%1)", op); 13143 output_asm_insn ("ag\t%1,%3(%4)", op); 13144 } 13145 else if (CONST_OK_FOR_K (vcall_offset)) 13146 { 13147 output_asm_insn ("lghi\t%4,%3", op); 13148 output_asm_insn ("ag\t%4,0(%1)", op); 13149 output_asm_insn ("ag\t%1,0(%4)", op); 13150 } 13151 else if (CONST_OK_FOR_Os (vcall_offset)) 13152 { 13153 output_asm_insn ("lgfi\t%4,%3", op); 13154 output_asm_insn ("ag\t%4,0(%1)", op); 13155 output_asm_insn ("ag\t%1,0(%4)", op); 13156 } 13157 else 13158 { 13159 op[7] = gen_label_rtx (); 13160 output_asm_insn ("llgf\t%4,%7-%5(%4)", op); 13161 output_asm_insn ("ag\t%4,0(%1)", op); 13162 output_asm_insn ("ag\t%1,0(%4)", op); 13163 } 13164 } 13165 13166 /* Jump to target. */ 13167 output_asm_insn ("jg\t%0", op); 13168 13169 /* Output literal pool if required. */ 13170 if (op[5]) 13171 { 13172 output_asm_insn (".align\t4", op); 13173 targetm.asm_out.internal_label (file, "L", 13174 CODE_LABEL_NUMBER (op[5])); 13175 } 13176 if (op[6]) 13177 { 13178 targetm.asm_out.internal_label (file, "L", 13179 CODE_LABEL_NUMBER (op[6])); 13180 output_asm_insn (".long\t%2", op); 13181 } 13182 if (op[7]) 13183 { 13184 targetm.asm_out.internal_label (file, "L", 13185 CODE_LABEL_NUMBER (op[7])); 13186 output_asm_insn (".long\t%3", op); 13187 } 13188 } 13189 else 13190 { 13191 /* Setup base pointer if required. */ 13192 if (!vcall_offset 13193 || (!DISP_IN_RANGE (delta) 13194 && !CONST_OK_FOR_K (delta) 13195 && !CONST_OK_FOR_Os (delta)) 13196 || (!DISP_IN_RANGE (delta) 13197 && !CONST_OK_FOR_K (vcall_offset) 13198 && !CONST_OK_FOR_Os (vcall_offset))) 13199 { 13200 op[5] = gen_label_rtx (); 13201 output_asm_insn ("basr\t%4,0", op); 13202 targetm.asm_out.internal_label (file, "L", 13203 CODE_LABEL_NUMBER (op[5])); 13204 } 13205 13206 /* Add DELTA to this pointer. */ 13207 if (delta) 13208 { 13209 if (CONST_OK_FOR_J (delta)) 13210 output_asm_insn ("la\t%1,%2(%1)", op); 13211 else if (DISP_IN_RANGE (delta)) 13212 output_asm_insn ("lay\t%1,%2(%1)", op); 13213 else if (CONST_OK_FOR_K (delta)) 13214 output_asm_insn ("ahi\t%1,%2", op); 13215 else if (CONST_OK_FOR_Os (delta)) 13216 output_asm_insn ("afi\t%1,%2", op); 13217 else 13218 { 13219 op[6] = gen_label_rtx (); 13220 output_asm_insn ("a\t%1,%6-%5(%4)", op); 13221 } 13222 } 13223 13224 /* Perform vcall adjustment. */ 13225 if (vcall_offset) 13226 { 13227 if (CONST_OK_FOR_J (vcall_offset)) 13228 { 13229 output_asm_insn ("l\t%4,0(%1)", op); 13230 output_asm_insn ("a\t%1,%3(%4)", op); 13231 } 13232 else if (DISP_IN_RANGE (vcall_offset)) 13233 { 13234 output_asm_insn ("l\t%4,0(%1)", op); 13235 output_asm_insn ("ay\t%1,%3(%4)", op); 13236 } 13237 else if (CONST_OK_FOR_K (vcall_offset)) 13238 { 13239 output_asm_insn ("lhi\t%4,%3", op); 13240 output_asm_insn ("a\t%4,0(%1)", op); 13241 output_asm_insn ("a\t%1,0(%4)", op); 13242 } 13243 else if (CONST_OK_FOR_Os (vcall_offset)) 13244 { 13245 output_asm_insn ("iilf\t%4,%3", op); 13246 output_asm_insn ("a\t%4,0(%1)", op); 13247 output_asm_insn ("a\t%1,0(%4)", op); 13248 } 13249 else 13250 { 13251 op[7] = gen_label_rtx (); 13252 output_asm_insn ("l\t%4,%7-%5(%4)", op); 13253 output_asm_insn ("a\t%4,0(%1)", op); 13254 output_asm_insn ("a\t%1,0(%4)", op); 13255 } 13256 13257 /* We had to clobber the base pointer register. 13258 Re-setup the base pointer (with a different base). */ 13259 op[5] = gen_label_rtx (); 13260 output_asm_insn ("basr\t%4,0", op); 13261 targetm.asm_out.internal_label (file, "L", 13262 CODE_LABEL_NUMBER (op[5])); 13263 } 13264 13265 /* Jump to target. */ 13266 op[8] = gen_label_rtx (); 13267 13268 if (!flag_pic) 13269 output_asm_insn ("l\t%4,%8-%5(%4)", op); 13270 else if (!nonlocal) 13271 output_asm_insn ("a\t%4,%8-%5(%4)", op); 13272 /* We cannot call through .plt, since .plt requires %r12 loaded. */ 13273 else if (flag_pic == 1) 13274 { 13275 output_asm_insn ("a\t%4,%8-%5(%4)", op); 13276 output_asm_insn ("l\t%4,%0(%4)", op); 13277 } 13278 else if (flag_pic == 2) 13279 { 13280 op[9] = gen_rtx_REG (Pmode, 0); 13281 output_asm_insn ("l\t%9,%8-4-%5(%4)", op); 13282 output_asm_insn ("a\t%4,%8-%5(%4)", op); 13283 output_asm_insn ("ar\t%4,%9", op); 13284 output_asm_insn ("l\t%4,0(%4)", op); 13285 } 13286 13287 output_asm_insn ("br\t%4", op); 13288 13289 /* Output literal pool. */ 13290 output_asm_insn (".align\t4", op); 13291 13292 if (nonlocal && flag_pic == 2) 13293 output_asm_insn (".long\t%0", op); 13294 if (nonlocal) 13295 { 13296 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 13297 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL; 13298 } 13299 13300 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8])); 13301 if (!flag_pic) 13302 output_asm_insn (".long\t%0", op); 13303 else 13304 output_asm_insn (".long\t%0-%5", op); 13305 13306 if (op[6]) 13307 { 13308 targetm.asm_out.internal_label (file, "L", 13309 CODE_LABEL_NUMBER (op[6])); 13310 output_asm_insn (".long\t%2", op); 13311 } 13312 if (op[7]) 13313 { 13314 targetm.asm_out.internal_label (file, "L", 13315 CODE_LABEL_NUMBER (op[7])); 13316 output_asm_insn (".long\t%3", op); 13317 } 13318 } 13319 final_end_function (); 13320 assemble_end_function (thunk, fnname); 13321} 13322 13323/* Output either an indirect jump or an indirect call 13324 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO 13325 using a branch trampoline disabling branch target prediction. */ 13326 13327void 13328s390_indirect_branch_via_thunk (unsigned int regno, 13329 unsigned int return_addr_regno, 13330 rtx comparison_operator, 13331 enum s390_indirect_branch_type type) 13332{ 13333 enum s390_indirect_branch_option option; 13334 13335 if (type == s390_indirect_branch_type_return) 13336 { 13337 if (s390_return_addr_from_memory ()) 13338 option = s390_opt_function_return_mem; 13339 else 13340 option = s390_opt_function_return_reg; 13341 } 13342 else if (type == s390_indirect_branch_type_jump) 13343 option = s390_opt_indirect_branch_jump; 13344 else if (type == s390_indirect_branch_type_call) 13345 option = s390_opt_indirect_branch_call; 13346 else 13347 gcc_unreachable (); 13348 13349 if (TARGET_INDIRECT_BRANCH_TABLE) 13350 { 13351 char label[32]; 13352 13353 ASM_GENERATE_INTERNAL_LABEL (label, 13354 indirect_branch_table_label[option], 13355 indirect_branch_table_label_no[option]++); 13356 ASM_OUTPUT_LABEL (asm_out_file, label); 13357 } 13358 13359 if (return_addr_regno != INVALID_REGNUM) 13360 { 13361 gcc_assert (comparison_operator == NULL_RTX); 13362 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno); 13363 } 13364 else 13365 { 13366 fputs (" \tjg", asm_out_file); 13367 if (comparison_operator != NULL_RTX) 13368 print_operand (asm_out_file, comparison_operator, 'C'); 13369 13370 fputs ("\t", asm_out_file); 13371 } 13372 13373 if (TARGET_CPU_Z10) 13374 fprintf (asm_out_file, 13375 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n", 13376 regno); 13377 else 13378 fprintf (asm_out_file, 13379 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n", 13380 INDIRECT_BRANCH_THUNK_REGNUM, regno); 13381 13382 if ((option == s390_opt_indirect_branch_jump 13383 && cfun->machine->indirect_branch_jump == indirect_branch_thunk) 13384 || (option == s390_opt_indirect_branch_call 13385 && cfun->machine->indirect_branch_call == indirect_branch_thunk) 13386 || (option == s390_opt_function_return_reg 13387 && cfun->machine->function_return_reg == indirect_branch_thunk) 13388 || (option == s390_opt_function_return_mem 13389 && cfun->machine->function_return_mem == indirect_branch_thunk)) 13390 { 13391 if (TARGET_CPU_Z10) 13392 indirect_branch_z10thunk_mask |= (1 << regno); 13393 else 13394 indirect_branch_prez10thunk_mask |= (1 << regno); 13395 } 13396} 13397 13398/* Output an inline thunk for indirect jumps. EXECUTE_TARGET can 13399 either be an address register or a label pointing to the location 13400 of the jump instruction. */ 13401 13402void 13403s390_indirect_branch_via_inline_thunk (rtx execute_target) 13404{ 13405 if (TARGET_INDIRECT_BRANCH_TABLE) 13406 { 13407 char label[32]; 13408 13409 ASM_GENERATE_INTERNAL_LABEL (label, 13410 indirect_branch_table_label[s390_opt_indirect_branch_jump], 13411 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++); 13412 ASM_OUTPUT_LABEL (asm_out_file, label); 13413 } 13414 13415 if (!TARGET_ZARCH) 13416 fputs ("\t.machinemode zarch\n", asm_out_file); 13417 13418 if (REG_P (execute_target)) 13419 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target)); 13420 else 13421 output_asm_insn ("\texrl\t%%r0,%0", &execute_target); 13422 13423 if (!TARGET_ZARCH) 13424 fputs ("\t.machinemode esa\n", asm_out_file); 13425 13426 fputs ("0:\tj\t0b\n", asm_out_file); 13427} 13428 13429static bool 13430s390_valid_pointer_mode (scalar_int_mode mode) 13431{ 13432 return (mode == SImode || (TARGET_64BIT && mode == DImode)); 13433} 13434 13435/* Checks whether the given CALL_EXPR would use a caller 13436 saved register. This is used to decide whether sibling call 13437 optimization could be performed on the respective function 13438 call. */ 13439 13440static bool 13441s390_call_saved_register_used (tree call_expr) 13442{ 13443 CUMULATIVE_ARGS cum_v; 13444 cumulative_args_t cum; 13445 tree parameter; 13446 rtx parm_rtx; 13447 int reg, i; 13448 13449 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0); 13450 cum = pack_cumulative_args (&cum_v); 13451 13452 for (i = 0; i < call_expr_nargs (call_expr); i++) 13453 { 13454 parameter = CALL_EXPR_ARG (call_expr, i); 13455 gcc_assert (parameter); 13456 13457 /* For an undeclared variable passed as parameter we will get 13458 an ERROR_MARK node here. */ 13459 if (TREE_CODE (parameter) == ERROR_MARK) 13460 return true; 13461 13462 /* We assume that in the target function all parameters are 13463 named. This only has an impact on vector argument register 13464 usage none of which is call-saved. */ 13465 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true); 13466 apply_pass_by_reference_rules (&cum_v, arg); 13467 13468 parm_rtx = s390_function_arg (cum, arg); 13469 13470 s390_function_arg_advance (cum, arg); 13471 13472 if (!parm_rtx) 13473 continue; 13474 13475 if (REG_P (parm_rtx)) 13476 { 13477 int size = s390_function_arg_size (arg.mode, arg.type); 13478 int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; 13479 13480 for (reg = 0; reg < nregs; reg++) 13481 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) 13482 return true; 13483 } 13484 else if (GET_CODE (parm_rtx) == PARALLEL) 13485 { 13486 int i; 13487 13488 for (i = 0; i < XVECLEN (parm_rtx, 0); i++) 13489 { 13490 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); 13491 13492 gcc_assert (REG_P (r)); 13493 gcc_assert (REG_NREGS (r) == 1); 13494 13495 if (!call_used_or_fixed_reg_p (REGNO (r))) 13496 return true; 13497 } 13498 } 13499 } 13500 return false; 13501} 13502 13503/* Return true if the given call expression can be 13504 turned into a sibling call. 13505 DECL holds the declaration of the function to be called whereas 13506 EXP is the call expression itself. */ 13507 13508static bool 13509s390_function_ok_for_sibcall (tree decl, tree exp) 13510{ 13511 /* The TPF epilogue uses register 1. */ 13512 if (TARGET_TPF_PROFILING) 13513 return false; 13514 13515 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved) 13516 which would have to be restored before the sibcall. */ 13517 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl)) 13518 return false; 13519 13520 /* The thunks for indirect branches require r1 if no exrl is 13521 available. r1 might not be available when doing a sibling 13522 call. */ 13523 if (TARGET_INDIRECT_BRANCH_NOBP_CALL 13524 && !TARGET_CPU_Z10 13525 && !decl) 13526 return false; 13527 13528 /* Register 6 on s390 is available as an argument register but unfortunately 13529 "caller saved". This makes functions needing this register for arguments 13530 not suitable for sibcalls. */ 13531 return !s390_call_saved_register_used (exp); 13532} 13533 13534/* Return the fixed registers used for condition codes. */ 13535 13536static bool 13537s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 13538{ 13539 *p1 = CC_REGNUM; 13540 *p2 = INVALID_REGNUM; 13541 13542 return true; 13543} 13544 13545/* This function is used by the call expanders of the machine description. 13546 It emits the call insn itself together with the necessary operations 13547 to adjust the target address and returns the emitted insn. 13548 ADDR_LOCATION is the target address rtx 13549 TLS_CALL the location of the thread-local symbol 13550 RESULT_REG the register where the result of the call should be stored 13551 RETADDR_REG the register where the return address should be stored 13552 If this parameter is NULL_RTX the call is considered 13553 to be a sibling call. */ 13554 13555rtx_insn * 13556s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, 13557 rtx retaddr_reg) 13558{ 13559 bool plt_call = false; 13560 rtx_insn *insn; 13561 rtx vec[4] = { NULL_RTX }; 13562 int elts = 0; 13563 rtx *call = &vec[0]; 13564 rtx *clobber_ret_reg = &vec[1]; 13565 rtx *use = &vec[2]; 13566 rtx *clobber_thunk_reg = &vec[3]; 13567 int i; 13568 13569 /* Direct function calls need special treatment. */ 13570 if (GET_CODE (addr_location) == SYMBOL_REF) 13571 { 13572 /* When calling a global routine in PIC mode, we must 13573 replace the symbol itself with the PLT stub. */ 13574 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) 13575 { 13576 if (TARGET_64BIT || retaddr_reg != NULL_RTX) 13577 { 13578 addr_location = gen_rtx_UNSPEC (Pmode, 13579 gen_rtvec (1, addr_location), 13580 UNSPEC_PLT); 13581 addr_location = gen_rtx_CONST (Pmode, addr_location); 13582 plt_call = true; 13583 } 13584 else 13585 /* For -fpic code the PLT entries might use r12 which is 13586 call-saved. Therefore we cannot do a sibcall when 13587 calling directly using a symbol ref. When reaching 13588 this point we decided (in s390_function_ok_for_sibcall) 13589 to do a sibcall for a function pointer but one of the 13590 optimizers was able to get rid of the function pointer 13591 by propagating the symbol ref into the call. This 13592 optimization is illegal for S/390 so we turn the direct 13593 call into a indirect call again. */ 13594 addr_location = force_reg (Pmode, addr_location); 13595 } 13596 } 13597 13598 /* If it is already an indirect call or the code above moved the 13599 SYMBOL_REF to somewhere else make sure the address can be found in 13600 register 1. */ 13601 if (retaddr_reg == NULL_RTX 13602 && GET_CODE (addr_location) != SYMBOL_REF 13603 && !plt_call) 13604 { 13605 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location); 13606 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM); 13607 } 13608 13609 if (TARGET_INDIRECT_BRANCH_NOBP_CALL 13610 && GET_CODE (addr_location) != SYMBOL_REF 13611 && !plt_call) 13612 { 13613 /* Indirect branch thunks require the target to be a single GPR. */ 13614 addr_location = force_reg (Pmode, addr_location); 13615 13616 /* Without exrl the indirect branch thunks need an additional 13617 register for larl;ex */ 13618 if (!TARGET_CPU_Z10) 13619 { 13620 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM); 13621 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg); 13622 } 13623 } 13624 13625 addr_location = gen_rtx_MEM (QImode, addr_location); 13626 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); 13627 13628 if (result_reg != NULL_RTX) 13629 *call = gen_rtx_SET (result_reg, *call); 13630 13631 if (retaddr_reg != NULL_RTX) 13632 { 13633 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); 13634 13635 if (tls_call != NULL_RTX) 13636 *use = gen_rtx_USE (VOIDmode, tls_call); 13637 } 13638 13639 13640 for (i = 0; i < 4; i++) 13641 if (vec[i] != NULL_RTX) 13642 elts++; 13643 13644 if (elts > 1) 13645 { 13646 rtvec v; 13647 int e = 0; 13648 13649 v = rtvec_alloc (elts); 13650 for (i = 0; i < 4; i++) 13651 if (vec[i] != NULL_RTX) 13652 { 13653 RTVEC_ELT (v, e) = vec[i]; 13654 e++; 13655 } 13656 13657 *call = gen_rtx_PARALLEL (VOIDmode, v); 13658 } 13659 13660 insn = emit_call_insn (*call); 13661 13662 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */ 13663 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX) 13664 { 13665 /* s390_function_ok_for_sibcall should 13666 have denied sibcalls in this case. */ 13667 gcc_assert (retaddr_reg != NULL_RTX); 13668 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12)); 13669 } 13670 return insn; 13671} 13672 13673/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ 13674 13675static void 13676s390_conditional_register_usage (void) 13677{ 13678 int i; 13679 13680 if (flag_pic) 13681 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13682 fixed_regs[BASE_REGNUM] = 0; 13683 fixed_regs[RETURN_REGNUM] = 0; 13684 if (TARGET_64BIT) 13685 { 13686 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++) 13687 call_used_regs[i] = 0; 13688 } 13689 else 13690 { 13691 call_used_regs[FPR4_REGNUM] = 0; 13692 call_used_regs[FPR6_REGNUM] = 0; 13693 } 13694 13695 if (TARGET_SOFT_FLOAT) 13696 { 13697 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) 13698 fixed_regs[i] = 1; 13699 } 13700 13701 /* Disable v16 - v31 for non-vector target. */ 13702 if (!TARGET_VX) 13703 { 13704 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++) 13705 fixed_regs[i] = call_used_regs[i] = 1; 13706 } 13707} 13708 13709/* Corresponding function to eh_return expander. */ 13710 13711static GTY(()) rtx s390_tpf_eh_return_symbol; 13712void 13713s390_emit_tpf_eh_return (rtx target) 13714{ 13715 rtx_insn *insn; 13716 rtx reg, orig_ra; 13717 13718 if (!s390_tpf_eh_return_symbol) 13719 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return"); 13720 13721 reg = gen_rtx_REG (Pmode, 2); 13722 orig_ra = gen_rtx_REG (Pmode, 3); 13723 13724 emit_move_insn (reg, target); 13725 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM)); 13726 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg, 13727 gen_rtx_REG (Pmode, RETURN_REGNUM)); 13728 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); 13729 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra); 13730 13731 emit_move_insn (EH_RETURN_HANDLER_RTX, reg); 13732} 13733 13734/* Rework the prologue/epilogue to avoid saving/restoring 13735 registers unnecessarily. */ 13736 13737static void 13738s390_optimize_prologue (void) 13739{ 13740 rtx_insn *insn, *new_insn, *next_insn; 13741 13742 /* Do a final recompute of the frame-related data. */ 13743 s390_optimize_register_info (); 13744 13745 /* If all special registers are in fact used, there's nothing we 13746 can do, so no point in walking the insn list. */ 13747 13748 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM 13749 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM) 13750 return; 13751 13752 /* Search for prologue/epilogue insns and replace them. */ 13753 for (insn = get_insns (); insn; insn = next_insn) 13754 { 13755 int first, last, off; 13756 rtx set, base, offset; 13757 rtx pat; 13758 13759 next_insn = NEXT_INSN (insn); 13760 13761 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn)) 13762 continue; 13763 13764 pat = PATTERN (insn); 13765 13766 /* Remove ldgr/lgdr instructions used for saving and restore 13767 GPRs if possible. */ 13768 if (TARGET_Z10) 13769 { 13770 rtx tmp_pat = pat; 13771 13772 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr) 13773 tmp_pat = XVECEXP (pat, 0, 0); 13774 13775 if (GET_CODE (tmp_pat) == SET 13776 && GET_MODE (SET_SRC (tmp_pat)) == DImode 13777 && REG_P (SET_SRC (tmp_pat)) 13778 && REG_P (SET_DEST (tmp_pat))) 13779 { 13780 int src_regno = REGNO (SET_SRC (tmp_pat)); 13781 int dest_regno = REGNO (SET_DEST (tmp_pat)); 13782 int gpr_regno; 13783 int fpr_regno; 13784 13785 if (!((GENERAL_REGNO_P (src_regno) 13786 && FP_REGNO_P (dest_regno)) 13787 || (FP_REGNO_P (src_regno) 13788 && GENERAL_REGNO_P (dest_regno)))) 13789 continue; 13790 13791 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno; 13792 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno; 13793 13794 /* GPR must be call-saved, FPR must be call-clobbered. */ 13795 if (!call_used_regs[fpr_regno] 13796 || call_used_regs[gpr_regno]) 13797 continue; 13798 13799 /* It must not happen that what we once saved in an FPR now 13800 needs a stack slot. */ 13801 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK); 13802 13803 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE) 13804 { 13805 remove_insn (insn); 13806 continue; 13807 } 13808 } 13809 } 13810 13811 if (GET_CODE (pat) == PARALLEL 13812 && store_multiple_operation (pat, VOIDmode)) 13813 { 13814 set = XVECEXP (pat, 0, 0); 13815 first = REGNO (SET_SRC (set)); 13816 last = first + XVECLEN (pat, 0) - 1; 13817 offset = const0_rtx; 13818 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); 13819 off = INTVAL (offset); 13820 13821 if (GET_CODE (base) != REG || off < 0) 13822 continue; 13823 if (cfun_frame_layout.first_save_gpr != -1 13824 && (cfun_frame_layout.first_save_gpr < first 13825 || cfun_frame_layout.last_save_gpr > last)) 13826 continue; 13827 if (REGNO (base) != STACK_POINTER_REGNUM 13828 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 13829 continue; 13830 if (first > BASE_REGNUM || last < BASE_REGNUM) 13831 continue; 13832 13833 if (cfun_frame_layout.first_save_gpr != -1) 13834 { 13835 rtx s_pat = save_gprs (base, 13836 off + (cfun_frame_layout.first_save_gpr 13837 - first) * UNITS_PER_LONG, 13838 cfun_frame_layout.first_save_gpr, 13839 cfun_frame_layout.last_save_gpr); 13840 new_insn = emit_insn_before (s_pat, insn); 13841 INSN_ADDRESSES_NEW (new_insn, -1); 13842 } 13843 13844 remove_insn (insn); 13845 continue; 13846 } 13847 13848 if (cfun_frame_layout.first_save_gpr == -1 13849 && GET_CODE (pat) == SET 13850 && GENERAL_REG_P (SET_SRC (pat)) 13851 && GET_CODE (SET_DEST (pat)) == MEM) 13852 { 13853 set = pat; 13854 first = REGNO (SET_SRC (set)); 13855 offset = const0_rtx; 13856 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); 13857 off = INTVAL (offset); 13858 13859 if (GET_CODE (base) != REG || off < 0) 13860 continue; 13861 if (REGNO (base) != STACK_POINTER_REGNUM 13862 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 13863 continue; 13864 13865 remove_insn (insn); 13866 continue; 13867 } 13868 13869 if (GET_CODE (pat) == PARALLEL 13870 && load_multiple_operation (pat, VOIDmode)) 13871 { 13872 set = XVECEXP (pat, 0, 0); 13873 first = REGNO (SET_DEST (set)); 13874 last = first + XVECLEN (pat, 0) - 1; 13875 offset = const0_rtx; 13876 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); 13877 off = INTVAL (offset); 13878 13879 if (GET_CODE (base) != REG || off < 0) 13880 continue; 13881 13882 if (cfun_frame_layout.first_restore_gpr != -1 13883 && (cfun_frame_layout.first_restore_gpr < first 13884 || cfun_frame_layout.last_restore_gpr > last)) 13885 continue; 13886 if (REGNO (base) != STACK_POINTER_REGNUM 13887 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 13888 continue; 13889 if (first > BASE_REGNUM || last < BASE_REGNUM) 13890 continue; 13891 13892 if (cfun_frame_layout.first_restore_gpr != -1) 13893 { 13894 rtx rpat = restore_gprs (base, 13895 off + (cfun_frame_layout.first_restore_gpr 13896 - first) * UNITS_PER_LONG, 13897 cfun_frame_layout.first_restore_gpr, 13898 cfun_frame_layout.last_restore_gpr); 13899 13900 /* Remove REG_CFA_RESTOREs for registers that we no 13901 longer need to save. */ 13902 REG_NOTES (rpat) = REG_NOTES (insn); 13903 for (rtx *ptr = ®_NOTES (rpat); *ptr; ) 13904 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE 13905 && ((int) REGNO (XEXP (*ptr, 0)) 13906 < cfun_frame_layout.first_restore_gpr)) 13907 *ptr = XEXP (*ptr, 1); 13908 else 13909 ptr = &XEXP (*ptr, 1); 13910 new_insn = emit_insn_before (rpat, insn); 13911 RTX_FRAME_RELATED_P (new_insn) = 1; 13912 INSN_ADDRESSES_NEW (new_insn, -1); 13913 } 13914 13915 remove_insn (insn); 13916 continue; 13917 } 13918 13919 if (cfun_frame_layout.first_restore_gpr == -1 13920 && GET_CODE (pat) == SET 13921 && GENERAL_REG_P (SET_DEST (pat)) 13922 && GET_CODE (SET_SRC (pat)) == MEM) 13923 { 13924 set = pat; 13925 first = REGNO (SET_DEST (set)); 13926 offset = const0_rtx; 13927 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); 13928 off = INTVAL (offset); 13929 13930 if (GET_CODE (base) != REG || off < 0) 13931 continue; 13932 13933 if (REGNO (base) != STACK_POINTER_REGNUM 13934 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 13935 continue; 13936 13937 remove_insn (insn); 13938 continue; 13939 } 13940 } 13941} 13942 13943/* On z10 and later the dynamic branch prediction must see the 13944 backward jump within a certain windows. If not it falls back to 13945 the static prediction. This function rearranges the loop backward 13946 branch in a way which makes the static prediction always correct. 13947 The function returns true if it added an instruction. */ 13948static bool 13949s390_fix_long_loop_prediction (rtx_insn *insn) 13950{ 13951 rtx set = single_set (insn); 13952 rtx code_label, label_ref; 13953 rtx_insn *uncond_jump; 13954 rtx_insn *cur_insn; 13955 rtx tmp; 13956 int distance; 13957 13958 /* This will exclude branch on count and branch on index patterns 13959 since these are correctly statically predicted. */ 13960 if (!set 13961 || SET_DEST (set) != pc_rtx 13962 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE) 13963 return false; 13964 13965 /* Skip conditional returns. */ 13966 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1)) 13967 && XEXP (SET_SRC (set), 2) == pc_rtx) 13968 return false; 13969 13970 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ? 13971 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2)); 13972 13973 gcc_assert (GET_CODE (label_ref) == LABEL_REF); 13974 13975 code_label = XEXP (label_ref, 0); 13976 13977 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1 13978 || INSN_ADDRESSES (INSN_UID (insn)) == -1 13979 || (INSN_ADDRESSES (INSN_UID (insn)) 13980 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE)) 13981 return false; 13982 13983 for (distance = 0, cur_insn = PREV_INSN (insn); 13984 distance < PREDICT_DISTANCE - 6; 13985 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn)) 13986 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn)) 13987 return false; 13988 13989 rtx_code_label *new_label = gen_label_rtx (); 13990 uncond_jump = emit_jump_insn_after ( 13991 gen_rtx_SET (pc_rtx, 13992 gen_rtx_LABEL_REF (VOIDmode, code_label)), 13993 insn); 13994 emit_label_after (new_label, uncond_jump); 13995 13996 tmp = XEXP (SET_SRC (set), 1); 13997 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2); 13998 XEXP (SET_SRC (set), 2) = tmp; 13999 INSN_CODE (insn) = -1; 14000 14001 XEXP (label_ref, 0) = new_label; 14002 JUMP_LABEL (insn) = new_label; 14003 JUMP_LABEL (uncond_jump) = code_label; 14004 14005 return true; 14006} 14007 14008/* Returns 1 if INSN reads the value of REG for purposes not related 14009 to addressing of memory, and 0 otherwise. */ 14010static int 14011s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn) 14012{ 14013 return reg_referenced_p (reg, PATTERN (insn)) 14014 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn)); 14015} 14016 14017/* Starting from INSN find_cond_jump looks downwards in the insn 14018 stream for a single jump insn which is the last user of the 14019 condition code set in INSN. */ 14020static rtx_insn * 14021find_cond_jump (rtx_insn *insn) 14022{ 14023 for (; insn; insn = NEXT_INSN (insn)) 14024 { 14025 rtx ite, cc; 14026 14027 if (LABEL_P (insn)) 14028 break; 14029 14030 if (!JUMP_P (insn)) 14031 { 14032 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn)) 14033 break; 14034 continue; 14035 } 14036 14037 /* This will be triggered by a return. */ 14038 if (GET_CODE (PATTERN (insn)) != SET) 14039 break; 14040 14041 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx); 14042 ite = SET_SRC (PATTERN (insn)); 14043 14044 if (GET_CODE (ite) != IF_THEN_ELSE) 14045 break; 14046 14047 cc = XEXP (XEXP (ite, 0), 0); 14048 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))) 14049 break; 14050 14051 if (find_reg_note (insn, REG_DEAD, cc)) 14052 return insn; 14053 break; 14054 } 14055 14056 return NULL; 14057} 14058 14059/* Swap the condition in COND and the operands in OP0 and OP1 so that 14060 the semantics does not change. If NULL_RTX is passed as COND the 14061 function tries to find the conditional jump starting with INSN. */ 14062static void 14063s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn) 14064{ 14065 rtx tmp = *op0; 14066 14067 if (cond == NULL_RTX) 14068 { 14069 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn)); 14070 rtx set = jump ? single_set (jump) : NULL_RTX; 14071 14072 if (set == NULL_RTX) 14073 return; 14074 14075 cond = XEXP (SET_SRC (set), 0); 14076 } 14077 14078 *op0 = *op1; 14079 *op1 = tmp; 14080 PUT_CODE (cond, swap_condition (GET_CODE (cond))); 14081} 14082 14083/* On z10, instructions of the compare-and-branch family have the 14084 property to access the register occurring as second operand with 14085 its bits complemented. If such a compare is grouped with a second 14086 instruction that accesses the same register non-complemented, and 14087 if that register's value is delivered via a bypass, then the 14088 pipeline recycles, thereby causing significant performance decline. 14089 This function locates such situations and exchanges the two 14090 operands of the compare. The function return true whenever it 14091 added an insn. */ 14092static bool 14093s390_z10_optimize_cmp (rtx_insn *insn) 14094{ 14095 rtx_insn *prev_insn, *next_insn; 14096 bool insn_added_p = false; 14097 rtx cond, *op0, *op1; 14098 14099 if (GET_CODE (PATTERN (insn)) == PARALLEL) 14100 { 14101 /* Handle compare and branch and branch on count 14102 instructions. */ 14103 rtx pattern = single_set (insn); 14104 14105 if (!pattern 14106 || SET_DEST (pattern) != pc_rtx 14107 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) 14108 return false; 14109 14110 cond = XEXP (SET_SRC (pattern), 0); 14111 op0 = &XEXP (cond, 0); 14112 op1 = &XEXP (cond, 1); 14113 } 14114 else if (GET_CODE (PATTERN (insn)) == SET) 14115 { 14116 rtx src, dest; 14117 14118 /* Handle normal compare instructions. */ 14119 src = SET_SRC (PATTERN (insn)); 14120 dest = SET_DEST (PATTERN (insn)); 14121 14122 if (!REG_P (dest) 14123 || !CC_REGNO_P (REGNO (dest)) 14124 || GET_CODE (src) != COMPARE) 14125 return false; 14126 14127 /* s390_swap_cmp will try to find the conditional 14128 jump when passing NULL_RTX as condition. */ 14129 cond = NULL_RTX; 14130 op0 = &XEXP (src, 0); 14131 op1 = &XEXP (src, 1); 14132 } 14133 else 14134 return false; 14135 14136 if (!REG_P (*op0) || !REG_P (*op1)) 14137 return false; 14138 14139 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT) 14140 return false; 14141 14142 /* Swap the COMPARE arguments and its mask if there is a 14143 conflicting access in the previous insn. */ 14144 prev_insn = prev_active_insn (insn); 14145 if (prev_insn != NULL_RTX && INSN_P (prev_insn) 14146 && reg_referenced_p (*op1, PATTERN (prev_insn))) 14147 s390_swap_cmp (cond, op0, op1, insn); 14148 14149 /* Check if there is a conflict with the next insn. If there 14150 was no conflict with the previous insn, then swap the 14151 COMPARE arguments and its mask. If we already swapped 14152 the operands, or if swapping them would cause a conflict 14153 with the previous insn, issue a NOP after the COMPARE in 14154 order to separate the two instuctions. */ 14155 next_insn = next_active_insn (insn); 14156 if (next_insn != NULL_RTX && INSN_P (next_insn) 14157 && s390_non_addr_reg_read_p (*op1, next_insn)) 14158 { 14159 if (prev_insn != NULL_RTX && INSN_P (prev_insn) 14160 && s390_non_addr_reg_read_p (*op0, prev_insn)) 14161 { 14162 if (REGNO (*op1) == 0) 14163 emit_insn_after (gen_nop_lr1 (), insn); 14164 else 14165 emit_insn_after (gen_nop_lr0 (), insn); 14166 insn_added_p = true; 14167 } 14168 else 14169 s390_swap_cmp (cond, op0, op1, insn); 14170 } 14171 return insn_added_p; 14172} 14173 14174/* Number of INSNs to be scanned backward in the last BB of the loop 14175 and forward in the first BB of the loop. This usually should be a 14176 bit more than the number of INSNs which could go into one 14177 group. */ 14178#define S390_OSC_SCAN_INSN_NUM 5 14179 14180/* Scan LOOP for static OSC collisions and return true if a osc_break 14181 should be issued for this loop. */ 14182static bool 14183s390_adjust_loop_scan_osc (struct loop* loop) 14184 14185{ 14186 HARD_REG_SET modregs, newregs; 14187 rtx_insn *insn, *store_insn = NULL; 14188 rtx set; 14189 struct s390_address addr_store, addr_load; 14190 subrtx_iterator::array_type array; 14191 int insn_count; 14192 14193 CLEAR_HARD_REG_SET (modregs); 14194 14195 insn_count = 0; 14196 FOR_BB_INSNS_REVERSE (loop->latch, insn) 14197 { 14198 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 14199 continue; 14200 14201 insn_count++; 14202 if (insn_count > S390_OSC_SCAN_INSN_NUM) 14203 return false; 14204 14205 find_all_hard_reg_sets (insn, &newregs, true); 14206 modregs |= newregs; 14207 14208 set = single_set (insn); 14209 if (!set) 14210 continue; 14211 14212 if (MEM_P (SET_DEST (set)) 14213 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store)) 14214 { 14215 store_insn = insn; 14216 break; 14217 } 14218 } 14219 14220 if (store_insn == NULL_RTX) 14221 return false; 14222 14223 insn_count = 0; 14224 FOR_BB_INSNS (loop->header, insn) 14225 { 14226 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 14227 continue; 14228 14229 if (insn == store_insn) 14230 return false; 14231 14232 insn_count++; 14233 if (insn_count > S390_OSC_SCAN_INSN_NUM) 14234 return false; 14235 14236 find_all_hard_reg_sets (insn, &newregs, true); 14237 modregs |= newregs; 14238 14239 set = single_set (insn); 14240 if (!set) 14241 continue; 14242 14243 /* An intermediate store disrupts static OSC checking 14244 anyway. */ 14245 if (MEM_P (SET_DEST (set)) 14246 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL)) 14247 return false; 14248 14249 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST) 14250 if (MEM_P (*iter) 14251 && s390_decompose_address (XEXP (*iter, 0), &addr_load) 14252 && rtx_equal_p (addr_load.base, addr_store.base) 14253 && rtx_equal_p (addr_load.indx, addr_store.indx) 14254 && rtx_equal_p (addr_load.disp, addr_store.disp)) 14255 { 14256 if ((addr_load.base != NULL_RTX 14257 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base))) 14258 || (addr_load.indx != NULL_RTX 14259 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx)))) 14260 return true; 14261 } 14262 } 14263 return false; 14264} 14265 14266/* Look for adjustments which can be done on simple innermost 14267 loops. */ 14268static void 14269s390_adjust_loops () 14270{ 14271 struct loop *loop = NULL; 14272 14273 df_analyze (); 14274 compute_bb_for_insn (); 14275 14276 /* Find the loops. */ 14277 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 14278 14279 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 14280 { 14281 if (dump_file) 14282 { 14283 flow_loop_dump (loop, dump_file, NULL, 0); 14284 fprintf (dump_file, ";; OSC loop scan Loop: "); 14285 } 14286 if (loop->latch == NULL 14287 || pc_set (BB_END (loop->latch)) == NULL_RTX 14288 || !s390_adjust_loop_scan_osc (loop)) 14289 { 14290 if (dump_file) 14291 { 14292 if (loop->latch == NULL) 14293 fprintf (dump_file, " muliple backward jumps\n"); 14294 else 14295 { 14296 fprintf (dump_file, " header insn: %d latch insn: %d ", 14297 INSN_UID (BB_HEAD (loop->header)), 14298 INSN_UID (BB_END (loop->latch))); 14299 if (pc_set (BB_END (loop->latch)) == NULL_RTX) 14300 fprintf (dump_file, " loop does not end with jump\n"); 14301 else 14302 fprintf (dump_file, " not instrumented\n"); 14303 } 14304 } 14305 } 14306 else 14307 { 14308 rtx_insn *new_insn; 14309 14310 if (dump_file) 14311 fprintf (dump_file, " adding OSC break insn: "); 14312 new_insn = emit_insn_before (gen_osc_break (), 14313 BB_END (loop->latch)); 14314 INSN_ADDRESSES_NEW (new_insn, -1); 14315 } 14316 } 14317 14318 loop_optimizer_finalize (); 14319 14320 df_finish_pass (false); 14321} 14322 14323/* Perform machine-dependent processing. */ 14324 14325static void 14326s390_reorg (void) 14327{ 14328 struct constant_pool *pool; 14329 rtx_insn *insn; 14330 int hw_before, hw_after; 14331 14332 if (s390_tune == PROCESSOR_2964_Z13) 14333 s390_adjust_loops (); 14334 14335 /* Make sure all splits have been performed; splits after 14336 machine_dependent_reorg might confuse insn length counts. */ 14337 split_all_insns_noflow (); 14338 14339 /* Install the main literal pool and the associated base 14340 register load insns. The literal pool might be > 4096 bytes in 14341 size, so that some of its elements cannot be directly accessed. 14342 14343 To fix this, we split the single literal pool into multiple 14344 pool chunks, reloading the pool base register at various 14345 points throughout the function to ensure it always points to 14346 the pool chunk the following code expects. */ 14347 14348 /* Collect the literal pool. */ 14349 pool = s390_mainpool_start (); 14350 if (pool) 14351 { 14352 /* Finish up literal pool related changes. */ 14353 s390_mainpool_finish (pool); 14354 } 14355 else 14356 { 14357 /* If literal pool overflowed, chunkify it. */ 14358 pool = s390_chunkify_start (); 14359 s390_chunkify_finish (pool); 14360 } 14361 14362 /* Generate out-of-pool execute target insns. */ 14363 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 14364 { 14365 rtx label; 14366 rtx_insn *target; 14367 14368 label = s390_execute_label (insn); 14369 if (!label) 14370 continue; 14371 14372 gcc_assert (label != const0_rtx); 14373 14374 target = emit_label (XEXP (label, 0)); 14375 INSN_ADDRESSES_NEW (target, -1); 14376 14377 if (JUMP_P (insn)) 14378 { 14379 target = emit_jump_insn (s390_execute_target (insn)); 14380 /* This is important in order to keep a table jump 14381 pointing at the jump table label. Only this makes it 14382 being recognized as table jump. */ 14383 JUMP_LABEL (target) = JUMP_LABEL (insn); 14384 } 14385 else 14386 target = emit_insn (s390_execute_target (insn)); 14387 INSN_ADDRESSES_NEW (target, -1); 14388 } 14389 14390 /* Try to optimize prologue and epilogue further. */ 14391 s390_optimize_prologue (); 14392 14393 /* Walk over the insns and do some >=z10 specific changes. */ 14394 if (s390_tune >= PROCESSOR_2097_Z10) 14395 { 14396 rtx_insn *insn; 14397 bool insn_added_p = false; 14398 14399 /* The insn lengths and addresses have to be up to date for the 14400 following manipulations. */ 14401 shorten_branches (get_insns ()); 14402 14403 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 14404 { 14405 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 14406 continue; 14407 14408 if (JUMP_P (insn)) 14409 insn_added_p |= s390_fix_long_loop_prediction (insn); 14410 14411 if ((GET_CODE (PATTERN (insn)) == PARALLEL 14412 || GET_CODE (PATTERN (insn)) == SET) 14413 && s390_tune == PROCESSOR_2097_Z10) 14414 insn_added_p |= s390_z10_optimize_cmp (insn); 14415 } 14416 14417 /* Adjust branches if we added new instructions. */ 14418 if (insn_added_p) 14419 shorten_branches (get_insns ()); 14420 } 14421 14422 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after); 14423 if (hw_after > 0) 14424 { 14425 rtx_insn *insn; 14426 14427 /* Insert NOPs for hotpatching. */ 14428 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 14429 /* Emit NOPs 14430 1. inside the area covered by debug information to allow setting 14431 breakpoints at the NOPs, 14432 2. before any insn which results in an asm instruction, 14433 3. before in-function labels to avoid jumping to the NOPs, for 14434 example as part of a loop, 14435 4. before any barrier in case the function is completely empty 14436 (__builtin_unreachable ()) and has neither internal labels nor 14437 active insns. 14438 */ 14439 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn)) 14440 break; 14441 /* Output a series of NOPs before the first active insn. */ 14442 while (insn && hw_after > 0) 14443 { 14444 if (hw_after >= 3) 14445 { 14446 emit_insn_before (gen_nop_6_byte (), insn); 14447 hw_after -= 3; 14448 } 14449 else if (hw_after >= 2) 14450 { 14451 emit_insn_before (gen_nop_4_byte (), insn); 14452 hw_after -= 2; 14453 } 14454 else 14455 { 14456 emit_insn_before (gen_nop_2_byte (), insn); 14457 hw_after -= 1; 14458 } 14459 } 14460 } 14461} 14462 14463/* Return true if INSN is a fp load insn writing register REGNO. */ 14464static inline bool 14465s390_fpload_toreg (rtx_insn *insn, unsigned int regno) 14466{ 14467 rtx set; 14468 enum attr_type flag = s390_safe_attr_type (insn); 14469 14470 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF) 14471 return false; 14472 14473 set = single_set (insn); 14474 14475 if (set == NULL_RTX) 14476 return false; 14477 14478 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set))) 14479 return false; 14480 14481 if (REGNO (SET_DEST (set)) != regno) 14482 return false; 14483 14484 return true; 14485} 14486 14487/* This value describes the distance to be avoided between an 14488 arithmetic fp instruction and an fp load writing the same register. 14489 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is 14490 fine but the exact value has to be avoided. Otherwise the FP 14491 pipeline will throw an exception causing a major penalty. */ 14492#define Z10_EARLYLOAD_DISTANCE 7 14493 14494/* Rearrange the ready list in order to avoid the situation described 14495 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is 14496 moved to the very end of the ready list. */ 14497static void 14498s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p) 14499{ 14500 unsigned int regno; 14501 int nready = *nready_p; 14502 rtx_insn *tmp; 14503 int i; 14504 rtx_insn *insn; 14505 rtx set; 14506 enum attr_type flag; 14507 int distance; 14508 14509 /* Skip DISTANCE - 1 active insns. */ 14510 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1; 14511 distance > 0 && insn != NULL_RTX; 14512 distance--, insn = prev_active_insn (insn)) 14513 if (CALL_P (insn) || JUMP_P (insn)) 14514 return; 14515 14516 if (insn == NULL_RTX) 14517 return; 14518 14519 set = single_set (insn); 14520 14521 if (set == NULL_RTX || !REG_P (SET_DEST (set)) 14522 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT) 14523 return; 14524 14525 flag = s390_safe_attr_type (insn); 14526 14527 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF) 14528 return; 14529 14530 regno = REGNO (SET_DEST (set)); 14531 i = nready - 1; 14532 14533 while (!s390_fpload_toreg (ready[i], regno) && i > 0) 14534 i--; 14535 14536 if (!i) 14537 return; 14538 14539 tmp = ready[i]; 14540 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i); 14541 ready[0] = tmp; 14542} 14543 14544/* Returns TRUE if BB is entered via a fallthru edge and all other 14545 incoming edges are less than likely. */ 14546static bool 14547s390_bb_fallthru_entry_likely (basic_block bb) 14548{ 14549 edge e, fallthru_edge; 14550 edge_iterator ei; 14551 14552 if (!bb) 14553 return false; 14554 14555 fallthru_edge = find_fallthru_edge (bb->preds); 14556 if (!fallthru_edge) 14557 return false; 14558 14559 FOR_EACH_EDGE (e, ei, bb->preds) 14560 if (e != fallthru_edge 14561 && e->probability >= profile_probability::likely ()) 14562 return false; 14563 14564 return true; 14565} 14566 14567struct s390_sched_state 14568{ 14569 /* Number of insns in the group. */ 14570 int group_state; 14571 /* Execution side of the group. */ 14572 int side; 14573 /* Group can only hold two insns. */ 14574 bool group_of_two; 14575} s390_sched_state; 14576 14577static struct s390_sched_state sched_state = {0, 1, false}; 14578 14579#define S390_SCHED_ATTR_MASK_CRACKED 0x1 14580#define S390_SCHED_ATTR_MASK_EXPANDED 0x2 14581#define S390_SCHED_ATTR_MASK_ENDGROUP 0x4 14582#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8 14583#define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10 14584 14585static unsigned int 14586s390_get_sched_attrmask (rtx_insn *insn) 14587{ 14588 unsigned int mask = 0; 14589 14590 switch (s390_tune) 14591 { 14592 case PROCESSOR_2827_ZEC12: 14593 if (get_attr_zEC12_cracked (insn)) 14594 mask |= S390_SCHED_ATTR_MASK_CRACKED; 14595 if (get_attr_zEC12_expanded (insn)) 14596 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 14597 if (get_attr_zEC12_endgroup (insn)) 14598 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 14599 if (get_attr_zEC12_groupalone (insn)) 14600 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 14601 break; 14602 case PROCESSOR_2964_Z13: 14603 if (get_attr_z13_cracked (insn)) 14604 mask |= S390_SCHED_ATTR_MASK_CRACKED; 14605 if (get_attr_z13_expanded (insn)) 14606 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 14607 if (get_attr_z13_endgroup (insn)) 14608 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 14609 if (get_attr_z13_groupalone (insn)) 14610 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 14611 if (get_attr_z13_groupoftwo (insn)) 14612 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; 14613 break; 14614 case PROCESSOR_3906_Z14: 14615 if (get_attr_z14_cracked (insn)) 14616 mask |= S390_SCHED_ATTR_MASK_CRACKED; 14617 if (get_attr_z14_expanded (insn)) 14618 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 14619 if (get_attr_z14_endgroup (insn)) 14620 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 14621 if (get_attr_z14_groupalone (insn)) 14622 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 14623 if (get_attr_z14_groupoftwo (insn)) 14624 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; 14625 break; 14626 case PROCESSOR_8561_Z15: 14627 if (get_attr_z15_cracked (insn)) 14628 mask |= S390_SCHED_ATTR_MASK_CRACKED; 14629 if (get_attr_z15_expanded (insn)) 14630 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 14631 if (get_attr_z15_endgroup (insn)) 14632 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 14633 if (get_attr_z15_groupalone (insn)) 14634 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 14635 if (get_attr_z15_groupoftwo (insn)) 14636 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; 14637 break; 14638 default: 14639 gcc_unreachable (); 14640 } 14641 return mask; 14642} 14643 14644static unsigned int 14645s390_get_unit_mask (rtx_insn *insn, int *units) 14646{ 14647 unsigned int mask = 0; 14648 14649 switch (s390_tune) 14650 { 14651 case PROCESSOR_2964_Z13: 14652 *units = 4; 14653 if (get_attr_z13_unit_lsu (insn)) 14654 mask |= 1 << 0; 14655 if (get_attr_z13_unit_fxa (insn)) 14656 mask |= 1 << 1; 14657 if (get_attr_z13_unit_fxb (insn)) 14658 mask |= 1 << 2; 14659 if (get_attr_z13_unit_vfu (insn)) 14660 mask |= 1 << 3; 14661 break; 14662 case PROCESSOR_3906_Z14: 14663 *units = 4; 14664 if (get_attr_z14_unit_lsu (insn)) 14665 mask |= 1 << 0; 14666 if (get_attr_z14_unit_fxa (insn)) 14667 mask |= 1 << 1; 14668 if (get_attr_z14_unit_fxb (insn)) 14669 mask |= 1 << 2; 14670 if (get_attr_z14_unit_vfu (insn)) 14671 mask |= 1 << 3; 14672 break; 14673 case PROCESSOR_8561_Z15: 14674 *units = 4; 14675 if (get_attr_z15_unit_lsu (insn)) 14676 mask |= 1 << 0; 14677 if (get_attr_z15_unit_fxa (insn)) 14678 mask |= 1 << 1; 14679 if (get_attr_z15_unit_fxb (insn)) 14680 mask |= 1 << 2; 14681 if (get_attr_z15_unit_vfu (insn)) 14682 mask |= 1 << 3; 14683 break; 14684 default: 14685 gcc_unreachable (); 14686 } 14687 return mask; 14688} 14689 14690static bool 14691s390_is_fpd (rtx_insn *insn) 14692{ 14693 if (insn == NULL_RTX) 14694 return false; 14695 14696 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn) 14697 || get_attr_z15_unit_fpd (insn); 14698} 14699 14700static bool 14701s390_is_fxd (rtx_insn *insn) 14702{ 14703 if (insn == NULL_RTX) 14704 return false; 14705 14706 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn) 14707 || get_attr_z15_unit_fxd (insn); 14708} 14709 14710/* Returns TRUE if INSN is a long-running instruction. */ 14711static bool 14712s390_is_longrunning (rtx_insn *insn) 14713{ 14714 if (insn == NULL_RTX) 14715 return false; 14716 14717 return s390_is_fxd (insn) || s390_is_fpd (insn); 14718} 14719 14720 14721/* Return the scheduling score for INSN. The higher the score the 14722 better. The score is calculated from the OOO scheduling attributes 14723 of INSN and the scheduling state sched_state. */ 14724static int 14725s390_sched_score (rtx_insn *insn) 14726{ 14727 unsigned int mask = s390_get_sched_attrmask (insn); 14728 int score = 0; 14729 14730 switch (sched_state.group_state) 14731 { 14732 case 0: 14733 /* Try to put insns into the first slot which would otherwise 14734 break a group. */ 14735 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 14736 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) 14737 score += 5; 14738 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) 14739 score += 10; 14740 break; 14741 case 1: 14742 /* Prefer not cracked insns while trying to put together a 14743 group. */ 14744 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 14745 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 14746 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) 14747 score += 10; 14748 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0) 14749 score += 5; 14750 /* If we are in a group of two already, try to schedule another 14751 group-of-two insn to avoid shortening another group. */ 14752 if (sched_state.group_of_two 14753 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0) 14754 score += 15; 14755 break; 14756 case 2: 14757 /* Prefer not cracked insns while trying to put together a 14758 group. */ 14759 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 14760 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 14761 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) 14762 score += 10; 14763 /* Prefer endgroup insns in the last slot. */ 14764 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) 14765 score += 10; 14766 /* Try to avoid group-of-two insns in the last slot as they will 14767 shorten this group as well as the next one. */ 14768 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0) 14769 score = MAX (0, score - 15); 14770 break; 14771 } 14772 14773 if (s390_tune >= PROCESSOR_2964_Z13) 14774 { 14775 int units, i; 14776 unsigned unit_mask, m = 1; 14777 14778 unit_mask = s390_get_unit_mask (insn, &units); 14779 gcc_assert (units <= MAX_SCHED_UNITS); 14780 14781 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long 14782 ago the last insn of this unit type got scheduled. This is 14783 supposed to help providing a proper instruction mix to the 14784 CPU. */ 14785 for (i = 0; i < units; i++, m <<= 1) 14786 if (m & unit_mask) 14787 score += (last_scheduled_unit_distance[i][sched_state.side] 14788 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE); 14789 14790 int other_side = 1 - sched_state.side; 14791 14792 /* Try to delay long-running insns when side is busy. */ 14793 if (s390_is_longrunning (insn)) 14794 { 14795 if (s390_is_fxd (insn)) 14796 { 14797 if (fxd_longrunning[sched_state.side] 14798 && fxd_longrunning[other_side] 14799 <= fxd_longrunning[sched_state.side]) 14800 score = MAX (0, score - 10); 14801 14802 else if (fxd_longrunning[other_side] 14803 >= fxd_longrunning[sched_state.side]) 14804 score += 10; 14805 } 14806 14807 if (s390_is_fpd (insn)) 14808 { 14809 if (fpd_longrunning[sched_state.side] 14810 && fpd_longrunning[other_side] 14811 <= fpd_longrunning[sched_state.side]) 14812 score = MAX (0, score - 10); 14813 14814 else if (fpd_longrunning[other_side] 14815 >= fpd_longrunning[sched_state.side]) 14816 score += 10; 14817 } 14818 } 14819 } 14820 14821 return score; 14822} 14823 14824/* This function is called via hook TARGET_SCHED_REORDER before 14825 issuing one insn from list READY which contains *NREADYP entries. 14826 For target z10 it reorders load instructions to avoid early load 14827 conflicts in the floating point pipeline */ 14828static int 14829s390_sched_reorder (FILE *file, int verbose, 14830 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED) 14831{ 14832 if (s390_tune == PROCESSOR_2097_Z10 14833 && reload_completed 14834 && *nreadyp > 1) 14835 s390_z10_prevent_earlyload_conflicts (ready, nreadyp); 14836 14837 if (s390_tune >= PROCESSOR_2827_ZEC12 14838 && reload_completed 14839 && *nreadyp > 1) 14840 { 14841 int i; 14842 int last_index = *nreadyp - 1; 14843 int max_index = -1; 14844 int max_score = -1; 14845 rtx_insn *tmp; 14846 14847 /* Just move the insn with the highest score to the top (the 14848 end) of the list. A full sort is not needed since a conflict 14849 in the hazard recognition cannot happen. So the top insn in 14850 the ready list will always be taken. */ 14851 for (i = last_index; i >= 0; i--) 14852 { 14853 int score; 14854 14855 if (recog_memoized (ready[i]) < 0) 14856 continue; 14857 14858 score = s390_sched_score (ready[i]); 14859 if (score > max_score) 14860 { 14861 max_score = score; 14862 max_index = i; 14863 } 14864 } 14865 14866 if (max_index != -1) 14867 { 14868 if (max_index != last_index) 14869 { 14870 tmp = ready[max_index]; 14871 ready[max_index] = ready[last_index]; 14872 ready[last_index] = tmp; 14873 14874 if (verbose > 5) 14875 fprintf (file, 14876 ";;\t\tBACKEND: move insn %d to the top of list\n", 14877 INSN_UID (ready[last_index])); 14878 } 14879 else if (verbose > 5) 14880 fprintf (file, 14881 ";;\t\tBACKEND: best insn %d already on top\n", 14882 INSN_UID (ready[last_index])); 14883 } 14884 14885 if (verbose > 5) 14886 { 14887 fprintf (file, "ready list ooo attributes - sched state: %d\n", 14888 sched_state.group_state); 14889 14890 for (i = last_index; i >= 0; i--) 14891 { 14892 unsigned int sched_mask; 14893 rtx_insn *insn = ready[i]; 14894 14895 if (recog_memoized (insn) < 0) 14896 continue; 14897 14898 sched_mask = s390_get_sched_attrmask (insn); 14899 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ", 14900 INSN_UID (insn), 14901 s390_sched_score (insn)); 14902#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\ 14903 ((M) & sched_mask) ? #ATTR : ""); 14904 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); 14905 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); 14906 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); 14907 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); 14908#undef PRINT_SCHED_ATTR 14909 if (s390_tune >= PROCESSOR_2964_Z13) 14910 { 14911 unsigned int unit_mask, m = 1; 14912 int units, j; 14913 14914 unit_mask = s390_get_unit_mask (insn, &units); 14915 fprintf (file, "(units:"); 14916 for (j = 0; j < units; j++, m <<= 1) 14917 if (m & unit_mask) 14918 fprintf (file, " u%d", j); 14919 fprintf (file, ")"); 14920 } 14921 fprintf (file, "\n"); 14922 } 14923 } 14924 } 14925 14926 return s390_issue_rate (); 14927} 14928 14929 14930/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after 14931 the scheduler has issued INSN. It stores the last issued insn into 14932 last_scheduled_insn in order to make it available for 14933 s390_sched_reorder. */ 14934static int 14935s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) 14936{ 14937 last_scheduled_insn = insn; 14938 14939 bool ends_group = false; 14940 14941 if (s390_tune >= PROCESSOR_2827_ZEC12 14942 && reload_completed 14943 && recog_memoized (insn) >= 0) 14944 { 14945 unsigned int mask = s390_get_sched_attrmask (insn); 14946 14947 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0) 14948 sched_state.group_of_two = true; 14949 14950 /* If this is a group-of-two insn, we actually ended the last group 14951 and this insn is the first one of the new group. */ 14952 if (sched_state.group_state == 2 && sched_state.group_of_two) 14953 { 14954 sched_state.side = sched_state.side ? 0 : 1; 14955 sched_state.group_state = 0; 14956 } 14957 14958 /* Longrunning and side bookkeeping. */ 14959 for (int i = 0; i < 2; i++) 14960 { 14961 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1); 14962 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1); 14963 } 14964 14965 unsigned latency = insn_default_latency (insn); 14966 if (s390_is_longrunning (insn)) 14967 { 14968 if (s390_is_fxd (insn)) 14969 fxd_longrunning[sched_state.side] = latency; 14970 else 14971 fpd_longrunning[sched_state.side] = latency; 14972 } 14973 14974 if (s390_tune >= PROCESSOR_2964_Z13) 14975 { 14976 int units, i; 14977 unsigned unit_mask, m = 1; 14978 14979 unit_mask = s390_get_unit_mask (insn, &units); 14980 gcc_assert (units <= MAX_SCHED_UNITS); 14981 14982 for (i = 0; i < units; i++, m <<= 1) 14983 if (m & unit_mask) 14984 last_scheduled_unit_distance[i][sched_state.side] = 0; 14985 else if (last_scheduled_unit_distance[i][sched_state.side] 14986 < MAX_SCHED_MIX_DISTANCE) 14987 last_scheduled_unit_distance[i][sched_state.side]++; 14988 } 14989 14990 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 14991 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0 14992 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0 14993 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) 14994 { 14995 sched_state.group_state = 0; 14996 ends_group = true; 14997 } 14998 else 14999 { 15000 switch (sched_state.group_state) 15001 { 15002 case 0: 15003 sched_state.group_state++; 15004 break; 15005 case 1: 15006 sched_state.group_state++; 15007 if (sched_state.group_of_two) 15008 { 15009 sched_state.group_state = 0; 15010 ends_group = true; 15011 } 15012 break; 15013 case 2: 15014 sched_state.group_state++; 15015 ends_group = true; 15016 break; 15017 } 15018 } 15019 15020 if (verbose > 5) 15021 { 15022 unsigned int sched_mask; 15023 15024 sched_mask = s390_get_sched_attrmask (insn); 15025 15026 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn)); 15027#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : ""); 15028 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); 15029 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); 15030 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); 15031 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); 15032#undef PRINT_SCHED_ATTR 15033 15034 if (s390_tune >= PROCESSOR_2964_Z13) 15035 { 15036 unsigned int unit_mask, m = 1; 15037 int units, j; 15038 15039 unit_mask = s390_get_unit_mask (insn, &units); 15040 fprintf (file, "(units:"); 15041 for (j = 0; j < units; j++, m <<= 1) 15042 if (m & unit_mask) 15043 fprintf (file, " %d", j); 15044 fprintf (file, ")"); 15045 } 15046 fprintf (file, " sched state: %d\n", sched_state.group_state); 15047 15048 if (s390_tune >= PROCESSOR_2964_Z13) 15049 { 15050 int units, j; 15051 15052 s390_get_unit_mask (insn, &units); 15053 15054 fprintf (file, ";;\t\tBACKEND: units on this side unused for: "); 15055 for (j = 0; j < units; j++) 15056 fprintf (file, "%d:%d ", j, 15057 last_scheduled_unit_distance[j][sched_state.side]); 15058 fprintf (file, "\n"); 15059 } 15060 } 15061 15062 /* If this insn ended a group, the next will be on the other side. */ 15063 if (ends_group) 15064 { 15065 sched_state.group_state = 0; 15066 sched_state.side = sched_state.side ? 0 : 1; 15067 sched_state.group_of_two = false; 15068 } 15069 } 15070 15071 if (GET_CODE (PATTERN (insn)) != USE 15072 && GET_CODE (PATTERN (insn)) != CLOBBER) 15073 return more - 1; 15074 else 15075 return more; 15076} 15077 15078static void 15079s390_sched_init (FILE *file ATTRIBUTE_UNUSED, 15080 int verbose ATTRIBUTE_UNUSED, 15081 int max_ready ATTRIBUTE_UNUSED) 15082{ 15083 /* If the next basic block is most likely entered via a fallthru edge 15084 we keep the last sched state. Otherwise we start a new group. 15085 The scheduler traverses basic blocks in "instruction stream" ordering 15086 so if we see a fallthru edge here, sched_state will be of its 15087 source block. 15088 15089 current_sched_info->prev_head is the insn before the first insn of the 15090 block of insns to be scheduled. 15091 */ 15092 rtx_insn *insn = current_sched_info->prev_head 15093 ? NEXT_INSN (current_sched_info->prev_head) : NULL; 15094 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL; 15095 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb)) 15096 { 15097 last_scheduled_insn = NULL; 15098 memset (last_scheduled_unit_distance, 0, 15099 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int)); 15100 sched_state.group_state = 0; 15101 sched_state.group_of_two = false; 15102 } 15103} 15104 15105/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates 15106 a new number struct loop *loop should be unrolled if tuned for cpus with 15107 a built-in stride prefetcher. 15108 The loop is analyzed for memory accesses by calling check_dpu for 15109 each rtx of the loop. Depending on the loop_depth and the amount of 15110 memory accesses a new number <=nunroll is returned to improve the 15111 behavior of the hardware prefetch unit. */ 15112static unsigned 15113s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) 15114{ 15115 basic_block *bbs; 15116 rtx_insn *insn; 15117 unsigned i; 15118 unsigned mem_count = 0; 15119 15120 if (s390_tune < PROCESSOR_2097_Z10) 15121 return nunroll; 15122 15123 /* Count the number of memory references within the loop body. */ 15124 bbs = get_loop_body (loop); 15125 subrtx_iterator::array_type array; 15126 for (i = 0; i < loop->num_nodes; i++) 15127 FOR_BB_INSNS (bbs[i], insn) 15128 if (INSN_P (insn) && INSN_CODE (insn) != -1) 15129 { 15130 rtx set; 15131 15132 /* The runtime of small loops with memory block operations 15133 will be determined by the memory operation. Doing 15134 unrolling doesn't help here. Measurements to confirm 15135 this where only done on recent CPU levels. So better do 15136 not change anything for older CPUs. */ 15137 if (s390_tune >= PROCESSOR_2964_Z13 15138 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS 15139 && ((set = single_set (insn)) != NULL_RTX) 15140 && ((GET_MODE (SET_DEST (set)) == BLKmode 15141 && (GET_MODE (SET_SRC (set)) == BLKmode 15142 || SET_SRC (set) == const0_rtx)) 15143 || (GET_CODE (SET_SRC (set)) == COMPARE 15144 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode 15145 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode))) 15146 return 1; 15147 15148 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 15149 if (MEM_P (*iter)) 15150 mem_count += 1; 15151 } 15152 free (bbs); 15153 15154 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */ 15155 if (mem_count == 0) 15156 return nunroll; 15157 15158 switch (loop_depth(loop)) 15159 { 15160 case 1: 15161 return MIN (nunroll, 28 / mem_count); 15162 case 2: 15163 return MIN (nunroll, 22 / mem_count); 15164 default: 15165 return MIN (nunroll, 16 / mem_count); 15166 } 15167} 15168 15169/* Restore the current options. This is a hook function and also called 15170 internally. */ 15171 15172static void 15173s390_function_specific_restore (struct gcc_options *opts, 15174 struct cl_target_option *ptr ATTRIBUTE_UNUSED) 15175{ 15176 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost; 15177} 15178 15179static void 15180s390_default_align (struct gcc_options *opts) 15181{ 15182 /* Set the default function alignment to 16 in order to get rid of 15183 some unwanted performance effects. */ 15184 if (opts->x_flag_align_functions && !opts->x_str_align_functions 15185 && opts->x_s390_tune >= PROCESSOR_2964_Z13) 15186 opts->x_str_align_functions = "16"; 15187} 15188 15189static void 15190s390_override_options_after_change (void) 15191{ 15192 s390_default_align (&global_options); 15193} 15194 15195static void 15196s390_option_override_internal (struct gcc_options *opts, 15197 const struct gcc_options *opts_set) 15198{ 15199 /* Architecture mode defaults according to ABI. */ 15200 if (!(opts_set->x_target_flags & MASK_ZARCH)) 15201 { 15202 if (TARGET_64BIT) 15203 opts->x_target_flags |= MASK_ZARCH; 15204 else 15205 opts->x_target_flags &= ~MASK_ZARCH; 15206 } 15207 15208 /* Set the march default in case it hasn't been specified on cmdline. */ 15209 if (!opts_set->x_s390_arch) 15210 opts->x_s390_arch = PROCESSOR_2064_Z900; 15211 15212 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch]; 15213 15214 /* Determine processor to tune for. */ 15215 if (!opts_set->x_s390_tune) 15216 opts->x_s390_tune = opts->x_s390_arch; 15217 15218 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune]; 15219 15220 /* Sanity checks. */ 15221 if (opts->x_s390_arch == PROCESSOR_NATIVE 15222 || opts->x_s390_tune == PROCESSOR_NATIVE) 15223 gcc_unreachable (); 15224 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags)) 15225 error ("64-bit ABI not supported in ESA/390 mode"); 15226 15227 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline 15228 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline 15229 || opts->x_s390_function_return == indirect_branch_thunk_inline 15230 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline 15231 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline) 15232 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>"); 15233 15234 if (opts->x_s390_indirect_branch != indirect_branch_keep) 15235 { 15236 if (!opts_set->x_s390_indirect_branch_call) 15237 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch; 15238 15239 if (!opts_set->x_s390_indirect_branch_jump) 15240 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch; 15241 } 15242 15243 if (opts->x_s390_function_return != indirect_branch_keep) 15244 { 15245 if (!opts_set->x_s390_function_return_reg) 15246 opts->x_s390_function_return_reg = opts->x_s390_function_return; 15247 15248 if (!opts_set->x_s390_function_return_mem) 15249 opts->x_s390_function_return_mem = opts->x_s390_function_return; 15250 } 15251 15252 /* Enable hardware transactions if available and not explicitly 15253 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ 15254 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags)) 15255 { 15256 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags)) 15257 opts->x_target_flags |= MASK_OPT_HTM; 15258 else 15259 opts->x_target_flags &= ~MASK_OPT_HTM; 15260 } 15261 15262 if (TARGET_OPT_VX_P (opts_set->x_target_flags)) 15263 { 15264 if (TARGET_OPT_VX_P (opts->x_target_flags)) 15265 { 15266 if (!TARGET_CPU_VX_P (opts)) 15267 error ("hardware vector support not available on %s", 15268 processor_table[(int)opts->x_s390_arch].name); 15269 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags)) 15270 error ("hardware vector support not available with " 15271 "%<-msoft-float%>"); 15272 } 15273 } 15274 else 15275 { 15276 if (TARGET_CPU_VX_P (opts)) 15277 /* Enable vector support if available and not explicitly disabled 15278 by user. E.g. with -m31 -march=z13 -mzarch */ 15279 opts->x_target_flags |= MASK_OPT_VX; 15280 else 15281 opts->x_target_flags &= ~MASK_OPT_VX; 15282 } 15283 15284 /* Use hardware DFP if available and not explicitly disabled by 15285 user. E.g. with -m31 -march=z10 -mzarch */ 15286 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags)) 15287 { 15288 if (TARGET_DFP_P (opts)) 15289 opts->x_target_flags |= MASK_HARD_DFP; 15290 else 15291 opts->x_target_flags &= ~MASK_HARD_DFP; 15292 } 15293 15294 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts)) 15295 { 15296 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)) 15297 { 15298 if (!TARGET_CPU_DFP_P (opts)) 15299 error ("hardware decimal floating point instructions" 15300 " not available on %s", 15301 processor_table[(int)opts->x_s390_arch].name); 15302 if (!TARGET_ZARCH_P (opts->x_target_flags)) 15303 error ("hardware decimal floating point instructions" 15304 " not available in ESA/390 mode"); 15305 } 15306 else 15307 opts->x_target_flags &= ~MASK_HARD_DFP; 15308 } 15309 15310 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags) 15311 && TARGET_SOFT_FLOAT_P (opts->x_target_flags)) 15312 { 15313 if (TARGET_HARD_DFP_P (opts_set->x_target_flags) 15314 && TARGET_HARD_DFP_P (opts->x_target_flags)) 15315 error ("%<-mhard-dfp%> can%'t be used in conjunction with " 15316 "%<-msoft-float%>"); 15317 15318 opts->x_target_flags &= ~MASK_HARD_DFP; 15319 } 15320 15321 if (TARGET_BACKCHAIN_P (opts->x_target_flags) 15322 && TARGET_PACKED_STACK_P (opts->x_target_flags) 15323 && TARGET_HARD_FLOAT_P (opts->x_target_flags)) 15324 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not " 15325 "supported in combination"); 15326 15327 if (opts->x_s390_stack_size) 15328 { 15329 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size) 15330 error ("stack size must be greater than the stack guard value"); 15331 else if (opts->x_s390_stack_size > 1 << 16) 15332 error ("stack size must not be greater than 64k"); 15333 } 15334 else if (opts->x_s390_stack_guard) 15335 error ("%<-mstack-guard%> implies use of %<-mstack-size%>"); 15336 15337 /* Our implementation of the stack probe requires the probe interval 15338 to be used as displacement in an address operand. The maximum 15339 probe interval currently is 64k. This would exceed short 15340 displacements. Trim that value down to 4k if that happens. This 15341 might result in too many probes being generated only on the 15342 oldest supported machine level z900. */ 15343 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval))) 15344 param_stack_clash_protection_probe_interval = 12; 15345 15346#if TARGET_TPF != 0 15347 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check)) 15348 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095"); 15349 15350 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target)) 15351 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095"); 15352 15353 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check)) 15354 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095"); 15355 15356 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target)) 15357 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095"); 15358 15359 if (s390_tpf_trace_skip) 15360 { 15361 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET; 15362 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET; 15363 } 15364#endif 15365 15366#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 15367 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags)) 15368 opts->x_target_flags |= MASK_LONG_DOUBLE_128; 15369#endif 15370 15371 if (opts->x_s390_tune >= PROCESSOR_2097_Z10) 15372 { 15373 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns, 15374 100); 15375 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32); 15376 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns, 15377 2000); 15378 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times, 15379 64); 15380 } 15381 15382 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length, 15383 256); 15384 /* values for loop prefetching */ 15385 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256); 15386 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128); 15387 /* s390 has more than 2 levels and the size is much larger. Since 15388 we are always running virtualized assume that we only get a small 15389 part of the caches above l1. */ 15390 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500); 15391 SET_OPTION_IF_UNSET (opts, opts_set, 15392 param_prefetch_min_insn_to_mem_ratio, 2); 15393 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6); 15394 15395 /* Use the alternative scheduling-pressure algorithm by default. */ 15396 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2); 15397 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2); 15398 15399 /* Use aggressive inlining parameters. */ 15400 if (opts->x_s390_tune >= PROCESSOR_2964_Z13) 15401 { 15402 SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2); 15403 SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80); 15404 } 15405 15406 /* Set the default alignment. */ 15407 s390_default_align (opts); 15408 15409 /* Call target specific restore function to do post-init work. At the moment, 15410 this just sets opts->x_s390_cost_pointer. */ 15411 s390_function_specific_restore (opts, NULL); 15412 15413 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode, 15414 because 31-bit PLT stubs assume that %r12 contains GOT address, which is 15415 not the case when the code runs before the prolog. */ 15416 if (opts->x_flag_fentry && !TARGET_64BIT) 15417 error ("%<-mfentry%> is supported only for 64-bit CPUs"); 15418} 15419 15420static void 15421s390_option_override (void) 15422{ 15423 unsigned int i; 15424 cl_deferred_option *opt; 15425 vec<cl_deferred_option> *v = 15426 (vec<cl_deferred_option> *) s390_deferred_options; 15427 15428 if (v) 15429 FOR_EACH_VEC_ELT (*v, i, opt) 15430 { 15431 switch (opt->opt_index) 15432 { 15433 case OPT_mhotpatch_: 15434 { 15435 int val1; 15436 int val2; 15437 char *s = strtok (ASTRDUP (opt->arg), ","); 15438 char *t = strtok (NULL, "\0"); 15439 15440 if (t != NULL) 15441 { 15442 val1 = integral_argument (s); 15443 val2 = integral_argument (t); 15444 } 15445 else 15446 { 15447 val1 = -1; 15448 val2 = -1; 15449 } 15450 if (val1 == -1 || val2 == -1) 15451 { 15452 /* argument is not a plain number */ 15453 error ("arguments to %qs should be non-negative integers", 15454 "-mhotpatch=n,m"); 15455 break; 15456 } 15457 else if (val1 > s390_hotpatch_hw_max 15458 || val2 > s390_hotpatch_hw_max) 15459 { 15460 error ("argument to %qs is too large (max. %d)", 15461 "-mhotpatch=n,m", s390_hotpatch_hw_max); 15462 break; 15463 } 15464 s390_hotpatch_hw_before_label = val1; 15465 s390_hotpatch_hw_after_label = val2; 15466 break; 15467 } 15468 default: 15469 gcc_unreachable (); 15470 } 15471 } 15472 15473 /* Set up function hooks. */ 15474 init_machine_status = s390_init_machine_status; 15475 15476 s390_option_override_internal (&global_options, &global_options_set); 15477 15478 /* Save the initial options in case the user does function specific 15479 options. */ 15480 target_option_default_node = build_target_option_node (&global_options); 15481 target_option_current_node = target_option_default_node; 15482 15483 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch 15484 requires the arch flags to be evaluated already. Since prefetching 15485 is beneficial on s390, we enable it if available. */ 15486 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3) 15487 flag_prefetch_loop_arrays = 1; 15488 15489 if (!s390_pic_data_is_text_relative && !flag_pic) 15490 error ("%<-mno-pic-data-is-text-relative%> cannot be used without " 15491 "%<-fpic%>/%<-fPIC%>"); 15492 15493 if (TARGET_TPF) 15494 { 15495 /* Don't emit DWARF3/4 unless specifically selected. The TPF 15496 debuggers do not yet support DWARF 3/4. */ 15497 if (!global_options_set.x_dwarf_strict) 15498 dwarf_strict = 1; 15499 if (!global_options_set.x_dwarf_version) 15500 dwarf_version = 2; 15501 } 15502} 15503 15504#if S390_USE_TARGET_ATTRIBUTE 15505/* Inner function to process the attribute((target(...))), take an argument and 15506 set the current options from the argument. If we have a list, recursively go 15507 over the list. */ 15508 15509static bool 15510s390_valid_target_attribute_inner_p (tree args, 15511 struct gcc_options *opts, 15512 struct gcc_options *new_opts_set, 15513 bool force_pragma) 15514{ 15515 char *next_optstr; 15516 bool ret = true; 15517 15518#define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 } 15519#define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 } 15520 static const struct 15521 { 15522 const char *string; 15523 size_t len; 15524 int opt; 15525 int has_arg; 15526 int only_as_pragma; 15527 } attrs[] = { 15528 /* enum options */ 15529 S390_ATTRIB ("arch=", OPT_march_, 1), 15530 S390_ATTRIB ("tune=", OPT_mtune_, 1), 15531 /* uinteger options */ 15532 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1), 15533 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1), 15534 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1), 15535 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1), 15536 /* flag options */ 15537 S390_ATTRIB ("backchain", OPT_mbackchain, 0), 15538 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0), 15539 S390_ATTRIB ("hard-float", OPT_mhard_float, 0), 15540 S390_ATTRIB ("htm", OPT_mhtm, 0), 15541 S390_ATTRIB ("vx", OPT_mvx, 0), 15542 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0), 15543 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0), 15544 S390_ATTRIB ("soft-float", OPT_msoft_float, 0), 15545 S390_ATTRIB ("mvcle", OPT_mmvcle, 0), 15546 S390_PRAGMA ("zvector", OPT_mzvector, 0), 15547 /* boolean options */ 15548 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0), 15549 }; 15550#undef S390_ATTRIB 15551#undef S390_PRAGMA 15552 15553 /* If this is a list, recurse to get the options. */ 15554 if (TREE_CODE (args) == TREE_LIST) 15555 { 15556 bool ret = true; 15557 int num_pragma_values; 15558 int i; 15559 15560 /* Note: attribs.c:decl_attributes prepends the values from 15561 current_target_pragma to the list of target attributes. To determine 15562 whether we're looking at a value of the attribute or the pragma we 15563 assume that the first [list_length (current_target_pragma)] values in 15564 the list are the values from the pragma. */ 15565 num_pragma_values = (!force_pragma && current_target_pragma != NULL) 15566 ? list_length (current_target_pragma) : 0; 15567 for (i = 0; args; args = TREE_CHAIN (args), i++) 15568 { 15569 bool is_pragma; 15570 15571 is_pragma = (force_pragma || i < num_pragma_values); 15572 if (TREE_VALUE (args) 15573 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args), 15574 opts, new_opts_set, 15575 is_pragma)) 15576 { 15577 ret = false; 15578 } 15579 } 15580 return ret; 15581 } 15582 15583 else if (TREE_CODE (args) != STRING_CST) 15584 { 15585 error ("attribute %<target%> argument not a string"); 15586 return false; 15587 } 15588 15589 /* Handle multiple arguments separated by commas. */ 15590 next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); 15591 15592 while (next_optstr && *next_optstr != '\0') 15593 { 15594 char *p = next_optstr; 15595 char *orig_p = p; 15596 char *comma = strchr (next_optstr, ','); 15597 size_t len, opt_len; 15598 int opt; 15599 bool opt_set_p; 15600 char ch; 15601 unsigned i; 15602 int mask = 0; 15603 enum cl_var_type var_type; 15604 bool found; 15605 15606 if (comma) 15607 { 15608 *comma = '\0'; 15609 len = comma - next_optstr; 15610 next_optstr = comma + 1; 15611 } 15612 else 15613 { 15614 len = strlen (p); 15615 next_optstr = NULL; 15616 } 15617 15618 /* Recognize no-xxx. */ 15619 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') 15620 { 15621 opt_set_p = false; 15622 p += 3; 15623 len -= 3; 15624 } 15625 else 15626 opt_set_p = true; 15627 15628 /* Find the option. */ 15629 ch = *p; 15630 found = false; 15631 for (i = 0; i < ARRAY_SIZE (attrs); i++) 15632 { 15633 opt_len = attrs[i].len; 15634 if (ch == attrs[i].string[0] 15635 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len) 15636 && memcmp (p, attrs[i].string, opt_len) == 0) 15637 { 15638 opt = attrs[i].opt; 15639 if (!opt_set_p && cl_options[opt].cl_reject_negative) 15640 continue; 15641 mask = cl_options[opt].var_value; 15642 var_type = cl_options[opt].var_type; 15643 found = true; 15644 break; 15645 } 15646 } 15647 15648 /* Process the option. */ 15649 if (!found) 15650 { 15651 error ("attribute(target(\"%s\")) is unknown", orig_p); 15652 return false; 15653 } 15654 else if (attrs[i].only_as_pragma && !force_pragma) 15655 { 15656 /* Value is not allowed for the target attribute. */ 15657 error ("value %qs is not supported by attribute %<target%>", 15658 attrs[i].string); 15659 return false; 15660 } 15661 15662 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR) 15663 { 15664 if (var_type == CLVC_BIT_CLEAR) 15665 opt_set_p = !opt_set_p; 15666 15667 if (opt_set_p) 15668 opts->x_target_flags |= mask; 15669 else 15670 opts->x_target_flags &= ~mask; 15671 new_opts_set->x_target_flags |= mask; 15672 } 15673 15674 else if (cl_options[opt].var_type == CLVC_BOOLEAN) 15675 { 15676 int value; 15677 15678 if (cl_options[opt].cl_uinteger) 15679 { 15680 /* Unsigned integer argument. Code based on the function 15681 decode_cmdline_option () in opts-common.c. */ 15682 value = integral_argument (p + opt_len); 15683 } 15684 else 15685 value = (opt_set_p) ? 1 : 0; 15686 15687 if (value != -1) 15688 { 15689 struct cl_decoded_option decoded; 15690 15691 /* Value range check; only implemented for numeric and boolean 15692 options at the moment. */ 15693 generate_option (opt, NULL, value, CL_TARGET, &decoded); 15694 s390_handle_option (opts, new_opts_set, &decoded, input_location); 15695 set_option (opts, new_opts_set, opt, value, 15696 p + opt_len, DK_UNSPECIFIED, input_location, 15697 global_dc); 15698 } 15699 else 15700 { 15701 error ("attribute(target(\"%s\")) is unknown", orig_p); 15702 ret = false; 15703 } 15704 } 15705 15706 else if (cl_options[opt].var_type == CLVC_ENUM) 15707 { 15708 bool arg_ok; 15709 int value; 15710 15711 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); 15712 if (arg_ok) 15713 set_option (opts, new_opts_set, opt, value, 15714 p + opt_len, DK_UNSPECIFIED, input_location, 15715 global_dc); 15716 else 15717 { 15718 error ("attribute(target(\"%s\")) is unknown", orig_p); 15719 ret = false; 15720 } 15721 } 15722 15723 else 15724 gcc_unreachable (); 15725 } 15726 return ret; 15727} 15728 15729/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ 15730 15731tree 15732s390_valid_target_attribute_tree (tree args, 15733 struct gcc_options *opts, 15734 const struct gcc_options *opts_set, 15735 bool force_pragma) 15736{ 15737 tree t = NULL_TREE; 15738 struct gcc_options new_opts_set; 15739 15740 memset (&new_opts_set, 0, sizeof (new_opts_set)); 15741 15742 /* Process each of the options on the chain. */ 15743 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set, 15744 force_pragma)) 15745 return error_mark_node; 15746 15747 /* If some option was set (even if it has not changed), rerun 15748 s390_option_override_internal, and then save the options away. */ 15749 if (new_opts_set.x_target_flags 15750 || new_opts_set.x_s390_arch 15751 || new_opts_set.x_s390_tune 15752 || new_opts_set.x_s390_stack_guard 15753 || new_opts_set.x_s390_stack_size 15754 || new_opts_set.x_s390_branch_cost 15755 || new_opts_set.x_s390_warn_framesize 15756 || new_opts_set.x_s390_warn_dynamicstack_p) 15757 { 15758 const unsigned char *src = (const unsigned char *)opts_set; 15759 unsigned char *dest = (unsigned char *)&new_opts_set; 15760 unsigned int i; 15761 15762 /* Merge the original option flags into the new ones. */ 15763 for (i = 0; i < sizeof(*opts_set); i++) 15764 dest[i] |= src[i]; 15765 15766 /* Do any overrides, such as arch=xxx, or tune=xxx support. */ 15767 s390_option_override_internal (opts, &new_opts_set); 15768 /* Save the current options unless we are validating options for 15769 #pragma. */ 15770 t = build_target_option_node (opts); 15771 } 15772 return t; 15773} 15774 15775/* Hook to validate attribute((target("string"))). */ 15776 15777static bool 15778s390_valid_target_attribute_p (tree fndecl, 15779 tree ARG_UNUSED (name), 15780 tree args, 15781 int ARG_UNUSED (flags)) 15782{ 15783 struct gcc_options func_options; 15784 tree new_target, new_optimize; 15785 bool ret = true; 15786 15787 /* attribute((target("default"))) does nothing, beyond 15788 affecting multi-versioning. */ 15789 if (TREE_VALUE (args) 15790 && TREE_CODE (TREE_VALUE (args)) == STRING_CST 15791 && TREE_CHAIN (args) == NULL_TREE 15792 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) 15793 return true; 15794 15795 tree old_optimize = build_optimization_node (&global_options); 15796 15797 /* Get the optimization options of the current function. */ 15798 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 15799 15800 if (!func_optimize) 15801 func_optimize = old_optimize; 15802 15803 /* Init func_options. */ 15804 memset (&func_options, 0, sizeof (func_options)); 15805 init_options_struct (&func_options, NULL); 15806 lang_hooks.init_options_struct (&func_options); 15807 15808 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize)); 15809 15810 /* Initialize func_options to the default before its target options can 15811 be set. */ 15812 cl_target_option_restore (&func_options, 15813 TREE_TARGET_OPTION (target_option_default_node)); 15814 15815 new_target = s390_valid_target_attribute_tree (args, &func_options, 15816 &global_options_set, 15817 (args == 15818 current_target_pragma)); 15819 new_optimize = build_optimization_node (&func_options); 15820 if (new_target == error_mark_node) 15821 ret = false; 15822 else if (fndecl && new_target) 15823 { 15824 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; 15825 if (old_optimize != new_optimize) 15826 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 15827 } 15828 return ret; 15829} 15830 15831/* Hook to determine if one function can safely inline another. */ 15832 15833static bool 15834s390_can_inline_p (tree caller, tree callee) 15835{ 15836 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 15837 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 15838 15839 if (!callee_tree) 15840 callee_tree = target_option_default_node; 15841 if (!caller_tree) 15842 caller_tree = target_option_default_node; 15843 if (callee_tree == caller_tree) 15844 return true; 15845 15846 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); 15847 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 15848 bool ret = true; 15849 15850 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)) 15851 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))) 15852 ret = false; 15853 15854 /* Don't inline functions to be compiled for a more recent arch into a 15855 function for an older arch. */ 15856 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch) 15857 ret = false; 15858 15859 /* Inlining a hard float function into a soft float function is only 15860 allowed if the hard float function doesn't actually make use of 15861 floating point. 15862 15863 We are called from FEs for multi-versioning call optimization, so 15864 beware of ipa_fn_summaries not available. */ 15865 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags) 15866 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags)) 15867 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags) 15868 && TARGET_HARD_DFP_P (callee_opts->x_target_flags))) 15869 && (! ipa_fn_summaries 15870 || ipa_fn_summaries->get 15871 (cgraph_node::get (callee))->fp_expressions)) 15872 ret = false; 15873 15874 return ret; 15875} 15876#endif 15877 15878/* Set VAL to correct enum value according to the indirect-branch or 15879 function-return attribute in ATTR. */ 15880 15881static inline void 15882s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val) 15883{ 15884 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); 15885 if (strcmp (str, "keep") == 0) 15886 *val = indirect_branch_keep; 15887 else if (strcmp (str, "thunk") == 0) 15888 *val = indirect_branch_thunk; 15889 else if (strcmp (str, "thunk-inline") == 0) 15890 *val = indirect_branch_thunk_inline; 15891 else if (strcmp (str, "thunk-extern") == 0) 15892 *val = indirect_branch_thunk_extern; 15893} 15894 15895/* Memorize the setting for -mindirect-branch* and -mfunction-return* 15896 from either the cmdline or the function attributes in 15897 cfun->machine. */ 15898 15899static void 15900s390_indirect_branch_settings (tree fndecl) 15901{ 15902 tree attr; 15903 15904 if (!fndecl) 15905 return; 15906 15907 /* Initialize with the cmdline options and let the attributes 15908 override it. */ 15909 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump; 15910 cfun->machine->indirect_branch_call = s390_indirect_branch_call; 15911 15912 cfun->machine->function_return_reg = s390_function_return_reg; 15913 cfun->machine->function_return_mem = s390_function_return_mem; 15914 15915 if ((attr = lookup_attribute ("indirect_branch", 15916 DECL_ATTRIBUTES (fndecl)))) 15917 { 15918 s390_indirect_branch_attrvalue (attr, 15919 &cfun->machine->indirect_branch_jump); 15920 s390_indirect_branch_attrvalue (attr, 15921 &cfun->machine->indirect_branch_call); 15922 } 15923 15924 if ((attr = lookup_attribute ("indirect_branch_jump", 15925 DECL_ATTRIBUTES (fndecl)))) 15926 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump); 15927 15928 if ((attr = lookup_attribute ("indirect_branch_call", 15929 DECL_ATTRIBUTES (fndecl)))) 15930 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call); 15931 15932 if ((attr = lookup_attribute ("function_return", 15933 DECL_ATTRIBUTES (fndecl)))) 15934 { 15935 s390_indirect_branch_attrvalue (attr, 15936 &cfun->machine->function_return_reg); 15937 s390_indirect_branch_attrvalue (attr, 15938 &cfun->machine->function_return_mem); 15939 } 15940 15941 if ((attr = lookup_attribute ("function_return_reg", 15942 DECL_ATTRIBUTES (fndecl)))) 15943 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg); 15944 15945 if ((attr = lookup_attribute ("function_return_mem", 15946 DECL_ATTRIBUTES (fndecl)))) 15947 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem); 15948} 15949 15950#if S390_USE_TARGET_ATTRIBUTE 15951/* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl 15952 cache. */ 15953 15954void 15955s390_activate_target_options (tree new_tree) 15956{ 15957 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); 15958 if (TREE_TARGET_GLOBALS (new_tree)) 15959 restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); 15960 else if (new_tree == target_option_default_node) 15961 restore_target_globals (&default_target_globals); 15962 else 15963 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); 15964 s390_previous_fndecl = NULL_TREE; 15965} 15966#endif 15967 15968/* Establish appropriate back-end context for processing the function 15969 FNDECL. The argument might be NULL to indicate processing at top 15970 level, outside of any function scope. */ 15971static void 15972s390_set_current_function (tree fndecl) 15973{ 15974#if S390_USE_TARGET_ATTRIBUTE 15975 /* Only change the context if the function changes. This hook is called 15976 several times in the course of compiling a function, and we don't want to 15977 slow things down too much or call target_reinit when it isn't safe. */ 15978 if (fndecl == s390_previous_fndecl) 15979 { 15980 s390_indirect_branch_settings (fndecl); 15981 return; 15982 } 15983 15984 tree old_tree; 15985 if (s390_previous_fndecl == NULL_TREE) 15986 old_tree = target_option_current_node; 15987 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl)) 15988 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl); 15989 else 15990 old_tree = target_option_default_node; 15991 15992 if (fndecl == NULL_TREE) 15993 { 15994 if (old_tree != target_option_current_node) 15995 s390_activate_target_options (target_option_current_node); 15996 return; 15997 } 15998 15999 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); 16000 if (new_tree == NULL_TREE) 16001 new_tree = target_option_default_node; 16002 16003 if (old_tree != new_tree) 16004 s390_activate_target_options (new_tree); 16005 s390_previous_fndecl = fndecl; 16006#endif 16007 s390_indirect_branch_settings (fndecl); 16008} 16009 16010/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ 16011 16012static bool 16013s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 16014 unsigned int align ATTRIBUTE_UNUSED, 16015 enum by_pieces_operation op ATTRIBUTE_UNUSED, 16016 bool speed_p ATTRIBUTE_UNUSED) 16017{ 16018 return (size == 1 || size == 2 16019 || size == 4 || (TARGET_ZARCH && size == 8)); 16020} 16021 16022/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 16023 16024static void 16025s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 16026{ 16027 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc]; 16028 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc]; 16029 tree call_efpc = build_call_expr (efpc, 0); 16030 tree fenv_var = create_tmp_var_raw (unsigned_type_node); 16031 16032#define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000) 16033#define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000) 16034#define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00) 16035#define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24) 16036#define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16) 16037#define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8) 16038 16039 /* Generates the equivalent of feholdexcept (&fenv_var) 16040 16041 fenv_var = __builtin_s390_efpc (); 16042 __builtin_s390_sfpc (fenv_var & mask) */ 16043 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc, 16044 NULL_TREE, NULL_TREE); 16045 tree new_fpc 16046 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 16047 build_int_cst (unsigned_type_node, 16048 ~(FPC_DXC_MASK | FPC_FLAGS_MASK 16049 | FPC_EXCEPTION_MASK))); 16050 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc); 16051 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc); 16052 16053 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT) 16054 16055 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */ 16056 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc, 16057 build_int_cst (unsigned_type_node, 16058 ~(FPC_DXC_MASK | FPC_FLAGS_MASK))); 16059 *clear = build_call_expr (sfpc, 1, new_fpc); 16060 16061 /* Generates the equivalent of feupdateenv (fenv_var) 16062 16063 old_fpc = __builtin_s390_efpc (); 16064 __builtin_s390_sfpc (fenv_var); 16065 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */ 16066 16067 old_fpc = create_tmp_var_raw (unsigned_type_node); 16068 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc, 16069 NULL_TREE, NULL_TREE); 16070 16071 set_new_fpc = build_call_expr (sfpc, 1, fenv_var); 16072 16073 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc, 16074 build_int_cst (unsigned_type_node, 16075 FPC_FLAGS_MASK)); 16076 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except, 16077 build_int_cst (unsigned_type_node, 16078 FPC_FLAGS_SHIFT)); 16079 tree atomic_feraiseexcept 16080 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 16081 raise_old_except = build_call_expr (atomic_feraiseexcept, 16082 1, raise_old_except); 16083 16084 *update = build2 (COMPOUND_EXPR, void_type_node, 16085 build2 (COMPOUND_EXPR, void_type_node, 16086 store_old_fpc, set_new_fpc), 16087 raise_old_except); 16088 16089#undef FPC_EXCEPTION_MASK 16090#undef FPC_FLAGS_MASK 16091#undef FPC_DXC_MASK 16092#undef FPC_EXCEPTION_MASK_SHIFT 16093#undef FPC_FLAGS_SHIFT 16094#undef FPC_DXC_SHIFT 16095} 16096 16097/* Return the vector mode to be used for inner mode MODE when doing 16098 vectorization. */ 16099static machine_mode 16100s390_preferred_simd_mode (scalar_mode mode) 16101{ 16102 if (TARGET_VXE) 16103 switch (mode) 16104 { 16105 case E_SFmode: 16106 return V4SFmode; 16107 default:; 16108 } 16109 16110 if (TARGET_VX) 16111 switch (mode) 16112 { 16113 case E_DFmode: 16114 return V2DFmode; 16115 case E_DImode: 16116 return V2DImode; 16117 case E_SImode: 16118 return V4SImode; 16119 case E_HImode: 16120 return V8HImode; 16121 case E_QImode: 16122 return V16QImode; 16123 default:; 16124 } 16125 return word_mode; 16126} 16127 16128/* Our hardware does not require vectors to be strictly aligned. */ 16129static bool 16130s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED, 16131 const_tree type ATTRIBUTE_UNUSED, 16132 int misalignment ATTRIBUTE_UNUSED, 16133 bool is_packed ATTRIBUTE_UNUSED) 16134{ 16135 if (TARGET_VX) 16136 return true; 16137 16138 return default_builtin_support_vector_misalignment (mode, type, misalignment, 16139 is_packed); 16140} 16141 16142/* The vector ABI requires vector types to be aligned on an 8 byte 16143 boundary (our stack alignment). However, we allow this to be 16144 overriden by the user, while this definitely breaks the ABI. */ 16145static HOST_WIDE_INT 16146s390_vector_alignment (const_tree type) 16147{ 16148 tree size = TYPE_SIZE (type); 16149 16150 if (!TARGET_VX_ABI) 16151 return default_vector_alignment (type); 16152 16153 if (TYPE_USER_ALIGN (type)) 16154 return TYPE_ALIGN (type); 16155 16156 if (tree_fits_uhwi_p (size) 16157 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT) 16158 return tree_to_uhwi (size); 16159 16160 return BIGGEST_ALIGNMENT; 16161} 16162 16163/* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for 16164 LARL instruction. */ 16165 16166static HOST_WIDE_INT 16167s390_constant_alignment (const_tree, HOST_WIDE_INT align) 16168{ 16169 return MAX (align, 16); 16170} 16171 16172#ifdef HAVE_AS_MACHINE_MACHINEMODE 16173/* Implement TARGET_ASM_FILE_START. */ 16174static void 16175s390_asm_file_start (void) 16176{ 16177 default_file_start (); 16178 s390_asm_output_machine_for_arch (asm_out_file); 16179} 16180#endif 16181 16182/* Implement TARGET_ASM_FILE_END. */ 16183static void 16184s390_asm_file_end (void) 16185{ 16186#ifdef HAVE_AS_GNU_ATTRIBUTE 16187 varpool_node *vnode; 16188 cgraph_node *cnode; 16189 16190 FOR_EACH_VARIABLE (vnode) 16191 if (TREE_PUBLIC (vnode->decl)) 16192 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false); 16193 16194 FOR_EACH_FUNCTION (cnode) 16195 if (TREE_PUBLIC (cnode->decl)) 16196 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false); 16197 16198 16199 if (s390_vector_abi != 0) 16200 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", 16201 s390_vector_abi); 16202#endif 16203 file_end_indicate_exec_stack (); 16204 16205 if (flag_split_stack) 16206 file_end_indicate_split_stack (); 16207} 16208 16209/* Return true if TYPE is a vector bool type. */ 16210static inline bool 16211s390_vector_bool_type_p (const_tree type) 16212{ 16213 return TYPE_VECTOR_OPAQUE (type); 16214} 16215 16216/* Return the diagnostic message string if the binary operation OP is 16217 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 16218static const char* 16219s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) 16220{ 16221 bool bool1_p, bool2_p; 16222 bool plusminus_p; 16223 bool muldiv_p; 16224 bool compare_p; 16225 machine_mode mode1, mode2; 16226 16227 if (!TARGET_ZVECTOR) 16228 return NULL; 16229 16230 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2)) 16231 return NULL; 16232 16233 bool1_p = s390_vector_bool_type_p (type1); 16234 bool2_p = s390_vector_bool_type_p (type2); 16235 16236 /* Mixing signed and unsigned types is forbidden for all 16237 operators. */ 16238 if (!bool1_p && !bool2_p 16239 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) 16240 return N_("types differ in signedness"); 16241 16242 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR); 16243 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR 16244 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR 16245 || op == ROUND_DIV_EXPR); 16246 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR 16247 || op == EQ_EXPR || op == NE_EXPR); 16248 16249 if (bool1_p && bool2_p && (plusminus_p || muldiv_p)) 16250 return N_("binary operator does not support two vector bool operands"); 16251 16252 if (bool1_p != bool2_p && (muldiv_p || compare_p)) 16253 return N_("binary operator does not support vector bool operand"); 16254 16255 mode1 = TYPE_MODE (type1); 16256 mode2 = TYPE_MODE (type2); 16257 16258 if (bool1_p != bool2_p && plusminus_p 16259 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT 16260 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT)) 16261 return N_("binary operator does not support mixing vector " 16262 "bool with floating point vector operands"); 16263 16264 return NULL; 16265} 16266 16267/* Implement TARGET_C_EXCESS_PRECISION. 16268 16269 FIXME: For historical reasons, float_t and double_t are typedef'ed to 16270 double on s390, causing operations on float_t to operate in a higher 16271 precision than is necessary. However, it is not the case that SFmode 16272 operations have implicit excess precision, and we generate more optimal 16273 code if we let the compiler know no implicit extra precision is added. 16274 16275 That means when we are compiling with -fexcess-precision=fast, the value 16276 we set for FLT_EVAL_METHOD will be out of line with the actual precision of 16277 float_t (though they would be correct for -fexcess-precision=standard). 16278 16279 A complete fix would modify glibc to remove the unnecessary typedef 16280 of float_t to double. */ 16281 16282static enum flt_eval_method 16283s390_excess_precision (enum excess_precision_type type) 16284{ 16285 switch (type) 16286 { 16287 case EXCESS_PRECISION_TYPE_IMPLICIT: 16288 case EXCESS_PRECISION_TYPE_FAST: 16289 /* The fastest type to promote to will always be the native type, 16290 whether that occurs with implicit excess precision or 16291 otherwise. */ 16292 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; 16293 case EXCESS_PRECISION_TYPE_STANDARD: 16294 /* Otherwise, when we are in a standards compliant mode, to 16295 ensure consistency with the implementation in glibc, report that 16296 float is evaluated to the range and precision of double. */ 16297 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE; 16298 default: 16299 gcc_unreachable (); 16300 } 16301 return FLT_EVAL_METHOD_UNPREDICTABLE; 16302} 16303 16304/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 16305 16306static unsigned HOST_WIDE_INT 16307s390_asan_shadow_offset (void) 16308{ 16309 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000); 16310} 16311 16312#ifdef HAVE_GAS_HIDDEN 16313# define USE_HIDDEN_LINKONCE 1 16314#else 16315# define USE_HIDDEN_LINKONCE 0 16316#endif 16317 16318/* Output an indirect branch trampoline for target register REGNO. */ 16319 16320static void 16321s390_output_indirect_thunk_function (unsigned int regno, bool z10_p) 16322{ 16323 tree decl; 16324 char thunk_label[32]; 16325 int i; 16326 16327 if (z10_p) 16328 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno); 16329 else 16330 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX, 16331 INDIRECT_BRANCH_THUNK_REGNUM, regno); 16332 16333 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 16334 get_identifier (thunk_label), 16335 build_function_type_list (void_type_node, NULL_TREE)); 16336 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 16337 NULL_TREE, void_type_node); 16338 TREE_PUBLIC (decl) = 1; 16339 TREE_STATIC (decl) = 1; 16340 DECL_IGNORED_P (decl) = 1; 16341 16342 if (USE_HIDDEN_LINKONCE) 16343 { 16344 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); 16345 16346 targetm.asm_out.unique_section (decl, 0); 16347 switch_to_section (get_named_section (decl, NULL, 0)); 16348 16349 targetm.asm_out.globalize_label (asm_out_file, thunk_label); 16350 fputs ("\t.hidden\t", asm_out_file); 16351 assemble_name (asm_out_file, thunk_label); 16352 putc ('\n', asm_out_file); 16353 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl); 16354 } 16355 else 16356 { 16357 switch_to_section (text_section); 16358 ASM_OUTPUT_LABEL (asm_out_file, thunk_label); 16359 } 16360 16361 DECL_INITIAL (decl) = make_node (BLOCK); 16362 current_function_decl = decl; 16363 allocate_struct_function (decl, false); 16364 init_function_start (decl); 16365 cfun->is_thunk = true; 16366 first_function_block_is_cold = false; 16367 final_start_function (emit_barrier (), asm_out_file, 1); 16368 16369 /* This makes CFI at least usable for indirect jumps. 16370 16371 Stopping in the thunk: backtrace will point to the thunk target 16372 is if it was interrupted by a signal. For a call this means that 16373 the call chain will be: caller->callee->thunk */ 16374 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm) 16375 { 16376 fputs ("\t.cfi_signal_frame\n", asm_out_file); 16377 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno); 16378 for (i = 0; i < FPR15_REGNUM; i++) 16379 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]); 16380 } 16381 16382 if (z10_p) 16383 { 16384 /* exrl 0,1f */ 16385 16386 /* We generate a thunk for z10 compiled code although z10 is 16387 currently not enabled. Tell the assembler to accept the 16388 instruction. */ 16389 if (!TARGET_CPU_Z10) 16390 { 16391 fputs ("\t.machine push\n", asm_out_file); 16392 fputs ("\t.machine z10\n", asm_out_file); 16393 } 16394 /* We use exrl even if -mzarch hasn't been specified on the 16395 command line so we have to tell the assembler to accept 16396 it. */ 16397 if (!TARGET_ZARCH) 16398 fputs ("\t.machinemode zarch\n", asm_out_file); 16399 16400 fputs ("\texrl\t0,1f\n", asm_out_file); 16401 16402 if (!TARGET_ZARCH) 16403 fputs ("\t.machinemode esa\n", asm_out_file); 16404 16405 if (!TARGET_CPU_Z10) 16406 fputs ("\t.machine pop\n", asm_out_file); 16407 } 16408 else 16409 { 16410 /* larl %r1,1f */ 16411 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n", 16412 INDIRECT_BRANCH_THUNK_REGNUM); 16413 16414 /* ex 0,0(%r1) */ 16415 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n", 16416 INDIRECT_BRANCH_THUNK_REGNUM); 16417 } 16418 16419 /* 0: j 0b */ 16420 fputs ("0:\tj\t0b\n", asm_out_file); 16421 16422 /* 1: br <regno> */ 16423 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno); 16424 16425 final_end_function (); 16426 init_insn_lengths (); 16427 free_after_compilation (cfun); 16428 set_cfun (NULL); 16429 current_function_decl = NULL; 16430} 16431 16432/* Implement the asm.code_end target hook. */ 16433 16434static void 16435s390_code_end (void) 16436{ 16437 int i; 16438 16439 for (i = 1; i < 16; i++) 16440 { 16441 if (indirect_branch_z10thunk_mask & (1 << i)) 16442 s390_output_indirect_thunk_function (i, true); 16443 16444 if (indirect_branch_prez10thunk_mask & (1 << i)) 16445 s390_output_indirect_thunk_function (i, false); 16446 } 16447 16448 if (TARGET_INDIRECT_BRANCH_TABLE) 16449 { 16450 int o; 16451 int i; 16452 16453 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++) 16454 { 16455 if (indirect_branch_table_label_no[o] == 0) 16456 continue; 16457 16458 switch_to_section (get_section (indirect_branch_table_name[o], 16459 0, 16460 NULL_TREE)); 16461 for (i = 0; i < indirect_branch_table_label_no[o]; i++) 16462 { 16463 char label_start[32]; 16464 16465 ASM_GENERATE_INTERNAL_LABEL (label_start, 16466 indirect_branch_table_label[o], i); 16467 16468 fputs ("\t.long\t", asm_out_file); 16469 assemble_name_raw (asm_out_file, label_start); 16470 fputs ("-.\n", asm_out_file); 16471 } 16472 switch_to_section (current_function_section ()); 16473 } 16474 } 16475} 16476 16477/* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */ 16478 16479unsigned int 16480s390_case_values_threshold (void) 16481{ 16482 /* Disabling branch prediction for indirect jumps makes jump tables 16483 much more expensive. */ 16484 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP) 16485 return 20; 16486 16487 return default_case_values_threshold (); 16488} 16489 16490/* Evaluate the insns between HEAD and TAIL and do back-end to install 16491 back-end specific dependencies. 16492 16493 Establish an ANTI dependency between r11 and r15 restores from FPRs 16494 to prevent the instructions scheduler from reordering them since 16495 this would break CFI. No further handling in the sched_reorder 16496 hook is required since the r11 and r15 restore will never appear in 16497 the same ready list with that change. */ 16498void 16499s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail) 16500{ 16501 if (!frame_pointer_needed || !epilogue_completed) 16502 return; 16503 16504 while (head != tail && DEBUG_INSN_P (head)) 16505 head = NEXT_INSN (head); 16506 16507 rtx_insn *r15_restore = NULL, *r11_restore = NULL; 16508 16509 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn)) 16510 { 16511 rtx set = single_set (insn); 16512 if (!INSN_P (insn) 16513 || !RTX_FRAME_RELATED_P (insn) 16514 || set == NULL_RTX 16515 || !REG_P (SET_DEST (set)) 16516 || !FP_REG_P (SET_SRC (set))) 16517 continue; 16518 16519 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM) 16520 r11_restore = insn; 16521 16522 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM) 16523 r15_restore = insn; 16524 } 16525 16526 if (r11_restore == NULL || r15_restore == NULL) 16527 return; 16528 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI); 16529} 16530 16531/* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */ 16532 16533static unsigned HOST_WIDE_INT 16534s390_shift_truncation_mask (machine_mode mode) 16535{ 16536 return mode == DImode || mode == SImode ? 63 : 0; 16537} 16538 16539/* Initialize GCC target structure. */ 16540 16541#undef TARGET_ASM_ALIGNED_HI_OP 16542#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" 16543#undef TARGET_ASM_ALIGNED_DI_OP 16544#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 16545#undef TARGET_ASM_INTEGER 16546#define TARGET_ASM_INTEGER s390_assemble_integer 16547 16548#undef TARGET_ASM_OPEN_PAREN 16549#define TARGET_ASM_OPEN_PAREN "" 16550 16551#undef TARGET_ASM_CLOSE_PAREN 16552#define TARGET_ASM_CLOSE_PAREN "" 16553 16554#undef TARGET_OPTION_OVERRIDE 16555#define TARGET_OPTION_OVERRIDE s390_option_override 16556 16557#ifdef TARGET_THREAD_SSP_OFFSET 16558#undef TARGET_STACK_PROTECT_GUARD 16559#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null 16560#endif 16561 16562#undef TARGET_ENCODE_SECTION_INFO 16563#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info 16564 16565#undef TARGET_SCALAR_MODE_SUPPORTED_P 16566#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p 16567 16568#ifdef HAVE_AS_TLS 16569#undef TARGET_HAVE_TLS 16570#define TARGET_HAVE_TLS true 16571#endif 16572#undef TARGET_CANNOT_FORCE_CONST_MEM 16573#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem 16574 16575#undef TARGET_DELEGITIMIZE_ADDRESS 16576#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address 16577 16578#undef TARGET_LEGITIMIZE_ADDRESS 16579#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address 16580 16581#undef TARGET_RETURN_IN_MEMORY 16582#define TARGET_RETURN_IN_MEMORY s390_return_in_memory 16583 16584#undef TARGET_INIT_BUILTINS 16585#define TARGET_INIT_BUILTINS s390_init_builtins 16586#undef TARGET_EXPAND_BUILTIN 16587#define TARGET_EXPAND_BUILTIN s390_expand_builtin 16588#undef TARGET_BUILTIN_DECL 16589#define TARGET_BUILTIN_DECL s390_builtin_decl 16590 16591#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 16592#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra 16593 16594#undef TARGET_ASM_OUTPUT_MI_THUNK 16595#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk 16596#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 16597#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 16598 16599#undef TARGET_C_EXCESS_PRECISION 16600#define TARGET_C_EXCESS_PRECISION s390_excess_precision 16601 16602#undef TARGET_SCHED_ADJUST_PRIORITY 16603#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority 16604#undef TARGET_SCHED_ISSUE_RATE 16605#define TARGET_SCHED_ISSUE_RATE s390_issue_rate 16606#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 16607#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead 16608 16609#undef TARGET_SCHED_VARIABLE_ISSUE 16610#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue 16611#undef TARGET_SCHED_REORDER 16612#define TARGET_SCHED_REORDER s390_sched_reorder 16613#undef TARGET_SCHED_INIT 16614#define TARGET_SCHED_INIT s390_sched_init 16615 16616#undef TARGET_CANNOT_COPY_INSN_P 16617#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p 16618#undef TARGET_RTX_COSTS 16619#define TARGET_RTX_COSTS s390_rtx_costs 16620#undef TARGET_ADDRESS_COST 16621#define TARGET_ADDRESS_COST s390_address_cost 16622#undef TARGET_REGISTER_MOVE_COST 16623#define TARGET_REGISTER_MOVE_COST s390_register_move_cost 16624#undef TARGET_MEMORY_MOVE_COST 16625#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost 16626#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 16627#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 16628 s390_builtin_vectorization_cost 16629 16630#undef TARGET_MACHINE_DEPENDENT_REORG 16631#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg 16632 16633#undef TARGET_VALID_POINTER_MODE 16634#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode 16635 16636#undef TARGET_BUILD_BUILTIN_VA_LIST 16637#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list 16638#undef TARGET_EXPAND_BUILTIN_VA_START 16639#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start 16640#undef TARGET_ASAN_SHADOW_OFFSET 16641#define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset 16642#undef TARGET_GIMPLIFY_VA_ARG_EXPR 16643#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg 16644 16645#undef TARGET_PROMOTE_FUNCTION_MODE 16646#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode 16647#undef TARGET_PASS_BY_REFERENCE 16648#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference 16649 16650#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 16651#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change 16652 16653#undef TARGET_FUNCTION_OK_FOR_SIBCALL 16654#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall 16655#undef TARGET_FUNCTION_ARG 16656#define TARGET_FUNCTION_ARG s390_function_arg 16657#undef TARGET_FUNCTION_ARG_ADVANCE 16658#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance 16659#undef TARGET_FUNCTION_ARG_PADDING 16660#define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding 16661#undef TARGET_FUNCTION_VALUE 16662#define TARGET_FUNCTION_VALUE s390_function_value 16663#undef TARGET_LIBCALL_VALUE 16664#define TARGET_LIBCALL_VALUE s390_libcall_value 16665#undef TARGET_STRICT_ARGUMENT_NAMING 16666#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 16667 16668#undef TARGET_KEEP_LEAF_WHEN_PROFILED 16669#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled 16670 16671#undef TARGET_FIXED_CONDITION_CODE_REGS 16672#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs 16673 16674#undef TARGET_CC_MODES_COMPATIBLE 16675#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible 16676 16677#undef TARGET_INVALID_WITHIN_DOLOOP 16678#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null 16679 16680#ifdef HAVE_AS_TLS 16681#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 16682#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel 16683#endif 16684 16685#undef TARGET_DWARF_FRAME_REG_MODE 16686#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode 16687 16688#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 16689#undef TARGET_MANGLE_TYPE 16690#define TARGET_MANGLE_TYPE s390_mangle_type 16691#endif 16692 16693#undef TARGET_SCALAR_MODE_SUPPORTED_P 16694#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p 16695 16696#undef TARGET_VECTOR_MODE_SUPPORTED_P 16697#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p 16698 16699#undef TARGET_PREFERRED_RELOAD_CLASS 16700#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class 16701 16702#undef TARGET_SECONDARY_RELOAD 16703#define TARGET_SECONDARY_RELOAD s390_secondary_reload 16704#undef TARGET_SECONDARY_MEMORY_NEEDED 16705#define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed 16706#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 16707#define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode 16708 16709#undef TARGET_LIBGCC_CMP_RETURN_MODE 16710#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode 16711 16712#undef TARGET_LIBGCC_SHIFT_COUNT_MODE 16713#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode 16714 16715#undef TARGET_LEGITIMATE_ADDRESS_P 16716#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p 16717 16718#undef TARGET_LEGITIMATE_CONSTANT_P 16719#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p 16720 16721#undef TARGET_LRA_P 16722#define TARGET_LRA_P s390_lra_p 16723 16724#undef TARGET_CAN_ELIMINATE 16725#define TARGET_CAN_ELIMINATE s390_can_eliminate 16726 16727#undef TARGET_CONDITIONAL_REGISTER_USAGE 16728#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage 16729 16730#undef TARGET_LOOP_UNROLL_ADJUST 16731#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust 16732 16733#undef TARGET_ASM_TRAMPOLINE_TEMPLATE 16734#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template 16735#undef TARGET_TRAMPOLINE_INIT 16736#define TARGET_TRAMPOLINE_INIT s390_trampoline_init 16737 16738/* PR 79421 */ 16739#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 16740#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 16741 16742#undef TARGET_UNWIND_WORD_MODE 16743#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode 16744 16745#undef TARGET_CANONICALIZE_COMPARISON 16746#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison 16747 16748#undef TARGET_HARD_REGNO_SCRATCH_OK 16749#define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok 16750 16751#undef TARGET_HARD_REGNO_NREGS 16752#define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs 16753#undef TARGET_HARD_REGNO_MODE_OK 16754#define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok 16755#undef TARGET_MODES_TIEABLE_P 16756#define TARGET_MODES_TIEABLE_P s390_modes_tieable_p 16757 16758#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED 16759#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ 16760 s390_hard_regno_call_part_clobbered 16761 16762#undef TARGET_ATTRIBUTE_TABLE 16763#define TARGET_ATTRIBUTE_TABLE s390_attribute_table 16764 16765#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 16766#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true 16767 16768#undef TARGET_SET_UP_BY_PROLOGUE 16769#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue 16770 16771#undef TARGET_EXTRA_LIVE_ON_ENTRY 16772#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry 16773 16774#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 16775#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 16776 s390_use_by_pieces_infrastructure_p 16777 16778#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 16779#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv 16780 16781#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN 16782#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn 16783 16784#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 16785#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode 16786 16787#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 16788#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment 16789 16790#undef TARGET_VECTOR_ALIGNMENT 16791#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment 16792 16793#undef TARGET_INVALID_BINARY_OP 16794#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op 16795 16796#ifdef HAVE_AS_MACHINE_MACHINEMODE 16797#undef TARGET_ASM_FILE_START 16798#define TARGET_ASM_FILE_START s390_asm_file_start 16799#endif 16800 16801#undef TARGET_ASM_FILE_END 16802#define TARGET_ASM_FILE_END s390_asm_file_end 16803 16804#undef TARGET_SET_CURRENT_FUNCTION 16805#define TARGET_SET_CURRENT_FUNCTION s390_set_current_function 16806 16807#if S390_USE_TARGET_ATTRIBUTE 16808#undef TARGET_OPTION_VALID_ATTRIBUTE_P 16809#define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p 16810 16811#undef TARGET_CAN_INLINE_P 16812#define TARGET_CAN_INLINE_P s390_can_inline_p 16813#endif 16814 16815#undef TARGET_OPTION_RESTORE 16816#define TARGET_OPTION_RESTORE s390_function_specific_restore 16817 16818#undef TARGET_CAN_CHANGE_MODE_CLASS 16819#define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class 16820 16821#undef TARGET_CONSTANT_ALIGNMENT 16822#define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment 16823 16824#undef TARGET_ASM_CODE_END 16825#define TARGET_ASM_CODE_END s390_code_end 16826 16827#undef TARGET_CASE_VALUES_THRESHOLD 16828#define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold 16829 16830#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 16831#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ 16832 s390_sched_dependencies_evaluation 16833 16834#undef TARGET_SHIFT_TRUNCATION_MASK 16835#define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask 16836 16837/* Use only short displacement, since long displacement is not available for 16838 the floating point instructions. */ 16839#undef TARGET_MAX_ANCHOR_OFFSET 16840#define TARGET_MAX_ANCHOR_OFFSET 0xfff 16841 16842struct gcc_target targetm = TARGET_INITIALIZER; 16843 16844#include "gt-s390.h" 16845