1/* Subroutines for insn-output.c for SPARC. 2 Copyright (C) 1987-2020 Free Software Foundation, Inc. 3 Contributed by Michael Tiemann (tiemann@cygnus.com) 4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, 5 at Cygnus Support. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify 10it under the terms of the GNU General Public License as published by 11the Free Software Foundation; either version 3, or (at your option) 12any later version. 13 14GCC is distributed in the hope that it will be useful, 15but WITHOUT ANY WARRANTY; without even the implied warranty of 16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17GNU General Public License for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#define IN_TARGET_CODE 1 24 25#include "config.h" 26#include "system.h" 27#include "coretypes.h" 28#include "backend.h" 29#include "target.h" 30#include "rtl.h" 31#include "tree.h" 32#include "memmodel.h" 33#include "gimple.h" 34#include "df.h" 35#include "tm_p.h" 36#include "stringpool.h" 37#include "attribs.h" 38#include "expmed.h" 39#include "optabs.h" 40#include "regs.h" 41#include "emit-rtl.h" 42#include "recog.h" 43#include "diagnostic-core.h" 44#include "alias.h" 45#include "fold-const.h" 46#include "stor-layout.h" 47#include "calls.h" 48#include "varasm.h" 49#include "output.h" 50#include "insn-attr.h" 51#include "explow.h" 52#include "expr.h" 53#include "debug.h" 54#include "cfgrtl.h" 55#include "common/common-target.h" 56#include "gimplify.h" 57#include "langhooks.h" 58#include "reload.h" 59#include "tree-pass.h" 60#include "context.h" 61#include "builtins.h" 62#include "tree-vector-builder.h" 63#include "opts.h" 64 65/* This file should be included last. */ 66#include "target-def.h" 67 68/* Processor costs */ 69 70struct processor_costs { 71 /* Integer load */ 72 const int int_load; 73 74 /* Integer signed load */ 75 const int int_sload; 76 77 /* Integer zeroed load */ 78 const int int_zload; 79 80 /* Float load */ 81 const int float_load; 82 83 /* fmov, fneg, fabs */ 84 const int float_move; 85 86 /* fadd, fsub */ 87 const int float_plusminus; 88 89 /* fcmp */ 90 const int float_cmp; 91 92 /* fmov, fmovr */ 93 const int float_cmove; 94 95 /* fmul */ 96 const int float_mul; 97 98 /* fdivs */ 99 const int float_div_sf; 100 101 /* fdivd */ 102 const int float_div_df; 103 104 /* fsqrts */ 105 const int float_sqrt_sf; 106 107 /* fsqrtd */ 108 const int float_sqrt_df; 109 110 /* umul/smul */ 111 const int int_mul; 112 113 /* mulX */ 114 const int int_mulX; 115 116 /* integer multiply cost for each bit set past the most 117 significant 3, so the formula for multiply cost becomes: 118 119 if (rs1 < 0) 120 highest_bit = highest_clear_bit(rs1); 121 else 122 highest_bit = highest_set_bit(rs1); 123 if (highest_bit < 3) 124 highest_bit = 3; 125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); 126 127 A value of zero indicates that the multiply costs is fixed, 128 and not variable. */ 129 const int int_mul_bit_factor; 130 131 /* udiv/sdiv */ 132 const int int_div; 133 134 /* divX */ 135 const int int_divX; 136 137 /* movcc, movr */ 138 const int int_cmove; 139 140 /* penalty for shifts, due to scheduling rules etc. */ 141 const int shift_penalty; 142 143 /* cost of a (predictable) branch. */ 144 const int branch_cost; 145}; 146 147static const 148struct processor_costs cypress_costs = { 149 COSTS_N_INSNS (2), /* int load */ 150 COSTS_N_INSNS (2), /* int signed load */ 151 COSTS_N_INSNS (2), /* int zeroed load */ 152 COSTS_N_INSNS (2), /* float load */ 153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ 154 COSTS_N_INSNS (5), /* fadd, fsub */ 155 COSTS_N_INSNS (1), /* fcmp */ 156 COSTS_N_INSNS (1), /* fmov, fmovr */ 157 COSTS_N_INSNS (7), /* fmul */ 158 COSTS_N_INSNS (37), /* fdivs */ 159 COSTS_N_INSNS (37), /* fdivd */ 160 COSTS_N_INSNS (63), /* fsqrts */ 161 COSTS_N_INSNS (63), /* fsqrtd */ 162 COSTS_N_INSNS (1), /* imul */ 163 COSTS_N_INSNS (1), /* imulX */ 164 0, /* imul bit factor */ 165 COSTS_N_INSNS (1), /* idiv */ 166 COSTS_N_INSNS (1), /* idivX */ 167 COSTS_N_INSNS (1), /* movcc/movr */ 168 0, /* shift penalty */ 169 3 /* branch cost */ 170}; 171 172static const 173struct processor_costs supersparc_costs = { 174 COSTS_N_INSNS (1), /* int load */ 175 COSTS_N_INSNS (1), /* int signed load */ 176 COSTS_N_INSNS (1), /* int zeroed load */ 177 COSTS_N_INSNS (0), /* float load */ 178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 179 COSTS_N_INSNS (3), /* fadd, fsub */ 180 COSTS_N_INSNS (3), /* fcmp */ 181 COSTS_N_INSNS (1), /* fmov, fmovr */ 182 COSTS_N_INSNS (3), /* fmul */ 183 COSTS_N_INSNS (6), /* fdivs */ 184 COSTS_N_INSNS (9), /* fdivd */ 185 COSTS_N_INSNS (12), /* fsqrts */ 186 COSTS_N_INSNS (12), /* fsqrtd */ 187 COSTS_N_INSNS (4), /* imul */ 188 COSTS_N_INSNS (4), /* imulX */ 189 0, /* imul bit factor */ 190 COSTS_N_INSNS (4), /* idiv */ 191 COSTS_N_INSNS (4), /* idivX */ 192 COSTS_N_INSNS (1), /* movcc/movr */ 193 1, /* shift penalty */ 194 3 /* branch cost */ 195}; 196 197static const 198struct processor_costs hypersparc_costs = { 199 COSTS_N_INSNS (1), /* int load */ 200 COSTS_N_INSNS (1), /* int signed load */ 201 COSTS_N_INSNS (1), /* int zeroed load */ 202 COSTS_N_INSNS (1), /* float load */ 203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 204 COSTS_N_INSNS (1), /* fadd, fsub */ 205 COSTS_N_INSNS (1), /* fcmp */ 206 COSTS_N_INSNS (1), /* fmov, fmovr */ 207 COSTS_N_INSNS (1), /* fmul */ 208 COSTS_N_INSNS (8), /* fdivs */ 209 COSTS_N_INSNS (12), /* fdivd */ 210 COSTS_N_INSNS (17), /* fsqrts */ 211 COSTS_N_INSNS (17), /* fsqrtd */ 212 COSTS_N_INSNS (17), /* imul */ 213 COSTS_N_INSNS (17), /* imulX */ 214 0, /* imul bit factor */ 215 COSTS_N_INSNS (17), /* idiv */ 216 COSTS_N_INSNS (17), /* idivX */ 217 COSTS_N_INSNS (1), /* movcc/movr */ 218 0, /* shift penalty */ 219 3 /* branch cost */ 220}; 221 222static const 223struct processor_costs leon_costs = { 224 COSTS_N_INSNS (1), /* int load */ 225 COSTS_N_INSNS (1), /* int signed load */ 226 COSTS_N_INSNS (1), /* int zeroed load */ 227 COSTS_N_INSNS (1), /* float load */ 228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 229 COSTS_N_INSNS (1), /* fadd, fsub */ 230 COSTS_N_INSNS (1), /* fcmp */ 231 COSTS_N_INSNS (1), /* fmov, fmovr */ 232 COSTS_N_INSNS (1), /* fmul */ 233 COSTS_N_INSNS (15), /* fdivs */ 234 COSTS_N_INSNS (15), /* fdivd */ 235 COSTS_N_INSNS (23), /* fsqrts */ 236 COSTS_N_INSNS (23), /* fsqrtd */ 237 COSTS_N_INSNS (5), /* imul */ 238 COSTS_N_INSNS (5), /* imulX */ 239 0, /* imul bit factor */ 240 COSTS_N_INSNS (5), /* idiv */ 241 COSTS_N_INSNS (5), /* idivX */ 242 COSTS_N_INSNS (1), /* movcc/movr */ 243 0, /* shift penalty */ 244 3 /* branch cost */ 245}; 246 247static const 248struct processor_costs leon3_costs = { 249 COSTS_N_INSNS (1), /* int load */ 250 COSTS_N_INSNS (1), /* int signed load */ 251 COSTS_N_INSNS (1), /* int zeroed load */ 252 COSTS_N_INSNS (1), /* float load */ 253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 254 COSTS_N_INSNS (1), /* fadd, fsub */ 255 COSTS_N_INSNS (1), /* fcmp */ 256 COSTS_N_INSNS (1), /* fmov, fmovr */ 257 COSTS_N_INSNS (1), /* fmul */ 258 COSTS_N_INSNS (14), /* fdivs */ 259 COSTS_N_INSNS (15), /* fdivd */ 260 COSTS_N_INSNS (22), /* fsqrts */ 261 COSTS_N_INSNS (23), /* fsqrtd */ 262 COSTS_N_INSNS (5), /* imul */ 263 COSTS_N_INSNS (5), /* imulX */ 264 0, /* imul bit factor */ 265 COSTS_N_INSNS (35), /* idiv */ 266 COSTS_N_INSNS (35), /* idivX */ 267 COSTS_N_INSNS (1), /* movcc/movr */ 268 0, /* shift penalty */ 269 3 /* branch cost */ 270}; 271 272static const 273struct processor_costs leon5_costs = { 274 COSTS_N_INSNS (1), /* int load */ 275 COSTS_N_INSNS (1), /* int signed load */ 276 COSTS_N_INSNS (1), /* int zeroed load */ 277 COSTS_N_INSNS (1), /* float load */ 278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 279 COSTS_N_INSNS (1), /* fadd, fsub */ 280 COSTS_N_INSNS (1), /* fcmp */ 281 COSTS_N_INSNS (1), /* fmov, fmovr */ 282 COSTS_N_INSNS (1), /* fmul */ 283 COSTS_N_INSNS (17), /* fdivs */ 284 COSTS_N_INSNS (18), /* fdivd */ 285 COSTS_N_INSNS (25), /* fsqrts */ 286 COSTS_N_INSNS (26), /* fsqrtd */ 287 COSTS_N_INSNS (4), /* imul */ 288 COSTS_N_INSNS (4), /* imulX */ 289 0, /* imul bit factor */ 290 COSTS_N_INSNS (35), /* idiv */ 291 COSTS_N_INSNS (35), /* idivX */ 292 COSTS_N_INSNS (1), /* movcc/movr */ 293 0, /* shift penalty */ 294 3 /* branch cost */ 295}; 296 297static const 298struct processor_costs sparclet_costs = { 299 COSTS_N_INSNS (3), /* int load */ 300 COSTS_N_INSNS (3), /* int signed load */ 301 COSTS_N_INSNS (1), /* int zeroed load */ 302 COSTS_N_INSNS (1), /* float load */ 303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 304 COSTS_N_INSNS (1), /* fadd, fsub */ 305 COSTS_N_INSNS (1), /* fcmp */ 306 COSTS_N_INSNS (1), /* fmov, fmovr */ 307 COSTS_N_INSNS (1), /* fmul */ 308 COSTS_N_INSNS (1), /* fdivs */ 309 COSTS_N_INSNS (1), /* fdivd */ 310 COSTS_N_INSNS (1), /* fsqrts */ 311 COSTS_N_INSNS (1), /* fsqrtd */ 312 COSTS_N_INSNS (5), /* imul */ 313 COSTS_N_INSNS (5), /* imulX */ 314 0, /* imul bit factor */ 315 COSTS_N_INSNS (5), /* idiv */ 316 COSTS_N_INSNS (5), /* idivX */ 317 COSTS_N_INSNS (1), /* movcc/movr */ 318 0, /* shift penalty */ 319 3 /* branch cost */ 320}; 321 322static const 323struct processor_costs ultrasparc_costs = { 324 COSTS_N_INSNS (2), /* int load */ 325 COSTS_N_INSNS (3), /* int signed load */ 326 COSTS_N_INSNS (2), /* int zeroed load */ 327 COSTS_N_INSNS (2), /* float load */ 328 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 329 COSTS_N_INSNS (4), /* fadd, fsub */ 330 COSTS_N_INSNS (1), /* fcmp */ 331 COSTS_N_INSNS (2), /* fmov, fmovr */ 332 COSTS_N_INSNS (4), /* fmul */ 333 COSTS_N_INSNS (13), /* fdivs */ 334 COSTS_N_INSNS (23), /* fdivd */ 335 COSTS_N_INSNS (13), /* fsqrts */ 336 COSTS_N_INSNS (23), /* fsqrtd */ 337 COSTS_N_INSNS (4), /* imul */ 338 COSTS_N_INSNS (4), /* imulX */ 339 2, /* imul bit factor */ 340 COSTS_N_INSNS (37), /* idiv */ 341 COSTS_N_INSNS (68), /* idivX */ 342 COSTS_N_INSNS (2), /* movcc/movr */ 343 2, /* shift penalty */ 344 2 /* branch cost */ 345}; 346 347static const 348struct processor_costs ultrasparc3_costs = { 349 COSTS_N_INSNS (2), /* int load */ 350 COSTS_N_INSNS (3), /* int signed load */ 351 COSTS_N_INSNS (3), /* int zeroed load */ 352 COSTS_N_INSNS (2), /* float load */ 353 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 354 COSTS_N_INSNS (4), /* fadd, fsub */ 355 COSTS_N_INSNS (5), /* fcmp */ 356 COSTS_N_INSNS (3), /* fmov, fmovr */ 357 COSTS_N_INSNS (4), /* fmul */ 358 COSTS_N_INSNS (17), /* fdivs */ 359 COSTS_N_INSNS (20), /* fdivd */ 360 COSTS_N_INSNS (20), /* fsqrts */ 361 COSTS_N_INSNS (29), /* fsqrtd */ 362 COSTS_N_INSNS (6), /* imul */ 363 COSTS_N_INSNS (6), /* imulX */ 364 0, /* imul bit factor */ 365 COSTS_N_INSNS (40), /* idiv */ 366 COSTS_N_INSNS (71), /* idivX */ 367 COSTS_N_INSNS (2), /* movcc/movr */ 368 0, /* shift penalty */ 369 2 /* branch cost */ 370}; 371 372static const 373struct processor_costs niagara_costs = { 374 COSTS_N_INSNS (3), /* int load */ 375 COSTS_N_INSNS (3), /* int signed load */ 376 COSTS_N_INSNS (3), /* int zeroed load */ 377 COSTS_N_INSNS (9), /* float load */ 378 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ 379 COSTS_N_INSNS (8), /* fadd, fsub */ 380 COSTS_N_INSNS (26), /* fcmp */ 381 COSTS_N_INSNS (8), /* fmov, fmovr */ 382 COSTS_N_INSNS (29), /* fmul */ 383 COSTS_N_INSNS (54), /* fdivs */ 384 COSTS_N_INSNS (83), /* fdivd */ 385 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ 386 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ 387 COSTS_N_INSNS (11), /* imul */ 388 COSTS_N_INSNS (11), /* imulX */ 389 0, /* imul bit factor */ 390 COSTS_N_INSNS (72), /* idiv */ 391 COSTS_N_INSNS (72), /* idivX */ 392 COSTS_N_INSNS (1), /* movcc/movr */ 393 0, /* shift penalty */ 394 4 /* branch cost */ 395}; 396 397static const 398struct processor_costs niagara2_costs = { 399 COSTS_N_INSNS (3), /* int load */ 400 COSTS_N_INSNS (3), /* int signed load */ 401 COSTS_N_INSNS (3), /* int zeroed load */ 402 COSTS_N_INSNS (3), /* float load */ 403 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ 404 COSTS_N_INSNS (6), /* fadd, fsub */ 405 COSTS_N_INSNS (6), /* fcmp */ 406 COSTS_N_INSNS (6), /* fmov, fmovr */ 407 COSTS_N_INSNS (6), /* fmul */ 408 COSTS_N_INSNS (19), /* fdivs */ 409 COSTS_N_INSNS (33), /* fdivd */ 410 COSTS_N_INSNS (19), /* fsqrts */ 411 COSTS_N_INSNS (33), /* fsqrtd */ 412 COSTS_N_INSNS (5), /* imul */ 413 COSTS_N_INSNS (5), /* imulX */ 414 0, /* imul bit factor */ 415 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ 416 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ 417 COSTS_N_INSNS (1), /* movcc/movr */ 418 0, /* shift penalty */ 419 5 /* branch cost */ 420}; 421 422static const 423struct processor_costs niagara3_costs = { 424 COSTS_N_INSNS (3), /* int load */ 425 COSTS_N_INSNS (3), /* int signed load */ 426 COSTS_N_INSNS (3), /* int zeroed load */ 427 COSTS_N_INSNS (3), /* float load */ 428 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 429 COSTS_N_INSNS (9), /* fadd, fsub */ 430 COSTS_N_INSNS (9), /* fcmp */ 431 COSTS_N_INSNS (9), /* fmov, fmovr */ 432 COSTS_N_INSNS (9), /* fmul */ 433 COSTS_N_INSNS (23), /* fdivs */ 434 COSTS_N_INSNS (37), /* fdivd */ 435 COSTS_N_INSNS (23), /* fsqrts */ 436 COSTS_N_INSNS (37), /* fsqrtd */ 437 COSTS_N_INSNS (9), /* imul */ 438 COSTS_N_INSNS (9), /* imulX */ 439 0, /* imul bit factor */ 440 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ 441 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ 442 COSTS_N_INSNS (1), /* movcc/movr */ 443 0, /* shift penalty */ 444 5 /* branch cost */ 445}; 446 447static const 448struct processor_costs niagara4_costs = { 449 COSTS_N_INSNS (5), /* int load */ 450 COSTS_N_INSNS (5), /* int signed load */ 451 COSTS_N_INSNS (5), /* int zeroed load */ 452 COSTS_N_INSNS (5), /* float load */ 453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 454 COSTS_N_INSNS (11), /* fadd, fsub */ 455 COSTS_N_INSNS (11), /* fcmp */ 456 COSTS_N_INSNS (11), /* fmov, fmovr */ 457 COSTS_N_INSNS (11), /* fmul */ 458 COSTS_N_INSNS (24), /* fdivs */ 459 COSTS_N_INSNS (37), /* fdivd */ 460 COSTS_N_INSNS (24), /* fsqrts */ 461 COSTS_N_INSNS (37), /* fsqrtd */ 462 COSTS_N_INSNS (12), /* imul */ 463 COSTS_N_INSNS (12), /* imulX */ 464 0, /* imul bit factor */ 465 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ 466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 467 COSTS_N_INSNS (1), /* movcc/movr */ 468 0, /* shift penalty */ 469 2 /* branch cost */ 470}; 471 472static const 473struct processor_costs niagara7_costs = { 474 COSTS_N_INSNS (5), /* int load */ 475 COSTS_N_INSNS (5), /* int signed load */ 476 COSTS_N_INSNS (5), /* int zeroed load */ 477 COSTS_N_INSNS (5), /* float load */ 478 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 479 COSTS_N_INSNS (11), /* fadd, fsub */ 480 COSTS_N_INSNS (11), /* fcmp */ 481 COSTS_N_INSNS (11), /* fmov, fmovr */ 482 COSTS_N_INSNS (11), /* fmul */ 483 COSTS_N_INSNS (24), /* fdivs */ 484 COSTS_N_INSNS (37), /* fdivd */ 485 COSTS_N_INSNS (24), /* fsqrts */ 486 COSTS_N_INSNS (37), /* fsqrtd */ 487 COSTS_N_INSNS (12), /* imul */ 488 COSTS_N_INSNS (12), /* imulX */ 489 0, /* imul bit factor */ 490 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */ 491 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 492 COSTS_N_INSNS (1), /* movcc/movr */ 493 0, /* shift penalty */ 494 1 /* branch cost */ 495}; 496 497static const 498struct processor_costs m8_costs = { 499 COSTS_N_INSNS (3), /* int load */ 500 COSTS_N_INSNS (3), /* int signed load */ 501 COSTS_N_INSNS (3), /* int zeroed load */ 502 COSTS_N_INSNS (3), /* float load */ 503 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 504 COSTS_N_INSNS (9), /* fadd, fsub */ 505 COSTS_N_INSNS (9), /* fcmp */ 506 COSTS_N_INSNS (9), /* fmov, fmovr */ 507 COSTS_N_INSNS (9), /* fmul */ 508 COSTS_N_INSNS (26), /* fdivs */ 509 COSTS_N_INSNS (30), /* fdivd */ 510 COSTS_N_INSNS (33), /* fsqrts */ 511 COSTS_N_INSNS (41), /* fsqrtd */ 512 COSTS_N_INSNS (12), /* imul */ 513 COSTS_N_INSNS (10), /* imulX */ 514 0, /* imul bit factor */ 515 COSTS_N_INSNS (57), /* udiv/sdiv */ 516 COSTS_N_INSNS (30), /* udivx/sdivx */ 517 COSTS_N_INSNS (1), /* movcc/movr */ 518 0, /* shift penalty */ 519 1 /* branch cost */ 520}; 521 522static const struct processor_costs *sparc_costs = &cypress_costs; 523 524#ifdef HAVE_AS_RELAX_OPTION 525/* If 'as' and 'ld' are relaxing tail call insns into branch always, use 526 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. 527 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if 528 somebody does not branch between the sethi and jmp. */ 529#define LEAF_SIBCALL_SLOT_RESERVED_P 1 530#else 531#define LEAF_SIBCALL_SLOT_RESERVED_P \ 532 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) 533#endif 534 535/* Vector to say how input registers are mapped to output registers. 536 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to 537 eliminate it. You must use -fomit-frame-pointer to get that. */ 538char leaf_reg_remap[] = 539{ 0, 1, 2, 3, 4, 5, 6, 7, 540 -1, -1, -1, -1, -1, -1, 14, -1, 541 -1, -1, -1, -1, -1, -1, -1, -1, 542 8, 9, 10, 11, 12, 13, -1, 15, 543 544 32, 33, 34, 35, 36, 37, 38, 39, 545 40, 41, 42, 43, 44, 45, 46, 47, 546 48, 49, 50, 51, 52, 53, 54, 55, 547 56, 57, 58, 59, 60, 61, 62, 63, 548 64, 65, 66, 67, 68, 69, 70, 71, 549 72, 73, 74, 75, 76, 77, 78, 79, 550 80, 81, 82, 83, 84, 85, 86, 87, 551 88, 89, 90, 91, 92, 93, 94, 95, 552 96, 97, 98, 99, 100, 101, 102}; 553 554/* Vector, indexed by hard register number, which contains 1 555 for a register that is allowable in a candidate for leaf 556 function treatment. */ 557char sparc_leaf_regs[] = 558{ 1, 1, 1, 1, 1, 1, 1, 1, 559 0, 0, 0, 0, 0, 0, 1, 0, 560 0, 0, 0, 0, 0, 0, 0, 0, 561 1, 1, 1, 1, 1, 1, 0, 1, 562 1, 1, 1, 1, 1, 1, 1, 1, 563 1, 1, 1, 1, 1, 1, 1, 1, 564 1, 1, 1, 1, 1, 1, 1, 1, 565 1, 1, 1, 1, 1, 1, 1, 1, 566 1, 1, 1, 1, 1, 1, 1, 1, 567 1, 1, 1, 1, 1, 1, 1, 1, 568 1, 1, 1, 1, 1, 1, 1, 1, 569 1, 1, 1, 1, 1, 1, 1, 1, 570 1, 1, 1, 1, 1, 1, 1}; 571 572struct GTY(()) machine_function 573{ 574 /* Size of the frame of the function. */ 575 HOST_WIDE_INT frame_size; 576 577 /* Size of the frame of the function minus the register window save area 578 and the outgoing argument area. */ 579 HOST_WIDE_INT apparent_frame_size; 580 581 /* Register we pretend the frame pointer is allocated to. Normally, this 582 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We 583 record "offset" separately as it may be too big for (reg + disp). */ 584 rtx frame_base_reg; 585 HOST_WIDE_INT frame_base_offset; 586 587 /* Number of global or FP registers to be saved (as 4-byte quantities). */ 588 int n_global_fp_regs; 589 590 /* True if the current function is leaf and uses only leaf regs, 591 so that the SPARC leaf function optimization can be applied. 592 Private version of crtl->uses_only_leaf_regs, see 593 sparc_expand_prologue for the rationale. */ 594 int leaf_function_p; 595 596 /* True if the prologue saves local or in registers. */ 597 bool save_local_in_regs_p; 598 599 /* True if the data calculated by sparc_expand_prologue are valid. */ 600 bool prologue_data_valid_p; 601}; 602 603#define sparc_frame_size cfun->machine->frame_size 604#define sparc_apparent_frame_size cfun->machine->apparent_frame_size 605#define sparc_frame_base_reg cfun->machine->frame_base_reg 606#define sparc_frame_base_offset cfun->machine->frame_base_offset 607#define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs 608#define sparc_leaf_function_p cfun->machine->leaf_function_p 609#define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p 610#define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p 611 612/* 1 if the next opcode is to be specially indented. */ 613int sparc_indent_opcode = 0; 614 615static void sparc_option_override (void); 616static void sparc_init_modes (void); 617static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, 618 const_tree, bool, bool, int *, int *); 619 620static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 621static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 622static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int); 623 624static void sparc_emit_set_const32 (rtx, rtx); 625static void sparc_emit_set_const64 (rtx, rtx); 626static void sparc_output_addr_vec (rtx); 627static void sparc_output_addr_diff_vec (rtx); 628static void sparc_output_deferred_case_vectors (void); 629static bool sparc_legitimate_address_p (machine_mode, rtx, bool); 630static bool sparc_legitimate_constant_p (machine_mode, rtx); 631static rtx sparc_builtin_saveregs (void); 632static int epilogue_renumber (rtx *, int); 633static bool sparc_assemble_integer (rtx, unsigned int, int); 634static int set_extends (rtx_insn *); 635static void sparc_asm_function_prologue (FILE *); 636static void sparc_asm_function_epilogue (FILE *); 637#ifdef TARGET_SOLARIS 638static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, 639 tree) ATTRIBUTE_UNUSED; 640#endif 641static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 642static int sparc_issue_rate (void); 643static void sparc_sched_init (FILE *, int, int); 644static int sparc_use_sched_lookahead (void); 645 646static void emit_soft_tfmode_libcall (const char *, int, rtx *); 647static void emit_soft_tfmode_binop (enum rtx_code, rtx *); 648static void emit_soft_tfmode_unop (enum rtx_code, rtx *); 649static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); 650static void emit_hard_tfmode_operation (enum rtx_code, rtx *); 651 652static bool sparc_function_ok_for_sibcall (tree, tree); 653static void sparc_init_libfuncs (void); 654static void sparc_init_builtins (void); 655static void sparc_fpu_init_builtins (void); 656static void sparc_vis_init_builtins (void); 657static tree sparc_builtin_decl (unsigned, bool); 658static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int); 659static tree sparc_fold_builtin (tree, int, tree *, bool); 660static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 661 HOST_WIDE_INT, tree); 662static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, 663 HOST_WIDE_INT, const_tree); 664static struct machine_function * sparc_init_machine_status (void); 665static bool sparc_cannot_force_const_mem (machine_mode, rtx); 666static rtx sparc_tls_get_addr (void); 667static rtx sparc_tls_got (void); 668static int sparc_register_move_cost (machine_mode, 669 reg_class_t, reg_class_t); 670static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool); 671static machine_mode sparc_promote_function_mode (const_tree, machine_mode, 672 int *, const_tree, int); 673static bool sparc_strict_argument_naming (cumulative_args_t); 674static void sparc_va_start (tree, rtx); 675static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 676static bool sparc_vector_mode_supported_p (machine_mode); 677static bool sparc_tls_referenced_p (rtx); 678static rtx sparc_legitimize_tls_address (rtx); 679static rtx sparc_legitimize_pic_address (rtx, rtx); 680static rtx sparc_legitimize_address (rtx, rtx, machine_mode); 681static rtx sparc_delegitimize_address (rtx); 682static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); 683static bool sparc_pass_by_reference (cumulative_args_t, 684 const function_arg_info &); 685static void sparc_function_arg_advance (cumulative_args_t, 686 const function_arg_info &); 687static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &); 688static rtx sparc_function_incoming_arg (cumulative_args_t, 689 const function_arg_info &); 690static pad_direction sparc_function_arg_padding (machine_mode, const_tree); 691static unsigned int sparc_function_arg_boundary (machine_mode, 692 const_tree); 693static int sparc_arg_partial_bytes (cumulative_args_t, 694 const function_arg_info &); 695static bool sparc_return_in_memory (const_tree, const_tree); 696static rtx sparc_struct_value_rtx (tree, int); 697static rtx sparc_function_value (const_tree, const_tree, bool); 698static rtx sparc_libcall_value (machine_mode, const_rtx); 699static bool sparc_function_value_regno_p (const unsigned int); 700static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void); 701static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 702static void sparc_file_end (void); 703static bool sparc_frame_pointer_required (void); 704static bool sparc_can_eliminate (const int, const int); 705static void sparc_conditional_register_usage (void); 706static bool sparc_use_pseudo_pic_reg (void); 707static void sparc_init_pic_reg (void); 708#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 709static const char *sparc_mangle_type (const_tree); 710#endif 711static void sparc_trampoline_init (rtx, tree, rtx); 712static machine_mode sparc_preferred_simd_mode (scalar_mode); 713static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); 714static bool sparc_lra_p (void); 715static bool sparc_print_operand_punct_valid_p (unsigned char); 716static void sparc_print_operand (FILE *, rtx, int); 717static void sparc_print_operand_address (FILE *, machine_mode, rtx); 718static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, 719 machine_mode, 720 secondary_reload_info *); 721static bool sparc_secondary_memory_needed (machine_mode, reg_class_t, 722 reg_class_t); 723static machine_mode sparc_secondary_memory_needed_mode (machine_mode); 724static scalar_int_mode sparc_cstore_mode (enum insn_code icode); 725static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *); 726static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *); 727static unsigned int sparc_min_arithmetic_precision (void); 728static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode); 729static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode); 730static bool sparc_modes_tieable_p (machine_mode, machine_mode); 731static bool sparc_can_change_mode_class (machine_mode, machine_mode, 732 reg_class_t); 733static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); 734static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, 735 const vec_perm_indices &); 736static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *); 737 738#ifdef SUBTARGET_ATTRIBUTE_TABLE 739/* Table of valid machine attributes. */ 740static const struct attribute_spec sparc_attribute_table[] = 741{ 742 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 743 do_diagnostic, handler, exclude } */ 744 SUBTARGET_ATTRIBUTE_TABLE, 745 { NULL, 0, 0, false, false, false, false, NULL, NULL } 746}; 747#endif 748 749char sparc_hard_reg_printed[8]; 750 751/* Initialize the GCC target structure. */ 752 753/* The default is to use .half rather than .short for aligned HI objects. */ 754#undef TARGET_ASM_ALIGNED_HI_OP 755#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 756 757#undef TARGET_ASM_UNALIGNED_HI_OP 758#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" 759#undef TARGET_ASM_UNALIGNED_SI_OP 760#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" 761#undef TARGET_ASM_UNALIGNED_DI_OP 762#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" 763 764/* The target hook has to handle DI-mode values. */ 765#undef TARGET_ASM_INTEGER 766#define TARGET_ASM_INTEGER sparc_assemble_integer 767 768#undef TARGET_ASM_FUNCTION_PROLOGUE 769#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue 770#undef TARGET_ASM_FUNCTION_EPILOGUE 771#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue 772 773#undef TARGET_SCHED_ADJUST_COST 774#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost 775#undef TARGET_SCHED_ISSUE_RATE 776#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate 777#undef TARGET_SCHED_INIT 778#define TARGET_SCHED_INIT sparc_sched_init 779#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 780#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead 781 782#undef TARGET_FUNCTION_OK_FOR_SIBCALL 783#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall 784 785#undef TARGET_INIT_LIBFUNCS 786#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs 787 788#undef TARGET_LEGITIMIZE_ADDRESS 789#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address 790#undef TARGET_DELEGITIMIZE_ADDRESS 791#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address 792#undef TARGET_MODE_DEPENDENT_ADDRESS_P 793#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p 794 795#undef TARGET_INIT_BUILTINS 796#define TARGET_INIT_BUILTINS sparc_init_builtins 797#undef TARGET_BUILTIN_DECL 798#define TARGET_BUILTIN_DECL sparc_builtin_decl 799#undef TARGET_EXPAND_BUILTIN 800#define TARGET_EXPAND_BUILTIN sparc_expand_builtin 801#undef TARGET_FOLD_BUILTIN 802#define TARGET_FOLD_BUILTIN sparc_fold_builtin 803 804#if TARGET_TLS 805#undef TARGET_HAVE_TLS 806#define TARGET_HAVE_TLS true 807#endif 808 809#undef TARGET_CANNOT_FORCE_CONST_MEM 810#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem 811 812#undef TARGET_ASM_OUTPUT_MI_THUNK 813#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk 814#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 815#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk 816 817#undef TARGET_RTX_COSTS 818#define TARGET_RTX_COSTS sparc_rtx_costs 819#undef TARGET_ADDRESS_COST 820#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 821#undef TARGET_REGISTER_MOVE_COST 822#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost 823 824#undef TARGET_PROMOTE_FUNCTION_MODE 825#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode 826#undef TARGET_STRICT_ARGUMENT_NAMING 827#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming 828 829#undef TARGET_MUST_PASS_IN_STACK 830#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 831#undef TARGET_PASS_BY_REFERENCE 832#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference 833#undef TARGET_ARG_PARTIAL_BYTES 834#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes 835#undef TARGET_FUNCTION_ARG_ADVANCE 836#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance 837#undef TARGET_FUNCTION_ARG 838#define TARGET_FUNCTION_ARG sparc_function_arg 839#undef TARGET_FUNCTION_INCOMING_ARG 840#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg 841#undef TARGET_FUNCTION_ARG_PADDING 842#define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding 843#undef TARGET_FUNCTION_ARG_BOUNDARY 844#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary 845 846#undef TARGET_RETURN_IN_MEMORY 847#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory 848#undef TARGET_STRUCT_VALUE_RTX 849#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx 850#undef TARGET_FUNCTION_VALUE 851#define TARGET_FUNCTION_VALUE sparc_function_value 852#undef TARGET_LIBCALL_VALUE 853#define TARGET_LIBCALL_VALUE sparc_libcall_value 854#undef TARGET_FUNCTION_VALUE_REGNO_P 855#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p 856 857#undef TARGET_EXPAND_BUILTIN_SAVEREGS 858#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs 859 860#undef TARGET_ASAN_SHADOW_OFFSET 861#define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset 862 863#undef TARGET_EXPAND_BUILTIN_VA_START 864#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start 865#undef TARGET_GIMPLIFY_VA_ARG_EXPR 866#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg 867 868#undef TARGET_VECTOR_MODE_SUPPORTED_P 869#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p 870 871#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 872#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode 873 874#ifdef SUBTARGET_INSERT_ATTRIBUTES 875#undef TARGET_INSERT_ATTRIBUTES 876#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 877#endif 878 879#ifdef SUBTARGET_ATTRIBUTE_TABLE 880#undef TARGET_ATTRIBUTE_TABLE 881#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table 882#endif 883 884#undef TARGET_OPTION_OVERRIDE 885#define TARGET_OPTION_OVERRIDE sparc_option_override 886 887#ifdef TARGET_THREAD_SSP_OFFSET 888#undef TARGET_STACK_PROTECT_GUARD 889#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null 890#endif 891 892#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) 893#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 894#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel 895#endif 896 897#undef TARGET_ASM_FILE_END 898#define TARGET_ASM_FILE_END sparc_file_end 899 900#undef TARGET_FRAME_POINTER_REQUIRED 901#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required 902 903#undef TARGET_CAN_ELIMINATE 904#define TARGET_CAN_ELIMINATE sparc_can_eliminate 905 906#undef TARGET_PREFERRED_RELOAD_CLASS 907#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class 908 909#undef TARGET_SECONDARY_RELOAD 910#define TARGET_SECONDARY_RELOAD sparc_secondary_reload 911#undef TARGET_SECONDARY_MEMORY_NEEDED 912#define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed 913#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 914#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode 915 916#undef TARGET_CONDITIONAL_REGISTER_USAGE 917#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage 918 919#undef TARGET_INIT_PIC_REG 920#define TARGET_INIT_PIC_REG sparc_init_pic_reg 921 922#undef TARGET_USE_PSEUDO_PIC_REG 923#define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg 924 925#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 926#undef TARGET_MANGLE_TYPE 927#define TARGET_MANGLE_TYPE sparc_mangle_type 928#endif 929 930#undef TARGET_LRA_P 931#define TARGET_LRA_P sparc_lra_p 932 933#undef TARGET_LEGITIMATE_ADDRESS_P 934#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p 935 936#undef TARGET_LEGITIMATE_CONSTANT_P 937#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p 938 939#undef TARGET_TRAMPOLINE_INIT 940#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init 941 942#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 943#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p 944#undef TARGET_PRINT_OPERAND 945#define TARGET_PRINT_OPERAND sparc_print_operand 946#undef TARGET_PRINT_OPERAND_ADDRESS 947#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address 948 949/* The value stored by LDSTUB. */ 950#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 951#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff 952 953#undef TARGET_CSTORE_MODE 954#define TARGET_CSTORE_MODE sparc_cstore_mode 955 956#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 957#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv 958 959#undef TARGET_FIXED_CONDITION_CODE_REGS 960#define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs 961 962#undef TARGET_MIN_ARITHMETIC_PRECISION 963#define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision 964 965#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 966#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 967 968#undef TARGET_HARD_REGNO_NREGS 969#define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs 970#undef TARGET_HARD_REGNO_MODE_OK 971#define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok 972 973#undef TARGET_MODES_TIEABLE_P 974#define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p 975 976#undef TARGET_CAN_CHANGE_MODE_CLASS 977#define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class 978 979#undef TARGET_CONSTANT_ALIGNMENT 980#define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment 981 982#undef TARGET_VECTORIZE_VEC_PERM_CONST 983#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const 984 985#undef TARGET_CAN_FOLLOW_JUMP 986#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump 987 988struct gcc_target targetm = TARGET_INITIALIZER; 989 990/* Return the memory reference contained in X if any, zero otherwise. */ 991 992static rtx 993mem_ref (rtx x) 994{ 995 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) 996 x = XEXP (x, 0); 997 998 if (MEM_P (x)) 999 return x; 1000 1001 return NULL_RTX; 1002} 1003 1004/* True if any of INSN's source register(s) is REG. */ 1005 1006static bool 1007insn_uses_reg_p (rtx_insn *insn, unsigned int reg) 1008{ 1009 extract_insn (insn); 1010 return ((REG_P (recog_data.operand[1]) 1011 && REGNO (recog_data.operand[1]) == reg) 1012 || (recog_data.n_operands == 3 1013 && REG_P (recog_data.operand[2]) 1014 && REGNO (recog_data.operand[2]) == reg)); 1015} 1016 1017/* True if INSN is a floating-point division or square-root. */ 1018 1019static bool 1020div_sqrt_insn_p (rtx_insn *insn) 1021{ 1022 if (GET_CODE (PATTERN (insn)) != SET) 1023 return false; 1024 1025 switch (get_attr_type (insn)) 1026 { 1027 case TYPE_FPDIVS: 1028 case TYPE_FPSQRTS: 1029 case TYPE_FPDIVD: 1030 case TYPE_FPSQRTD: 1031 return true; 1032 default: 1033 return false; 1034 } 1035} 1036 1037/* True if INSN is a floating-point instruction. */ 1038 1039static bool 1040fpop_insn_p (rtx_insn *insn) 1041{ 1042 if (GET_CODE (PATTERN (insn)) != SET) 1043 return false; 1044 1045 switch (get_attr_type (insn)) 1046 { 1047 case TYPE_FPMOVE: 1048 case TYPE_FPCMOVE: 1049 case TYPE_FP: 1050 case TYPE_FPCMP: 1051 case TYPE_FPMUL: 1052 case TYPE_FPDIVS: 1053 case TYPE_FPSQRTS: 1054 case TYPE_FPDIVD: 1055 case TYPE_FPSQRTD: 1056 return true; 1057 default: 1058 return false; 1059 } 1060} 1061 1062/* True if INSN is an atomic instruction. */ 1063 1064static bool 1065atomic_insn_for_leon3_p (rtx_insn *insn) 1066{ 1067 switch (INSN_CODE (insn)) 1068 { 1069 case CODE_FOR_swapsi: 1070 case CODE_FOR_ldstub: 1071 case CODE_FOR_atomic_compare_and_swap_leon3_1: 1072 return true; 1073 default: 1074 return false; 1075 } 1076} 1077 1078/* True if INSN is a store instruction. */ 1079 1080static bool 1081store_insn_p (rtx_insn *insn) 1082{ 1083 if (GET_CODE (PATTERN (insn)) != SET) 1084 return false; 1085 1086 switch (get_attr_type (insn)) 1087 { 1088 case TYPE_STORE: 1089 case TYPE_FPSTORE: 1090 return true; 1091 default: 1092 return false; 1093 } 1094} 1095 1096/* True if INSN is a load instruction. */ 1097 1098static bool 1099load_insn_p (rtx_insn *insn) 1100{ 1101 if (GET_CODE (PATTERN (insn)) != SET) 1102 return false; 1103 1104 switch (get_attr_type (insn)) 1105 { 1106 case TYPE_LOAD: 1107 case TYPE_SLOAD: 1108 case TYPE_FPLOAD: 1109 return true; 1110 default: 1111 return false; 1112 } 1113} 1114 1115/* We use a machine specific pass to enable workarounds for errata. 1116 1117 We need to have the (essentially) final form of the insn stream in order 1118 to properly detect the various hazards. Therefore, this machine specific 1119 pass runs as late as possible. */ 1120 1121/* True if INSN is a md pattern or asm statement. */ 1122#define USEFUL_INSN_P(INSN) \ 1123 (NONDEBUG_INSN_P (INSN) \ 1124 && GET_CODE (PATTERN (INSN)) != USE \ 1125 && GET_CODE (PATTERN (INSN)) != CLOBBER) 1126 1127rtx_insn * 1128next_active_non_empty_insn (rtx_insn *insn) 1129{ 1130 insn = next_active_insn (insn); 1131 1132 while (insn 1133 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 1134 || GET_CODE (PATTERN (insn)) == ASM_INPUT 1135 || (USEFUL_INSN_P (insn) 1136 && (asm_noperands (PATTERN (insn)) >= 0) 1137 && !strcmp (decode_asm_operands (PATTERN (insn), 1138 NULL, NULL, NULL, 1139 NULL, NULL), "")))) 1140 insn = next_active_insn (insn); 1141 1142 return insn; 1143} 1144 1145static unsigned int 1146sparc_do_work_around_errata (void) 1147{ 1148 rtx_insn *insn, *next; 1149 bool find_first_useful = true; 1150 1151 /* Force all instructions to be split into their final form. */ 1152 split_all_insns_noflow (); 1153 1154 /* Now look for specific patterns in the insn stream. */ 1155 for (insn = get_insns (); insn; insn = next) 1156 { 1157 bool insert_nop = false; 1158 rtx set; 1159 rtx_insn *jump; 1160 rtx_sequence *seq; 1161 1162 /* Look into the instruction in a delay slot. */ 1163 if (NONJUMP_INSN_P (insn) 1164 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))) 1165 { 1166 jump = seq->insn (0); 1167 insn = seq->insn (1); 1168 } 1169 else if (JUMP_P (insn)) 1170 jump = insn; 1171 else 1172 jump = NULL; 1173 1174 /* Do not begin function with atomic instruction. */ 1175 if (sparc_fix_ut700 1176 && find_first_useful 1177 && USEFUL_INSN_P (insn)) 1178 { 1179 find_first_useful = false; 1180 if (atomic_insn_for_leon3_p (insn)) 1181 emit_insn_before (gen_nop (), insn); 1182 } 1183 1184 /* Place a NOP at the branch target of an integer branch if it is a 1185 floating-point operation or a floating-point branch. */ 1186 if (sparc_fix_gr712rc 1187 && jump 1188 && jump_to_label_p (jump) 1189 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC) 1190 { 1191 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1192 if (target 1193 && (fpop_insn_p (target) 1194 || (JUMP_P (target) 1195 && get_attr_branch_type (target) == BRANCH_TYPE_FCC))) 1196 emit_insn_before (gen_nop (), target); 1197 } 1198 1199 /* Insert a NOP between load instruction and atomic instruction. Insert 1200 a NOP at branch target if there is a load in delay slot and an atomic 1201 instruction at branch target. */ 1202 if (sparc_fix_ut700 1203 && NONJUMP_INSN_P (insn) 1204 && load_insn_p (insn)) 1205 { 1206 if (jump && jump_to_label_p (jump)) 1207 { 1208 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1209 if (target && atomic_insn_for_leon3_p (target)) 1210 emit_insn_before (gen_nop (), target); 1211 } 1212 1213 next = next_active_non_empty_insn (insn); 1214 if (!next) 1215 break; 1216 1217 if (atomic_insn_for_leon3_p (next)) 1218 insert_nop = true; 1219 } 1220 1221 /* Look for a sequence that starts with a fdiv or fsqrt instruction and 1222 ends with another fdiv or fsqrt instruction with no dependencies on 1223 the former, along with an appropriate pattern in between. */ 1224 if (sparc_fix_lost_divsqrt 1225 && NONJUMP_INSN_P (insn) 1226 && div_sqrt_insn_p (insn)) 1227 { 1228 int i; 1229 int fp_found = 0; 1230 rtx_insn *after; 1231 1232 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn))); 1233 1234 next = next_active_insn (insn); 1235 if (!next) 1236 break; 1237 1238 for (after = next, i = 0; i < 4; i++) 1239 { 1240 /* Count floating-point operations. */ 1241 if (i != 3 && fpop_insn_p (after)) 1242 { 1243 /* If the insn uses the destination register of 1244 the div/sqrt, then it cannot be problematic. */ 1245 if (insn_uses_reg_p (after, dest_reg)) 1246 break; 1247 fp_found++; 1248 } 1249 1250 /* Count floating-point loads. */ 1251 if (i != 3 1252 && (set = single_set (after)) != NULL_RTX 1253 && REG_P (SET_DEST (set)) 1254 && REGNO (SET_DEST (set)) > 31) 1255 { 1256 /* If the insn uses the destination register of 1257 the div/sqrt, then it cannot be problematic. */ 1258 if (REGNO (SET_DEST (set)) == dest_reg) 1259 break; 1260 fp_found++; 1261 } 1262 1263 /* Check if this is a problematic sequence. */ 1264 if (i > 1 1265 && fp_found >= 2 1266 && div_sqrt_insn_p (after)) 1267 { 1268 /* If this is the short version of the problematic 1269 sequence we add two NOPs in a row to also prevent 1270 the long version. */ 1271 if (i == 2) 1272 emit_insn_before (gen_nop (), next); 1273 insert_nop = true; 1274 break; 1275 } 1276 1277 /* No need to scan past a second div/sqrt. */ 1278 if (div_sqrt_insn_p (after)) 1279 break; 1280 1281 /* Insert NOP before branch. */ 1282 if (i < 3 1283 && (!NONJUMP_INSN_P (after) 1284 || GET_CODE (PATTERN (after)) == SEQUENCE)) 1285 { 1286 insert_nop = true; 1287 break; 1288 } 1289 1290 after = next_active_insn (after); 1291 if (!after) 1292 break; 1293 } 1294 } 1295 1296 /* Look for either of these two sequences: 1297 1298 Sequence A: 1299 1. store of word size or less (e.g. st / stb / sth / stf) 1300 2. any single instruction that is not a load or store 1301 3. any store instruction (e.g. st / stb / sth / stf / std / stdf) 1302 1303 Sequence B: 1304 1. store of double word size (e.g. std / stdf) 1305 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */ 1306 if (sparc_fix_b2bst 1307 && NONJUMP_INSN_P (insn) 1308 && (set = single_set (insn)) != NULL_RTX 1309 && store_insn_p (insn)) 1310 { 1311 /* Sequence B begins with a double-word store. */ 1312 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8; 1313 rtx_insn *after; 1314 int i; 1315 1316 next = next_active_non_empty_insn (insn); 1317 if (!next) 1318 break; 1319 1320 for (after = next, i = 0; i < 2; i++) 1321 { 1322 /* If the insn is a branch, then it cannot be problematic. */ 1323 if (!NONJUMP_INSN_P (after) 1324 || GET_CODE (PATTERN (after)) == SEQUENCE) 1325 break; 1326 1327 /* Sequence B is only two instructions long. */ 1328 if (seq_b) 1329 { 1330 /* Add NOP if followed by a store. */ 1331 if (store_insn_p (after)) 1332 insert_nop = true; 1333 1334 /* Otherwise it is ok. */ 1335 break; 1336 } 1337 1338 /* If the second instruction is a load or a store, 1339 then the sequence cannot be problematic. */ 1340 if (i == 0) 1341 { 1342 if ((set = single_set (after)) != NULL_RTX 1343 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) 1344 break; 1345 1346 after = next_active_non_empty_insn (after); 1347 if (!after) 1348 break; 1349 } 1350 1351 /* Add NOP if third instruction is a store. */ 1352 if (i == 1 1353 && store_insn_p (after)) 1354 insert_nop = true; 1355 } 1356 } 1357 1358 /* Look for a single-word load into an odd-numbered FP register. */ 1359 else if (sparc_fix_at697f 1360 && NONJUMP_INSN_P (insn) 1361 && (set = single_set (insn)) != NULL_RTX 1362 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1363 && mem_ref (SET_SRC (set)) 1364 && REG_P (SET_DEST (set)) 1365 && REGNO (SET_DEST (set)) > 31 1366 && REGNO (SET_DEST (set)) % 2 != 0) 1367 { 1368 /* The wrong dependency is on the enclosing double register. */ 1369 const unsigned int x = REGNO (SET_DEST (set)) - 1; 1370 unsigned int src1, src2, dest; 1371 int code; 1372 1373 next = next_active_insn (insn); 1374 if (!next) 1375 break; 1376 /* If the insn is a branch, then it cannot be problematic. */ 1377 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1378 continue; 1379 1380 extract_insn (next); 1381 code = INSN_CODE (next); 1382 1383 switch (code) 1384 { 1385 case CODE_FOR_adddf3: 1386 case CODE_FOR_subdf3: 1387 case CODE_FOR_muldf3: 1388 case CODE_FOR_divdf3: 1389 dest = REGNO (recog_data.operand[0]); 1390 src1 = REGNO (recog_data.operand[1]); 1391 src2 = REGNO (recog_data.operand[2]); 1392 if (src1 != src2) 1393 { 1394 /* Case [1-4]: 1395 ld [address], %fx+1 1396 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ 1397 if ((src1 == x || src2 == x) 1398 && (dest == src1 || dest == src2)) 1399 insert_nop = true; 1400 } 1401 else 1402 { 1403 /* Case 5: 1404 ld [address], %fx+1 1405 FPOPd %fx, %fx, %fx */ 1406 if (src1 == x 1407 && dest == src1 1408 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) 1409 insert_nop = true; 1410 } 1411 break; 1412 1413 case CODE_FOR_sqrtdf2: 1414 dest = REGNO (recog_data.operand[0]); 1415 src1 = REGNO (recog_data.operand[1]); 1416 /* Case 6: 1417 ld [address], %fx+1 1418 fsqrtd %fx, %fx */ 1419 if (src1 == x && dest == src1) 1420 insert_nop = true; 1421 break; 1422 1423 default: 1424 break; 1425 } 1426 } 1427 1428 /* Look for a single-word load into an integer register. */ 1429 else if (sparc_fix_ut699 1430 && NONJUMP_INSN_P (insn) 1431 && (set = single_set (insn)) != NULL_RTX 1432 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 1433 && (mem_ref (SET_SRC (set)) != NULL_RTX 1434 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op) 1435 && REG_P (SET_DEST (set)) 1436 && REGNO (SET_DEST (set)) < 32) 1437 { 1438 /* There is no problem if the second memory access has a data 1439 dependency on the first single-cycle load. */ 1440 rtx x = SET_DEST (set); 1441 1442 next = next_active_insn (insn); 1443 if (!next) 1444 break; 1445 /* If the insn is a branch, then it cannot be problematic. */ 1446 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1447 continue; 1448 1449 /* Look for a second memory access to/from an integer register. */ 1450 if ((set = single_set (next)) != NULL_RTX) 1451 { 1452 rtx src = SET_SRC (set); 1453 rtx dest = SET_DEST (set); 1454 rtx mem; 1455 1456 /* LDD is affected. */ 1457 if ((mem = mem_ref (src)) != NULL_RTX 1458 && REG_P (dest) 1459 && REGNO (dest) < 32 1460 && !reg_mentioned_p (x, XEXP (mem, 0))) 1461 insert_nop = true; 1462 1463 /* STD is *not* affected. */ 1464 else if (MEM_P (dest) 1465 && GET_MODE_SIZE (GET_MODE (dest)) <= 4 1466 && (src == CONST0_RTX (GET_MODE (dest)) 1467 || (REG_P (src) 1468 && REGNO (src) < 32 1469 && REGNO (src) != REGNO (x))) 1470 && !reg_mentioned_p (x, XEXP (dest, 0))) 1471 insert_nop = true; 1472 1473 /* GOT accesses uses LD. */ 1474 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op 1475 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1))) 1476 insert_nop = true; 1477 } 1478 } 1479 1480 /* Look for a single-word load/operation into an FP register. */ 1481 else if (sparc_fix_ut699 1482 && NONJUMP_INSN_P (insn) 1483 && (set = single_set (insn)) != NULL_RTX 1484 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1485 && REG_P (SET_DEST (set)) 1486 && REGNO (SET_DEST (set)) > 31) 1487 { 1488 /* Number of instructions in the problematic window. */ 1489 const int n_insns = 4; 1490 /* The problematic combination is with the sibling FP register. */ 1491 const unsigned int x = REGNO (SET_DEST (set)); 1492 const unsigned int y = x ^ 1; 1493 rtx_insn *after; 1494 int i; 1495 1496 next = next_active_insn (insn); 1497 if (!next) 1498 break; 1499 /* If the insn is a branch, then it cannot be problematic. */ 1500 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1501 continue; 1502 1503 /* Look for a second load/operation into the sibling FP register. */ 1504 if (!((set = single_set (next)) != NULL_RTX 1505 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1506 && REG_P (SET_DEST (set)) 1507 && REGNO (SET_DEST (set)) == y)) 1508 continue; 1509 1510 /* Look for a (possible) store from the FP register in the next N 1511 instructions, but bail out if it is again modified or if there 1512 is a store from the sibling FP register before this store. */ 1513 for (after = next, i = 0; i < n_insns; i++) 1514 { 1515 bool branch_p; 1516 1517 after = next_active_insn (after); 1518 if (!after) 1519 break; 1520 1521 /* This is a branch with an empty delay slot. */ 1522 if (!NONJUMP_INSN_P (after)) 1523 { 1524 if (++i == n_insns) 1525 break; 1526 branch_p = true; 1527 after = NULL; 1528 } 1529 /* This is a branch with a filled delay slot. */ 1530 else if (rtx_sequence *seq = 1531 dyn_cast <rtx_sequence *> (PATTERN (after))) 1532 { 1533 if (++i == n_insns) 1534 break; 1535 branch_p = true; 1536 after = seq->insn (1); 1537 } 1538 /* This is a regular instruction. */ 1539 else 1540 branch_p = false; 1541 1542 if (after && (set = single_set (after)) != NULL_RTX) 1543 { 1544 const rtx src = SET_SRC (set); 1545 const rtx dest = SET_DEST (set); 1546 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); 1547 1548 /* If the FP register is again modified before the store, 1549 then the store isn't affected. */ 1550 if (REG_P (dest) 1551 && (REGNO (dest) == x 1552 || (REGNO (dest) == y && size == 8))) 1553 break; 1554 1555 if (MEM_P (dest) && REG_P (src)) 1556 { 1557 /* If there is a store from the sibling FP register 1558 before the store, then the store is not affected. */ 1559 if (REGNO (src) == y || (REGNO (src) == x && size == 8)) 1560 break; 1561 1562 /* Otherwise, the store is affected. */ 1563 if (REGNO (src) == x && size == 4) 1564 { 1565 insert_nop = true; 1566 break; 1567 } 1568 } 1569 } 1570 1571 /* If we have a branch in the first M instructions, then we 1572 cannot see the (M+2)th instruction so we play safe. */ 1573 if (branch_p && i <= (n_insns - 2)) 1574 { 1575 insert_nop = true; 1576 break; 1577 } 1578 } 1579 } 1580 1581 else 1582 next = NEXT_INSN (insn); 1583 1584 if (insert_nop) 1585 emit_insn_before (gen_nop (), next); 1586 } 1587 1588 return 0; 1589} 1590 1591namespace { 1592 1593const pass_data pass_data_work_around_errata = 1594{ 1595 RTL_PASS, /* type */ 1596 "errata", /* name */ 1597 OPTGROUP_NONE, /* optinfo_flags */ 1598 TV_MACH_DEP, /* tv_id */ 1599 0, /* properties_required */ 1600 0, /* properties_provided */ 1601 0, /* properties_destroyed */ 1602 0, /* todo_flags_start */ 1603 0, /* todo_flags_finish */ 1604}; 1605 1606class pass_work_around_errata : public rtl_opt_pass 1607{ 1608public: 1609 pass_work_around_errata(gcc::context *ctxt) 1610 : rtl_opt_pass(pass_data_work_around_errata, ctxt) 1611 {} 1612 1613 /* opt_pass methods: */ 1614 virtual bool gate (function *) 1615 { 1616 return sparc_fix_at697f 1617 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc 1618 || sparc_fix_b2bst || sparc_fix_lost_divsqrt; 1619 } 1620 1621 virtual unsigned int execute (function *) 1622 { 1623 return sparc_do_work_around_errata (); 1624 } 1625 1626}; // class pass_work_around_errata 1627 1628} // anon namespace 1629 1630rtl_opt_pass * 1631make_pass_work_around_errata (gcc::context *ctxt) 1632{ 1633 return new pass_work_around_errata (ctxt); 1634} 1635 1636/* Helpers for TARGET_DEBUG_OPTIONS. */ 1637static void 1638dump_target_flag_bits (const int flags) 1639{ 1640 if (flags & MASK_64BIT) 1641 fprintf (stderr, "64BIT "); 1642 if (flags & MASK_APP_REGS) 1643 fprintf (stderr, "APP_REGS "); 1644 if (flags & MASK_FASTER_STRUCTS) 1645 fprintf (stderr, "FASTER_STRUCTS "); 1646 if (flags & MASK_FLAT) 1647 fprintf (stderr, "FLAT "); 1648 if (flags & MASK_FMAF) 1649 fprintf (stderr, "FMAF "); 1650 if (flags & MASK_FSMULD) 1651 fprintf (stderr, "FSMULD "); 1652 if (flags & MASK_FPU) 1653 fprintf (stderr, "FPU "); 1654 if (flags & MASK_HARD_QUAD) 1655 fprintf (stderr, "HARD_QUAD "); 1656 if (flags & MASK_POPC) 1657 fprintf (stderr, "POPC "); 1658 if (flags & MASK_PTR64) 1659 fprintf (stderr, "PTR64 "); 1660 if (flags & MASK_STACK_BIAS) 1661 fprintf (stderr, "STACK_BIAS "); 1662 if (flags & MASK_UNALIGNED_DOUBLES) 1663 fprintf (stderr, "UNALIGNED_DOUBLES "); 1664 if (flags & MASK_V8PLUS) 1665 fprintf (stderr, "V8PLUS "); 1666 if (flags & MASK_VIS) 1667 fprintf (stderr, "VIS "); 1668 if (flags & MASK_VIS2) 1669 fprintf (stderr, "VIS2 "); 1670 if (flags & MASK_VIS3) 1671 fprintf (stderr, "VIS3 "); 1672 if (flags & MASK_VIS4) 1673 fprintf (stderr, "VIS4 "); 1674 if (flags & MASK_VIS4B) 1675 fprintf (stderr, "VIS4B "); 1676 if (flags & MASK_CBCOND) 1677 fprintf (stderr, "CBCOND "); 1678 if (flags & MASK_DEPRECATED_V8_INSNS) 1679 fprintf (stderr, "DEPRECATED_V8_INSNS "); 1680 if (flags & MASK_LEON) 1681 fprintf (stderr, "LEON "); 1682 if (flags & MASK_LEON3) 1683 fprintf (stderr, "LEON3 "); 1684 if (flags & MASK_SPARCLET) 1685 fprintf (stderr, "SPARCLET "); 1686 if (flags & MASK_SPARCLITE) 1687 fprintf (stderr, "SPARCLITE "); 1688 if (flags & MASK_V8) 1689 fprintf (stderr, "V8 "); 1690 if (flags & MASK_V9) 1691 fprintf (stderr, "V9 "); 1692} 1693 1694static void 1695dump_target_flags (const char *prefix, const int flags) 1696{ 1697 fprintf (stderr, "%s: (%08x) [ ", prefix, flags); 1698 dump_target_flag_bits (flags); 1699 fprintf(stderr, "]\n"); 1700} 1701 1702/* Validate and override various options, and do some machine dependent 1703 initialization. */ 1704 1705static void 1706sparc_option_override (void) 1707{ 1708 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ 1709 static struct cpu_default { 1710 const int cpu; 1711 const enum sparc_processor_type processor; 1712 } const cpu_default[] = { 1713 /* There must be one entry here for each TARGET_CPU value. */ 1714 { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, 1715 { TARGET_CPU_v8, PROCESSOR_V8 }, 1716 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, 1717 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, 1718 { TARGET_CPU_leon, PROCESSOR_LEON }, 1719 { TARGET_CPU_leon3, PROCESSOR_LEON3 }, 1720 { TARGET_CPU_leon5, PROCESSOR_LEON5 }, 1721 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, 1722 { TARGET_CPU_sparclite, PROCESSOR_F930 }, 1723 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, 1724 { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, 1725 { TARGET_CPU_v9, PROCESSOR_V9 }, 1726 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, 1727 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, 1728 { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, 1729 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, 1730 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, 1731 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, 1732 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 }, 1733 { TARGET_CPU_m8, PROCESSOR_M8 }, 1734 { -1, PROCESSOR_V7 } 1735 }; 1736 const struct cpu_default *def; 1737 /* Table of values for -m{cpu,tune}=. This must match the order of 1738 the enum processor_type in sparc-opts.h. */ 1739 static struct cpu_table { 1740 const char *const name; 1741 const int disable; 1742 const int enable; 1743 } const cpu_table[] = { 1744 { "v7", MASK_ISA, 0 }, 1745 { "cypress", MASK_ISA, 0 }, 1746 { "v8", MASK_ISA, MASK_V8 }, 1747 /* TI TMS390Z55 supersparc */ 1748 { "supersparc", MASK_ISA, MASK_V8 }, 1749 { "hypersparc", MASK_ISA, MASK_V8 }, 1750 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON }, 1751 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 }, 1752 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 }, 1753 { "leon3v7", MASK_ISA, MASK_LEON3 }, 1754 { "sparclite", MASK_ISA, MASK_SPARCLITE }, 1755 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ 1756 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1757 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ 1758 { "f934", MASK_ISA, MASK_SPARCLITE }, 1759 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1760 { "sparclet", MASK_ISA, MASK_SPARCLET }, 1761 /* TEMIC sparclet */ 1762 { "tsc701", MASK_ISA, MASK_SPARCLET }, 1763 { "v9", MASK_ISA, MASK_V9 }, 1764 /* UltraSPARC I, II, IIi */ 1765 { "ultrasparc", MASK_ISA, 1766 /* Although insns using %y are deprecated, it is a clear win. */ 1767 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1768 /* UltraSPARC III */ 1769 /* ??? Check if %y issue still holds true. */ 1770 { "ultrasparc3", MASK_ISA, 1771 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, 1772 /* UltraSPARC T1 */ 1773 { "niagara", MASK_ISA, 1774 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1775 /* UltraSPARC T2 */ 1776 { "niagara2", MASK_ISA, 1777 MASK_V9|MASK_POPC|MASK_VIS2 }, 1778 /* UltraSPARC T3 */ 1779 { "niagara3", MASK_ISA, 1780 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF }, 1781 /* UltraSPARC T4 */ 1782 { "niagara4", MASK_ISA, 1783 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, 1784 /* UltraSPARC M7 */ 1785 { "niagara7", MASK_ISA, 1786 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }, 1787 /* UltraSPARC M8 */ 1788 { "m8", MASK_ISA, 1789 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC } 1790 }; 1791 const struct cpu_table *cpu; 1792 unsigned int i; 1793 1794 if (sparc_debug_string != NULL) 1795 { 1796 const char *q; 1797 char *p; 1798 1799 p = ASTRDUP (sparc_debug_string); 1800 while ((q = strtok (p, ",")) != NULL) 1801 { 1802 bool invert; 1803 int mask; 1804 1805 p = NULL; 1806 if (*q == '!') 1807 { 1808 invert = true; 1809 q++; 1810 } 1811 else 1812 invert = false; 1813 1814 if (! strcmp (q, "all")) 1815 mask = MASK_DEBUG_ALL; 1816 else if (! strcmp (q, "options")) 1817 mask = MASK_DEBUG_OPTIONS; 1818 else 1819 error ("unknown %<-mdebug-%s%> switch", q); 1820 1821 if (invert) 1822 sparc_debug &= ~mask; 1823 else 1824 sparc_debug |= mask; 1825 } 1826 } 1827 1828 /* Enable the FsMULd instruction by default if not explicitly specified by 1829 the user. It may be later disabled by the CPU (explicitly or not). */ 1830 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD)) 1831 target_flags |= MASK_FSMULD; 1832 1833 if (TARGET_DEBUG_OPTIONS) 1834 { 1835 dump_target_flags("Initial target_flags", target_flags); 1836 dump_target_flags("target_flags_explicit", target_flags_explicit); 1837 } 1838 1839#ifdef SUBTARGET_OVERRIDE_OPTIONS 1840 SUBTARGET_OVERRIDE_OPTIONS; 1841#endif 1842 1843#ifndef SPARC_BI_ARCH 1844 /* Check for unsupported architecture size. */ 1845 if (!TARGET_64BIT != DEFAULT_ARCH32_P) 1846 error ("%s is not supported by this configuration", 1847 DEFAULT_ARCH32_P ? "-m64" : "-m32"); 1848#endif 1849 1850 /* We force all 64bit archs to use 128 bit long double */ 1851 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128) 1852 { 1853 error ("%<-mlong-double-64%> not allowed with %<-m64%>"); 1854 target_flags |= MASK_LONG_DOUBLE_128; 1855 } 1856 1857 /* Check that -fcall-saved-REG wasn't specified for out registers. */ 1858 for (i = 8; i < 16; i++) 1859 if (!call_used_regs [i]) 1860 { 1861 error ("%<-fcall-saved-REG%> is not supported for out registers"); 1862 call_used_regs [i] = 1; 1863 } 1864 1865 /* Set the default CPU if no -mcpu option was specified. */ 1866 if (!global_options_set.x_sparc_cpu_and_features) 1867 { 1868 for (def = &cpu_default[0]; def->cpu != -1; ++def) 1869 if (def->cpu == TARGET_CPU_DEFAULT) 1870 break; 1871 gcc_assert (def->cpu != -1); 1872 sparc_cpu_and_features = def->processor; 1873 } 1874 1875 /* Set the default CPU if no -mtune option was specified. */ 1876 if (!global_options_set.x_sparc_cpu) 1877 sparc_cpu = sparc_cpu_and_features; 1878 1879 cpu = &cpu_table[(int) sparc_cpu_and_features]; 1880 1881 if (TARGET_DEBUG_OPTIONS) 1882 { 1883 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); 1884 dump_target_flags ("cpu->disable", cpu->disable); 1885 dump_target_flags ("cpu->enable", cpu->enable); 1886 } 1887 1888 target_flags &= ~cpu->disable; 1889 target_flags |= (cpu->enable 1890#ifndef HAVE_AS_FMAF_HPC_VIS3 1891 & ~(MASK_FMAF | MASK_VIS3) 1892#endif 1893#ifndef HAVE_AS_SPARC4 1894 & ~MASK_CBCOND 1895#endif 1896#ifndef HAVE_AS_SPARC5_VIS4 1897 & ~(MASK_VIS4 | MASK_SUBXC) 1898#endif 1899#ifndef HAVE_AS_SPARC6 1900 & ~(MASK_VIS4B) 1901#endif 1902#ifndef HAVE_AS_LEON 1903 & ~(MASK_LEON | MASK_LEON3) 1904#endif 1905 & ~(target_flags_explicit & MASK_FEATURES) 1906 ); 1907 1908 /* FsMULd is a V8 instruction. */ 1909 if (!TARGET_V8 && !TARGET_V9) 1910 target_flags &= ~MASK_FSMULD; 1911 1912 /* -mvis2 implies -mvis. */ 1913 if (TARGET_VIS2) 1914 target_flags |= MASK_VIS; 1915 1916 /* -mvis3 implies -mvis2 and -mvis. */ 1917 if (TARGET_VIS3) 1918 target_flags |= MASK_VIS2 | MASK_VIS; 1919 1920 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */ 1921 if (TARGET_VIS4) 1922 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1923 1924 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */ 1925 if (TARGET_VIS4B) 1926 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1927 1928 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if 1929 FPU is disabled. */ 1930 if (!TARGET_FPU) 1931 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4 1932 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD); 1933 1934 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions 1935 are available; -m64 also implies v9. */ 1936 if (TARGET_VIS || TARGET_ARCH64) 1937 { 1938 target_flags |= MASK_V9; 1939 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); 1940 } 1941 1942 /* -mvis also implies -mv8plus on 32-bit. */ 1943 if (TARGET_VIS && !TARGET_ARCH64) 1944 target_flags |= MASK_V8PLUS; 1945 1946 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */ 1947 if (TARGET_V9 && TARGET_ARCH32) 1948 target_flags |= MASK_DEPRECATED_V8_INSNS; 1949 1950 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */ 1951 if (!TARGET_V9 || TARGET_ARCH64) 1952 target_flags &= ~MASK_V8PLUS; 1953 1954 /* Don't use stack biasing in 32-bit mode. */ 1955 if (TARGET_ARCH32) 1956 target_flags &= ~MASK_STACK_BIAS; 1957 1958 /* Use LRA instead of reload, unless otherwise instructed. */ 1959 if (!(target_flags_explicit & MASK_LRA)) 1960 target_flags |= MASK_LRA; 1961 1962 /* Enable applicable errata workarounds for LEON3FT. */ 1963 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc) 1964 { 1965 sparc_fix_b2bst = 1; 1966 sparc_fix_lost_divsqrt = 1; 1967 } 1968 1969 /* Disable FsMULd for the UT699 since it doesn't work correctly. */ 1970 if (sparc_fix_ut699) 1971 target_flags &= ~MASK_FSMULD; 1972 1973#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 1974 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 1975 target_flags |= MASK_LONG_DOUBLE_128; 1976#endif 1977 1978 if (TARGET_DEBUG_OPTIONS) 1979 dump_target_flags ("Final target_flags", target_flags); 1980 1981 /* Set the code model if no -mcmodel option was specified. */ 1982 if (global_options_set.x_sparc_code_model) 1983 { 1984 if (TARGET_ARCH32) 1985 error ("%<-mcmodel=%> is not supported in 32-bit mode"); 1986 } 1987 else 1988 { 1989 if (TARGET_ARCH32) 1990 sparc_code_model = CM_32; 1991 else 1992 sparc_code_model = SPARC_DEFAULT_CMODEL; 1993 } 1994 1995 /* Set the memory model if no -mmemory-model option was specified. */ 1996 if (!global_options_set.x_sparc_memory_model) 1997 { 1998 /* Choose the memory model for the operating system. */ 1999 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; 2000 if (os_default != SMM_DEFAULT) 2001 sparc_memory_model = os_default; 2002 /* Choose the most relaxed model for the processor. */ 2003 else if (TARGET_V9) 2004 sparc_memory_model = SMM_RMO; 2005 else if (TARGET_LEON3) 2006 sparc_memory_model = SMM_TSO; 2007 else if (TARGET_LEON) 2008 sparc_memory_model = SMM_SC; 2009 else if (TARGET_V8) 2010 sparc_memory_model = SMM_PSO; 2011 else 2012 sparc_memory_model = SMM_SC; 2013 } 2014 2015 /* Supply a default value for align_functions. */ 2016 if (flag_align_functions && !str_align_functions) 2017 { 2018 if (sparc_cpu == PROCESSOR_ULTRASPARC 2019 || sparc_cpu == PROCESSOR_ULTRASPARC3 2020 || sparc_cpu == PROCESSOR_NIAGARA 2021 || sparc_cpu == PROCESSOR_NIAGARA2 2022 || sparc_cpu == PROCESSOR_NIAGARA3 2023 || sparc_cpu == PROCESSOR_NIAGARA4) 2024 str_align_functions = "32"; 2025 else if (sparc_cpu == PROCESSOR_NIAGARA7 2026 || sparc_cpu == PROCESSOR_M8) 2027 str_align_functions = "64"; 2028 } 2029 2030 /* Validate PCC_STRUCT_RETURN. */ 2031 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) 2032 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); 2033 2034 /* Only use .uaxword when compiling for a 64-bit target. */ 2035 if (!TARGET_ARCH64) 2036 targetm.asm_out.unaligned_op.di = NULL; 2037 2038 /* Set the processor costs. */ 2039 switch (sparc_cpu) 2040 { 2041 case PROCESSOR_V7: 2042 case PROCESSOR_CYPRESS: 2043 sparc_costs = &cypress_costs; 2044 break; 2045 case PROCESSOR_V8: 2046 case PROCESSOR_SPARCLITE: 2047 case PROCESSOR_SUPERSPARC: 2048 sparc_costs = &supersparc_costs; 2049 break; 2050 case PROCESSOR_F930: 2051 case PROCESSOR_F934: 2052 case PROCESSOR_HYPERSPARC: 2053 case PROCESSOR_SPARCLITE86X: 2054 sparc_costs = &hypersparc_costs; 2055 break; 2056 case PROCESSOR_LEON: 2057 sparc_costs = &leon_costs; 2058 break; 2059 case PROCESSOR_LEON3: 2060 case PROCESSOR_LEON3V7: 2061 sparc_costs = &leon3_costs; 2062 break; 2063 case PROCESSOR_LEON5: 2064 sparc_costs = &leon5_costs; 2065 break; 2066 case PROCESSOR_SPARCLET: 2067 case PROCESSOR_TSC701: 2068 sparc_costs = &sparclet_costs; 2069 break; 2070 case PROCESSOR_V9: 2071 case PROCESSOR_ULTRASPARC: 2072 sparc_costs = &ultrasparc_costs; 2073 break; 2074 case PROCESSOR_ULTRASPARC3: 2075 sparc_costs = &ultrasparc3_costs; 2076 break; 2077 case PROCESSOR_NIAGARA: 2078 sparc_costs = &niagara_costs; 2079 break; 2080 case PROCESSOR_NIAGARA2: 2081 sparc_costs = &niagara2_costs; 2082 break; 2083 case PROCESSOR_NIAGARA3: 2084 sparc_costs = &niagara3_costs; 2085 break; 2086 case PROCESSOR_NIAGARA4: 2087 sparc_costs = &niagara4_costs; 2088 break; 2089 case PROCESSOR_NIAGARA7: 2090 sparc_costs = &niagara7_costs; 2091 break; 2092 case PROCESSOR_M8: 2093 sparc_costs = &m8_costs; 2094 break; 2095 case PROCESSOR_NATIVE: 2096 gcc_unreachable (); 2097 }; 2098 2099 /* param_simultaneous_prefetches is the number of prefetches that 2100 can run at the same time. More important, it is the threshold 2101 defining when additional prefetches will be dropped by the 2102 hardware. 2103 2104 The UltraSPARC-III features a documented prefetch queue with a 2105 size of 8. Additional prefetches issued in the cpu are 2106 dropped. 2107 2108 Niagara processors are different. In these processors prefetches 2109 are handled much like regular loads. The L1 miss buffer is 32 2110 entries, but prefetches start getting affected when 30 entries 2111 become occupied. That occupation could be a mix of regular loads 2112 and prefetches though. And that buffer is shared by all threads. 2113 Once the threshold is reached, if the core is running a single 2114 thread the prefetch will retry. If more than one thread is 2115 running, the prefetch will be dropped. 2116 2117 All this makes it very difficult to determine how many 2118 simultaneous prefetches can be issued simultaneously, even in a 2119 single-threaded program. Experimental results show that setting 2120 this parameter to 32 works well when the number of threads is not 2121 high. */ 2122 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 2123 param_simultaneous_prefetches, 2124 ((sparc_cpu == PROCESSOR_ULTRASPARC 2125 || sparc_cpu == PROCESSOR_NIAGARA 2126 || sparc_cpu == PROCESSOR_NIAGARA2 2127 || sparc_cpu == PROCESSOR_NIAGARA3 2128 || sparc_cpu == PROCESSOR_NIAGARA4) 2129 ? 2 2130 : (sparc_cpu == PROCESSOR_ULTRASPARC3 2131 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7 2132 || sparc_cpu == PROCESSOR_M8) 2133 ? 32 : 3)))); 2134 2135 /* param_l1_cache_line_size is the size of the L1 cache line, in 2136 bytes. 2137 2138 The Oracle SPARC Architecture (previously the UltraSPARC 2139 Architecture) specification states that when a PREFETCH[A] 2140 instruction is executed an implementation-specific amount of data 2141 is prefetched, and that it is at least 64 bytes long (aligned to 2142 at least 64 bytes). 2143 2144 However, this is not correct. The M7 (and implementations prior 2145 to that) does not guarantee a 64B prefetch into a cache if the 2146 line size is smaller. A single cache line is all that is ever 2147 prefetched. So for the M7, where the L1D$ has 32B lines and the 2148 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the 2149 L2 and L3, but only 32B are brought into the L1D$. (Assuming it 2150 is a read_n prefetch, which is the only type which allocates to 2151 the L1.) */ 2152 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 2153 param_l1_cache_line_size, 2154 (sparc_cpu == PROCESSOR_M8 ? 64 : 32)); 2155 2156 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use 2157 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and 2158 Niagara processors feature a L1D$ of 16KB. */ 2159 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 2160 param_l1_cache_size, 2161 ((sparc_cpu == PROCESSOR_ULTRASPARC 2162 || sparc_cpu == PROCESSOR_ULTRASPARC3 2163 || sparc_cpu == PROCESSOR_NIAGARA 2164 || sparc_cpu == PROCESSOR_NIAGARA2 2165 || sparc_cpu == PROCESSOR_NIAGARA3 2166 || sparc_cpu == PROCESSOR_NIAGARA4 2167 || sparc_cpu == PROCESSOR_NIAGARA7 2168 || sparc_cpu == PROCESSOR_M8) 2169 ? 16 : 64)); 2170 2171 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note 2172 that 512 is the default in params.def. */ 2173 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 2174 param_l2_cache_size, 2175 ((sparc_cpu == PROCESSOR_NIAGARA4 2176 || sparc_cpu == PROCESSOR_M8) 2177 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7 2178 ? 256 : 512))); 2179 2180 2181 /* Disable save slot sharing for call-clobbered registers by default. 2182 The IRA sharing algorithm works on single registers only and this 2183 pessimizes for double floating-point registers. */ 2184 if (!global_options_set.x_flag_ira_share_save_slots) 2185 flag_ira_share_save_slots = 0; 2186 2187 /* Only enable REE by default in 64-bit mode where it helps to eliminate 2188 redundant 32-to-64-bit extensions. */ 2189 if (!global_options_set.x_flag_ree && TARGET_ARCH32) 2190 flag_ree = 0; 2191 2192 /* Do various machine dependent initializations. */ 2193 sparc_init_modes (); 2194 2195 /* Set up function hooks. */ 2196 init_machine_status = sparc_init_machine_status; 2197} 2198 2199/* Miscellaneous utilities. */ 2200 2201/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move 2202 or branch on register contents instructions. */ 2203 2204int 2205v9_regcmp_p (enum rtx_code code) 2206{ 2207 return (code == EQ || code == NE || code == GE || code == LT 2208 || code == LE || code == GT); 2209} 2210 2211/* Nonzero if OP is a floating point constant which can 2212 be loaded into an integer register using a single 2213 sethi instruction. */ 2214 2215int 2216fp_sethi_p (rtx op) 2217{ 2218 if (GET_CODE (op) == CONST_DOUBLE) 2219 { 2220 long i; 2221 2222 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2223 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); 2224 } 2225 2226 return 0; 2227} 2228 2229/* Nonzero if OP is a floating point constant which can 2230 be loaded into an integer register using a single 2231 mov instruction. */ 2232 2233int 2234fp_mov_p (rtx op) 2235{ 2236 if (GET_CODE (op) == CONST_DOUBLE) 2237 { 2238 long i; 2239 2240 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2241 return SPARC_SIMM13_P (i); 2242 } 2243 2244 return 0; 2245} 2246 2247/* Nonzero if OP is a floating point constant which can 2248 be loaded into an integer register using a high/losum 2249 instruction sequence. */ 2250 2251int 2252fp_high_losum_p (rtx op) 2253{ 2254 /* The constraints calling this should only be in 2255 SFmode move insns, so any constant which cannot 2256 be moved using a single insn will do. */ 2257 if (GET_CODE (op) == CONST_DOUBLE) 2258 { 2259 long i; 2260 2261 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2262 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); 2263 } 2264 2265 return 0; 2266} 2267 2268/* Return true if the address of LABEL can be loaded by means of the 2269 mov{si,di}_pic_label_ref patterns in PIC mode. */ 2270 2271static bool 2272can_use_mov_pic_label_ref (rtx label) 2273{ 2274 /* VxWorks does not impose a fixed gap between segments; the run-time 2275 gap can be different from the object-file gap. We therefore can't 2276 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we 2277 are absolutely sure that X is in the same segment as the GOT. 2278 Unfortunately, the flexibility of linker scripts means that we 2279 can't be sure of that in general, so assume that GOT-relative 2280 accesses are never valid on VxWorks. */ 2281 if (TARGET_VXWORKS_RTP) 2282 return false; 2283 2284 /* Similarly, if the label is non-local, it might end up being placed 2285 in a different section than the current one; now mov_pic_label_ref 2286 requires the label and the code to be in the same section. */ 2287 if (LABEL_REF_NONLOCAL_P (label)) 2288 return false; 2289 2290 /* Finally, if we are reordering basic blocks and partition into hot 2291 and cold sections, this might happen for any label. */ 2292 if (flag_reorder_blocks_and_partition) 2293 return false; 2294 2295 return true; 2296} 2297 2298/* Expand a move instruction. Return true if all work is done. */ 2299 2300bool 2301sparc_expand_move (machine_mode mode, rtx *operands) 2302{ 2303 /* Handle sets of MEM first. */ 2304 if (GET_CODE (operands[0]) == MEM) 2305 { 2306 /* 0 is a register (or a pair of registers) on SPARC. */ 2307 if (register_or_zero_operand (operands[1], mode)) 2308 return false; 2309 2310 if (!reload_in_progress) 2311 { 2312 operands[0] = validize_mem (operands[0]); 2313 operands[1] = force_reg (mode, operands[1]); 2314 } 2315 } 2316 2317 /* Fix up TLS cases. */ 2318 if (TARGET_HAVE_TLS 2319 && CONSTANT_P (operands[1]) 2320 && sparc_tls_referenced_p (operands [1])) 2321 { 2322 operands[1] = sparc_legitimize_tls_address (operands[1]); 2323 return false; 2324 } 2325 2326 /* Fix up PIC cases. */ 2327 if (flag_pic && CONSTANT_P (operands[1])) 2328 { 2329 if (pic_address_needs_scratch (operands[1])) 2330 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); 2331 2332 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ 2333 if ((GET_CODE (operands[1]) == LABEL_REF 2334 && can_use_mov_pic_label_ref (operands[1])) 2335 || (GET_CODE (operands[1]) == CONST 2336 && GET_CODE (XEXP (operands[1], 0)) == PLUS 2337 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF 2338 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT 2339 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0)))) 2340 { 2341 if (mode == SImode) 2342 { 2343 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); 2344 return true; 2345 } 2346 2347 if (mode == DImode) 2348 { 2349 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); 2350 return true; 2351 } 2352 } 2353 2354 if (symbolic_operand (operands[1], mode)) 2355 { 2356 operands[1] 2357 = sparc_legitimize_pic_address (operands[1], 2358 reload_in_progress 2359 ? operands[0] : NULL_RTX); 2360 return false; 2361 } 2362 } 2363 2364 /* If we are trying to toss an integer constant into FP registers, 2365 or loading a FP or vector constant, force it into memory. */ 2366 if (CONSTANT_P (operands[1]) 2367 && REG_P (operands[0]) 2368 && (SPARC_FP_REG_P (REGNO (operands[0])) 2369 || SCALAR_FLOAT_MODE_P (mode) 2370 || VECTOR_MODE_P (mode))) 2371 { 2372 /* emit_group_store will send such bogosity to us when it is 2373 not storing directly into memory. So fix this up to avoid 2374 crashes in output_constant_pool. */ 2375 if (operands [1] == const0_rtx) 2376 operands[1] = CONST0_RTX (mode); 2377 2378 /* We can clear or set to all-ones FP registers if TARGET_VIS, and 2379 always other regs. */ 2380 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) 2381 && (const_zero_operand (operands[1], mode) 2382 || const_all_ones_operand (operands[1], mode))) 2383 return false; 2384 2385 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG 2386 /* We are able to build any SF constant in integer registers 2387 with at most 2 instructions. */ 2388 && (mode == SFmode 2389 /* And any DF constant in integer registers if needed. */ 2390 || (mode == DFmode && !can_create_pseudo_p ()))) 2391 return false; 2392 2393 operands[1] = force_const_mem (mode, operands[1]); 2394 if (!reload_in_progress) 2395 operands[1] = validize_mem (operands[1]); 2396 return false; 2397 } 2398 2399 /* Accept non-constants and valid constants unmodified. */ 2400 if (!CONSTANT_P (operands[1]) 2401 || GET_CODE (operands[1]) == HIGH 2402 || input_operand (operands[1], mode)) 2403 return false; 2404 2405 switch (mode) 2406 { 2407 case E_QImode: 2408 /* All QImode constants require only one insn, so proceed. */ 2409 break; 2410 2411 case E_HImode: 2412 case E_SImode: 2413 sparc_emit_set_const32 (operands[0], operands[1]); 2414 return true; 2415 2416 case E_DImode: 2417 /* input_operand should have filtered out 32-bit mode. */ 2418 sparc_emit_set_const64 (operands[0], operands[1]); 2419 return true; 2420 2421 case E_TImode: 2422 { 2423 rtx high, low; 2424 /* TImode isn't available in 32-bit mode. */ 2425 split_double (operands[1], &high, &low); 2426 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), 2427 high)); 2428 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), 2429 low)); 2430 } 2431 return true; 2432 2433 default: 2434 gcc_unreachable (); 2435 } 2436 2437 return false; 2438} 2439 2440/* Load OP1, a 32-bit constant, into OP0, a register. 2441 We know it can't be done in one insn when we get 2442 here, the move expander guarantees this. */ 2443 2444static void 2445sparc_emit_set_const32 (rtx op0, rtx op1) 2446{ 2447 machine_mode mode = GET_MODE (op0); 2448 rtx temp = op0; 2449 2450 if (can_create_pseudo_p ()) 2451 temp = gen_reg_rtx (mode); 2452 2453 if (GET_CODE (op1) == CONST_INT) 2454 { 2455 gcc_assert (!small_int_operand (op1, mode) 2456 && !const_high_operand (op1, mode)); 2457 2458 /* Emit them as real moves instead of a HIGH/LO_SUM, 2459 this way CSE can see everything and reuse intermediate 2460 values if it wants. */ 2461 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1) 2462 & ~(HOST_WIDE_INT) 0x3ff))); 2463 2464 emit_insn (gen_rtx_SET (op0, 2465 gen_rtx_IOR (mode, temp, 2466 GEN_INT (INTVAL (op1) & 0x3ff)))); 2467 } 2468 else 2469 { 2470 /* A symbol, emit in the traditional way. */ 2471 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1))); 2472 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1))); 2473 } 2474} 2475 2476/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. 2477 If TEMP is nonzero, we are forbidden to use any other scratch 2478 registers. Otherwise, we are allowed to generate them as needed. 2479 2480 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY 2481 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ 2482 2483void 2484sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) 2485{ 2486 rtx cst, temp1, temp2, temp3, temp4, temp5; 2487 rtx ti_temp = 0; 2488 2489 /* Deal with too large offsets. */ 2490 if (GET_CODE (op1) == CONST 2491 && GET_CODE (XEXP (op1, 0)) == PLUS 2492 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1)) 2493 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst)) 2494 { 2495 gcc_assert (!temp); 2496 temp1 = gen_reg_rtx (DImode); 2497 temp2 = gen_reg_rtx (DImode); 2498 sparc_emit_set_const64 (temp2, cst); 2499 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0), 2500 NULL_RTX); 2501 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2))); 2502 return; 2503 } 2504 2505 if (temp && GET_MODE (temp) == TImode) 2506 { 2507 ti_temp = temp; 2508 temp = gen_rtx_REG (DImode, REGNO (temp)); 2509 } 2510 2511 /* SPARC-V9 code model support. */ 2512 switch (sparc_code_model) 2513 { 2514 case CM_MEDLOW: 2515 /* The range spanned by all instructions in the object is less 2516 than 2^31 bytes (2GB) and the distance from any instruction 2517 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2518 than 2^31 bytes (2GB). 2519 2520 The executable must be in the low 4TB of the virtual address 2521 space. 2522 2523 sethi %hi(symbol), %temp1 2524 or %temp1, %lo(symbol), %reg */ 2525 if (temp) 2526 temp1 = temp; /* op0 is allowed. */ 2527 else 2528 temp1 = gen_reg_rtx (DImode); 2529 2530 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1))); 2531 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1))); 2532 break; 2533 2534 case CM_MEDMID: 2535 /* The range spanned by all instructions in the object is less 2536 than 2^31 bytes (2GB) and the distance from any instruction 2537 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2538 than 2^31 bytes (2GB). 2539 2540 The executable must be in the low 16TB of the virtual address 2541 space. 2542 2543 sethi %h44(symbol), %temp1 2544 or %temp1, %m44(symbol), %temp2 2545 sllx %temp2, 12, %temp3 2546 or %temp3, %l44(symbol), %reg */ 2547 if (temp) 2548 { 2549 temp1 = op0; 2550 temp2 = op0; 2551 temp3 = temp; /* op0 is allowed. */ 2552 } 2553 else 2554 { 2555 temp1 = gen_reg_rtx (DImode); 2556 temp2 = gen_reg_rtx (DImode); 2557 temp3 = gen_reg_rtx (DImode); 2558 } 2559 2560 emit_insn (gen_seth44 (temp1, op1)); 2561 emit_insn (gen_setm44 (temp2, temp1, op1)); 2562 emit_insn (gen_rtx_SET (temp3, 2563 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); 2564 emit_insn (gen_setl44 (op0, temp3, op1)); 2565 break; 2566 2567 case CM_MEDANY: 2568 /* The range spanned by all instructions in the object is less 2569 than 2^31 bytes (2GB) and the distance from any instruction 2570 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2571 than 2^31 bytes (2GB). 2572 2573 The executable can be placed anywhere in the virtual address 2574 space. 2575 2576 sethi %hh(symbol), %temp1 2577 sethi %lm(symbol), %temp2 2578 or %temp1, %hm(symbol), %temp3 2579 sllx %temp3, 32, %temp4 2580 or %temp4, %temp2, %temp5 2581 or %temp5, %lo(symbol), %reg */ 2582 if (temp) 2583 { 2584 /* It is possible that one of the registers we got for operands[2] 2585 might coincide with that of operands[0] (which is why we made 2586 it TImode). Pick the other one to use as our scratch. */ 2587 if (rtx_equal_p (temp, op0)) 2588 { 2589 gcc_assert (ti_temp); 2590 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2591 } 2592 temp1 = op0; 2593 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2594 temp3 = op0; 2595 temp4 = op0; 2596 temp5 = op0; 2597 } 2598 else 2599 { 2600 temp1 = gen_reg_rtx (DImode); 2601 temp2 = gen_reg_rtx (DImode); 2602 temp3 = gen_reg_rtx (DImode); 2603 temp4 = gen_reg_rtx (DImode); 2604 temp5 = gen_reg_rtx (DImode); 2605 } 2606 2607 emit_insn (gen_sethh (temp1, op1)); 2608 emit_insn (gen_setlm (temp2, op1)); 2609 emit_insn (gen_sethm (temp3, temp1, op1)); 2610 emit_insn (gen_rtx_SET (temp4, 2611 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2612 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2613 emit_insn (gen_setlo (op0, temp5, op1)); 2614 break; 2615 2616 case CM_EMBMEDANY: 2617 /* Old old old backwards compatibility kruft here. 2618 Essentially it is MEDLOW with a fixed 64-bit 2619 virtual base added to all data segment addresses. 2620 Text-segment stuff is computed like MEDANY, we can't 2621 reuse the code above because the relocation knobs 2622 look different. 2623 2624 Data segment: sethi %hi(symbol), %temp1 2625 add %temp1, EMBMEDANY_BASE_REG, %temp2 2626 or %temp2, %lo(symbol), %reg */ 2627 if (data_segment_operand (op1, GET_MODE (op1))) 2628 { 2629 if (temp) 2630 { 2631 temp1 = temp; /* op0 is allowed. */ 2632 temp2 = op0; 2633 } 2634 else 2635 { 2636 temp1 = gen_reg_rtx (DImode); 2637 temp2 = gen_reg_rtx (DImode); 2638 } 2639 2640 emit_insn (gen_embmedany_sethi (temp1, op1)); 2641 emit_insn (gen_embmedany_brsum (temp2, temp1)); 2642 emit_insn (gen_embmedany_losum (op0, temp2, op1)); 2643 } 2644 2645 /* Text segment: sethi %uhi(symbol), %temp1 2646 sethi %hi(symbol), %temp2 2647 or %temp1, %ulo(symbol), %temp3 2648 sllx %temp3, 32, %temp4 2649 or %temp4, %temp2, %temp5 2650 or %temp5, %lo(symbol), %reg */ 2651 else 2652 { 2653 if (temp) 2654 { 2655 /* It is possible that one of the registers we got for operands[2] 2656 might coincide with that of operands[0] (which is why we made 2657 it TImode). Pick the other one to use as our scratch. */ 2658 if (rtx_equal_p (temp, op0)) 2659 { 2660 gcc_assert (ti_temp); 2661 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2662 } 2663 temp1 = op0; 2664 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2665 temp3 = op0; 2666 temp4 = op0; 2667 temp5 = op0; 2668 } 2669 else 2670 { 2671 temp1 = gen_reg_rtx (DImode); 2672 temp2 = gen_reg_rtx (DImode); 2673 temp3 = gen_reg_rtx (DImode); 2674 temp4 = gen_reg_rtx (DImode); 2675 temp5 = gen_reg_rtx (DImode); 2676 } 2677 2678 emit_insn (gen_embmedany_textuhi (temp1, op1)); 2679 emit_insn (gen_embmedany_texthi (temp2, op1)); 2680 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); 2681 emit_insn (gen_rtx_SET (temp4, 2682 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2683 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2684 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); 2685 } 2686 break; 2687 2688 default: 2689 gcc_unreachable (); 2690 } 2691} 2692 2693/* These avoid problems when cross compiling. If we do not 2694 go through all this hair then the optimizer will see 2695 invalid REG_EQUAL notes or in some cases none at all. */ 2696static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); 2697static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); 2698static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); 2699static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); 2700 2701/* The optimizer is not to assume anything about exactly 2702 which bits are set for a HIGH, they are unspecified. 2703 Unfortunately this leads to many missed optimizations 2704 during CSE. We mask out the non-HIGH bits, and matches 2705 a plain movdi, to alleviate this problem. */ 2706static rtx 2707gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) 2708{ 2709 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); 2710} 2711 2712static rtx 2713gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) 2714{ 2715 return gen_rtx_SET (dest, GEN_INT (val)); 2716} 2717 2718static rtx 2719gen_safe_OR64 (rtx src, HOST_WIDE_INT val) 2720{ 2721 return gen_rtx_IOR (DImode, src, GEN_INT (val)); 2722} 2723 2724static rtx 2725gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) 2726{ 2727 return gen_rtx_XOR (DImode, src, GEN_INT (val)); 2728} 2729 2730/* Worker routines for 64-bit constant formation on arch64. 2731 One of the key things to be doing in these emissions is 2732 to create as many temp REGs as possible. This makes it 2733 possible for half-built constants to be used later when 2734 such values are similar to something required later on. 2735 Without doing this, the optimizer cannot see such 2736 opportunities. */ 2737 2738static void sparc_emit_set_const64_quick1 (rtx, rtx, 2739 unsigned HOST_WIDE_INT, int); 2740 2741static void 2742sparc_emit_set_const64_quick1 (rtx op0, rtx temp, 2743 unsigned HOST_WIDE_INT low_bits, int is_neg) 2744{ 2745 unsigned HOST_WIDE_INT high_bits; 2746 2747 if (is_neg) 2748 high_bits = (~low_bits) & 0xffffffff; 2749 else 2750 high_bits = low_bits; 2751 2752 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2753 if (!is_neg) 2754 { 2755 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2756 } 2757 else 2758 { 2759 /* If we are XOR'ing with -1, then we should emit a one's complement 2760 instead. This way the combiner will notice logical operations 2761 such as ANDN later on and substitute. */ 2762 if ((low_bits & 0x3ff) == 0x3ff) 2763 { 2764 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 2765 } 2766 else 2767 { 2768 emit_insn (gen_rtx_SET (op0, 2769 gen_safe_XOR64 (temp, 2770 (-(HOST_WIDE_INT)0x400 2771 | (low_bits & 0x3ff))))); 2772 } 2773 } 2774} 2775 2776static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, 2777 unsigned HOST_WIDE_INT, int); 2778 2779static void 2780sparc_emit_set_const64_quick2 (rtx op0, rtx temp, 2781 unsigned HOST_WIDE_INT high_bits, 2782 unsigned HOST_WIDE_INT low_immediate, 2783 int shift_count) 2784{ 2785 rtx temp2 = op0; 2786 2787 if ((high_bits & 0xfffffc00) != 0) 2788 { 2789 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2790 if ((high_bits & ~0xfffffc00) != 0) 2791 emit_insn (gen_rtx_SET (op0, 2792 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2793 else 2794 temp2 = temp; 2795 } 2796 else 2797 { 2798 emit_insn (gen_safe_SET64 (temp, high_bits)); 2799 temp2 = temp; 2800 } 2801 2802 /* Now shift it up into place. */ 2803 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2, 2804 GEN_INT (shift_count)))); 2805 2806 /* If there is a low immediate part piece, finish up by 2807 putting that in as well. */ 2808 if (low_immediate != 0) 2809 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate))); 2810} 2811 2812static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, 2813 unsigned HOST_WIDE_INT); 2814 2815/* Full 64-bit constant decomposition. Even though this is the 2816 'worst' case, we still optimize a few things away. */ 2817static void 2818sparc_emit_set_const64_longway (rtx op0, rtx temp, 2819 unsigned HOST_WIDE_INT high_bits, 2820 unsigned HOST_WIDE_INT low_bits) 2821{ 2822 rtx sub_temp = op0; 2823 2824 if (can_create_pseudo_p ()) 2825 sub_temp = gen_reg_rtx (DImode); 2826 2827 if ((high_bits & 0xfffffc00) != 0) 2828 { 2829 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2830 if ((high_bits & ~0xfffffc00) != 0) 2831 emit_insn (gen_rtx_SET (sub_temp, 2832 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2833 else 2834 sub_temp = temp; 2835 } 2836 else 2837 { 2838 emit_insn (gen_safe_SET64 (temp, high_bits)); 2839 sub_temp = temp; 2840 } 2841 2842 if (can_create_pseudo_p ()) 2843 { 2844 rtx temp2 = gen_reg_rtx (DImode); 2845 rtx temp3 = gen_reg_rtx (DImode); 2846 rtx temp4 = gen_reg_rtx (DImode); 2847 2848 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp, 2849 GEN_INT (32)))); 2850 2851 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); 2852 if ((low_bits & ~0xfffffc00) != 0) 2853 { 2854 emit_insn (gen_rtx_SET (temp3, 2855 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); 2856 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3))); 2857 } 2858 else 2859 { 2860 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2))); 2861 } 2862 } 2863 else 2864 { 2865 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); 2866 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); 2867 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); 2868 int to_shift = 12; 2869 2870 /* We are in the middle of reload, so this is really 2871 painful. However we do still make an attempt to 2872 avoid emitting truly stupid code. */ 2873 if (low1 != const0_rtx) 2874 { 2875 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2876 GEN_INT (to_shift)))); 2877 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1))); 2878 sub_temp = op0; 2879 to_shift = 12; 2880 } 2881 else 2882 { 2883 to_shift += 12; 2884 } 2885 if (low2 != const0_rtx) 2886 { 2887 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2888 GEN_INT (to_shift)))); 2889 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2))); 2890 sub_temp = op0; 2891 to_shift = 8; 2892 } 2893 else 2894 { 2895 to_shift += 8; 2896 } 2897 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2898 GEN_INT (to_shift)))); 2899 if (low3 != const0_rtx) 2900 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3))); 2901 /* phew... */ 2902 } 2903} 2904 2905/* Analyze a 64-bit constant for certain properties. */ 2906static void analyze_64bit_constant (unsigned HOST_WIDE_INT, 2907 unsigned HOST_WIDE_INT, 2908 int *, int *, int *); 2909 2910static void 2911analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, 2912 unsigned HOST_WIDE_INT low_bits, 2913 int *hbsp, int *lbsp, int *abbasp) 2914{ 2915 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 2916 int i; 2917 2918 lowest_bit_set = highest_bit_set = -1; 2919 i = 0; 2920 do 2921 { 2922 if ((lowest_bit_set == -1) 2923 && ((low_bits >> i) & 1)) 2924 lowest_bit_set = i; 2925 if ((highest_bit_set == -1) 2926 && ((high_bits >> (32 - i - 1)) & 1)) 2927 highest_bit_set = (64 - i - 1); 2928 } 2929 while (++i < 32 2930 && ((highest_bit_set == -1) 2931 || (lowest_bit_set == -1))); 2932 if (i == 32) 2933 { 2934 i = 0; 2935 do 2936 { 2937 if ((lowest_bit_set == -1) 2938 && ((high_bits >> i) & 1)) 2939 lowest_bit_set = i + 32; 2940 if ((highest_bit_set == -1) 2941 && ((low_bits >> (32 - i - 1)) & 1)) 2942 highest_bit_set = 32 - i - 1; 2943 } 2944 while (++i < 32 2945 && ((highest_bit_set == -1) 2946 || (lowest_bit_set == -1))); 2947 } 2948 /* If there are no bits set this should have gone out 2949 as one instruction! */ 2950 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); 2951 all_bits_between_are_set = 1; 2952 for (i = lowest_bit_set; i <= highest_bit_set; i++) 2953 { 2954 if (i < 32) 2955 { 2956 if ((low_bits & (1 << i)) != 0) 2957 continue; 2958 } 2959 else 2960 { 2961 if ((high_bits & (1 << (i - 32))) != 0) 2962 continue; 2963 } 2964 all_bits_between_are_set = 0; 2965 break; 2966 } 2967 *hbsp = highest_bit_set; 2968 *lbsp = lowest_bit_set; 2969 *abbasp = all_bits_between_are_set; 2970} 2971 2972static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); 2973 2974static int 2975const64_is_2insns (unsigned HOST_WIDE_INT high_bits, 2976 unsigned HOST_WIDE_INT low_bits) 2977{ 2978 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 2979 2980 if (high_bits == 0 2981 || high_bits == 0xffffffff) 2982 return 1; 2983 2984 analyze_64bit_constant (high_bits, low_bits, 2985 &highest_bit_set, &lowest_bit_set, 2986 &all_bits_between_are_set); 2987 2988 if ((highest_bit_set == 63 2989 || lowest_bit_set == 0) 2990 && all_bits_between_are_set != 0) 2991 return 1; 2992 2993 if ((highest_bit_set - lowest_bit_set) < 21) 2994 return 1; 2995 2996 return 0; 2997} 2998 2999static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, 3000 unsigned HOST_WIDE_INT, 3001 int, int); 3002 3003static unsigned HOST_WIDE_INT 3004create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, 3005 unsigned HOST_WIDE_INT low_bits, 3006 int lowest_bit_set, int shift) 3007{ 3008 HOST_WIDE_INT hi, lo; 3009 3010 if (lowest_bit_set < 32) 3011 { 3012 lo = (low_bits >> lowest_bit_set) << shift; 3013 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 3014 } 3015 else 3016 { 3017 lo = 0; 3018 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 3019 } 3020 gcc_assert (! (hi & lo)); 3021 return (hi | lo); 3022} 3023 3024/* Here we are sure to be arch64 and this is an integer constant 3025 being loaded into a register. Emit the most efficient 3026 insn sequence possible. Detection of all the 1-insn cases 3027 has been done already. */ 3028static void 3029sparc_emit_set_const64 (rtx op0, rtx op1) 3030{ 3031 unsigned HOST_WIDE_INT high_bits, low_bits; 3032 int lowest_bit_set, highest_bit_set; 3033 int all_bits_between_are_set; 3034 rtx temp = 0; 3035 3036 /* Sanity check that we know what we are working with. */ 3037 gcc_assert (TARGET_ARCH64 3038 && (GET_CODE (op0) == SUBREG 3039 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); 3040 3041 if (! can_create_pseudo_p ()) 3042 temp = op0; 3043 3044 if (GET_CODE (op1) != CONST_INT) 3045 { 3046 sparc_emit_set_symbolic_const64 (op0, op1, temp); 3047 return; 3048 } 3049 3050 if (! temp) 3051 temp = gen_reg_rtx (DImode); 3052 3053 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); 3054 low_bits = (INTVAL (op1) & 0xffffffff); 3055 3056 /* low_bits bits 0 --> 31 3057 high_bits bits 32 --> 63 */ 3058 3059 analyze_64bit_constant (high_bits, low_bits, 3060 &highest_bit_set, &lowest_bit_set, 3061 &all_bits_between_are_set); 3062 3063 /* First try for a 2-insn sequence. */ 3064 3065 /* These situations are preferred because the optimizer can 3066 * do more things with them: 3067 * 1) mov -1, %reg 3068 * sllx %reg, shift, %reg 3069 * 2) mov -1, %reg 3070 * srlx %reg, shift, %reg 3071 * 3) mov some_small_const, %reg 3072 * sllx %reg, shift, %reg 3073 */ 3074 if (((highest_bit_set == 63 3075 || lowest_bit_set == 0) 3076 && all_bits_between_are_set != 0) 3077 || ((highest_bit_set - lowest_bit_set) < 12)) 3078 { 3079 HOST_WIDE_INT the_const = -1; 3080 int shift = lowest_bit_set; 3081 3082 if ((highest_bit_set != 63 3083 && lowest_bit_set != 0) 3084 || all_bits_between_are_set == 0) 3085 { 3086 the_const = 3087 create_simple_focus_bits (high_bits, low_bits, 3088 lowest_bit_set, 0); 3089 } 3090 else if (lowest_bit_set == 0) 3091 shift = -(63 - highest_bit_set); 3092 3093 gcc_assert (SPARC_SIMM13_P (the_const)); 3094 gcc_assert (shift != 0); 3095 3096 emit_insn (gen_safe_SET64 (temp, the_const)); 3097 if (shift > 0) 3098 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp, 3099 GEN_INT (shift)))); 3100 else if (shift < 0) 3101 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp, 3102 GEN_INT (-shift)))); 3103 return; 3104 } 3105 3106 /* Now a range of 22 or less bits set somewhere. 3107 * 1) sethi %hi(focus_bits), %reg 3108 * sllx %reg, shift, %reg 3109 * 2) sethi %hi(focus_bits), %reg 3110 * srlx %reg, shift, %reg 3111 */ 3112 if ((highest_bit_set - lowest_bit_set) < 21) 3113 { 3114 unsigned HOST_WIDE_INT focus_bits = 3115 create_simple_focus_bits (high_bits, low_bits, 3116 lowest_bit_set, 10); 3117 3118 gcc_assert (SPARC_SETHI_P (focus_bits)); 3119 gcc_assert (lowest_bit_set != 10); 3120 3121 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); 3122 3123 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ 3124 if (lowest_bit_set < 10) 3125 emit_insn (gen_rtx_SET (op0, 3126 gen_rtx_LSHIFTRT (DImode, temp, 3127 GEN_INT (10 - lowest_bit_set)))); 3128 else if (lowest_bit_set > 10) 3129 emit_insn (gen_rtx_SET (op0, 3130 gen_rtx_ASHIFT (DImode, temp, 3131 GEN_INT (lowest_bit_set - 10)))); 3132 return; 3133 } 3134 3135 /* 1) sethi %hi(low_bits), %reg 3136 * or %reg, %lo(low_bits), %reg 3137 * 2) sethi %hi(~low_bits), %reg 3138 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg 3139 */ 3140 if (high_bits == 0 3141 || high_bits == 0xffffffff) 3142 { 3143 sparc_emit_set_const64_quick1 (op0, temp, low_bits, 3144 (high_bits == 0xffffffff)); 3145 return; 3146 } 3147 3148 /* Now, try 3-insn sequences. */ 3149 3150 /* 1) sethi %hi(high_bits), %reg 3151 * or %reg, %lo(high_bits), %reg 3152 * sllx %reg, 32, %reg 3153 */ 3154 if (low_bits == 0) 3155 { 3156 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); 3157 return; 3158 } 3159 3160 /* We may be able to do something quick 3161 when the constant is negated, so try that. */ 3162 if (const64_is_2insns ((~high_bits) & 0xffffffff, 3163 (~low_bits) & 0xfffffc00)) 3164 { 3165 /* NOTE: The trailing bits get XOR'd so we need the 3166 non-negated bits, not the negated ones. */ 3167 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; 3168 3169 if ((((~high_bits) & 0xffffffff) == 0 3170 && ((~low_bits) & 0x80000000) == 0) 3171 || (((~high_bits) & 0xffffffff) == 0xffffffff 3172 && ((~low_bits) & 0x80000000) != 0)) 3173 { 3174 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); 3175 3176 if ((SPARC_SETHI_P (fast_int) 3177 && (~high_bits & 0xffffffff) == 0) 3178 || SPARC_SIMM13_P (fast_int)) 3179 emit_insn (gen_safe_SET64 (temp, fast_int)); 3180 else 3181 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); 3182 } 3183 else 3184 { 3185 rtx negated_const; 3186 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | 3187 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); 3188 sparc_emit_set_const64 (temp, negated_const); 3189 } 3190 3191 /* If we are XOR'ing with -1, then we should emit a one's complement 3192 instead. This way the combiner will notice logical operations 3193 such as ANDN later on and substitute. */ 3194 if (trailing_bits == 0x3ff) 3195 { 3196 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 3197 } 3198 else 3199 { 3200 emit_insn (gen_rtx_SET (op0, 3201 gen_safe_XOR64 (temp, 3202 (-0x400 | trailing_bits)))); 3203 } 3204 return; 3205 } 3206 3207 /* 1) sethi %hi(xxx), %reg 3208 * or %reg, %lo(xxx), %reg 3209 * sllx %reg, yyy, %reg 3210 * 3211 * ??? This is just a generalized version of the low_bits==0 3212 * thing above, FIXME... 3213 */ 3214 if ((highest_bit_set - lowest_bit_set) < 32) 3215 { 3216 unsigned HOST_WIDE_INT focus_bits = 3217 create_simple_focus_bits (high_bits, low_bits, 3218 lowest_bit_set, 0); 3219 3220 /* We can't get here in this state. */ 3221 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); 3222 3223 /* So what we know is that the set bits straddle the 3224 middle of the 64-bit word. */ 3225 sparc_emit_set_const64_quick2 (op0, temp, 3226 focus_bits, 0, 3227 lowest_bit_set); 3228 return; 3229 } 3230 3231 /* 1) sethi %hi(high_bits), %reg 3232 * or %reg, %lo(high_bits), %reg 3233 * sllx %reg, 32, %reg 3234 * or %reg, low_bits, %reg 3235 */ 3236 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0)) 3237 { 3238 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); 3239 return; 3240 } 3241 3242 /* The easiest way when all else fails, is full decomposition. */ 3243 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); 3244} 3245 3246/* Implement TARGET_FIXED_CONDITION_CODE_REGS. */ 3247 3248static bool 3249sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 3250{ 3251 *p1 = SPARC_ICC_REG; 3252 *p2 = SPARC_FCC_REG; 3253 return true; 3254} 3255 3256/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ 3257 3258static unsigned int 3259sparc_min_arithmetic_precision (void) 3260{ 3261 return 32; 3262} 3263 3264/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 3265 return the mode to be used for the comparison. For floating-point, 3266 CCFP[E]mode is used. CCNZmode should be used when the first operand 3267 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special 3268 processing is needed. */ 3269 3270machine_mode 3271select_cc_mode (enum rtx_code op, rtx x, rtx y) 3272{ 3273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3274 { 3275 switch (op) 3276 { 3277 case EQ: 3278 case NE: 3279 case UNORDERED: 3280 case ORDERED: 3281 case UNLT: 3282 case UNLE: 3283 case UNGT: 3284 case UNGE: 3285 case UNEQ: 3286 return CCFPmode; 3287 3288 case LT: 3289 case LE: 3290 case GT: 3291 case GE: 3292 case LTGT: 3293 return CCFPEmode; 3294 3295 default: 3296 gcc_unreachable (); 3297 } 3298 } 3299 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 3300 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) 3301 && y == const0_rtx) 3302 { 3303 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3304 return CCXNZmode; 3305 else 3306 return CCNZmode; 3307 } 3308 else 3309 { 3310 /* This is for the cmp<mode>_sne pattern. */ 3311 if (GET_CODE (x) == NOT && y == constm1_rtx) 3312 { 3313 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3314 return CCXCmode; 3315 else 3316 return CCCmode; 3317 } 3318 3319 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */ 3320 if (!TARGET_ARCH64 && GET_MODE (x) == DImode) 3321 { 3322 if (GET_CODE (y) == UNSPEC 3323 && (XINT (y, 1) == UNSPEC_ADDV 3324 || XINT (y, 1) == UNSPEC_SUBV 3325 || XINT (y, 1) == UNSPEC_NEGV)) 3326 return CCVmode; 3327 else 3328 return CCCmode; 3329 } 3330 3331 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3332 return CCXmode; 3333 else 3334 return CCmode; 3335 } 3336} 3337 3338/* Emit the compare insn and return the CC reg for a CODE comparison 3339 with operands X and Y. */ 3340 3341static rtx 3342gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) 3343{ 3344 machine_mode mode; 3345 rtx cc_reg; 3346 3347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) 3348 return x; 3349 3350 mode = SELECT_CC_MODE (code, x, y); 3351 3352 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the 3353 fcc regs (cse can't tell they're really call clobbered regs and will 3354 remove a duplicate comparison even if there is an intervening function 3355 call - it will then try to reload the cc reg via an int reg which is why 3356 we need the movcc patterns). It is possible to provide the movcc 3357 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two 3358 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be 3359 to tell cse that CCFPE mode registers (even pseudos) are call 3360 clobbered. */ 3361 3362 /* ??? This is an experiment. Rather than making changes to cse which may 3363 or may not be easy/clean, we do our own cse. This is possible because 3364 we will generate hard registers. Cse knows they're call clobbered (it 3365 doesn't know the same thing about pseudos). If we guess wrong, no big 3366 deal, but if we win, great! */ 3367 3368 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3369#if 1 /* experiment */ 3370 { 3371 int reg; 3372 /* We cycle through the registers to ensure they're all exercised. */ 3373 static int next_fcc_reg = 0; 3374 /* Previous x,y for each fcc reg. */ 3375 static rtx prev_args[4][2]; 3376 3377 /* Scan prev_args for x,y. */ 3378 for (reg = 0; reg < 4; reg++) 3379 if (prev_args[reg][0] == x && prev_args[reg][1] == y) 3380 break; 3381 if (reg == 4) 3382 { 3383 reg = next_fcc_reg; 3384 prev_args[reg][0] = x; 3385 prev_args[reg][1] = y; 3386 next_fcc_reg = (next_fcc_reg + 1) & 3; 3387 } 3388 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); 3389 } 3390#else 3391 cc_reg = gen_reg_rtx (mode); 3392#endif /* ! experiment */ 3393 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3394 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); 3395 else 3396 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); 3397 3398 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this 3399 will only result in an unrecognizable insn so no point in asserting. */ 3400 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y))); 3401 3402 return cc_reg; 3403} 3404 3405 3406/* Emit the compare insn and return the CC reg for the comparison in CMP. */ 3407 3408rtx 3409gen_compare_reg (rtx cmp) 3410{ 3411 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); 3412} 3413 3414/* This function is used for v9 only. 3415 DEST is the target of the Scc insn. 3416 CODE is the code for an Scc's comparison. 3417 X and Y are the values we compare. 3418 3419 This function is needed to turn 3420 3421 (set (reg:SI 110) 3422 (gt (reg:CCX 100 %icc) 3423 (const_int 0))) 3424 into 3425 (set (reg:SI 110) 3426 (gt:DI (reg:CCX 100 %icc) 3427 (const_int 0))) 3428 3429 IE: The instruction recognizer needs to see the mode of the comparison to 3430 find the right instruction. We could use "gt:DI" right in the 3431 define_expand, but leaving it out allows us to handle DI, SI, etc. */ 3432 3433static int 3434gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) 3435{ 3436 if (! TARGET_ARCH64 3437 && (GET_MODE (x) == DImode 3438 || GET_MODE (dest) == DImode)) 3439 return 0; 3440 3441 /* Try to use the movrCC insns. */ 3442 if (TARGET_ARCH64 3443 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 3444 && y == const0_rtx 3445 && v9_regcmp_p (compare_code)) 3446 { 3447 rtx op0 = x; 3448 rtx temp; 3449 3450 /* Special case for op0 != 0. This can be done with one instruction if 3451 dest == x. */ 3452 3453 if (compare_code == NE 3454 && GET_MODE (dest) == DImode 3455 && rtx_equal_p (op0, dest)) 3456 { 3457 emit_insn (gen_rtx_SET (dest, 3458 gen_rtx_IF_THEN_ELSE (DImode, 3459 gen_rtx_fmt_ee (compare_code, DImode, 3460 op0, const0_rtx), 3461 const1_rtx, 3462 dest))); 3463 return 1; 3464 } 3465 3466 if (reg_overlap_mentioned_p (dest, op0)) 3467 { 3468 /* Handle the case where dest == x. 3469 We "early clobber" the result. */ 3470 op0 = gen_reg_rtx (GET_MODE (x)); 3471 emit_move_insn (op0, x); 3472 } 3473 3474 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3475 if (GET_MODE (op0) != DImode) 3476 { 3477 temp = gen_reg_rtx (DImode); 3478 convert_move (temp, op0, 0); 3479 } 3480 else 3481 temp = op0; 3482 emit_insn (gen_rtx_SET (dest, 3483 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3484 gen_rtx_fmt_ee (compare_code, DImode, 3485 temp, const0_rtx), 3486 const1_rtx, 3487 dest))); 3488 return 1; 3489 } 3490 else 3491 { 3492 x = gen_compare_reg_1 (compare_code, x, y); 3493 y = const0_rtx; 3494 3495 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3496 emit_insn (gen_rtx_SET (dest, 3497 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3498 gen_rtx_fmt_ee (compare_code, 3499 GET_MODE (x), x, y), 3500 const1_rtx, dest))); 3501 return 1; 3502 } 3503} 3504 3505 3506/* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this 3507 without jumps using the addx/subx instructions. */ 3508 3509bool 3510emit_scc_insn (rtx operands[]) 3511{ 3512 rtx tem, x, y; 3513 enum rtx_code code; 3514 machine_mode mode; 3515 3516 /* The quad-word fp compare library routines all return nonzero to indicate 3517 true, which is different from the equivalent libgcc routines, so we must 3518 handle them specially here. */ 3519 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) 3520 { 3521 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], 3522 GET_CODE (operands[1])); 3523 operands[2] = XEXP (operands[1], 0); 3524 operands[3] = XEXP (operands[1], 1); 3525 } 3526 3527 code = GET_CODE (operands[1]); 3528 x = operands[2]; 3529 y = operands[3]; 3530 mode = GET_MODE (x); 3531 3532 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has 3533 more applications). The exception to this is "reg != 0" which can 3534 be done in one instruction on v9 (so we do it). */ 3535 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode)) 3536 { 3537 if (y != const0_rtx) 3538 x = force_reg (mode, gen_rtx_XOR (mode, x, y)); 3539 3540 rtx pat = gen_rtx_SET (operands[0], 3541 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3542 x, const0_rtx)); 3543 3544 /* If we can use addx/subx or addxc, add a clobber for CC. */ 3545 if (mode == SImode || (code == NE && TARGET_VIS3)) 3546 { 3547 rtx clobber 3548 = gen_rtx_CLOBBER (VOIDmode, 3549 gen_rtx_REG (mode == SImode ? CCmode : CCXmode, 3550 SPARC_ICC_REG)); 3551 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber)); 3552 } 3553 3554 emit_insn (pat); 3555 return true; 3556 } 3557 3558 /* We can do LTU in DImode using the addxc instruction with VIS3. */ 3559 if (TARGET_ARCH64 3560 && mode == DImode 3561 && !((code == LTU || code == GTU) && TARGET_VIS3) 3562 && gen_v9_scc (operands[0], code, x, y)) 3563 return true; 3564 3565 /* We can do LTU and GEU using the addx/subx instructions too. And 3566 for GTU/LEU, if both operands are registers swap them and fall 3567 back to the easy case. */ 3568 if (code == GTU || code == LEU) 3569 { 3570 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 3571 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) 3572 { 3573 tem = x; 3574 x = y; 3575 y = tem; 3576 code = swap_condition (code); 3577 } 3578 } 3579 3580 if (code == LTU || code == GEU) 3581 { 3582 emit_insn (gen_rtx_SET (operands[0], 3583 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3584 gen_compare_reg_1 (code, x, y), 3585 const0_rtx))); 3586 return true; 3587 } 3588 3589 /* All the posibilities to use addx/subx based sequences has been 3590 exhausted, try for a 3 instruction sequence using v9 conditional 3591 moves. */ 3592 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y)) 3593 return true; 3594 3595 /* Nope, do branches. */ 3596 return false; 3597} 3598 3599/* Emit a conditional jump insn for the v9 architecture using comparison code 3600 CODE and jump target LABEL. 3601 This function exists to take advantage of the v9 brxx insns. */ 3602 3603static void 3604emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) 3605{ 3606 emit_jump_insn (gen_rtx_SET (pc_rtx, 3607 gen_rtx_IF_THEN_ELSE (VOIDmode, 3608 gen_rtx_fmt_ee (code, GET_MODE (op0), 3609 op0, const0_rtx), 3610 gen_rtx_LABEL_REF (VOIDmode, label), 3611 pc_rtx))); 3612} 3613 3614/* Emit a conditional jump insn for the UA2011 architecture using 3615 comparison code CODE and jump target LABEL. This function exists 3616 to take advantage of the UA2011 Compare and Branch insns. */ 3617 3618static void 3619emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label) 3620{ 3621 rtx if_then_else; 3622 3623 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode, 3624 gen_rtx_fmt_ee(code, GET_MODE(op0), 3625 op0, op1), 3626 gen_rtx_LABEL_REF (VOIDmode, label), 3627 pc_rtx); 3628 3629 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else)); 3630} 3631 3632void 3633emit_conditional_branch_insn (rtx operands[]) 3634{ 3635 /* The quad-word fp compare library routines all return nonzero to indicate 3636 true, which is different from the equivalent libgcc routines, so we must 3637 handle them specially here. */ 3638 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) 3639 { 3640 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], 3641 GET_CODE (operands[0])); 3642 operands[1] = XEXP (operands[0], 0); 3643 operands[2] = XEXP (operands[0], 1); 3644 } 3645 3646 /* If we can tell early on that the comparison is against a constant 3647 that won't fit in the 5-bit signed immediate field of a cbcond, 3648 use one of the other v9 conditional branch sequences. */ 3649 if (TARGET_CBCOND 3650 && GET_CODE (operands[1]) == REG 3651 && (GET_MODE (operands[1]) == SImode 3652 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode)) 3653 && (GET_CODE (operands[2]) != CONST_INT 3654 || SPARC_SIMM5_P (INTVAL (operands[2])))) 3655 { 3656 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); 3657 return; 3658 } 3659 3660 if (TARGET_ARCH64 && operands[2] == const0_rtx 3661 && GET_CODE (operands[1]) == REG 3662 && GET_MODE (operands[1]) == DImode) 3663 { 3664 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); 3665 return; 3666 } 3667 3668 operands[1] = gen_compare_reg (operands[0]); 3669 operands[2] = const0_rtx; 3670 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, 3671 operands[1], operands[2]); 3672 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], 3673 operands[3])); 3674} 3675 3676 3677/* Generate a DFmode part of a hard TFmode register. 3678 REG is the TFmode hard register, LOW is 1 for the 3679 low 64bit of the register and 0 otherwise. 3680 */ 3681rtx 3682gen_df_reg (rtx reg, int low) 3683{ 3684 int regno = REGNO (reg); 3685 3686 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) 3687 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2; 3688 return gen_rtx_REG (DFmode, regno); 3689} 3690 3691/* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. 3692 Unlike normal calls, TFmode operands are passed by reference. It is 3693 assumed that no more than 3 operands are required. */ 3694 3695static void 3696emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) 3697{ 3698 rtx ret_slot = NULL, arg[3], func_sym; 3699 int i; 3700 3701 /* We only expect to be called for conversions, unary, and binary ops. */ 3702 gcc_assert (nargs == 2 || nargs == 3); 3703 3704 for (i = 0; i < nargs; ++i) 3705 { 3706 rtx this_arg = operands[i]; 3707 rtx this_slot; 3708 3709 /* TFmode arguments and return values are passed by reference. */ 3710 if (GET_MODE (this_arg) == TFmode) 3711 { 3712 int force_stack_temp; 3713 3714 force_stack_temp = 0; 3715 if (TARGET_BUGGY_QP_LIB && i == 0) 3716 force_stack_temp = 1; 3717 3718 if (GET_CODE (this_arg) == MEM 3719 && ! force_stack_temp) 3720 { 3721 tree expr = MEM_EXPR (this_arg); 3722 if (expr) 3723 mark_addressable (expr); 3724 this_arg = XEXP (this_arg, 0); 3725 } 3726 else if (CONSTANT_P (this_arg) 3727 && ! force_stack_temp) 3728 { 3729 this_slot = force_const_mem (TFmode, this_arg); 3730 this_arg = XEXP (this_slot, 0); 3731 } 3732 else 3733 { 3734 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode)); 3735 3736 /* Operand 0 is the return value. We'll copy it out later. */ 3737 if (i > 0) 3738 emit_move_insn (this_slot, this_arg); 3739 else 3740 ret_slot = this_slot; 3741 3742 this_arg = XEXP (this_slot, 0); 3743 } 3744 } 3745 3746 arg[i] = this_arg; 3747 } 3748 3749 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); 3750 3751 if (GET_MODE (operands[0]) == TFmode) 3752 { 3753 if (nargs == 2) 3754 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3755 arg[0], GET_MODE (arg[0]), 3756 arg[1], GET_MODE (arg[1])); 3757 else 3758 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3759 arg[0], GET_MODE (arg[0]), 3760 arg[1], GET_MODE (arg[1]), 3761 arg[2], GET_MODE (arg[2])); 3762 3763 if (ret_slot) 3764 emit_move_insn (operands[0], ret_slot); 3765 } 3766 else 3767 { 3768 rtx ret; 3769 3770 gcc_assert (nargs == 2); 3771 3772 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, 3773 GET_MODE (operands[0]), 3774 arg[1], GET_MODE (arg[1])); 3775 3776 if (ret != operands[0]) 3777 emit_move_insn (operands[0], ret); 3778 } 3779} 3780 3781/* Expand soft-float TFmode calls to sparc abi routines. */ 3782 3783static void 3784emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) 3785{ 3786 const char *func; 3787 3788 switch (code) 3789 { 3790 case PLUS: 3791 func = "_Qp_add"; 3792 break; 3793 case MINUS: 3794 func = "_Qp_sub"; 3795 break; 3796 case MULT: 3797 func = "_Qp_mul"; 3798 break; 3799 case DIV: 3800 func = "_Qp_div"; 3801 break; 3802 default: 3803 gcc_unreachable (); 3804 } 3805 3806 emit_soft_tfmode_libcall (func, 3, operands); 3807} 3808 3809static void 3810emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) 3811{ 3812 const char *func; 3813 3814 gcc_assert (code == SQRT); 3815 func = "_Qp_sqrt"; 3816 3817 emit_soft_tfmode_libcall (func, 2, operands); 3818} 3819 3820static void 3821emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) 3822{ 3823 const char *func; 3824 3825 switch (code) 3826 { 3827 case FLOAT_EXTEND: 3828 switch (GET_MODE (operands[1])) 3829 { 3830 case E_SFmode: 3831 func = "_Qp_stoq"; 3832 break; 3833 case E_DFmode: 3834 func = "_Qp_dtoq"; 3835 break; 3836 default: 3837 gcc_unreachable (); 3838 } 3839 break; 3840 3841 case FLOAT_TRUNCATE: 3842 switch (GET_MODE (operands[0])) 3843 { 3844 case E_SFmode: 3845 func = "_Qp_qtos"; 3846 break; 3847 case E_DFmode: 3848 func = "_Qp_qtod"; 3849 break; 3850 default: 3851 gcc_unreachable (); 3852 } 3853 break; 3854 3855 case FLOAT: 3856 switch (GET_MODE (operands[1])) 3857 { 3858 case E_SImode: 3859 func = "_Qp_itoq"; 3860 if (TARGET_ARCH64) 3861 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); 3862 break; 3863 case E_DImode: 3864 func = "_Qp_xtoq"; 3865 break; 3866 default: 3867 gcc_unreachable (); 3868 } 3869 break; 3870 3871 case UNSIGNED_FLOAT: 3872 switch (GET_MODE (operands[1])) 3873 { 3874 case E_SImode: 3875 func = "_Qp_uitoq"; 3876 if (TARGET_ARCH64) 3877 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); 3878 break; 3879 case E_DImode: 3880 func = "_Qp_uxtoq"; 3881 break; 3882 default: 3883 gcc_unreachable (); 3884 } 3885 break; 3886 3887 case FIX: 3888 switch (GET_MODE (operands[0])) 3889 { 3890 case E_SImode: 3891 func = "_Qp_qtoi"; 3892 break; 3893 case E_DImode: 3894 func = "_Qp_qtox"; 3895 break; 3896 default: 3897 gcc_unreachable (); 3898 } 3899 break; 3900 3901 case UNSIGNED_FIX: 3902 switch (GET_MODE (operands[0])) 3903 { 3904 case E_SImode: 3905 func = "_Qp_qtoui"; 3906 break; 3907 case E_DImode: 3908 func = "_Qp_qtoux"; 3909 break; 3910 default: 3911 gcc_unreachable (); 3912 } 3913 break; 3914 3915 default: 3916 gcc_unreachable (); 3917 } 3918 3919 emit_soft_tfmode_libcall (func, 2, operands); 3920} 3921 3922/* Expand a hard-float tfmode operation. All arguments must be in 3923 registers. */ 3924 3925static void 3926emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) 3927{ 3928 rtx op, dest; 3929 3930 if (GET_RTX_CLASS (code) == RTX_UNARY) 3931 { 3932 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3933 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); 3934 } 3935 else 3936 { 3937 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3938 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); 3939 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3940 operands[1], operands[2]); 3941 } 3942 3943 if (register_operand (operands[0], VOIDmode)) 3944 dest = operands[0]; 3945 else 3946 dest = gen_reg_rtx (GET_MODE (operands[0])); 3947 3948 emit_insn (gen_rtx_SET (dest, op)); 3949 3950 if (dest != operands[0]) 3951 emit_move_insn (operands[0], dest); 3952} 3953 3954void 3955emit_tfmode_binop (enum rtx_code code, rtx *operands) 3956{ 3957 if (TARGET_HARD_QUAD) 3958 emit_hard_tfmode_operation (code, operands); 3959 else 3960 emit_soft_tfmode_binop (code, operands); 3961} 3962 3963void 3964emit_tfmode_unop (enum rtx_code code, rtx *operands) 3965{ 3966 if (TARGET_HARD_QUAD) 3967 emit_hard_tfmode_operation (code, operands); 3968 else 3969 emit_soft_tfmode_unop (code, operands); 3970} 3971 3972void 3973emit_tfmode_cvt (enum rtx_code code, rtx *operands) 3974{ 3975 if (TARGET_HARD_QUAD) 3976 emit_hard_tfmode_operation (code, operands); 3977 else 3978 emit_soft_tfmode_cvt (code, operands); 3979} 3980 3981/* Return nonzero if a branch/jump/call instruction will be emitting 3982 nop into its delay slot. */ 3983 3984int 3985empty_delay_slot (rtx_insn *insn) 3986{ 3987 rtx seq; 3988 3989 /* If no previous instruction (should not happen), return true. */ 3990 if (PREV_INSN (insn) == NULL) 3991 return 1; 3992 3993 seq = NEXT_INSN (PREV_INSN (insn)); 3994 if (GET_CODE (PATTERN (seq)) == SEQUENCE) 3995 return 0; 3996 3997 return 1; 3998} 3999 4000/* Return nonzero if we should emit a nop after a cbcond instruction. 4001 The cbcond instruction does not have a delay slot, however there is 4002 a severe performance penalty if a control transfer appears right 4003 after a cbcond. Therefore we emit a nop when we detect this 4004 situation. */ 4005 4006int 4007emit_cbcond_nop (rtx_insn *insn) 4008{ 4009 rtx next = next_active_insn (insn); 4010 4011 if (!next) 4012 return 1; 4013 4014 if (NONJUMP_INSN_P (next) 4015 && GET_CODE (PATTERN (next)) == SEQUENCE) 4016 next = XVECEXP (PATTERN (next), 0, 0); 4017 else if (CALL_P (next) 4018 && GET_CODE (PATTERN (next)) == PARALLEL) 4019 { 4020 rtx delay = XVECEXP (PATTERN (next), 0, 1); 4021 4022 if (GET_CODE (delay) == RETURN) 4023 { 4024 /* It's a sibling call. Do not emit the nop if we're going 4025 to emit something other than the jump itself as the first 4026 instruction of the sibcall sequence. */ 4027 if (sparc_leaf_function_p || TARGET_FLAT) 4028 return 0; 4029 } 4030 } 4031 4032 if (NONJUMP_INSN_P (next)) 4033 return 0; 4034 4035 return 1; 4036} 4037 4038/* Return nonzero if TRIAL, an insn, can be combined with a 'restore' 4039 instruction. RETURN_P is true if the v9 variant 'return' is to be 4040 considered in the test too. 4041 4042 TRIAL must be a SET whose destination is a REG appropriate for the 4043 'restore' instruction or, if RETURN_P is true, for the 'return' 4044 instruction. */ 4045 4046static int 4047eligible_for_restore_insn (rtx trial, bool return_p) 4048{ 4049 rtx pat = PATTERN (trial); 4050 rtx src = SET_SRC (pat); 4051 bool src_is_freg = false; 4052 rtx src_reg; 4053 4054 /* Since we now can do moves between float and integer registers when 4055 VIS3 is enabled, we have to catch this case. We can allow such 4056 moves when doing a 'return' however. */ 4057 src_reg = src; 4058 if (GET_CODE (src_reg) == SUBREG) 4059 src_reg = SUBREG_REG (src_reg); 4060 if (GET_CODE (src_reg) == REG 4061 && SPARC_FP_REG_P (REGNO (src_reg))) 4062 src_is_freg = true; 4063 4064 /* The 'restore src,%g0,dest' pattern for word mode and below. */ 4065 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4066 && arith_operand (src, GET_MODE (src)) 4067 && ! src_is_freg) 4068 { 4069 if (TARGET_ARCH64) 4070 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4071 else 4072 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); 4073 } 4074 4075 /* The 'restore src,%g0,dest' pattern for double-word mode. */ 4076 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4077 && arith_double_operand (src, GET_MODE (src)) 4078 && ! src_is_freg) 4079 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4080 4081 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ 4082 else if (! TARGET_FPU && register_operand (src, SFmode)) 4083 return 1; 4084 4085 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ 4086 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) 4087 return 1; 4088 4089 /* If we have the 'return' instruction, anything that does not use 4090 local or output registers and can go into a delay slot wins. */ 4091 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1)) 4092 return 1; 4093 4094 /* The 'restore src1,src2,dest' pattern for SImode. */ 4095 else if (GET_CODE (src) == PLUS 4096 && register_operand (XEXP (src, 0), SImode) 4097 && arith_operand (XEXP (src, 1), SImode)) 4098 return 1; 4099 4100 /* The 'restore src1,src2,dest' pattern for DImode. */ 4101 else if (GET_CODE (src) == PLUS 4102 && register_operand (XEXP (src, 0), DImode) 4103 && arith_double_operand (XEXP (src, 1), DImode)) 4104 return 1; 4105 4106 /* The 'restore src1,%lo(src2),dest' pattern. */ 4107 else if (GET_CODE (src) == LO_SUM 4108 && ! TARGET_CM_MEDMID 4109 && ((register_operand (XEXP (src, 0), SImode) 4110 && immediate_operand (XEXP (src, 1), SImode)) 4111 || (TARGET_ARCH64 4112 && register_operand (XEXP (src, 0), DImode) 4113 && immediate_operand (XEXP (src, 1), DImode)))) 4114 return 1; 4115 4116 /* The 'restore src,src,dest' pattern. */ 4117 else if (GET_CODE (src) == ASHIFT 4118 && (register_operand (XEXP (src, 0), SImode) 4119 || register_operand (XEXP (src, 0), DImode)) 4120 && XEXP (src, 1) == const1_rtx) 4121 return 1; 4122 4123 return 0; 4124} 4125 4126/* Return nonzero if TRIAL can go into the function return's delay slot. */ 4127 4128int 4129eligible_for_return_delay (rtx_insn *trial) 4130{ 4131 int regno; 4132 rtx pat; 4133 4134 /* If the function uses __builtin_eh_return, the eh_return machinery 4135 occupies the delay slot. */ 4136 if (crtl->calls_eh_return) 4137 return 0; 4138 4139 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4140 return 0; 4141 4142 /* In the case of a leaf or flat function, anything can go into the slot. */ 4143 if (sparc_leaf_function_p || TARGET_FLAT) 4144 return 1; 4145 4146 if (!NONJUMP_INSN_P (trial)) 4147 return 0; 4148 4149 pat = PATTERN (trial); 4150 if (GET_CODE (pat) == PARALLEL) 4151 { 4152 int i; 4153 4154 if (! TARGET_V9) 4155 return 0; 4156 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 4157 { 4158 rtx expr = XVECEXP (pat, 0, i); 4159 if (GET_CODE (expr) != SET) 4160 return 0; 4161 if (GET_CODE (SET_DEST (expr)) != REG) 4162 return 0; 4163 regno = REGNO (SET_DEST (expr)); 4164 if (regno >= 8 && regno < 24) 4165 return 0; 4166 } 4167 return !epilogue_renumber (&pat, 1); 4168 } 4169 4170 if (GET_CODE (pat) != SET) 4171 return 0; 4172 4173 if (GET_CODE (SET_DEST (pat)) != REG) 4174 return 0; 4175 4176 regno = REGNO (SET_DEST (pat)); 4177 4178 /* Otherwise, only operations which can be done in tandem with 4179 a `restore' or `return' insn can go into the delay slot. */ 4180 if (regno >= 8 && regno < 24) 4181 return 0; 4182 4183 /* If this instruction sets up floating point register and we have a return 4184 instruction, it can probably go in. But restore will not work 4185 with FP_REGS. */ 4186 if (! SPARC_INT_REG_P (regno)) 4187 return TARGET_V9 && !epilogue_renumber (&pat, 1); 4188 4189 return eligible_for_restore_insn (trial, true); 4190} 4191 4192/* Return nonzero if TRIAL can go into the sibling call's delay slot. */ 4193 4194int 4195eligible_for_sibcall_delay (rtx_insn *trial) 4196{ 4197 rtx pat; 4198 4199 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4200 return 0; 4201 4202 if (!NONJUMP_INSN_P (trial)) 4203 return 0; 4204 4205 pat = PATTERN (trial); 4206 4207 if (sparc_leaf_function_p || TARGET_FLAT) 4208 { 4209 /* If the tail call is done using the call instruction, 4210 we have to restore %o7 in the delay slot. */ 4211 if (LEAF_SIBCALL_SLOT_RESERVED_P) 4212 return 0; 4213 4214 /* %g1 is used to build the function address */ 4215 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) 4216 return 0; 4217 4218 return 1; 4219 } 4220 4221 if (GET_CODE (pat) != SET) 4222 return 0; 4223 4224 /* Otherwise, only operations which can be done in tandem with 4225 a `restore' insn can go into the delay slot. */ 4226 if (GET_CODE (SET_DEST (pat)) != REG 4227 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) 4228 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat)))) 4229 return 0; 4230 4231 /* If it mentions %o7, it can't go in, because sibcall will clobber it 4232 in most cases. */ 4233 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) 4234 return 0; 4235 4236 return eligible_for_restore_insn (trial, false); 4237} 4238 4239/* Determine if it's legal to put X into the constant pool. This 4240 is not possible if X contains the address of a symbol that is 4241 not constant (TLS) or not known at final link time (PIC). */ 4242 4243static bool 4244sparc_cannot_force_const_mem (machine_mode mode, rtx x) 4245{ 4246 switch (GET_CODE (x)) 4247 { 4248 case CONST_INT: 4249 case CONST_WIDE_INT: 4250 case CONST_DOUBLE: 4251 case CONST_VECTOR: 4252 /* Accept all non-symbolic constants. */ 4253 return false; 4254 4255 case LABEL_REF: 4256 /* Labels are OK iff we are non-PIC. */ 4257 return flag_pic != 0; 4258 4259 case SYMBOL_REF: 4260 /* 'Naked' TLS symbol references are never OK, 4261 non-TLS symbols are OK iff we are non-PIC. */ 4262 if (SYMBOL_REF_TLS_MODEL (x)) 4263 return true; 4264 else 4265 return flag_pic != 0; 4266 4267 case CONST: 4268 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)); 4269 case PLUS: 4270 case MINUS: 4271 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)) 4272 || sparc_cannot_force_const_mem (mode, XEXP (x, 1)); 4273 case UNSPEC: 4274 return true; 4275 default: 4276 gcc_unreachable (); 4277 } 4278} 4279 4280/* Global Offset Table support. */ 4281static GTY(()) rtx got_symbol_rtx = NULL_RTX; 4282static GTY(()) rtx got_register_rtx = NULL_RTX; 4283static GTY(()) rtx got_helper_rtx = NULL_RTX; 4284 4285static GTY(()) bool got_helper_needed = false; 4286 4287/* Return the SYMBOL_REF for the Global Offset Table. */ 4288 4289static rtx 4290sparc_got (void) 4291{ 4292 if (!got_symbol_rtx) 4293 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 4294 4295 return got_symbol_rtx; 4296} 4297 4298/* Wrapper around the load_pcrel_sym{si,di} patterns. */ 4299 4300static rtx 4301gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2) 4302{ 4303 int orig_flag_pic = flag_pic; 4304 rtx insn; 4305 4306 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ 4307 flag_pic = 0; 4308 if (TARGET_ARCH64) 4309 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0))); 4310 else 4311 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0))); 4312 flag_pic = orig_flag_pic; 4313 4314 return insn; 4315} 4316 4317/* Output the load_pcrel_sym{si,di} patterns. */ 4318 4319const char * 4320output_load_pcrel_sym (rtx *operands) 4321{ 4322 if (flag_delayed_branch) 4323 { 4324 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands); 4325 output_asm_insn ("call\t%a2", operands); 4326 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands); 4327 } 4328 else 4329 { 4330 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands); 4331 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands); 4332 output_asm_insn ("call\t%a2", operands); 4333 output_asm_insn (" nop", NULL); 4334 } 4335 4336 if (operands[2] == got_helper_rtx) 4337 got_helper_needed = true; 4338 4339 return ""; 4340} 4341 4342#ifdef HAVE_GAS_HIDDEN 4343# define USE_HIDDEN_LINKONCE 1 4344#else 4345# define USE_HIDDEN_LINKONCE 0 4346#endif 4347 4348/* Emit code to load the GOT register. */ 4349 4350void 4351load_got_register (void) 4352{ 4353 rtx insn; 4354 4355 if (TARGET_VXWORKS_RTP) 4356 { 4357 if (!got_register_rtx) 4358 got_register_rtx = pic_offset_table_rtx; 4359 4360 insn = gen_vxworks_load_got (); 4361 } 4362 else 4363 { 4364 if (!got_register_rtx) 4365 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); 4366 4367 /* The GOT symbol is subject to a PC-relative relocation so we need a 4368 helper function to add the PC value and thus get the final value. */ 4369 if (!got_helper_rtx) 4370 { 4371 char name[32]; 4372 4373 /* Skip the leading '%' as that cannot be used in a symbol name. */ 4374 if (USE_HIDDEN_LINKONCE) 4375 sprintf (name, "__sparc_get_pc_thunk.%s", 4376 reg_names[REGNO (got_register_rtx)] + 1); 4377 else 4378 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", 4379 REGNO (got_register_rtx)); 4380 4381 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4382 } 4383 4384 insn 4385 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx); 4386 } 4387 4388 emit_insn (insn); 4389} 4390 4391/* Ensure that we are not using patterns that are not OK with PIC. */ 4392 4393int 4394check_pic (int i) 4395{ 4396 rtx op; 4397 4398 switch (flag_pic) 4399 { 4400 case 1: 4401 op = recog_data.operand[i]; 4402 gcc_assert (GET_CODE (op) != SYMBOL_REF 4403 && (GET_CODE (op) != CONST 4404 || (GET_CODE (XEXP (op, 0)) == MINUS 4405 && XEXP (XEXP (op, 0), 0) == sparc_got () 4406 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); 4407 /* fallthrough */ 4408 case 2: 4409 default: 4410 return 1; 4411 } 4412} 4413 4414/* Return true if X is an address which needs a temporary register when 4415 reloaded while generating PIC code. */ 4416 4417int 4418pic_address_needs_scratch (rtx x) 4419{ 4420 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ 4421 if (GET_CODE (x) == CONST 4422 && GET_CODE (XEXP (x, 0)) == PLUS 4423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 4424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4425 && !SMALL_INT (XEXP (XEXP (x, 0), 1))) 4426 return 1; 4427 4428 return 0; 4429} 4430 4431/* Determine if a given RTX is a valid constant. We already know this 4432 satisfies CONSTANT_P. */ 4433 4434static bool 4435sparc_legitimate_constant_p (machine_mode mode, rtx x) 4436{ 4437 switch (GET_CODE (x)) 4438 { 4439 case CONST: 4440 case SYMBOL_REF: 4441 if (sparc_tls_referenced_p (x)) 4442 return false; 4443 break; 4444 4445 case CONST_DOUBLE: 4446 /* Floating point constants are generally not ok. 4447 The only exception is 0.0 and all-ones in VIS. */ 4448 if (TARGET_VIS 4449 && SCALAR_FLOAT_MODE_P (mode) 4450 && (const_zero_operand (x, mode) 4451 || const_all_ones_operand (x, mode))) 4452 return true; 4453 4454 return false; 4455 4456 case CONST_VECTOR: 4457 /* Vector constants are generally not ok. 4458 The only exception is 0 or -1 in VIS. */ 4459 if (TARGET_VIS 4460 && (const_zero_operand (x, mode) 4461 || const_all_ones_operand (x, mode))) 4462 return true; 4463 4464 return false; 4465 4466 default: 4467 break; 4468 } 4469 4470 return true; 4471} 4472 4473/* Determine if a given RTX is a valid constant address. */ 4474 4475bool 4476constant_address_p (rtx x) 4477{ 4478 switch (GET_CODE (x)) 4479 { 4480 case LABEL_REF: 4481 case CONST_INT: 4482 case HIGH: 4483 return true; 4484 4485 case CONST: 4486 if (flag_pic && pic_address_needs_scratch (x)) 4487 return false; 4488 return sparc_legitimate_constant_p (Pmode, x); 4489 4490 case SYMBOL_REF: 4491 return !flag_pic && sparc_legitimate_constant_p (Pmode, x); 4492 4493 default: 4494 return false; 4495 } 4496} 4497 4498/* Nonzero if the constant value X is a legitimate general operand 4499 when generating PIC code. It is given that flag_pic is on and 4500 that X satisfies CONSTANT_P. */ 4501 4502bool 4503legitimate_pic_operand_p (rtx x) 4504{ 4505 if (pic_address_needs_scratch (x)) 4506 return false; 4507 if (sparc_tls_referenced_p (x)) 4508 return false; 4509 return true; 4510} 4511 4512/* Return true if X is a representation of the PIC register. */ 4513 4514static bool 4515sparc_pic_register_p (rtx x) 4516{ 4517 if (!REG_P (x) || !pic_offset_table_rtx) 4518 return false; 4519 4520 if (x == pic_offset_table_rtx) 4521 return true; 4522 4523 if (!HARD_REGISTER_P (pic_offset_table_rtx) 4524 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress) 4525 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) 4526 return true; 4527 4528 return false; 4529} 4530 4531#define RTX_OK_FOR_OFFSET_P(X, MODE) \ 4532 (CONST_INT_P (X) \ 4533 && INTVAL (X) >= -0x1000 \ 4534 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE))) 4535 4536#define RTX_OK_FOR_OLO10_P(X, MODE) \ 4537 (CONST_INT_P (X) \ 4538 && INTVAL (X) >= -0x1000 \ 4539 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE))) 4540 4541/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook. 4542 4543 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT 4544 ordinarily. This changes a bit when generating PIC. */ 4545 4546static bool 4547sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 4548{ 4549 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; 4550 4551 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 4552 rs1 = addr; 4553 else if (GET_CODE (addr) == PLUS) 4554 { 4555 rs1 = XEXP (addr, 0); 4556 rs2 = XEXP (addr, 1); 4557 4558 /* Canonicalize. REG comes first, if there are no regs, 4559 LO_SUM comes first. */ 4560 if (!REG_P (rs1) 4561 && GET_CODE (rs1) != SUBREG 4562 && (REG_P (rs2) 4563 || GET_CODE (rs2) == SUBREG 4564 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) 4565 { 4566 rs1 = XEXP (addr, 1); 4567 rs2 = XEXP (addr, 0); 4568 } 4569 4570 if ((flag_pic == 1 4571 && sparc_pic_register_p (rs1) 4572 && !REG_P (rs2) 4573 && GET_CODE (rs2) != SUBREG 4574 && GET_CODE (rs2) != LO_SUM 4575 && GET_CODE (rs2) != MEM 4576 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) 4577 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) 4578 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) 4579 || ((REG_P (rs1) 4580 || GET_CODE (rs1) == SUBREG) 4581 && RTX_OK_FOR_OFFSET_P (rs2, mode))) 4582 { 4583 imm1 = rs2; 4584 rs2 = NULL; 4585 } 4586 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) 4587 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) 4588 { 4589 /* We prohibit REG + REG for TFmode when there are no quad move insns 4590 and we consequently need to split. We do this because REG+REG 4591 is not an offsettable address. If we get the situation in reload 4592 where source and destination of a movtf pattern are both MEMs with 4593 REG+REG address, then only one of them gets converted to an 4594 offsettable address. */ 4595 if (mode == TFmode 4596 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) 4597 return 0; 4598 4599 /* Likewise for TImode, but in all cases. */ 4600 if (mode == TImode) 4601 return 0; 4602 4603 /* We prohibit REG + REG on ARCH32 if not optimizing for 4604 DFmode/DImode because then mem_min_alignment is likely to be zero 4605 after reload and the forced split would lack a matching splitter 4606 pattern. */ 4607 if (TARGET_ARCH32 && !optimize 4608 && (mode == DFmode || mode == DImode)) 4609 return 0; 4610 } 4611 else if (USE_AS_OFFSETABLE_LO10 4612 && GET_CODE (rs1) == LO_SUM 4613 && TARGET_ARCH64 4614 && ! TARGET_CM_MEDMID 4615 && RTX_OK_FOR_OLO10_P (rs2, mode)) 4616 { 4617 rs2 = NULL; 4618 imm1 = XEXP (rs1, 1); 4619 rs1 = XEXP (rs1, 0); 4620 if (!CONSTANT_P (imm1) 4621 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4622 return 0; 4623 } 4624 } 4625 else if (GET_CODE (addr) == LO_SUM) 4626 { 4627 rs1 = XEXP (addr, 0); 4628 imm1 = XEXP (addr, 1); 4629 4630 if (!CONSTANT_P (imm1) 4631 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4632 return 0; 4633 4634 /* We can't allow TFmode in 32-bit mode, because an offset greater 4635 than the alignment (8) may cause the LO_SUM to overflow. */ 4636 if (mode == TFmode && TARGET_ARCH32) 4637 return 0; 4638 4639 /* During reload, accept the HIGH+LO_SUM construct generated by 4640 sparc_legitimize_reload_address. */ 4641 if (reload_in_progress 4642 && GET_CODE (rs1) == HIGH 4643 && XEXP (rs1, 0) == imm1) 4644 return 1; 4645 } 4646 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) 4647 return 1; 4648 else 4649 return 0; 4650 4651 if (GET_CODE (rs1) == SUBREG) 4652 rs1 = SUBREG_REG (rs1); 4653 if (!REG_P (rs1)) 4654 return 0; 4655 4656 if (rs2) 4657 { 4658 if (GET_CODE (rs2) == SUBREG) 4659 rs2 = SUBREG_REG (rs2); 4660 if (!REG_P (rs2)) 4661 return 0; 4662 } 4663 4664 if (strict) 4665 { 4666 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) 4667 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) 4668 return 0; 4669 } 4670 else 4671 { 4672 if ((! SPARC_INT_REG_P (REGNO (rs1)) 4673 && REGNO (rs1) != FRAME_POINTER_REGNUM 4674 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) 4675 || (rs2 4676 && (! SPARC_INT_REG_P (REGNO (rs2)) 4677 && REGNO (rs2) != FRAME_POINTER_REGNUM 4678 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) 4679 return 0; 4680 } 4681 return 1; 4682} 4683 4684/* Return the SYMBOL_REF for the tls_get_addr function. */ 4685 4686static GTY(()) rtx sparc_tls_symbol = NULL_RTX; 4687 4688static rtx 4689sparc_tls_get_addr (void) 4690{ 4691 if (!sparc_tls_symbol) 4692 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); 4693 4694 return sparc_tls_symbol; 4695} 4696 4697/* Return the Global Offset Table to be used in TLS mode. */ 4698 4699static rtx 4700sparc_tls_got (void) 4701{ 4702 /* In PIC mode, this is just the PIC offset table. */ 4703 if (flag_pic) 4704 { 4705 crtl->uses_pic_offset_table = 1; 4706 return pic_offset_table_rtx; 4707 } 4708 4709 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for 4710 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ 4711 if (TARGET_SUN_TLS && TARGET_ARCH32) 4712 { 4713 load_got_register (); 4714 return got_register_rtx; 4715 } 4716 4717 /* In all other cases, we load a new pseudo with the GOT symbol. */ 4718 return copy_to_reg (sparc_got ()); 4719} 4720 4721/* Return true if X contains a thread-local symbol. */ 4722 4723static bool 4724sparc_tls_referenced_p (rtx x) 4725{ 4726 if (!TARGET_HAVE_TLS) 4727 return false; 4728 4729 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 4730 x = XEXP (XEXP (x, 0), 0); 4731 4732 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 4733 return true; 4734 4735 /* That's all we handle in sparc_legitimize_tls_address for now. */ 4736 return false; 4737} 4738 4739/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4740 this (thread-local) address. */ 4741 4742static rtx 4743sparc_legitimize_tls_address (rtx addr) 4744{ 4745 rtx temp1, temp2, temp3, ret, o0, got; 4746 rtx_insn *insn; 4747 4748 gcc_assert (can_create_pseudo_p ()); 4749 4750 if (GET_CODE (addr) == SYMBOL_REF) 4751 /* Although the various sethi/or sequences generate SImode values, many of 4752 them can be transformed by the linker when relaxing and, if relaxing to 4753 local-exec, will become a sethi/xor pair, which is signed and therefore 4754 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these 4755 values be spilled onto the stack in 64-bit mode. */ 4756 switch (SYMBOL_REF_TLS_MODEL (addr)) 4757 { 4758 case TLS_MODEL_GLOBAL_DYNAMIC: 4759 start_sequence (); 4760 temp1 = gen_reg_rtx (Pmode); 4761 temp2 = gen_reg_rtx (Pmode); 4762 ret = gen_reg_rtx (Pmode); 4763 o0 = gen_rtx_REG (Pmode, 8); 4764 got = sparc_tls_got (); 4765 if (TARGET_ARCH32) 4766 { 4767 emit_insn (gen_tgd_hi22si (temp1, addr)); 4768 emit_insn (gen_tgd_lo10si (temp2, temp1, addr)); 4769 emit_insn (gen_tgd_addsi (o0, got, temp2, addr)); 4770 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (), 4771 addr, const1_rtx)); 4772 } 4773 else 4774 { 4775 emit_insn (gen_tgd_hi22di (temp1, addr)); 4776 emit_insn (gen_tgd_lo10di (temp2, temp1, addr)); 4777 emit_insn (gen_tgd_adddi (o0, got, temp2, addr)); 4778 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (), 4779 addr, const1_rtx)); 4780 } 4781 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4782 RTL_CONST_CALL_P (insn) = 1; 4783 insn = get_insns (); 4784 end_sequence (); 4785 emit_libcall_block (insn, ret, o0, addr); 4786 break; 4787 4788 case TLS_MODEL_LOCAL_DYNAMIC: 4789 start_sequence (); 4790 temp1 = gen_reg_rtx (Pmode); 4791 temp2 = gen_reg_rtx (Pmode); 4792 temp3 = gen_reg_rtx (Pmode); 4793 ret = gen_reg_rtx (Pmode); 4794 o0 = gen_rtx_REG (Pmode, 8); 4795 got = sparc_tls_got (); 4796 if (TARGET_ARCH32) 4797 { 4798 emit_insn (gen_tldm_hi22si (temp1)); 4799 emit_insn (gen_tldm_lo10si (temp2, temp1)); 4800 emit_insn (gen_tldm_addsi (o0, got, temp2)); 4801 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (), 4802 const1_rtx)); 4803 } 4804 else 4805 { 4806 emit_insn (gen_tldm_hi22di (temp1)); 4807 emit_insn (gen_tldm_lo10di (temp2, temp1)); 4808 emit_insn (gen_tldm_adddi (o0, got, temp2)); 4809 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (), 4810 const1_rtx)); 4811 } 4812 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4813 RTL_CONST_CALL_P (insn) = 1; 4814 insn = get_insns (); 4815 end_sequence (); 4816 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to 4817 share the LD_BASE result with other LD model accesses. */ 4818 emit_libcall_block (insn, temp3, o0, 4819 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 4820 UNSPEC_TLSLD_BASE)); 4821 temp1 = gen_reg_rtx (Pmode); 4822 temp2 = gen_reg_rtx (Pmode); 4823 if (TARGET_ARCH32) 4824 { 4825 emit_insn (gen_tldo_hix22si (temp1, addr)); 4826 emit_insn (gen_tldo_lox10si (temp2, temp1, addr)); 4827 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr)); 4828 } 4829 else 4830 { 4831 emit_insn (gen_tldo_hix22di (temp1, addr)); 4832 emit_insn (gen_tldo_lox10di (temp2, temp1, addr)); 4833 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr)); 4834 } 4835 break; 4836 4837 case TLS_MODEL_INITIAL_EXEC: 4838 temp1 = gen_reg_rtx (Pmode); 4839 temp2 = gen_reg_rtx (Pmode); 4840 temp3 = gen_reg_rtx (Pmode); 4841 got = sparc_tls_got (); 4842 if (TARGET_ARCH32) 4843 { 4844 emit_insn (gen_tie_hi22si (temp1, addr)); 4845 emit_insn (gen_tie_lo10si (temp2, temp1, addr)); 4846 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); 4847 } 4848 else 4849 { 4850 emit_insn (gen_tie_hi22di (temp1, addr)); 4851 emit_insn (gen_tie_lo10di (temp2, temp1, addr)); 4852 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); 4853 } 4854 if (TARGET_SUN_TLS) 4855 { 4856 ret = gen_reg_rtx (Pmode); 4857 if (TARGET_ARCH32) 4858 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7), 4859 temp3, addr)); 4860 else 4861 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7), 4862 temp3, addr)); 4863 } 4864 else 4865 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); 4866 break; 4867 4868 case TLS_MODEL_LOCAL_EXEC: 4869 temp1 = gen_reg_rtx (Pmode); 4870 temp2 = gen_reg_rtx (Pmode); 4871 if (TARGET_ARCH32) 4872 { 4873 emit_insn (gen_tle_hix22si (temp1, addr)); 4874 emit_insn (gen_tle_lox10si (temp2, temp1, addr)); 4875 } 4876 else 4877 { 4878 emit_insn (gen_tle_hix22di (temp1, addr)); 4879 emit_insn (gen_tle_lox10di (temp2, temp1, addr)); 4880 } 4881 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); 4882 break; 4883 4884 default: 4885 gcc_unreachable (); 4886 } 4887 4888 else if (GET_CODE (addr) == CONST) 4889 { 4890 rtx base, offset; 4891 4892 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); 4893 4894 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); 4895 offset = XEXP (XEXP (addr, 0), 1); 4896 4897 base = force_operand (base, NULL_RTX); 4898 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) 4899 offset = force_reg (Pmode, offset); 4900 ret = gen_rtx_PLUS (Pmode, base, offset); 4901 } 4902 4903 else 4904 gcc_unreachable (); /* for now ... */ 4905 4906 return ret; 4907} 4908 4909/* Legitimize PIC addresses. If the address is already position-independent, 4910 we return ORIG. Newly generated position-independent addresses go into a 4911 reg. This is REG if nonzero, otherwise we allocate register(s) as 4912 necessary. */ 4913 4914static rtx 4915sparc_legitimize_pic_address (rtx orig, rtx reg) 4916{ 4917 if (GET_CODE (orig) == SYMBOL_REF 4918 /* See the comment in sparc_expand_move. */ 4919 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) 4920 { 4921 bool gotdata_op = false; 4922 rtx pic_ref, address; 4923 rtx_insn *insn; 4924 4925 if (!reg) 4926 { 4927 gcc_assert (can_create_pseudo_p ()); 4928 reg = gen_reg_rtx (Pmode); 4929 } 4930 4931 if (flag_pic == 2) 4932 { 4933 /* If not during reload, allocate another temp reg here for loading 4934 in the address, so that these instructions can be optimized 4935 properly. */ 4936 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg; 4937 4938 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse 4939 won't get confused into thinking that these two instructions 4940 are loading in the true address of the symbol. If in the 4941 future a PIC rtx exists, that should be used instead. */ 4942 if (TARGET_ARCH64) 4943 { 4944 emit_insn (gen_movdi_high_pic (temp_reg, orig)); 4945 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); 4946 } 4947 else 4948 { 4949 emit_insn (gen_movsi_high_pic (temp_reg, orig)); 4950 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); 4951 } 4952 4953 address = temp_reg; 4954 gotdata_op = true; 4955 } 4956 else 4957 address = orig; 4958 4959 crtl->uses_pic_offset_table = 1; 4960 if (gotdata_op) 4961 { 4962 if (TARGET_ARCH64) 4963 insn = emit_insn (gen_movdi_pic_gotdata_op (reg, 4964 pic_offset_table_rtx, 4965 address, orig)); 4966 else 4967 insn = emit_insn (gen_movsi_pic_gotdata_op (reg, 4968 pic_offset_table_rtx, 4969 address, orig)); 4970 } 4971 else 4972 { 4973 pic_ref 4974 = gen_const_mem (Pmode, 4975 gen_rtx_PLUS (Pmode, 4976 pic_offset_table_rtx, address)); 4977 insn = emit_move_insn (reg, pic_ref); 4978 } 4979 4980 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4981 by loop. */ 4982 set_unique_reg_note (insn, REG_EQUAL, orig); 4983 return reg; 4984 } 4985 else if (GET_CODE (orig) == CONST) 4986 { 4987 rtx base, offset; 4988 4989 if (GET_CODE (XEXP (orig, 0)) == PLUS 4990 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0))) 4991 return orig; 4992 4993 if (!reg) 4994 { 4995 gcc_assert (can_create_pseudo_p ()); 4996 reg = gen_reg_rtx (Pmode); 4997 } 4998 4999 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 5000 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); 5001 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 5002 base == reg ? NULL_RTX : reg); 5003 5004 if (GET_CODE (offset) == CONST_INT) 5005 { 5006 if (SMALL_INT (offset)) 5007 return plus_constant (Pmode, base, INTVAL (offset)); 5008 else if (can_create_pseudo_p ()) 5009 offset = force_reg (Pmode, offset); 5010 else 5011 /* If we reach here, then something is seriously wrong. */ 5012 gcc_unreachable (); 5013 } 5014 return gen_rtx_PLUS (Pmode, base, offset); 5015 } 5016 else if (GET_CODE (orig) == LABEL_REF) 5017 /* ??? We ought to be checking that the register is live instead, in case 5018 it is eliminated. */ 5019 crtl->uses_pic_offset_table = 1; 5020 5021 return orig; 5022} 5023 5024/* Try machine-dependent ways of modifying an illegitimate address X 5025 to be legitimate. If we find one, return the new, valid address. 5026 5027 OLDX is the address as it was before break_out_memory_refs was called. 5028 In some cases it is useful to look at this to decide what needs to be done. 5029 5030 MODE is the mode of the operand pointed to by X. 5031 5032 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ 5033 5034static rtx 5035sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 5036 machine_mode mode) 5037{ 5038 rtx orig_x = x; 5039 5040 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) 5041 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 5042 force_operand (XEXP (x, 0), NULL_RTX)); 5043 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) 5044 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5045 force_operand (XEXP (x, 1), NULL_RTX)); 5046 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) 5047 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), 5048 XEXP (x, 1)); 5049 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) 5050 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5051 force_operand (XEXP (x, 1), NULL_RTX)); 5052 5053 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) 5054 return x; 5055 5056 if (sparc_tls_referenced_p (x)) 5057 x = sparc_legitimize_tls_address (x); 5058 else if (flag_pic) 5059 x = sparc_legitimize_pic_address (x, NULL_RTX); 5060 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) 5061 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5062 copy_to_mode_reg (Pmode, XEXP (x, 1))); 5063 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) 5064 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 5065 copy_to_mode_reg (Pmode, XEXP (x, 0))); 5066 else if (GET_CODE (x) == SYMBOL_REF 5067 || GET_CODE (x) == CONST 5068 || GET_CODE (x) == LABEL_REF) 5069 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); 5070 5071 return x; 5072} 5073 5074/* Delegitimize an address that was legitimized by the above function. */ 5075 5076static rtx 5077sparc_delegitimize_address (rtx x) 5078{ 5079 x = delegitimize_mem_from_attrs (x); 5080 5081 if (GET_CODE (x) == LO_SUM) 5082 x = XEXP (x, 1); 5083 5084 if (GET_CODE (x) == UNSPEC) 5085 switch (XINT (x, 1)) 5086 { 5087 case UNSPEC_MOVE_PIC: 5088 case UNSPEC_TLSLE: 5089 x = XVECEXP (x, 0, 0); 5090 gcc_assert (GET_CODE (x) == SYMBOL_REF); 5091 break; 5092 case UNSPEC_MOVE_GOTDATA: 5093 x = XVECEXP (x, 0, 2); 5094 gcc_assert (GET_CODE (x) == SYMBOL_REF); 5095 break; 5096 default: 5097 break; 5098 } 5099 5100 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */ 5101 if (GET_CODE (x) == MINUS 5102 && (XEXP (x, 0) == got_register_rtx 5103 || sparc_pic_register_p (XEXP (x, 0)))) 5104 { 5105 rtx y = XEXP (x, 1); 5106 5107 if (GET_CODE (y) == LO_SUM) 5108 y = XEXP (y, 1); 5109 5110 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL) 5111 { 5112 x = XVECEXP (y, 0, 0); 5113 gcc_assert (GET_CODE (x) == LABEL_REF 5114 || (GET_CODE (x) == CONST 5115 && GET_CODE (XEXP (x, 0)) == PLUS 5116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5117 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)); 5118 } 5119 } 5120 5121 return x; 5122} 5123 5124/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to 5125 replace the input X, or the original X if no replacement is called for. 5126 The output parameter *WIN is 1 if the calling macro should goto WIN, 5127 0 if it should not. 5128 5129 For SPARC, we wish to handle addresses by splitting them into 5130 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 5131 This cuts the number of extra insns by one. 5132 5133 Do nothing when generating PIC code and the address is a symbolic 5134 operand or requires a scratch register. */ 5135 5136rtx 5137sparc_legitimize_reload_address (rtx x, machine_mode mode, 5138 int opnum, int type, 5139 int ind_levels ATTRIBUTE_UNUSED, int *win) 5140{ 5141 /* Decompose SImode constants into HIGH+LO_SUM. */ 5142 if (CONSTANT_P (x) 5143 && (mode != TFmode || TARGET_ARCH64) 5144 && GET_MODE (x) == SImode 5145 && GET_CODE (x) != LO_SUM 5146 && GET_CODE (x) != HIGH 5147 && sparc_code_model <= CM_MEDLOW 5148 && !(flag_pic 5149 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) 5150 { 5151 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); 5152 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5153 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5154 opnum, (enum reload_type)type); 5155 *win = 1; 5156 return x; 5157 } 5158 5159 /* We have to recognize what we have already generated above. */ 5160 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) 5161 { 5162 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5163 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5164 opnum, (enum reload_type)type); 5165 *win = 1; 5166 return x; 5167 } 5168 5169 *win = 0; 5170 return x; 5171} 5172 5173/* Return true if ADDR (a legitimate address expression) 5174 has an effect that depends on the machine mode it is used for. 5175 5176 In PIC mode, 5177 5178 (mem:HI [%l7+a]) 5179 5180 is not equivalent to 5181 5182 (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) 5183 5184 because [%l7+a+1] is interpreted as the address of (a+1). */ 5185 5186 5187static bool 5188sparc_mode_dependent_address_p (const_rtx addr, 5189 addr_space_t as ATTRIBUTE_UNUSED) 5190{ 5191 if (GET_CODE (addr) == PLUS 5192 && sparc_pic_register_p (XEXP (addr, 0)) 5193 && symbolic_operand (XEXP (addr, 1), VOIDmode)) 5194 return true; 5195 5196 return false; 5197} 5198 5199/* Emit a call instruction with the pattern given by PAT. ADDR is the 5200 address of the call target. */ 5201 5202void 5203sparc_emit_call_insn (rtx pat, rtx addr) 5204{ 5205 rtx_insn *insn; 5206 5207 insn = emit_call_insn (pat); 5208 5209 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ 5210 if (TARGET_VXWORKS_RTP 5211 && flag_pic 5212 && GET_CODE (addr) == SYMBOL_REF 5213 && (SYMBOL_REF_DECL (addr) 5214 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 5215 : !SYMBOL_REF_LOCAL_P (addr))) 5216 { 5217 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 5218 crtl->uses_pic_offset_table = 1; 5219 } 5220} 5221 5222/* Return 1 if RTX is a MEM which is known to be aligned to at 5223 least a DESIRED byte boundary. */ 5224 5225int 5226mem_min_alignment (rtx mem, int desired) 5227{ 5228 rtx addr, base, offset; 5229 5230 /* If it's not a MEM we can't accept it. */ 5231 if (GET_CODE (mem) != MEM) 5232 return 0; 5233 5234 /* Obviously... */ 5235 if (!TARGET_UNALIGNED_DOUBLES 5236 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) 5237 return 1; 5238 5239 /* ??? The rest of the function predates MEM_ALIGN so 5240 there is probably a bit of redundancy. */ 5241 addr = XEXP (mem, 0); 5242 base = offset = NULL_RTX; 5243 if (GET_CODE (addr) == PLUS) 5244 { 5245 if (GET_CODE (XEXP (addr, 0)) == REG) 5246 { 5247 base = XEXP (addr, 0); 5248 5249 /* What we are saying here is that if the base 5250 REG is aligned properly, the compiler will make 5251 sure any REG based index upon it will be so 5252 as well. */ 5253 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) 5254 offset = XEXP (addr, 1); 5255 else 5256 offset = const0_rtx; 5257 } 5258 } 5259 else if (GET_CODE (addr) == REG) 5260 { 5261 base = addr; 5262 offset = const0_rtx; 5263 } 5264 5265 if (base != NULL_RTX) 5266 { 5267 int regno = REGNO (base); 5268 5269 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) 5270 { 5271 /* Check if the compiler has recorded some information 5272 about the alignment of the base REG. If reload has 5273 completed, we already matched with proper alignments. 5274 If not running global_alloc, reload might give us 5275 unaligned pointer to local stack though. */ 5276 if (((cfun != 0 5277 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) 5278 || (optimize && reload_completed)) 5279 && (INTVAL (offset) & (desired - 1)) == 0) 5280 return 1; 5281 } 5282 else 5283 { 5284 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) 5285 return 1; 5286 } 5287 } 5288 else if (! TARGET_UNALIGNED_DOUBLES 5289 || CONSTANT_P (addr) 5290 || GET_CODE (addr) == LO_SUM) 5291 { 5292 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES 5293 is true, in which case we can only assume that an access is aligned if 5294 it is to a constant address, or the address involves a LO_SUM. */ 5295 return 1; 5296 } 5297 5298 /* An obviously unaligned address. */ 5299 return 0; 5300} 5301 5302 5303/* Vectors to keep interesting information about registers where it can easily 5304 be got. We used to use the actual mode value as the bit number, but there 5305 are more than 32 modes now. Instead we use two tables: one indexed by 5306 hard register number, and one indexed by mode. */ 5307 5308/* The purpose of sparc_mode_class is to shrink the range of modes so that 5309 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 5310 mapped into one sparc_mode_class mode. */ 5311 5312enum sparc_mode_class { 5313 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, 5314 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 5315 CC_MODE, CCFP_MODE 5316}; 5317 5318/* Modes for single-word and smaller quantities. */ 5319#define S_MODES \ 5320 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5321 5322/* Modes for double-word and smaller quantities. */ 5323#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5324 5325/* Modes for quad-word and smaller quantities. */ 5326#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 5327 5328/* Modes for 8-word and smaller quantities. */ 5329#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) 5330 5331/* Modes for single-float quantities. */ 5332#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5333 5334/* Modes for double-float and smaller quantities. */ 5335#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5336 5337/* Modes for quad-float and smaller quantities. */ 5338#define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) 5339 5340/* Modes for quad-float pairs and smaller quantities. */ 5341#define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) 5342 5343/* Modes for double-float only quantities. */ 5344#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5345 5346/* Modes for quad-float and double-float only quantities. */ 5347#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) 5348 5349/* Modes for quad-float pairs and double-float only quantities. */ 5350#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) 5351 5352/* Modes for condition codes. */ 5353#define CC_MODES (1 << (int) CC_MODE) 5354#define CCFP_MODES (1 << (int) CCFP_MODE) 5355 5356/* Value is 1 if register/mode pair is acceptable on sparc. 5357 5358 The funny mixture of D and T modes is because integer operations 5359 do not specially operate on tetra quantities, so non-quad-aligned 5360 registers can hold quadword quantities (except %o4 and %i4 because 5361 they cross fixed registers). 5362 5363 ??? Note that, despite the settings, non-double-aligned parameter 5364 registers can hold double-word quantities in 32-bit mode. */ 5365 5366/* This points to either the 32-bit or the 64-bit version. */ 5367static const int *hard_regno_mode_classes; 5368 5369static const int hard_32bit_mode_classes[] = { 5370 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5371 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5372 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5373 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5374 5375 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5376 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5377 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5378 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5379 5380 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5381 and none can hold SFmode/SImode values. */ 5382 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5383 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5384 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5385 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5386 5387 /* %fcc[0123] */ 5388 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5389 5390 /* %icc, %sfp, %gsr */ 5391 CC_MODES, 0, D_MODES 5392}; 5393 5394static const int hard_64bit_mode_classes[] = { 5395 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5396 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5397 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5398 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5399 5400 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5401 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5402 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5403 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5404 5405 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5406 and none can hold SFmode/SImode values. */ 5407 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5408 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5409 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5410 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5411 5412 /* %fcc[0123] */ 5413 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5414 5415 /* %icc, %sfp, %gsr */ 5416 CC_MODES, 0, D_MODES 5417}; 5418 5419static int sparc_mode_class [NUM_MACHINE_MODES]; 5420 5421enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 5422 5423static void 5424sparc_init_modes (void) 5425{ 5426 int i; 5427 5428 for (i = 0; i < NUM_MACHINE_MODES; i++) 5429 { 5430 machine_mode m = (machine_mode) i; 5431 unsigned int size = GET_MODE_SIZE (m); 5432 5433 switch (GET_MODE_CLASS (m)) 5434 { 5435 case MODE_INT: 5436 case MODE_PARTIAL_INT: 5437 case MODE_COMPLEX_INT: 5438 if (size < 4) 5439 sparc_mode_class[i] = 1 << (int) H_MODE; 5440 else if (size == 4) 5441 sparc_mode_class[i] = 1 << (int) S_MODE; 5442 else if (size == 8) 5443 sparc_mode_class[i] = 1 << (int) D_MODE; 5444 else if (size == 16) 5445 sparc_mode_class[i] = 1 << (int) T_MODE; 5446 else if (size == 32) 5447 sparc_mode_class[i] = 1 << (int) O_MODE; 5448 else 5449 sparc_mode_class[i] = 0; 5450 break; 5451 case MODE_VECTOR_INT: 5452 if (size == 4) 5453 sparc_mode_class[i] = 1 << (int) SF_MODE; 5454 else if (size == 8) 5455 sparc_mode_class[i] = 1 << (int) DF_MODE; 5456 else 5457 sparc_mode_class[i] = 0; 5458 break; 5459 case MODE_FLOAT: 5460 case MODE_COMPLEX_FLOAT: 5461 if (size == 4) 5462 sparc_mode_class[i] = 1 << (int) SF_MODE; 5463 else if (size == 8) 5464 sparc_mode_class[i] = 1 << (int) DF_MODE; 5465 else if (size == 16) 5466 sparc_mode_class[i] = 1 << (int) TF_MODE; 5467 else if (size == 32) 5468 sparc_mode_class[i] = 1 << (int) OF_MODE; 5469 else 5470 sparc_mode_class[i] = 0; 5471 break; 5472 case MODE_CC: 5473 if (m == CCFPmode || m == CCFPEmode) 5474 sparc_mode_class[i] = 1 << (int) CCFP_MODE; 5475 else 5476 sparc_mode_class[i] = 1 << (int) CC_MODE; 5477 break; 5478 default: 5479 sparc_mode_class[i] = 0; 5480 break; 5481 } 5482 } 5483 5484 if (TARGET_ARCH64) 5485 hard_regno_mode_classes = hard_64bit_mode_classes; 5486 else 5487 hard_regno_mode_classes = hard_32bit_mode_classes; 5488 5489 /* Initialize the array used by REGNO_REG_CLASS. */ 5490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 5491 { 5492 if (i < 16 && TARGET_V8PLUS) 5493 sparc_regno_reg_class[i] = I64_REGS; 5494 else if (i < 32 || i == FRAME_POINTER_REGNUM) 5495 sparc_regno_reg_class[i] = GENERAL_REGS; 5496 else if (i < 64) 5497 sparc_regno_reg_class[i] = FP_REGS; 5498 else if (i < 96) 5499 sparc_regno_reg_class[i] = EXTRA_FP_REGS; 5500 else if (i < 100) 5501 sparc_regno_reg_class[i] = FPCC_REGS; 5502 else 5503 sparc_regno_reg_class[i] = NO_REGS; 5504 } 5505} 5506 5507/* Return whether REGNO, a global or FP register, must be saved/restored. */ 5508 5509static inline bool 5510save_global_or_fp_reg_p (unsigned int regno, 5511 int leaf_function ATTRIBUTE_UNUSED) 5512{ 5513 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno); 5514} 5515 5516/* Return whether the return address register (%i7) is needed. */ 5517 5518static inline bool 5519return_addr_reg_needed_p (int leaf_function) 5520{ 5521 /* If it is live, for example because of __builtin_return_address (0). */ 5522 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 5523 return true; 5524 5525 /* Otherwise, it is needed as save register if %o7 is clobbered. */ 5526 if (!leaf_function 5527 /* Loading the GOT register clobbers %o7. */ 5528 || crtl->uses_pic_offset_table 5529 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM)) 5530 return true; 5531 5532 return false; 5533} 5534 5535/* Return whether REGNO, a local or in register, must be saved/restored. */ 5536 5537static bool 5538save_local_or_in_reg_p (unsigned int regno, int leaf_function) 5539{ 5540 /* General case: call-saved registers live at some point. */ 5541 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno)) 5542 return true; 5543 5544 /* Frame pointer register (%fp) if needed. */ 5545 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 5546 return true; 5547 5548 /* Return address register (%i7) if needed. */ 5549 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function)) 5550 return true; 5551 5552 /* GOT register (%l7) if needed. */ 5553 if (got_register_rtx && regno == REGNO (got_register_rtx)) 5554 return true; 5555 5556 /* If the function accesses prior frames, the frame pointer and the return 5557 address of the previous frame must be saved on the stack. */ 5558 if (crtl->accesses_prior_frames 5559 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM)) 5560 return true; 5561 5562 return false; 5563} 5564 5565/* Compute the frame size required by the function. This function is called 5566 during the reload pass and also by sparc_expand_prologue. */ 5567 5568static HOST_WIDE_INT 5569sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function) 5570{ 5571 HOST_WIDE_INT frame_size, apparent_frame_size; 5572 int args_size, n_global_fp_regs = 0; 5573 bool save_local_in_regs_p = false; 5574 unsigned int i; 5575 5576 /* If the function allocates dynamic stack space, the dynamic offset is 5577 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */ 5578 if (leaf_function && !cfun->calls_alloca) 5579 args_size = 0; 5580 else 5581 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl); 5582 5583 /* Calculate space needed for global registers. */ 5584 if (TARGET_ARCH64) 5585 { 5586 for (i = 0; i < 8; i++) 5587 if (save_global_or_fp_reg_p (i, 0)) 5588 n_global_fp_regs += 2; 5589 } 5590 else 5591 { 5592 for (i = 0; i < 8; i += 2) 5593 if (save_global_or_fp_reg_p (i, 0) 5594 || save_global_or_fp_reg_p (i + 1, 0)) 5595 n_global_fp_regs += 2; 5596 } 5597 5598 /* In the flat window model, find out which local and in registers need to 5599 be saved. We don't reserve space in the current frame for them as they 5600 will be spilled into the register window save area of the caller's frame. 5601 However, as soon as we use this register window save area, we must create 5602 that of the current frame to make it the live one. */ 5603 if (TARGET_FLAT) 5604 for (i = 16; i < 32; i++) 5605 if (save_local_or_in_reg_p (i, leaf_function)) 5606 { 5607 save_local_in_regs_p = true; 5608 break; 5609 } 5610 5611 /* Calculate space needed for FP registers. */ 5612 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) 5613 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 5614 n_global_fp_regs += 2; 5615 5616 if (size == 0 5617 && n_global_fp_regs == 0 5618 && args_size == 0 5619 && !save_local_in_regs_p) 5620 frame_size = apparent_frame_size = 0; 5621 else 5622 { 5623 /* Start from the apparent frame size. */ 5624 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4; 5625 5626 /* We need to add the size of the outgoing argument area. */ 5627 frame_size = apparent_frame_size + ROUND_UP (args_size, 8); 5628 5629 /* And that of the register window save area. */ 5630 frame_size += FIRST_PARM_OFFSET (cfun->decl); 5631 5632 /* Finally, bump to the appropriate alignment. */ 5633 frame_size = SPARC_STACK_ALIGN (frame_size); 5634 } 5635 5636 /* Set up values for use in prologue and epilogue. */ 5637 sparc_frame_size = frame_size; 5638 sparc_apparent_frame_size = apparent_frame_size; 5639 sparc_n_global_fp_regs = n_global_fp_regs; 5640 sparc_save_local_in_regs_p = save_local_in_regs_p; 5641 5642 return frame_size; 5643} 5644 5645/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ 5646 5647int 5648sparc_initial_elimination_offset (int to) 5649{ 5650 int offset; 5651 5652 if (to == STACK_POINTER_REGNUM) 5653 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf); 5654 else 5655 offset = 0; 5656 5657 offset += SPARC_STACK_BIAS; 5658 return offset; 5659} 5660 5661/* Output any necessary .register pseudo-ops. */ 5662 5663void 5664sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) 5665{ 5666 int i; 5667 5668 if (TARGET_ARCH32) 5669 return; 5670 5671 /* Check if %g[2367] were used without 5672 .register being printed for them already. */ 5673 for (i = 2; i < 8; i++) 5674 { 5675 if (df_regs_ever_live_p (i) 5676 && ! sparc_hard_reg_printed [i]) 5677 { 5678 sparc_hard_reg_printed [i] = 1; 5679 /* %g7 is used as TLS base register, use #ignore 5680 for it instead of #scratch. */ 5681 fprintf (file, "\t.register\t%%g%d, #%s\n", i, 5682 i == 7 ? "ignore" : "scratch"); 5683 } 5684 if (i == 3) i = 5; 5685 } 5686} 5687 5688#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 5689 5690#if PROBE_INTERVAL > 4096 5691#error Cannot use indexed addressing mode for stack probing 5692#endif 5693 5694/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 5695 inclusive. These are offsets from the current stack pointer. 5696 5697 Note that we don't use the REG+REG addressing mode for the probes because 5698 of the stack bias in 64-bit mode. And it doesn't really buy us anything 5699 so the advantages of having a single code win here. */ 5700 5701static void 5702sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) 5703{ 5704 rtx g1 = gen_rtx_REG (Pmode, 1); 5705 5706 /* See if we have a constant small number of probes to generate. If so, 5707 that's the easy case. */ 5708 if (size <= PROBE_INTERVAL) 5709 { 5710 emit_move_insn (g1, GEN_INT (first)); 5711 emit_insn (gen_rtx_SET (g1, 5712 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5713 emit_stack_probe (plus_constant (Pmode, g1, -size)); 5714 } 5715 5716 /* The run-time loop is made up of 9 insns in the generic case while the 5717 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ 5718 else if (size <= 4 * PROBE_INTERVAL) 5719 { 5720 HOST_WIDE_INT i; 5721 5722 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); 5723 emit_insn (gen_rtx_SET (g1, 5724 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5725 emit_stack_probe (g1); 5726 5727 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 5728 it exceeds SIZE. If only two probes are needed, this will not 5729 generate any code. Then probe at FIRST + SIZE. */ 5730 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 5731 { 5732 emit_insn (gen_rtx_SET (g1, 5733 plus_constant (Pmode, g1, -PROBE_INTERVAL))); 5734 emit_stack_probe (g1); 5735 } 5736 5737 emit_stack_probe (plus_constant (Pmode, g1, 5738 (i - PROBE_INTERVAL) - size)); 5739 } 5740 5741 /* Otherwise, do the same as above, but in a loop. Note that we must be 5742 extra careful with variables wrapping around because we might be at 5743 the very top (or the very bottom) of the address space and we have 5744 to be able to handle this case properly; in particular, we use an 5745 equality test for the loop condition. */ 5746 else 5747 { 5748 HOST_WIDE_INT rounded_size; 5749 rtx g4 = gen_rtx_REG (Pmode, 4); 5750 5751 emit_move_insn (g1, GEN_INT (first)); 5752 5753 5754 /* Step 1: round SIZE to the previous multiple of the interval. */ 5755 5756 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); 5757 emit_move_insn (g4, GEN_INT (rounded_size)); 5758 5759 5760 /* Step 2: compute initial and final value of the loop counter. */ 5761 5762 /* TEST_ADDR = SP + FIRST. */ 5763 emit_insn (gen_rtx_SET (g1, 5764 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5765 5766 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 5767 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4))); 5768 5769 5770 /* Step 3: the loop 5771 5772 while (TEST_ADDR != LAST_ADDR) 5773 { 5774 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 5775 probe at TEST_ADDR 5776 } 5777 5778 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 5779 until it is equal to ROUNDED_SIZE. */ 5780 5781 if (TARGET_ARCH64) 5782 emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); 5783 else 5784 emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); 5785 5786 5787 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 5788 that SIZE is equal to ROUNDED_SIZE. */ 5789 5790 if (size != rounded_size) 5791 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size)); 5792 } 5793 5794 /* Make sure nothing is scheduled before we are done. */ 5795 emit_insn (gen_blockage ()); 5796} 5797 5798/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 5799 absolute addresses. */ 5800 5801const char * 5802output_probe_stack_range (rtx reg1, rtx reg2) 5803{ 5804 static int labelno = 0; 5805 char loop_lab[32]; 5806 rtx xops[2]; 5807 5808 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 5809 5810 /* Loop. */ 5811 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 5812 5813 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 5814 xops[0] = reg1; 5815 xops[1] = GEN_INT (-PROBE_INTERVAL); 5816 output_asm_insn ("add\t%0, %1, %0", xops); 5817 5818 /* Test if TEST_ADDR == LAST_ADDR. */ 5819 xops[1] = reg2; 5820 output_asm_insn ("cmp\t%0, %1", xops); 5821 5822 /* Probe at TEST_ADDR and branch. */ 5823 if (TARGET_ARCH64) 5824 fputs ("\tbne,pt\t%xcc,", asm_out_file); 5825 else 5826 fputs ("\tbne\t", asm_out_file); 5827 assemble_name_raw (asm_out_file, loop_lab); 5828 fputc ('\n', asm_out_file); 5829 xops[1] = GEN_INT (SPARC_STACK_BIAS); 5830 output_asm_insn (" st\t%%g0, [%0+%1]", xops); 5831 5832 return ""; 5833} 5834 5835/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as 5836 needed. LOW is supposed to be double-word aligned for 32-bit registers. 5837 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE 5838 is the action to be performed if SAVE_P returns true and ACTION_FALSE 5839 the action to be performed if it returns false. Return the new offset. */ 5840 5841typedef bool (*sorr_pred_t) (unsigned int, int); 5842typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t; 5843 5844static int 5845emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base, 5846 int offset, int leaf_function, sorr_pred_t save_p, 5847 sorr_act_t action_true, sorr_act_t action_false) 5848{ 5849 unsigned int i; 5850 rtx mem; 5851 rtx_insn *insn; 5852 5853 if (TARGET_ARCH64 && high <= 32) 5854 { 5855 int fp_offset = -1; 5856 5857 for (i = low; i < high; i++) 5858 { 5859 if (save_p (i, leaf_function)) 5860 { 5861 mem = gen_frame_mem (DImode, plus_constant (Pmode, 5862 base, offset)); 5863 if (action_true == SORR_SAVE) 5864 { 5865 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); 5866 RTX_FRAME_RELATED_P (insn) = 1; 5867 } 5868 else /* action_true == SORR_RESTORE */ 5869 { 5870 /* The frame pointer must be restored last since its old 5871 value may be used as base address for the frame. This 5872 is problematic in 64-bit mode only because of the lack 5873 of double-word load instruction. */ 5874 if (i == HARD_FRAME_POINTER_REGNUM) 5875 fp_offset = offset; 5876 else 5877 emit_move_insn (gen_rtx_REG (DImode, i), mem); 5878 } 5879 offset += 8; 5880 } 5881 else if (action_false == SORR_ADVANCE) 5882 offset += 8; 5883 } 5884 5885 if (fp_offset >= 0) 5886 { 5887 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset)); 5888 emit_move_insn (hard_frame_pointer_rtx, mem); 5889 } 5890 } 5891 else 5892 { 5893 for (i = low; i < high; i += 2) 5894 { 5895 bool reg0 = save_p (i, leaf_function); 5896 bool reg1 = save_p (i + 1, leaf_function); 5897 machine_mode mode; 5898 int regno; 5899 5900 if (reg0 && reg1) 5901 { 5902 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode; 5903 regno = i; 5904 } 5905 else if (reg0) 5906 { 5907 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5908 regno = i; 5909 } 5910 else if (reg1) 5911 { 5912 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5913 regno = i + 1; 5914 offset += 4; 5915 } 5916 else 5917 { 5918 if (action_false == SORR_ADVANCE) 5919 offset += 8; 5920 continue; 5921 } 5922 5923 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset)); 5924 if (action_true == SORR_SAVE) 5925 { 5926 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); 5927 RTX_FRAME_RELATED_P (insn) = 1; 5928 if (mode == DImode) 5929 { 5930 rtx set1, set2; 5931 mem = gen_frame_mem (SImode, plus_constant (Pmode, base, 5932 offset)); 5933 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno)); 5934 RTX_FRAME_RELATED_P (set1) = 1; 5935 mem 5936 = gen_frame_mem (SImode, plus_constant (Pmode, base, 5937 offset + 4)); 5938 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1)); 5939 RTX_FRAME_RELATED_P (set2) = 1; 5940 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5941 gen_rtx_PARALLEL (VOIDmode, 5942 gen_rtvec (2, set1, set2))); 5943 } 5944 } 5945 else /* action_true == SORR_RESTORE */ 5946 emit_move_insn (gen_rtx_REG (mode, regno), mem); 5947 5948 /* Bump and round down to double word 5949 in case we already bumped by 4. */ 5950 offset = ROUND_DOWN (offset + 8, 8); 5951 } 5952 } 5953 5954 return offset; 5955} 5956 5957/* Emit code to adjust BASE to OFFSET. Return the new base. */ 5958 5959static rtx 5960emit_adjust_base_to_offset (rtx base, int offset) 5961{ 5962 /* ??? This might be optimized a little as %g1 might already have a 5963 value close enough that a single add insn will do. */ 5964 /* ??? Although, all of this is probably only a temporary fix because 5965 if %g1 can hold a function result, then sparc_expand_epilogue will 5966 lose (the result will be clobbered). */ 5967 rtx new_base = gen_rtx_REG (Pmode, 1); 5968 emit_move_insn (new_base, GEN_INT (offset)); 5969 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base))); 5970 return new_base; 5971} 5972 5973/* Emit code to save/restore call-saved global and FP registers. */ 5974 5975static void 5976emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action) 5977{ 5978 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095) 5979 { 5980 base = emit_adjust_base_to_offset (base, offset); 5981 offset = 0; 5982 } 5983 5984 offset 5985 = emit_save_or_restore_regs (0, 8, base, offset, 0, 5986 save_global_or_fp_reg_p, action, SORR_NONE); 5987 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0, 5988 save_global_or_fp_reg_p, action, SORR_NONE); 5989} 5990 5991/* Emit code to save/restore call-saved local and in registers. */ 5992 5993static void 5994emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action) 5995{ 5996 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095) 5997 { 5998 base = emit_adjust_base_to_offset (base, offset); 5999 offset = 0; 6000 } 6001 6002 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p, 6003 save_local_or_in_reg_p, action, SORR_ADVANCE); 6004} 6005 6006/* Emit a window_save insn. */ 6007 6008static rtx_insn * 6009emit_window_save (rtx increment) 6010{ 6011 rtx_insn *insn = emit_insn (gen_window_save (increment)); 6012 RTX_FRAME_RELATED_P (insn) = 1; 6013 6014 /* The incoming return address (%o7) is saved in %i7. */ 6015 add_reg_note (insn, REG_CFA_REGISTER, 6016 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), 6017 gen_rtx_REG (Pmode, 6018 INCOMING_RETURN_ADDR_REGNUM))); 6019 6020 /* The window save event. */ 6021 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx); 6022 6023 /* The CFA is %fp, the hard frame pointer. */ 6024 add_reg_note (insn, REG_CFA_DEF_CFA, 6025 plus_constant (Pmode, hard_frame_pointer_rtx, 6026 INCOMING_FRAME_SP_OFFSET)); 6027 6028 return insn; 6029} 6030 6031/* Generate an increment for the stack pointer. */ 6032 6033static rtx 6034gen_stack_pointer_inc (rtx increment) 6035{ 6036 return gen_rtx_SET (stack_pointer_rtx, 6037 gen_rtx_PLUS (Pmode, 6038 stack_pointer_rtx, 6039 increment)); 6040} 6041 6042/* Expand the function prologue. The prologue is responsible for reserving 6043 storage for the frame, saving the call-saved registers and loading the 6044 GOT register if needed. */ 6045 6046void 6047sparc_expand_prologue (void) 6048{ 6049 HOST_WIDE_INT size; 6050 rtx_insn *insn; 6051 6052 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying 6053 on the final value of the flag means deferring the prologue/epilogue 6054 expansion until just before the second scheduling pass, which is too 6055 late to emit multiple epilogues or return insns. 6056 6057 Of course we are making the assumption that the value of the flag 6058 will not change between now and its final value. Of the three parts 6059 of the formula, only the last one can reasonably vary. Let's take a 6060 closer look, after assuming that the first two ones are set to true 6061 (otherwise the last value is effectively silenced). 6062 6063 If only_leaf_regs_used returns false, the global predicate will also 6064 be false so the actual frame size calculated below will be positive. 6065 As a consequence, the save_register_window insn will be emitted in 6066 the instruction stream; now this insn explicitly references %fp 6067 which is not a leaf register so only_leaf_regs_used will always 6068 return false subsequently. 6069 6070 If only_leaf_regs_used returns true, we hope that the subsequent 6071 optimization passes won't cause non-leaf registers to pop up. For 6072 example, the regrename pass has special provisions to not rename to 6073 non-leaf registers in a leaf function. */ 6074 sparc_leaf_function_p 6075 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used (); 6076 6077 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6078 6079 if (flag_stack_usage_info) 6080 current_function_static_stack_size = size; 6081 6082 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6083 || flag_stack_clash_protection) 6084 { 6085 if (crtl->is_leaf && !cfun->calls_alloca) 6086 { 6087 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6088 sparc_emit_probe_stack_range (get_stack_check_protect (), 6089 size - get_stack_check_protect ()); 6090 } 6091 else if (size > 0) 6092 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6093 } 6094 6095 if (size == 0) 6096 ; /* do nothing. */ 6097 else if (sparc_leaf_function_p) 6098 { 6099 rtx size_int_rtx = GEN_INT (-size); 6100 6101 if (size <= 4096) 6102 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6103 else if (size <= 8192) 6104 { 6105 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6106 RTX_FRAME_RELATED_P (insn) = 1; 6107 6108 /* %sp is still the CFA register. */ 6109 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6110 } 6111 else 6112 { 6113 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6114 emit_move_insn (size_rtx, size_int_rtx); 6115 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6116 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 6117 gen_stack_pointer_inc (size_int_rtx)); 6118 } 6119 6120 RTX_FRAME_RELATED_P (insn) = 1; 6121 6122 /* Ensure no memory access is done before the frame is established. */ 6123 emit_insn (gen_frame_blockage ()); 6124 } 6125 else 6126 { 6127 rtx size_int_rtx = GEN_INT (-size); 6128 6129 if (size <= 4096) 6130 emit_window_save (size_int_rtx); 6131 else if (size <= 8192) 6132 { 6133 emit_window_save (GEN_INT (-4096)); 6134 6135 /* %sp is not the CFA register anymore. */ 6136 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6137 6138 /* Likewise. */ 6139 emit_insn (gen_frame_blockage ()); 6140 } 6141 else 6142 { 6143 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6144 emit_move_insn (size_rtx, size_int_rtx); 6145 emit_window_save (size_rtx); 6146 } 6147 } 6148 6149 if (sparc_leaf_function_p) 6150 { 6151 sparc_frame_base_reg = stack_pointer_rtx; 6152 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6153 } 6154 else 6155 { 6156 sparc_frame_base_reg = hard_frame_pointer_rtx; 6157 sparc_frame_base_offset = SPARC_STACK_BIAS; 6158 } 6159 6160 if (sparc_n_global_fp_regs > 0) 6161 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6162 sparc_frame_base_offset 6163 - sparc_apparent_frame_size, 6164 SORR_SAVE); 6165 6166 /* Advertise that the data calculated just above are now valid. */ 6167 sparc_prologue_data_valid_p = true; 6168} 6169 6170/* Expand the function prologue. The prologue is responsible for reserving 6171 storage for the frame, saving the call-saved registers and loading the 6172 GOT register if needed. */ 6173 6174void 6175sparc_flat_expand_prologue (void) 6176{ 6177 HOST_WIDE_INT size; 6178 rtx_insn *insn; 6179 6180 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf; 6181 6182 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6183 6184 if (flag_stack_usage_info) 6185 current_function_static_stack_size = size; 6186 6187 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6188 || flag_stack_clash_protection) 6189 { 6190 if (crtl->is_leaf && !cfun->calls_alloca) 6191 { 6192 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6193 sparc_emit_probe_stack_range (get_stack_check_protect (), 6194 size - get_stack_check_protect ()); 6195 } 6196 else if (size > 0) 6197 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6198 } 6199 6200 if (sparc_save_local_in_regs_p) 6201 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, 6202 SORR_SAVE); 6203 6204 if (size == 0) 6205 ; /* do nothing. */ 6206 else 6207 { 6208 rtx size_int_rtx, size_rtx; 6209 6210 size_rtx = size_int_rtx = GEN_INT (-size); 6211 6212 /* We establish the frame (i.e. decrement the stack pointer) first, even 6213 if we use a frame pointer, because we cannot clobber any call-saved 6214 registers, including the frame pointer, if we haven't created a new 6215 register save area, for the sake of compatibility with the ABI. */ 6216 if (size <= 4096) 6217 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6218 else if (size <= 8192 && !frame_pointer_needed) 6219 { 6220 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6221 RTX_FRAME_RELATED_P (insn) = 1; 6222 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6223 } 6224 else 6225 { 6226 size_rtx = gen_rtx_REG (Pmode, 1); 6227 emit_move_insn (size_rtx, size_int_rtx); 6228 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6229 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6230 gen_stack_pointer_inc (size_int_rtx)); 6231 } 6232 RTX_FRAME_RELATED_P (insn) = 1; 6233 6234 /* Ensure no memory access is done before the frame is established. */ 6235 emit_insn (gen_frame_blockage ()); 6236 6237 if (frame_pointer_needed) 6238 { 6239 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx, 6240 gen_rtx_MINUS (Pmode, 6241 stack_pointer_rtx, 6242 size_rtx))); 6243 RTX_FRAME_RELATED_P (insn) = 1; 6244 6245 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6246 gen_rtx_SET (hard_frame_pointer_rtx, 6247 plus_constant (Pmode, stack_pointer_rtx, 6248 size))); 6249 } 6250 6251 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6252 { 6253 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM); 6254 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 6255 6256 insn = emit_move_insn (i7, o7); 6257 RTX_FRAME_RELATED_P (insn) = 1; 6258 6259 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7)); 6260 6261 /* Prevent this instruction from ever being considered dead, 6262 even if this function has no epilogue. */ 6263 emit_use (i7); 6264 } 6265 } 6266 6267 if (frame_pointer_needed) 6268 { 6269 sparc_frame_base_reg = hard_frame_pointer_rtx; 6270 sparc_frame_base_offset = SPARC_STACK_BIAS; 6271 } 6272 else 6273 { 6274 sparc_frame_base_reg = stack_pointer_rtx; 6275 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6276 } 6277 6278 if (sparc_n_global_fp_regs > 0) 6279 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6280 sparc_frame_base_offset 6281 - sparc_apparent_frame_size, 6282 SORR_SAVE); 6283 6284 /* Advertise that the data calculated just above are now valid. */ 6285 sparc_prologue_data_valid_p = true; 6286} 6287 6288/* This function generates the assembly code for function entry, which boils 6289 down to emitting the necessary .register directives. */ 6290 6291static void 6292sparc_asm_function_prologue (FILE *file) 6293{ 6294 /* Check that the assumption we made in sparc_expand_prologue is valid. */ 6295 if (!TARGET_FLAT) 6296 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs); 6297 6298 sparc_output_scratch_registers (file); 6299} 6300 6301/* Expand the function epilogue, either normal or part of a sibcall. 6302 We emit all the instructions except the return or the call. */ 6303 6304void 6305sparc_expand_epilogue (bool for_eh) 6306{ 6307 HOST_WIDE_INT size = sparc_frame_size; 6308 6309 if (cfun->calls_alloca) 6310 emit_insn (gen_frame_blockage ()); 6311 6312 if (sparc_n_global_fp_regs > 0) 6313 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6314 sparc_frame_base_offset 6315 - sparc_apparent_frame_size, 6316 SORR_RESTORE); 6317 6318 if (size == 0 || for_eh) 6319 ; /* do nothing. */ 6320 else if (sparc_leaf_function_p) 6321 { 6322 /* Ensure no memory access is done after the frame is destroyed. */ 6323 emit_insn (gen_frame_blockage ()); 6324 6325 if (size <= 4096) 6326 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6327 else if (size <= 8192) 6328 { 6329 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6330 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6331 } 6332 else 6333 { 6334 rtx reg = gen_rtx_REG (Pmode, 1); 6335 emit_move_insn (reg, GEN_INT (size)); 6336 emit_insn (gen_stack_pointer_inc (reg)); 6337 } 6338 } 6339} 6340 6341/* Expand the function epilogue, either normal or part of a sibcall. 6342 We emit all the instructions except the return or the call. */ 6343 6344void 6345sparc_flat_expand_epilogue (bool for_eh) 6346{ 6347 HOST_WIDE_INT size = sparc_frame_size; 6348 6349 if (sparc_n_global_fp_regs > 0) 6350 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6351 sparc_frame_base_offset 6352 - sparc_apparent_frame_size, 6353 SORR_RESTORE); 6354 6355 /* If we have a frame pointer, we'll need both to restore it before the 6356 frame is destroyed and use its current value in destroying the frame. 6357 Since we don't have an atomic way to do that in the flat window model, 6358 we save the current value into a temporary register (%g1). */ 6359 if (frame_pointer_needed && !for_eh) 6360 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx); 6361 6362 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6363 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM), 6364 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)); 6365 6366 if (sparc_save_local_in_regs_p) 6367 emit_save_or_restore_local_in_regs (sparc_frame_base_reg, 6368 sparc_frame_base_offset, 6369 SORR_RESTORE); 6370 6371 if (size == 0 || for_eh) 6372 ; /* do nothing. */ 6373 else if (frame_pointer_needed) 6374 { 6375 /* Ensure no memory access is done after the frame is destroyed. */ 6376 emit_insn (gen_frame_blockage ()); 6377 6378 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1)); 6379 } 6380 else 6381 { 6382 /* Likewise. */ 6383 emit_insn (gen_frame_blockage ()); 6384 6385 if (size <= 4096) 6386 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6387 else if (size <= 8192) 6388 { 6389 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6390 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6391 } 6392 else 6393 { 6394 rtx reg = gen_rtx_REG (Pmode, 1); 6395 emit_move_insn (reg, GEN_INT (size)); 6396 emit_insn (gen_stack_pointer_inc (reg)); 6397 } 6398 } 6399} 6400 6401/* Return true if it is appropriate to emit `return' instructions in the 6402 body of a function. */ 6403 6404bool 6405sparc_can_use_return_insn_p (void) 6406{ 6407 return sparc_prologue_data_valid_p 6408 && sparc_n_global_fp_regs == 0 6409 && TARGET_FLAT 6410 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p) 6411 : (sparc_frame_size == 0 || !sparc_leaf_function_p); 6412} 6413 6414/* This function generates the assembly code for function exit. */ 6415 6416static void 6417sparc_asm_function_epilogue (FILE *file) 6418{ 6419 /* If the last two instructions of a function are "call foo; dslot;" 6420 the return address might point to the first instruction in the next 6421 function and we have to output a dummy nop for the sake of sane 6422 backtraces in such cases. This is pointless for sibling calls since 6423 the return address is explicitly adjusted. */ 6424 6425 rtx_insn *insn = get_last_insn (); 6426 6427 rtx last_real_insn = prev_real_insn (insn); 6428 if (last_real_insn 6429 && NONJUMP_INSN_P (last_real_insn) 6430 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) 6431 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); 6432 6433 if (last_real_insn 6434 && CALL_P (last_real_insn) 6435 && !SIBLING_CALL_P (last_real_insn)) 6436 fputs("\tnop\n", file); 6437 6438 sparc_output_deferred_case_vectors (); 6439} 6440 6441/* Output a 'restore' instruction. */ 6442 6443static void 6444output_restore (rtx pat) 6445{ 6446 rtx operands[3]; 6447 6448 if (! pat) 6449 { 6450 fputs ("\t restore\n", asm_out_file); 6451 return; 6452 } 6453 6454 gcc_assert (GET_CODE (pat) == SET); 6455 6456 operands[0] = SET_DEST (pat); 6457 pat = SET_SRC (pat); 6458 6459 switch (GET_CODE (pat)) 6460 { 6461 case PLUS: 6462 operands[1] = XEXP (pat, 0); 6463 operands[2] = XEXP (pat, 1); 6464 output_asm_insn (" restore %r1, %2, %Y0", operands); 6465 break; 6466 case LO_SUM: 6467 operands[1] = XEXP (pat, 0); 6468 operands[2] = XEXP (pat, 1); 6469 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); 6470 break; 6471 case ASHIFT: 6472 operands[1] = XEXP (pat, 0); 6473 gcc_assert (XEXP (pat, 1) == const1_rtx); 6474 output_asm_insn (" restore %r1, %r1, %Y0", operands); 6475 break; 6476 default: 6477 operands[1] = pat; 6478 output_asm_insn (" restore %%g0, %1, %Y0", operands); 6479 break; 6480 } 6481} 6482 6483/* Output a return. */ 6484 6485const char * 6486output_return (rtx_insn *insn) 6487{ 6488 if (crtl->calls_eh_return) 6489 { 6490 /* If the function uses __builtin_eh_return, the eh_return 6491 machinery occupies the delay slot. */ 6492 gcc_assert (!final_sequence); 6493 6494 if (flag_delayed_branch) 6495 { 6496 if (!TARGET_FLAT && TARGET_V9) 6497 fputs ("\treturn\t%i7+8\n", asm_out_file); 6498 else 6499 { 6500 if (!TARGET_FLAT) 6501 fputs ("\trestore\n", asm_out_file); 6502 6503 fputs ("\tjmp\t%o7+8\n", asm_out_file); 6504 } 6505 6506 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); 6507 } 6508 else 6509 { 6510 if (!TARGET_FLAT) 6511 fputs ("\trestore\n", asm_out_file); 6512 6513 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file); 6514 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); 6515 } 6516 } 6517 else if (sparc_leaf_function_p || TARGET_FLAT) 6518 { 6519 /* This is a leaf or flat function so we don't have to bother restoring 6520 the register window, which frees us from dealing with the convoluted 6521 semantics of restore/return. We simply output the jump to the 6522 return address and the insn in the delay slot (if any). */ 6523 6524 return "jmp\t%%o7+%)%#"; 6525 } 6526 else 6527 { 6528 /* This is a regular function so we have to restore the register window. 6529 We may have a pending insn for the delay slot, which will be either 6530 combined with the 'restore' instruction or put in the delay slot of 6531 the 'return' instruction. */ 6532 6533 if (final_sequence) 6534 { 6535 rtx_insn *delay; 6536 rtx pat; 6537 6538 delay = NEXT_INSN (insn); 6539 gcc_assert (delay); 6540 6541 pat = PATTERN (delay); 6542 6543 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) 6544 { 6545 epilogue_renumber (&pat, 0); 6546 return "return\t%%i7+%)%#"; 6547 } 6548 else 6549 { 6550 output_asm_insn ("jmp\t%%i7+%)", NULL); 6551 6552 /* We're going to output the insn in the delay slot manually. 6553 Make sure to output its source location first. */ 6554 PATTERN (delay) = gen_blockage (); 6555 INSN_CODE (delay) = -1; 6556 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6557 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6558 6559 output_restore (pat); 6560 } 6561 } 6562 else 6563 { 6564 /* The delay slot is empty. */ 6565 if (TARGET_V9) 6566 return "return\t%%i7+%)\n\t nop"; 6567 else if (flag_delayed_branch) 6568 return "jmp\t%%i7+%)\n\t restore"; 6569 else 6570 return "restore\n\tjmp\t%%o7+%)\n\t nop"; 6571 } 6572 } 6573 6574 return ""; 6575} 6576 6577/* Output a sibling call. */ 6578 6579const char * 6580output_sibcall (rtx_insn *insn, rtx call_operand) 6581{ 6582 rtx operands[1]; 6583 6584 gcc_assert (flag_delayed_branch); 6585 6586 operands[0] = call_operand; 6587 6588 if (sparc_leaf_function_p || TARGET_FLAT) 6589 { 6590 /* This is a leaf or flat function so we don't have to bother restoring 6591 the register window. We simply output the jump to the function and 6592 the insn in the delay slot (if any). */ 6593 6594 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); 6595 6596 if (final_sequence) 6597 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", 6598 operands); 6599 else 6600 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize 6601 it into branch if possible. */ 6602 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", 6603 operands); 6604 } 6605 else 6606 { 6607 /* This is a regular function so we have to restore the register window. 6608 We may have a pending insn for the delay slot, which will be combined 6609 with the 'restore' instruction. */ 6610 6611 output_asm_insn ("call\t%a0, 0", operands); 6612 6613 if (final_sequence) 6614 { 6615 rtx_insn *delay; 6616 rtx pat; 6617 6618 delay = NEXT_INSN (insn); 6619 gcc_assert (delay); 6620 6621 pat = PATTERN (delay); 6622 6623 /* We're going to output the insn in the delay slot manually. 6624 Make sure to output its source location first. */ 6625 PATTERN (delay) = gen_blockage (); 6626 INSN_CODE (delay) = -1; 6627 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6628 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6629 6630 output_restore (pat); 6631 } 6632 else 6633 output_restore (NULL_RTX); 6634 } 6635 6636 return ""; 6637} 6638 6639/* Functions for handling argument passing. 6640 6641 For 32-bit, the first 6 args are normally in registers and the rest are 6642 pushed. Any arg that starts within the first 6 words is at least 6643 partially passed in a register unless its data type forbids. 6644 6645 For 64-bit, the argument registers are laid out as an array of 16 elements 6646 and arguments are added sequentially. The first 6 int args and up to the 6647 first 16 fp args (depending on size) are passed in regs. 6648 6649 Slot Stack Integral Float Float in structure Double Long Double 6650 ---- ----- -------- ----- ------------------ ------ ----------- 6651 15 [SP+248] %f31 %f30,%f31 %d30 6652 14 [SP+240] %f29 %f28,%f29 %d28 %q28 6653 13 [SP+232] %f27 %f26,%f27 %d26 6654 12 [SP+224] %f25 %f24,%f25 %d24 %q24 6655 11 [SP+216] %f23 %f22,%f23 %d22 6656 10 [SP+208] %f21 %f20,%f21 %d20 %q20 6657 9 [SP+200] %f19 %f18,%f19 %d18 6658 8 [SP+192] %f17 %f16,%f17 %d16 %q16 6659 7 [SP+184] %f15 %f14,%f15 %d14 6660 6 [SP+176] %f13 %f12,%f13 %d12 %q12 6661 5 [SP+168] %o5 %f11 %f10,%f11 %d10 6662 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 6663 3 [SP+152] %o3 %f7 %f6,%f7 %d6 6664 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 6665 1 [SP+136] %o1 %f3 %f2,%f3 %d2 6666 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 6667 6668 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. 6669 6670 Integral arguments are always passed as 64-bit quantities appropriately 6671 extended. 6672 6673 Passing of floating point values is handled as follows. 6674 If a prototype is in scope: 6675 If the value is in a named argument (i.e. not a stdarg function or a 6676 value not part of the `...') then the value is passed in the appropriate 6677 fp reg. 6678 If the value is part of the `...' and is passed in one of the first 6 6679 slots then the value is passed in the appropriate int reg. 6680 If the value is part of the `...' and is not passed in one of the first 6 6681 slots then the value is passed in memory. 6682 If a prototype is not in scope: 6683 If the value is one of the first 6 arguments the value is passed in the 6684 appropriate integer reg and the appropriate fp reg. 6685 If the value is not one of the first 6 arguments the value is passed in 6686 the appropriate fp reg and in memory. 6687 6688 6689 Summary of the calling conventions implemented by GCC on the SPARC: 6690 6691 32-bit ABI: 6692 size argument return value 6693 6694 small integer <4 int. reg. int. reg. 6695 word 4 int. reg. int. reg. 6696 double word 8 int. reg. int. reg. 6697 6698 _Complex small integer <8 int. reg. int. reg. 6699 _Complex word 8 int. reg. int. reg. 6700 _Complex double word 16 memory int. reg. 6701 6702 vector integer <=8 int. reg. FP reg. 6703 vector integer >8 memory memory 6704 6705 float 4 int. reg. FP reg. 6706 double 8 int. reg. FP reg. 6707 long double 16 memory memory 6708 6709 _Complex float 8 memory FP reg. 6710 _Complex double 16 memory FP reg. 6711 _Complex long double 32 memory FP reg. 6712 6713 vector float any memory memory 6714 6715 aggregate any memory memory 6716 6717 6718 6719 64-bit ABI: 6720 size argument return value 6721 6722 small integer <8 int. reg. int. reg. 6723 word 8 int. reg. int. reg. 6724 double word 16 int. reg. int. reg. 6725 6726 _Complex small integer <16 int. reg. int. reg. 6727 _Complex word 16 int. reg. int. reg. 6728 _Complex double word 32 memory int. reg. 6729 6730 vector integer <=16 FP reg. FP reg. 6731 vector integer 16<s<=32 memory FP reg. 6732 vector integer >32 memory memory 6733 6734 float 4 FP reg. FP reg. 6735 double 8 FP reg. FP reg. 6736 long double 16 FP reg. FP reg. 6737 6738 _Complex float 8 FP reg. FP reg. 6739 _Complex double 16 FP reg. FP reg. 6740 _Complex long double 32 memory FP reg. 6741 6742 vector float <=16 FP reg. FP reg. 6743 vector float 16<s<=32 memory FP reg. 6744 vector float >32 memory memory 6745 6746 aggregate <=16 reg. reg. 6747 aggregate 16<s<=32 memory reg. 6748 aggregate >32 memory memory 6749 6750 6751 6752Note #1: complex floating-point types follow the extended SPARC ABIs as 6753implemented by the Sun compiler. 6754 6755Note #2: integer vector types follow the scalar floating-point types 6756conventions to match what is implemented by the Sun VIS SDK. 6757 6758Note #3: floating-point vector types follow the aggregate types 6759conventions. */ 6760 6761 6762/* Maximum number of int regs for args. */ 6763#define SPARC_INT_ARG_MAX 6 6764/* Maximum number of fp regs for args. */ 6765#define SPARC_FP_ARG_MAX 16 6766/* Number of words (partially) occupied for a given size in units. */ 6767#define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD) 6768 6769/* Handle the INIT_CUMULATIVE_ARGS macro. 6770 Initialize a variable CUM of type CUMULATIVE_ARGS 6771 for a call to a function whose data type is FNTYPE. 6772 For a library call, FNTYPE is 0. */ 6773 6774void 6775init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree) 6776{ 6777 cum->words = 0; 6778 cum->prototype_p = fntype && prototype_p (fntype); 6779 cum->libcall_p = !fntype; 6780} 6781 6782/* Handle promotion of pointer and integer arguments. */ 6783 6784static machine_mode 6785sparc_promote_function_mode (const_tree type, machine_mode mode, 6786 int *punsignedp, const_tree, int) 6787{ 6788 if (type && POINTER_TYPE_P (type)) 6789 { 6790 *punsignedp = POINTERS_EXTEND_UNSIGNED; 6791 return Pmode; 6792 } 6793 6794 /* Integral arguments are passed as full words, as per the ABI. */ 6795 if (GET_MODE_CLASS (mode) == MODE_INT 6796 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 6797 return word_mode; 6798 6799 return mode; 6800} 6801 6802/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ 6803 6804static bool 6805sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 6806{ 6807 return TARGET_ARCH64 ? true : false; 6808} 6809 6810/* Handle the TARGET_PASS_BY_REFERENCE target hook. 6811 Specify whether to pass the argument by reference. */ 6812 6813static bool 6814sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg) 6815{ 6816 tree type = arg.type; 6817 machine_mode mode = arg.mode; 6818 if (TARGET_ARCH32) 6819 /* Original SPARC 32-bit ABI says that structures and unions, 6820 and quad-precision floats are passed by reference. 6821 All other base types are passed in registers. 6822 6823 Extended ABI (as implemented by the Sun compiler) says that all 6824 complex floats are passed by reference. Pass complex integers 6825 in registers up to 8 bytes. More generally, enforce the 2-word 6826 cap for passing arguments in registers. 6827 6828 Vector ABI (as implemented by the Sun VIS SDK) says that integer 6829 vectors are passed like floats of the same size, that is in 6830 registers up to 8 bytes. Pass all vector floats by reference 6831 like structure and unions. */ 6832 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 6833 || mode == SCmode 6834 /* Catch CDImode, TFmode, DCmode and TCmode. */ 6835 || GET_MODE_SIZE (mode) > 8 6836 || (type 6837 && VECTOR_TYPE_P (type) 6838 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 6839 else 6840 /* Original SPARC 64-bit ABI says that structures and unions 6841 smaller than 16 bytes are passed in registers, as well as 6842 all other base types. 6843 6844 Extended ABI (as implemented by the Sun compiler) says that 6845 complex floats are passed in registers up to 16 bytes. Pass 6846 all complex integers in registers up to 16 bytes. More generally, 6847 enforce the 2-word cap for passing arguments in registers. 6848 6849 Vector ABI (as implemented by the Sun VIS SDK) says that integer 6850 vectors are passed like floats of the same size, that is in 6851 registers (up to 16 bytes). Pass all vector floats like structure 6852 and unions. */ 6853 return ((type 6854 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type)) 6855 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) 6856 /* Catch CTImode and TCmode. */ 6857 || GET_MODE_SIZE (mode) > 16); 6858} 6859 6860/* Traverse the record TYPE recursively and call FUNC on its fields. 6861 NAMED is true if this is for a named parameter. DATA is passed 6862 to FUNC for each field. OFFSET is the starting position and 6863 PACKED is true if we are inside a packed record. */ 6864 6865template <typename T, void Func (const_tree, int, bool, T*)> 6866static void 6867traverse_record_type (const_tree type, bool named, T *data, 6868 int offset = 0, bool packed = false) 6869{ 6870 /* The ABI obviously doesn't specify how packed structures are passed. 6871 These are passed in integer regs if possible, otherwise memory. */ 6872 if (!packed) 6873 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6874 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6875 { 6876 packed = true; 6877 break; 6878 } 6879 6880 /* Walk the real fields, but skip those with no size or a zero size. 6881 ??? Fields with variable offset are handled as having zero offset. */ 6882 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6883 if (TREE_CODE (field) == FIELD_DECL) 6884 { 6885 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field))) 6886 continue; 6887 6888 int bitpos = offset; 6889 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST) 6890 bitpos += int_bit_position (field); 6891 6892 tree field_type = TREE_TYPE (field); 6893 if (TREE_CODE (field_type) == RECORD_TYPE) 6894 traverse_record_type<T, Func> (field_type, named, data, bitpos, 6895 packed); 6896 else 6897 { 6898 const bool fp_type 6899 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type); 6900 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU, 6901 data); 6902 } 6903 } 6904} 6905 6906/* Handle recursive register classifying for structure layout. */ 6907 6908typedef struct 6909{ 6910 bool fp_regs; /* true if field eligible to FP registers. */ 6911 bool fp_regs_in_first_word; /* true if such field in first word. */ 6912} classify_data_t; 6913 6914/* A subroutine of function_arg_slotno. Classify the field. */ 6915 6916inline void 6917classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data) 6918{ 6919 if (fp) 6920 { 6921 data->fp_regs = true; 6922 if (bitpos < BITS_PER_WORD) 6923 data->fp_regs_in_first_word = true; 6924 } 6925} 6926 6927/* Compute the slot number to pass an argument in. 6928 Return the slot number or -1 if passing on the stack. 6929 6930 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6931 the preceding args and about the function being called. 6932 MODE is the argument's machine mode. 6933 TYPE is the data type of the argument (as a tree). 6934 This is null for libcalls where that information may 6935 not be available. 6936 NAMED is nonzero if this argument is a named parameter 6937 (otherwise it is an extra parameter matching an ellipsis). 6938 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. 6939 *PREGNO records the register number to use if scalar type. 6940 *PPADDING records the amount of padding needed in words. */ 6941 6942static int 6943function_arg_slotno (const struct sparc_args *cum, machine_mode mode, 6944 const_tree type, bool named, bool incoming, 6945 int *pregno, int *ppadding) 6946{ 6947 const int regbase 6948 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 6949 int slotno = cum->words, regno; 6950 enum mode_class mclass = GET_MODE_CLASS (mode); 6951 6952 /* Silence warnings in the callers. */ 6953 *pregno = -1; 6954 *ppadding = -1; 6955 6956 if (type && TREE_ADDRESSABLE (type)) 6957 return -1; 6958 6959 /* In 64-bit mode, objects requiring 16-byte alignment get it. */ 6960 if (TARGET_ARCH64 6961 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 6962 && (slotno & 1) != 0) 6963 { 6964 slotno++; 6965 *ppadding = 1; 6966 } 6967 else 6968 *ppadding = 0; 6969 6970 /* Vector types deserve special treatment because they are polymorphic wrt 6971 their mode, depending upon whether VIS instructions are enabled. */ 6972 if (type && VECTOR_TYPE_P (type)) 6973 { 6974 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 6975 { 6976 /* The SPARC port defines no floating-point vector modes. */ 6977 gcc_assert (mode == BLKmode); 6978 } 6979 else 6980 { 6981 /* Integer vector types should either have a vector 6982 mode or an integral mode, because we are guaranteed 6983 by pass_by_reference that their size is not greater 6984 than 16 bytes and TImode is 16-byte wide. */ 6985 gcc_assert (mode != BLKmode); 6986 6987 /* Integer vectors are handled like floats as per 6988 the Sun VIS SDK. */ 6989 mclass = MODE_FLOAT; 6990 } 6991 } 6992 6993 switch (mclass) 6994 { 6995 case MODE_FLOAT: 6996 case MODE_COMPLEX_FLOAT: 6997 case MODE_VECTOR_INT: 6998 if (TARGET_ARCH64 && TARGET_FPU && named) 6999 { 7000 /* If all arg slots are filled, then must pass on stack. */ 7001 if (slotno >= SPARC_FP_ARG_MAX) 7002 return -1; 7003 7004 regno = SPARC_FP_ARG_FIRST + slotno * 2; 7005 /* Arguments filling only one single FP register are 7006 right-justified in the outer double FP register. */ 7007 if (GET_MODE_SIZE (mode) <= 4) 7008 regno++; 7009 break; 7010 } 7011 /* fallthrough */ 7012 7013 case MODE_INT: 7014 case MODE_COMPLEX_INT: 7015 /* If all arg slots are filled, then must pass on stack. */ 7016 if (slotno >= SPARC_INT_ARG_MAX) 7017 return -1; 7018 7019 regno = regbase + slotno; 7020 break; 7021 7022 case MODE_RANDOM: 7023 /* MODE is VOIDmode when generating the actual call. */ 7024 if (mode == VOIDmode) 7025 return -1; 7026 7027 if (TARGET_64BIT && TARGET_FPU && named 7028 && type 7029 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type))) 7030 { 7031 /* If all arg slots are filled, then must pass on stack. */ 7032 if (slotno >= SPARC_FP_ARG_MAX) 7033 return -1; 7034 7035 if (TREE_CODE (type) == RECORD_TYPE) 7036 { 7037 classify_data_t data = { false, false }; 7038 traverse_record_type<classify_data_t, classify_registers> 7039 (type, named, &data); 7040 7041 if (data.fp_regs) 7042 { 7043 /* If all FP slots are filled except for the last one and 7044 there is no FP field in the first word, then must pass 7045 on stack. */ 7046 if (slotno >= SPARC_FP_ARG_MAX - 1 7047 && !data.fp_regs_in_first_word) 7048 return -1; 7049 } 7050 else 7051 { 7052 /* If all int slots are filled, then must pass on stack. */ 7053 if (slotno >= SPARC_INT_ARG_MAX) 7054 return -1; 7055 } 7056 7057 /* PREGNO isn't set since both int and FP regs can be used. */ 7058 return slotno; 7059 } 7060 7061 regno = SPARC_FP_ARG_FIRST + slotno * 2; 7062 } 7063 else 7064 { 7065 /* If all arg slots are filled, then must pass on stack. */ 7066 if (slotno >= SPARC_INT_ARG_MAX) 7067 return -1; 7068 7069 regno = regbase + slotno; 7070 } 7071 break; 7072 7073 default : 7074 gcc_unreachable (); 7075 } 7076 7077 *pregno = regno; 7078 return slotno; 7079} 7080 7081/* Handle recursive register counting/assigning for structure layout. */ 7082 7083typedef struct 7084{ 7085 int slotno; /* slot number of the argument. */ 7086 int regbase; /* regno of the base register. */ 7087 int intoffset; /* offset of the first pending integer field. */ 7088 int nregs; /* number of words passed in registers. */ 7089 bool stack; /* true if part of the argument is on the stack. */ 7090 rtx ret; /* return expression being built. */ 7091} assign_data_t; 7092 7093/* A subroutine of function_arg_record_value. Compute the number of integer 7094 registers to be assigned between PARMS->intoffset and BITPOS. Return 7095 true if at least one integer register is assigned or false otherwise. */ 7096 7097static bool 7098compute_int_layout (int bitpos, assign_data_t *data, int *pnregs) 7099{ 7100 if (data->intoffset < 0) 7101 return false; 7102 7103 const int intoffset = data->intoffset; 7104 data->intoffset = -1; 7105 7106 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7107 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); 7108 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD); 7109 int nregs = (endbit - startbit) / BITS_PER_WORD; 7110 7111 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno) 7112 { 7113 nregs = SPARC_INT_ARG_MAX - this_slotno; 7114 7115 /* We need to pass this field (partly) on the stack. */ 7116 data->stack = 1; 7117 } 7118 7119 if (nregs <= 0) 7120 return false; 7121 7122 *pnregs = nregs; 7123 return true; 7124} 7125 7126/* A subroutine of function_arg_record_value. Compute the number and the mode 7127 of the FP registers to be assigned for FIELD. Return true if at least one 7128 FP register is assigned or false otherwise. */ 7129 7130static bool 7131compute_fp_layout (const_tree field, int bitpos, assign_data_t *data, 7132 int *pnregs, machine_mode *pmode) 7133{ 7134 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7135 machine_mode mode = DECL_MODE (field); 7136 int nregs, nslots; 7137 7138 /* Slots are counted as words while regs are counted as having the size of 7139 the (inner) mode. */ 7140 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode) 7141 { 7142 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7143 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 7144 } 7145 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 7146 { 7147 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7148 nregs = 2; 7149 } 7150 else 7151 nregs = 1; 7152 7153 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode)); 7154 7155 if (nslots > SPARC_FP_ARG_MAX - this_slotno) 7156 { 7157 nslots = SPARC_FP_ARG_MAX - this_slotno; 7158 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode); 7159 7160 /* We need to pass this field (partly) on the stack. */ 7161 data->stack = 1; 7162 7163 if (nregs <= 0) 7164 return false; 7165 } 7166 7167 *pnregs = nregs; 7168 *pmode = mode; 7169 return true; 7170} 7171 7172/* A subroutine of function_arg_record_value. Count the number of registers 7173 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */ 7174 7175inline void 7176count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data) 7177{ 7178 if (fp) 7179 { 7180 int nregs; 7181 machine_mode mode; 7182 7183 if (compute_int_layout (bitpos, data, &nregs)) 7184 data->nregs += nregs; 7185 7186 if (compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7187 data->nregs += nregs; 7188 } 7189 else 7190 { 7191 if (data->intoffset < 0) 7192 data->intoffset = bitpos; 7193 } 7194} 7195 7196/* A subroutine of function_arg_record_value. Assign the bits of the 7197 structure between PARMS->intoffset and BITPOS to integer registers. */ 7198 7199static void 7200assign_int_registers (int bitpos, assign_data_t *data) 7201{ 7202 int intoffset = data->intoffset; 7203 machine_mode mode; 7204 int nregs; 7205 7206 if (!compute_int_layout (bitpos, data, &nregs)) 7207 return; 7208 7209 /* If this is the trailing part of a word, only load that much into 7210 the register. Otherwise load the whole register. Note that in 7211 the latter case we may pick up unwanted bits. It's not a problem 7212 at the moment but may wish to revisit. */ 7213 if (intoffset % BITS_PER_WORD != 0) 7214 mode = smallest_int_mode_for_size (BITS_PER_WORD 7215 - intoffset % BITS_PER_WORD); 7216 else 7217 mode = word_mode; 7218 7219 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7220 unsigned int regno = data->regbase + this_slotno; 7221 intoffset /= BITS_PER_UNIT; 7222 7223 do 7224 { 7225 rtx reg = gen_rtx_REG (mode, regno); 7226 XVECEXP (data->ret, 0, data->stack + data->nregs) 7227 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); 7228 data->nregs += 1; 7229 mode = word_mode; 7230 regno += 1; 7231 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1; 7232 } 7233 while (--nregs > 0); 7234} 7235 7236/* A subroutine of function_arg_record_value. Assign FIELD at position 7237 BITPOS to FP registers. */ 7238 7239static void 7240assign_fp_registers (const_tree field, int bitpos, assign_data_t *data) 7241{ 7242 int nregs; 7243 machine_mode mode; 7244 7245 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7246 return; 7247 7248 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7249 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2; 7250 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) 7251 regno++; 7252 int pos = bitpos / BITS_PER_UNIT; 7253 7254 do 7255 { 7256 rtx reg = gen_rtx_REG (mode, regno); 7257 XVECEXP (data->ret, 0, data->stack + data->nregs) 7258 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 7259 data->nregs += 1; 7260 regno += GET_MODE_SIZE (mode) / 4; 7261 pos += GET_MODE_SIZE (mode); 7262 } 7263 while (--nregs > 0); 7264} 7265 7266/* A subroutine of function_arg_record_value. Assign FIELD and the bits of 7267 the structure between PARMS->intoffset and BITPOS to registers. */ 7268 7269inline void 7270assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data) 7271{ 7272 if (fp) 7273 { 7274 assign_int_registers (bitpos, data); 7275 7276 assign_fp_registers (field, bitpos, data); 7277 } 7278 else 7279 { 7280 if (data->intoffset < 0) 7281 data->intoffset = bitpos; 7282 } 7283} 7284 7285/* Used by function_arg and function_value to implement the complex 7286 conventions of the 64-bit ABI for passing and returning structures. 7287 Return an expression valid as a return value for the FUNCTION_ARG 7288 and TARGET_FUNCTION_VALUE. 7289 7290 TYPE is the data type of the argument (as a tree). 7291 This is null for libcalls where that information may 7292 not be available. 7293 MODE is the argument's machine mode. 7294 SLOTNO is the index number of the argument's slot in the parameter array. 7295 NAMED is true if this argument is a named parameter 7296 (otherwise it is an extra parameter matching an ellipsis). 7297 REGBASE is the regno of the base register for the parameter array. */ 7298 7299static rtx 7300function_arg_record_value (const_tree type, machine_mode mode, 7301 int slotno, bool named, int regbase) 7302{ 7303 const int size = int_size_in_bytes (type); 7304 assign_data_t data; 7305 int nregs; 7306 7307 data.slotno = slotno; 7308 data.regbase = regbase; 7309 7310 /* Count how many registers we need. */ 7311 data.nregs = 0; 7312 data.intoffset = 0; 7313 data.stack = false; 7314 traverse_record_type<assign_data_t, count_registers> (type, named, &data); 7315 7316 /* Take into account pending integer fields. */ 7317 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs)) 7318 data.nregs += nregs; 7319 7320 /* Allocate the vector and handle some annoying special cases. */ 7321 nregs = data.nregs; 7322 7323 if (nregs == 0) 7324 { 7325 /* ??? Empty structure has no value? Duh? */ 7326 if (size <= 0) 7327 { 7328 /* Though there's nothing really to store, return a word register 7329 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL 7330 leads to breakage due to the fact that there are zero bytes to 7331 load. */ 7332 return gen_rtx_REG (mode, regbase); 7333 } 7334 7335 /* ??? C++ has structures with no fields, and yet a size. Give up 7336 for now and pass everything back in integer registers. */ 7337 nregs = CEIL_NWORDS (size); 7338 if (nregs + slotno > SPARC_INT_ARG_MAX) 7339 nregs = SPARC_INT_ARG_MAX - slotno; 7340 } 7341 7342 gcc_assert (nregs > 0); 7343 7344 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs)); 7345 7346 /* If at least one field must be passed on the stack, generate 7347 (parallel [(expr_list (nil) ...) ...]) so that all fields will 7348 also be passed on the stack. We can't do much better because the 7349 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case 7350 of structures for which the fields passed exclusively in registers 7351 are not at the beginning of the structure. */ 7352 if (data.stack) 7353 XVECEXP (data.ret, 0, 0) 7354 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7355 7356 /* Assign the registers. */ 7357 data.nregs = 0; 7358 data.intoffset = 0; 7359 traverse_record_type<assign_data_t, assign_registers> (type, named, &data); 7360 7361 /* Assign pending integer fields. */ 7362 assign_int_registers (size * BITS_PER_UNIT, &data); 7363 7364 gcc_assert (data.nregs == nregs); 7365 7366 return data.ret; 7367} 7368 7369/* Used by function_arg and function_value to implement the conventions 7370 of the 64-bit ABI for passing and returning unions. 7371 Return an expression valid as a return value for the FUNCTION_ARG 7372 and TARGET_FUNCTION_VALUE. 7373 7374 SIZE is the size in bytes of the union. 7375 MODE is the argument's machine mode. 7376 SLOTNO is the index number of the argument's slot in the parameter array. 7377 REGNO is the hard register the union will be passed in. */ 7378 7379static rtx 7380function_arg_union_value (int size, machine_mode mode, int slotno, int regno) 7381{ 7382 unsigned int nwords; 7383 7384 /* See comment in function_arg_record_value for empty structures. */ 7385 if (size <= 0) 7386 return gen_rtx_REG (mode, regno); 7387 7388 if (slotno == SPARC_INT_ARG_MAX - 1) 7389 nwords = 1; 7390 else 7391 nwords = CEIL_NWORDS (size); 7392 7393 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); 7394 7395 /* Unions are passed left-justified. */ 7396 for (unsigned int i = 0; i < nwords; i++) 7397 XVECEXP (regs, 0, i) 7398 = gen_rtx_EXPR_LIST (VOIDmode, 7399 gen_rtx_REG (word_mode, regno + i), 7400 GEN_INT (UNITS_PER_WORD * i)); 7401 7402 return regs; 7403} 7404 7405/* Used by function_arg and function_value to implement the conventions 7406 of the 64-bit ABI for passing and returning BLKmode vectors. 7407 Return an expression valid as a return value for the FUNCTION_ARG 7408 and TARGET_FUNCTION_VALUE. 7409 7410 SIZE is the size in bytes of the vector. 7411 SLOTNO is the index number of the argument's slot in the parameter array. 7412 NAMED is true if this argument is a named parameter 7413 (otherwise it is an extra parameter matching an ellipsis). 7414 REGNO is the hard register the vector will be passed in. */ 7415 7416static rtx 7417function_arg_vector_value (int size, int slotno, bool named, int regno) 7418{ 7419 const int mult = (named ? 2 : 1); 7420 unsigned int nwords; 7421 7422 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1) 7423 nwords = 1; 7424 else 7425 nwords = CEIL_NWORDS (size); 7426 7427 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords)); 7428 7429 if (size < UNITS_PER_WORD) 7430 XVECEXP (regs, 0, 0) 7431 = gen_rtx_EXPR_LIST (VOIDmode, 7432 gen_rtx_REG (SImode, regno), 7433 const0_rtx); 7434 else 7435 for (unsigned int i = 0; i < nwords; i++) 7436 XVECEXP (regs, 0, i) 7437 = gen_rtx_EXPR_LIST (VOIDmode, 7438 gen_rtx_REG (word_mode, regno + i * mult), 7439 GEN_INT (i * UNITS_PER_WORD)); 7440 7441 return regs; 7442} 7443 7444/* Determine where to put an argument to a function. 7445 Value is zero to push the argument on the stack, 7446 or a hard register in which to store the argument. 7447 7448 CUM is a variable of type CUMULATIVE_ARGS which gives info about 7449 the preceding args and about the function being called. 7450 ARG is a description of the argument. 7451 INCOMING_P is false for TARGET_FUNCTION_ARG, true for 7452 TARGET_FUNCTION_INCOMING_ARG. */ 7453 7454static rtx 7455sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg, 7456 bool incoming) 7457{ 7458 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7459 const int regbase 7460 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 7461 int slotno, regno, padding; 7462 tree type = arg.type; 7463 machine_mode mode = arg.mode; 7464 enum mode_class mclass = GET_MODE_CLASS (mode); 7465 bool named = arg.named; 7466 7467 slotno 7468 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding); 7469 if (slotno == -1) 7470 return 0; 7471 7472 /* Integer vectors are handled like floats as per the Sun VIS SDK. */ 7473 if (type && VECTOR_INTEGER_TYPE_P (type)) 7474 mclass = MODE_FLOAT; 7475 7476 if (TARGET_ARCH32) 7477 return gen_rtx_REG (mode, regno); 7478 7479 /* Structures up to 16 bytes in size are passed in arg slots on the stack 7480 and are promoted to registers if possible. */ 7481 if (type && TREE_CODE (type) == RECORD_TYPE) 7482 { 7483 const int size = int_size_in_bytes (type); 7484 gcc_assert (size <= 16); 7485 7486 return function_arg_record_value (type, mode, slotno, named, regbase); 7487 } 7488 7489 /* Unions up to 16 bytes in size are passed in integer registers. */ 7490 else if (type && TREE_CODE (type) == UNION_TYPE) 7491 { 7492 const int size = int_size_in_bytes (type); 7493 gcc_assert (size <= 16); 7494 7495 return function_arg_union_value (size, mode, slotno, regno); 7496 } 7497 7498 /* Floating-point vectors up to 16 bytes are passed in registers. */ 7499 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode) 7500 { 7501 const int size = int_size_in_bytes (type); 7502 gcc_assert (size <= 16); 7503 7504 return function_arg_vector_value (size, slotno, named, regno); 7505 } 7506 7507 /* v9 fp args in reg slots beyond the int reg slots get passed in regs 7508 but also have the slot allocated for them. 7509 If no prototype is in scope fp values in register slots get passed 7510 in two places, either fp regs and int regs or fp regs and memory. */ 7511 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7512 && SPARC_FP_REG_P (regno)) 7513 { 7514 rtx reg = gen_rtx_REG (mode, regno); 7515 if (cum->prototype_p || cum->libcall_p) 7516 return reg; 7517 else 7518 { 7519 rtx v0, v1; 7520 7521 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) 7522 { 7523 int intreg; 7524 7525 /* On incoming, we don't need to know that the value 7526 is passed in %f0 and %i0, and it confuses other parts 7527 causing needless spillage even on the simplest cases. */ 7528 if (incoming) 7529 return reg; 7530 7531 intreg = (SPARC_OUTGOING_INT_ARG_FIRST 7532 + (regno - SPARC_FP_ARG_FIRST) / 2); 7533 7534 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7535 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), 7536 const0_rtx); 7537 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7538 } 7539 else 7540 { 7541 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7542 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7543 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7544 } 7545 } 7546 } 7547 7548 /* All other aggregate types are passed in an integer register in a mode 7549 corresponding to the size of the type. */ 7550 else if (type && AGGREGATE_TYPE_P (type)) 7551 { 7552 const int size = int_size_in_bytes (type); 7553 gcc_assert (size <= 16); 7554 7555 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7556 } 7557 7558 return gen_rtx_REG (mode, regno); 7559} 7560 7561/* Handle the TARGET_FUNCTION_ARG target hook. */ 7562 7563static rtx 7564sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg) 7565{ 7566 return sparc_function_arg_1 (cum, arg, false); 7567} 7568 7569/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ 7570 7571static rtx 7572sparc_function_incoming_arg (cumulative_args_t cum, 7573 const function_arg_info &arg) 7574{ 7575 return sparc_function_arg_1 (cum, arg, true); 7576} 7577 7578/* For sparc64, objects requiring 16 byte alignment are passed that way. */ 7579 7580static unsigned int 7581sparc_function_arg_boundary (machine_mode mode, const_tree type) 7582{ 7583 return ((TARGET_ARCH64 7584 && (GET_MODE_ALIGNMENT (mode) == 128 7585 || (type && TYPE_ALIGN (type) == 128))) 7586 ? 128 7587 : PARM_BOUNDARY); 7588} 7589 7590/* For an arg passed partly in registers and partly in memory, 7591 this is the number of bytes of registers used. 7592 For args passed entirely in registers or entirely in memory, zero. 7593 7594 Any arg that starts in the first 6 regs but won't entirely fit in them 7595 needs partial registers on v8. On v9, structures with integer 7596 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp 7597 values that begin in the last fp reg [where "last fp reg" varies with the 7598 mode] will be split between that reg and memory. */ 7599 7600static int 7601sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg) 7602{ 7603 int slotno, regno, padding; 7604 7605 /* We pass false for incoming here, it doesn't matter. */ 7606 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type, 7607 arg.named, false, ®no, &padding); 7608 7609 if (slotno == -1) 7610 return 0; 7611 7612 if (TARGET_ARCH32) 7613 { 7614 /* We are guaranteed by pass_by_reference that the size of the 7615 argument is not greater than 8 bytes, so we only need to return 7616 one word if the argument is partially passed in registers. */ 7617 const int size = GET_MODE_SIZE (arg.mode); 7618 7619 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1) 7620 return UNITS_PER_WORD; 7621 } 7622 else 7623 { 7624 /* We are guaranteed by pass_by_reference that the size of the 7625 argument is not greater than 16 bytes, so we only need to return 7626 one word if the argument is partially passed in registers. */ 7627 if (arg.aggregate_type_p ()) 7628 { 7629 const int size = int_size_in_bytes (arg.type); 7630 7631 if (size > UNITS_PER_WORD 7632 && (slotno == SPARC_INT_ARG_MAX - 1 7633 || slotno == SPARC_FP_ARG_MAX - 1)) 7634 return UNITS_PER_WORD; 7635 } 7636 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT 7637 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT 7638 || (arg.type && VECTOR_TYPE_P (arg.type))) 7639 && !(TARGET_FPU && arg.named))) 7640 { 7641 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type)) 7642 ? int_size_in_bytes (arg.type) 7643 : GET_MODE_SIZE (arg.mode); 7644 7645 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1) 7646 return UNITS_PER_WORD; 7647 } 7648 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT 7649 || (arg.type && VECTOR_TYPE_P (arg.type))) 7650 { 7651 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type)) 7652 ? int_size_in_bytes (arg.type) 7653 : GET_MODE_SIZE (arg.mode); 7654 7655 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1) 7656 return UNITS_PER_WORD; 7657 } 7658 } 7659 7660 return 0; 7661} 7662 7663/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. 7664 Update the data in CUM to advance over argument ARG. */ 7665 7666static void 7667sparc_function_arg_advance (cumulative_args_t cum_v, 7668 const function_arg_info &arg) 7669{ 7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7671 tree type = arg.type; 7672 machine_mode mode = arg.mode; 7673 int regno, padding; 7674 7675 /* We pass false for incoming here, it doesn't matter. */ 7676 function_arg_slotno (cum, mode, type, arg.named, false, ®no, &padding); 7677 7678 /* If argument requires leading padding, add it. */ 7679 cum->words += padding; 7680 7681 if (TARGET_ARCH32) 7682 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode)); 7683 else 7684 { 7685 /* For types that can have BLKmode, get the size from the type. */ 7686 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 7687 { 7688 const int size = int_size_in_bytes (type); 7689 7690 /* See comment in function_arg_record_value for empty structures. */ 7691 if (size <= 0) 7692 cum->words++; 7693 else 7694 cum->words += CEIL_NWORDS (size); 7695 } 7696 else 7697 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode)); 7698 } 7699} 7700 7701/* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs 7702 are always stored left shifted in their argument slot. */ 7703 7704static pad_direction 7705sparc_function_arg_padding (machine_mode mode, const_tree type) 7706{ 7707 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type)) 7708 return PAD_UPWARD; 7709 7710 /* Fall back to the default. */ 7711 return default_function_arg_padding (mode, type); 7712} 7713 7714/* Handle the TARGET_RETURN_IN_MEMORY target hook. 7715 Specify whether to return the return value in memory. */ 7716 7717static bool 7718sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 7719{ 7720 if (TARGET_ARCH32) 7721 /* Original SPARC 32-bit ABI says that structures and unions, and 7722 quad-precision floats are returned in memory. But note that the 7723 first part is implemented through -fpcc-struct-return being the 7724 default, so here we only implement -freg-struct-return instead. 7725 All other base types are returned in registers. 7726 7727 Extended ABI (as implemented by the Sun compiler) says that 7728 all complex floats are returned in registers (8 FP registers 7729 at most for '_Complex long double'). Return all complex integers 7730 in registers (4 at most for '_Complex long long'). 7731 7732 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7733 integers are returned like floats of the same size, that is in 7734 registers up to 8 bytes and in memory otherwise. Return all 7735 vector floats in memory like structure and unions; note that 7736 they always have BLKmode like the latter. */ 7737 return (TYPE_MODE (type) == BLKmode 7738 || TYPE_MODE (type) == TFmode 7739 || (TREE_CODE (type) == VECTOR_TYPE 7740 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7741 else 7742 /* Original SPARC 64-bit ABI says that structures and unions 7743 smaller than 32 bytes are returned in registers, as well as 7744 all other base types. 7745 7746 Extended ABI (as implemented by the Sun compiler) says that all 7747 complex floats are returned in registers (8 FP registers at most 7748 for '_Complex long double'). Return all complex integers in 7749 registers (4 at most for '_Complex TItype'). 7750 7751 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7752 integers are returned like floats of the same size, that is in 7753 registers. Return all vector floats like structure and unions; 7754 note that they always have BLKmode like the latter. */ 7755 return (TYPE_MODE (type) == BLKmode 7756 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); 7757} 7758 7759/* Handle the TARGET_STRUCT_VALUE target hook. 7760 Return where to find the structure return value address. */ 7761 7762static rtx 7763sparc_struct_value_rtx (tree fndecl, int incoming) 7764{ 7765 if (TARGET_ARCH64) 7766 return NULL_RTX; 7767 else 7768 { 7769 rtx mem; 7770 7771 if (incoming) 7772 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, 7773 STRUCT_VALUE_OFFSET)); 7774 else 7775 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, 7776 STRUCT_VALUE_OFFSET)); 7777 7778 /* Only follow the SPARC ABI for fixed-size structure returns. 7779 Variable size structure returns are handled per the normal 7780 procedures in GCC. This is enabled by -mstd-struct-return */ 7781 if (incoming == 2 7782 && sparc_std_struct_return 7783 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) 7784 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) 7785 { 7786 /* We must check and adjust the return address, as it is optional 7787 as to whether the return object is really provided. */ 7788 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 7789 rtx scratch = gen_reg_rtx (SImode); 7790 rtx_code_label *endlab = gen_label_rtx (); 7791 7792 /* Calculate the return object size. */ 7793 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); 7794 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); 7795 /* Construct a temporary return value. */ 7796 rtx temp_val 7797 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); 7798 7799 /* Implement SPARC 32-bit psABI callee return struct checking: 7800 7801 Fetch the instruction where we will return to and see if 7802 it's an unimp instruction (the most significant 10 bits 7803 will be zero). */ 7804 emit_move_insn (scratch, gen_rtx_MEM (SImode, 7805 plus_constant (Pmode, 7806 ret_reg, 8))); 7807 /* Assume the size is valid and pre-adjust. */ 7808 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4))); 7809 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 7810 0, endlab); 7811 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4))); 7812 /* Write the address of the memory pointed to by temp_val into 7813 the memory pointed to by mem. */ 7814 emit_move_insn (mem, XEXP (temp_val, 0)); 7815 emit_label (endlab); 7816 } 7817 7818 return mem; 7819 } 7820} 7821 7822/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. 7823 For v9, function return values are subject to the same rules as arguments, 7824 except that up to 32 bytes may be returned in registers. */ 7825 7826static rtx 7827sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing) 7828{ 7829 /* Beware that the two values are swapped here wrt function_arg. */ 7830 const int regbase 7831 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 7832 enum mode_class mclass = GET_MODE_CLASS (mode); 7833 int regno; 7834 7835 /* Integer vectors are handled like floats as per the Sun VIS SDK. 7836 Note that integer vectors larger than 16 bytes have BLKmode so 7837 they need to be handled like floating-point vectors below. */ 7838 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode) 7839 mclass = MODE_FLOAT; 7840 7841 if (TARGET_ARCH64 && type) 7842 { 7843 /* Structures up to 32 bytes in size are returned in registers. */ 7844 if (TREE_CODE (type) == RECORD_TYPE) 7845 { 7846 const int size = int_size_in_bytes (type); 7847 gcc_assert (size <= 32); 7848 7849 return function_arg_record_value (type, mode, 0, true, regbase); 7850 } 7851 7852 /* Unions up to 32 bytes in size are returned in integer registers. */ 7853 else if (TREE_CODE (type) == UNION_TYPE) 7854 { 7855 const int size = int_size_in_bytes (type); 7856 gcc_assert (size <= 32); 7857 7858 return function_arg_union_value (size, mode, 0, regbase); 7859 } 7860 7861 /* Vectors up to 32 bytes are returned in FP registers. */ 7862 else if (VECTOR_TYPE_P (type) && mode == BLKmode) 7863 { 7864 const int size = int_size_in_bytes (type); 7865 gcc_assert (size <= 32); 7866 7867 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST); 7868 } 7869 7870 /* Objects that require it are returned in FP registers. */ 7871 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7872 ; 7873 7874 /* All other aggregate types are returned in an integer register in a 7875 mode corresponding to the size of the type. */ 7876 else if (AGGREGATE_TYPE_P (type)) 7877 { 7878 /* All other aggregate types are passed in an integer register 7879 in a mode corresponding to the size of the type. */ 7880 const int size = int_size_in_bytes (type); 7881 gcc_assert (size <= 32); 7882 7883 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7884 7885 /* ??? We probably should have made the same ABI change in 7886 3.4.0 as the one we made for unions. The latter was 7887 required by the SCD though, while the former is not 7888 specified, so we favored compatibility and efficiency. 7889 7890 Now we're stuck for aggregates larger than 16 bytes, 7891 because OImode vanished in the meantime. Let's not 7892 try to be unduly clever, and simply follow the ABI 7893 for unions in that case. */ 7894 if (mode == BLKmode) 7895 return function_arg_union_value (size, mode, 0, regbase); 7896 else 7897 mclass = MODE_INT; 7898 } 7899 7900 /* We should only have pointer and integer types at this point. This 7901 must match sparc_promote_function_mode. */ 7902 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7903 mode = word_mode; 7904 } 7905 7906 /* We should only have pointer and integer types at this point, except with 7907 -freg-struct-return. This must match sparc_promote_function_mode. */ 7908 else if (TARGET_ARCH32 7909 && !(type && AGGREGATE_TYPE_P (type)) 7910 && mclass == MODE_INT 7911 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7912 mode = word_mode; 7913 7914 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) 7915 regno = SPARC_FP_ARG_FIRST; 7916 else 7917 regno = regbase; 7918 7919 return gen_rtx_REG (mode, regno); 7920} 7921 7922/* Handle TARGET_FUNCTION_VALUE. 7923 On the SPARC, the value is found in the first "output" register, but the 7924 called function leaves it in the first "input" register. */ 7925 7926static rtx 7927sparc_function_value (const_tree valtype, 7928 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 7929 bool outgoing) 7930{ 7931 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); 7932} 7933 7934/* Handle TARGET_LIBCALL_VALUE. */ 7935 7936static rtx 7937sparc_libcall_value (machine_mode mode, 7938 const_rtx fun ATTRIBUTE_UNUSED) 7939{ 7940 return sparc_function_value_1 (NULL_TREE, mode, false); 7941} 7942 7943/* Handle FUNCTION_VALUE_REGNO_P. 7944 On the SPARC, the first "output" reg is used for integer values, and the 7945 first floating point register is used for floating point values. */ 7946 7947static bool 7948sparc_function_value_regno_p (const unsigned int regno) 7949{ 7950 return (regno == 8 || (TARGET_FPU && regno == 32)); 7951} 7952 7953/* Do what is necessary for `va_start'. We look at the current function 7954 to determine if stdarg or varargs is used and return the address of 7955 the first unnamed parameter. */ 7956 7957static rtx 7958sparc_builtin_saveregs (void) 7959{ 7960 int first_reg = crtl->args.info.words; 7961 rtx address; 7962 int regno; 7963 7964 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) 7965 emit_move_insn (gen_rtx_MEM (word_mode, 7966 gen_rtx_PLUS (Pmode, 7967 frame_pointer_rtx, 7968 GEN_INT (FIRST_PARM_OFFSET (0) 7969 + (UNITS_PER_WORD 7970 * regno)))), 7971 gen_rtx_REG (word_mode, 7972 SPARC_INCOMING_INT_ARG_FIRST + regno)); 7973 7974 address = gen_rtx_PLUS (Pmode, 7975 frame_pointer_rtx, 7976 GEN_INT (FIRST_PARM_OFFSET (0) 7977 + UNITS_PER_WORD * first_reg)); 7978 7979 return address; 7980} 7981 7982/* Implement `va_start' for stdarg. */ 7983 7984static void 7985sparc_va_start (tree valist, rtx nextarg) 7986{ 7987 nextarg = expand_builtin_saveregs (); 7988 std_expand_builtin_va_start (valist, nextarg); 7989} 7990 7991/* Implement `va_arg' for stdarg. */ 7992 7993static tree 7994sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 7995 gimple_seq *post_p) 7996{ 7997 HOST_WIDE_INT size, rsize, align; 7998 tree addr, incr; 7999 bool indirect; 8000 tree ptrtype = build_pointer_type (type); 8001 8002 if (pass_va_arg_by_reference (type)) 8003 { 8004 indirect = true; 8005 size = rsize = UNITS_PER_WORD; 8006 align = 0; 8007 } 8008 else 8009 { 8010 indirect = false; 8011 size = int_size_in_bytes (type); 8012 rsize = ROUND_UP (size, UNITS_PER_WORD); 8013 align = 0; 8014 8015 if (TARGET_ARCH64) 8016 { 8017 /* For SPARC64, objects requiring 16-byte alignment get it. */ 8018 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) 8019 align = 2 * UNITS_PER_WORD; 8020 8021 /* SPARC-V9 ABI states that structures up to 16 bytes in size 8022 are left-justified in their slots. */ 8023 if (AGGREGATE_TYPE_P (type)) 8024 { 8025 if (size == 0) 8026 size = rsize = UNITS_PER_WORD; 8027 else 8028 size = rsize; 8029 } 8030 } 8031 } 8032 8033 incr = valist; 8034 if (align) 8035 { 8036 incr = fold_build_pointer_plus_hwi (incr, align - 1); 8037 incr = fold_convert (sizetype, incr); 8038 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, 8039 size_int (-align)); 8040 incr = fold_convert (ptr_type_node, incr); 8041 } 8042 8043 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); 8044 addr = incr; 8045 8046 if (BYTES_BIG_ENDIAN && size < rsize) 8047 addr = fold_build_pointer_plus_hwi (incr, rsize - size); 8048 8049 if (indirect) 8050 { 8051 addr = fold_convert (build_pointer_type (ptrtype), addr); 8052 addr = build_va_arg_indirect_ref (addr); 8053 } 8054 8055 /* If the address isn't aligned properly for the type, we need a temporary. 8056 FIXME: This is inefficient, usually we can do this in registers. */ 8057 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) 8058 { 8059 tree tmp = create_tmp_var (type, "va_arg_tmp"); 8060 tree dest_addr = build_fold_addr_expr (tmp); 8061 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 8062 3, dest_addr, addr, size_int (rsize)); 8063 TREE_ADDRESSABLE (tmp) = 1; 8064 gimplify_and_add (copy, pre_p); 8065 addr = dest_addr; 8066 } 8067 8068 else 8069 addr = fold_convert (ptrtype, addr); 8070 8071 incr = fold_build_pointer_plus_hwi (incr, rsize); 8072 gimplify_assign (valist, incr, post_p); 8073 8074 return build_va_arg_indirect_ref (addr); 8075} 8076 8077/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. 8078 Specify whether the vector mode is supported by the hardware. */ 8079 8080static bool 8081sparc_vector_mode_supported_p (machine_mode mode) 8082{ 8083 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; 8084} 8085 8086/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ 8087 8088static machine_mode 8089sparc_preferred_simd_mode (scalar_mode mode) 8090{ 8091 if (TARGET_VIS) 8092 switch (mode) 8093 { 8094 case E_SImode: 8095 return V2SImode; 8096 case E_HImode: 8097 return V4HImode; 8098 case E_QImode: 8099 return V8QImode; 8100 8101 default:; 8102 } 8103 8104 return word_mode; 8105} 8106 8107/* Implement TARGET_CAN_FOLLOW_JUMP. */ 8108 8109static bool 8110sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee) 8111{ 8112 /* Do not fold unconditional jumps that have been created for crossing 8113 partition boundaries. */ 8114 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower)) 8115 return false; 8116 8117 return true; 8118} 8119 8120/* Return the string to output an unconditional branch to LABEL, which is 8121 the operand number of the label. 8122 8123 DEST is the destination insn (i.e. the label), INSN is the source. */ 8124 8125const char * 8126output_ubranch (rtx dest, rtx_insn *insn) 8127{ 8128 static char string[64]; 8129 bool v9_form = false; 8130 int delta; 8131 char *p; 8132 8133 /* Even if we are trying to use cbcond for this, evaluate 8134 whether we can use V9 branches as our backup plan. */ 8135 delta = 5000000; 8136 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ()) 8137 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8138 - INSN_ADDRESSES (INSN_UID (insn))); 8139 8140 /* Leave some instructions for "slop". */ 8141 if (TARGET_V9 && delta >= -260000 && delta < 260000) 8142 v9_form = true; 8143 8144 if (TARGET_CBCOND) 8145 { 8146 bool emit_nop = emit_cbcond_nop (insn); 8147 bool far = false; 8148 const char *rval; 8149 8150 if (delta < -500 || delta > 500) 8151 far = true; 8152 8153 if (far) 8154 { 8155 if (v9_form) 8156 rval = "ba,a,pt\t%%xcc, %l0"; 8157 else 8158 rval = "b,a\t%l0"; 8159 } 8160 else 8161 { 8162 if (emit_nop) 8163 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop"; 8164 else 8165 rval = "cwbe\t%%g0, %%g0, %l0"; 8166 } 8167 return rval; 8168 } 8169 8170 if (v9_form) 8171 strcpy (string, "ba%*,pt\t%%xcc, "); 8172 else 8173 strcpy (string, "b%*\t"); 8174 8175 p = strchr (string, '\0'); 8176 *p++ = '%'; 8177 *p++ = 'l'; 8178 *p++ = '0'; 8179 *p++ = '%'; 8180 *p++ = '('; 8181 *p = '\0'; 8182 8183 return string; 8184} 8185 8186/* Return the string to output a conditional branch to LABEL, which is 8187 the operand number of the label. OP is the conditional expression. 8188 XEXP (OP, 0) is assumed to be a condition code register (integer or 8189 floating point) and its mode specifies what kind of comparison we made. 8190 8191 DEST is the destination insn (i.e. the label), INSN is the source. 8192 8193 REVERSED is nonzero if we should reverse the sense of the comparison. 8194 8195 ANNUL is nonzero if we should generate an annulling branch. */ 8196 8197const char * 8198output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, 8199 rtx_insn *insn) 8200{ 8201 static char string[64]; 8202 enum rtx_code code = GET_CODE (op); 8203 rtx cc_reg = XEXP (op, 0); 8204 machine_mode mode = GET_MODE (cc_reg); 8205 const char *labelno, *branch; 8206 int spaces = 8, far; 8207 char *p; 8208 8209 /* v9 branches are limited to +-1MB. If it is too far away, 8210 change 8211 8212 bne,pt %xcc, .LC30 8213 8214 to 8215 8216 be,pn %xcc, .+12 8217 nop 8218 ba .LC30 8219 8220 and 8221 8222 fbne,a,pn %fcc2, .LC29 8223 8224 to 8225 8226 fbe,pt %fcc2, .+16 8227 nop 8228 ba .LC29 */ 8229 8230 far = TARGET_V9 && (get_attr_length (insn) >= 3); 8231 if (reversed ^ far) 8232 { 8233 /* Reversal of FP compares takes care -- an ordered compare 8234 becomes an unordered compare and vice versa. */ 8235 if (mode == CCFPmode || mode == CCFPEmode) 8236 code = reverse_condition_maybe_unordered (code); 8237 else 8238 code = reverse_condition (code); 8239 } 8240 8241 /* Start by writing the branch condition. */ 8242 if (mode == CCFPmode || mode == CCFPEmode) 8243 { 8244 switch (code) 8245 { 8246 case NE: 8247 branch = "fbne"; 8248 break; 8249 case EQ: 8250 branch = "fbe"; 8251 break; 8252 case GE: 8253 branch = "fbge"; 8254 break; 8255 case GT: 8256 branch = "fbg"; 8257 break; 8258 case LE: 8259 branch = "fble"; 8260 break; 8261 case LT: 8262 branch = "fbl"; 8263 break; 8264 case UNORDERED: 8265 branch = "fbu"; 8266 break; 8267 case ORDERED: 8268 branch = "fbo"; 8269 break; 8270 case UNGT: 8271 branch = "fbug"; 8272 break; 8273 case UNLT: 8274 branch = "fbul"; 8275 break; 8276 case UNEQ: 8277 branch = "fbue"; 8278 break; 8279 case UNGE: 8280 branch = "fbuge"; 8281 break; 8282 case UNLE: 8283 branch = "fbule"; 8284 break; 8285 case LTGT: 8286 branch = "fblg"; 8287 break; 8288 default: 8289 gcc_unreachable (); 8290 } 8291 8292 /* ??? !v9: FP branches cannot be preceded by another floating point 8293 insn. Because there is currently no concept of pre-delay slots, 8294 we can fix this only by always emitting a nop before a floating 8295 point branch. */ 8296 8297 string[0] = '\0'; 8298 if (! TARGET_V9) 8299 strcpy (string, "nop\n\t"); 8300 strcat (string, branch); 8301 } 8302 else 8303 { 8304 switch (code) 8305 { 8306 case NE: 8307 if (mode == CCVmode || mode == CCXVmode) 8308 branch = "bvs"; 8309 else 8310 branch = "bne"; 8311 break; 8312 case EQ: 8313 if (mode == CCVmode || mode == CCXVmode) 8314 branch = "bvc"; 8315 else 8316 branch = "be"; 8317 break; 8318 case GE: 8319 if (mode == CCNZmode || mode == CCXNZmode) 8320 branch = "bpos"; 8321 else 8322 branch = "bge"; 8323 break; 8324 case GT: 8325 branch = "bg"; 8326 break; 8327 case LE: 8328 branch = "ble"; 8329 break; 8330 case LT: 8331 if (mode == CCNZmode || mode == CCXNZmode) 8332 branch = "bneg"; 8333 else 8334 branch = "bl"; 8335 break; 8336 case GEU: 8337 branch = "bgeu"; 8338 break; 8339 case GTU: 8340 branch = "bgu"; 8341 break; 8342 case LEU: 8343 branch = "bleu"; 8344 break; 8345 case LTU: 8346 branch = "blu"; 8347 break; 8348 default: 8349 gcc_unreachable (); 8350 } 8351 strcpy (string, branch); 8352 } 8353 spaces -= strlen (branch); 8354 p = strchr (string, '\0'); 8355 8356 /* Now add the annulling, the label, and a possible noop. */ 8357 if (annul && ! far) 8358 { 8359 strcpy (p, ",a"); 8360 p += 2; 8361 spaces -= 2; 8362 } 8363 8364 if (TARGET_V9) 8365 { 8366 rtx note; 8367 int v8 = 0; 8368 8369 if (! far && insn && INSN_ADDRESSES_SET_P ()) 8370 { 8371 int delta = (INSN_ADDRESSES (INSN_UID (dest)) 8372 - INSN_ADDRESSES (INSN_UID (insn))); 8373 /* Leave some instructions for "slop". */ 8374 if (delta < -260000 || delta >= 260000) 8375 v8 = 1; 8376 } 8377 8378 switch (mode) 8379 { 8380 case E_CCmode: 8381 case E_CCNZmode: 8382 case E_CCCmode: 8383 case E_CCVmode: 8384 labelno = "%%icc, "; 8385 if (v8) 8386 labelno = ""; 8387 break; 8388 case E_CCXmode: 8389 case E_CCXNZmode: 8390 case E_CCXCmode: 8391 case E_CCXVmode: 8392 labelno = "%%xcc, "; 8393 gcc_assert (!v8); 8394 break; 8395 case E_CCFPmode: 8396 case E_CCFPEmode: 8397 { 8398 static char v9_fcc_labelno[] = "%%fccX, "; 8399 /* Set the char indicating the number of the fcc reg to use. */ 8400 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; 8401 labelno = v9_fcc_labelno; 8402 if (v8) 8403 { 8404 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); 8405 labelno = ""; 8406 } 8407 } 8408 break; 8409 default: 8410 gcc_unreachable (); 8411 } 8412 8413 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8414 { 8415 strcpy (p, 8416 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8417 >= profile_probability::even ()) ^ far) 8418 ? ",pt" : ",pn"); 8419 p += 3; 8420 spaces -= 3; 8421 } 8422 } 8423 else 8424 labelno = ""; 8425 8426 if (spaces > 0) 8427 *p++ = '\t'; 8428 else 8429 *p++ = ' '; 8430 strcpy (p, labelno); 8431 p = strchr (p, '\0'); 8432 if (far) 8433 { 8434 strcpy (p, ".+12\n\t nop\n\tb\t"); 8435 /* Skip the next insn if requested or 8436 if we know that it will be a nop. */ 8437 if (annul || ! final_sequence) 8438 p[3] = '6'; 8439 p += 14; 8440 } 8441 *p++ = '%'; 8442 *p++ = 'l'; 8443 *p++ = label + '0'; 8444 *p++ = '%'; 8445 *p++ = '#'; 8446 *p = '\0'; 8447 8448 return string; 8449} 8450 8451/* Emit a library call comparison between floating point X and Y. 8452 COMPARISON is the operator to compare with (EQ, NE, GT, etc). 8453 Return the new operator to be used in the comparison sequence. 8454 8455 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode 8456 values as arguments instead of the TFmode registers themselves, 8457 that's why we cannot call emit_float_lib_cmp. */ 8458 8459rtx 8460sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) 8461{ 8462 const char *qpfunc; 8463 rtx slot0, slot1, result, tem, tem2, libfunc; 8464 machine_mode mode; 8465 enum rtx_code new_comparison; 8466 8467 switch (comparison) 8468 { 8469 case EQ: 8470 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); 8471 break; 8472 8473 case NE: 8474 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); 8475 break; 8476 8477 case GT: 8478 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); 8479 break; 8480 8481 case GE: 8482 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); 8483 break; 8484 8485 case LT: 8486 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); 8487 break; 8488 8489 case LE: 8490 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); 8491 break; 8492 8493 case ORDERED: 8494 case UNORDERED: 8495 case UNGT: 8496 case UNLT: 8497 case UNEQ: 8498 case UNGE: 8499 case UNLE: 8500 case LTGT: 8501 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); 8502 break; 8503 8504 default: 8505 gcc_unreachable (); 8506 } 8507 8508 if (TARGET_ARCH64) 8509 { 8510 if (MEM_P (x)) 8511 { 8512 tree expr = MEM_EXPR (x); 8513 if (expr) 8514 mark_addressable (expr); 8515 slot0 = x; 8516 } 8517 else 8518 { 8519 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8520 emit_move_insn (slot0, x); 8521 } 8522 8523 if (MEM_P (y)) 8524 { 8525 tree expr = MEM_EXPR (y); 8526 if (expr) 8527 mark_addressable (expr); 8528 slot1 = y; 8529 } 8530 else 8531 { 8532 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8533 emit_move_insn (slot1, y); 8534 } 8535 8536 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8537 emit_library_call (libfunc, LCT_NORMAL, 8538 DImode, 8539 XEXP (slot0, 0), Pmode, 8540 XEXP (slot1, 0), Pmode); 8541 mode = DImode; 8542 } 8543 else 8544 { 8545 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8546 emit_library_call (libfunc, LCT_NORMAL, 8547 SImode, 8548 x, TFmode, y, TFmode); 8549 mode = SImode; 8550 } 8551 8552 8553 /* Immediately move the result of the libcall into a pseudo 8554 register so reload doesn't clobber the value if it needs 8555 the return register for a spill reg. */ 8556 result = gen_reg_rtx (mode); 8557 emit_move_insn (result, hard_libcall_value (mode, libfunc)); 8558 8559 switch (comparison) 8560 { 8561 default: 8562 return gen_rtx_NE (VOIDmode, result, const0_rtx); 8563 case ORDERED: 8564 case UNORDERED: 8565 new_comparison = (comparison == UNORDERED ? EQ : NE); 8566 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); 8567 case UNGT: 8568 case UNGE: 8569 new_comparison = (comparison == UNGT ? GT : NE); 8570 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); 8571 case UNLE: 8572 return gen_rtx_NE (VOIDmode, result, const2_rtx); 8573 case UNLT: 8574 tem = gen_reg_rtx (mode); 8575 if (TARGET_ARCH32) 8576 emit_insn (gen_andsi3 (tem, result, const1_rtx)); 8577 else 8578 emit_insn (gen_anddi3 (tem, result, const1_rtx)); 8579 return gen_rtx_NE (VOIDmode, tem, const0_rtx); 8580 case UNEQ: 8581 case LTGT: 8582 tem = gen_reg_rtx (mode); 8583 if (TARGET_ARCH32) 8584 emit_insn (gen_addsi3 (tem, result, const1_rtx)); 8585 else 8586 emit_insn (gen_adddi3 (tem, result, const1_rtx)); 8587 tem2 = gen_reg_rtx (mode); 8588 if (TARGET_ARCH32) 8589 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); 8590 else 8591 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); 8592 new_comparison = (comparison == UNEQ ? EQ : NE); 8593 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); 8594 } 8595 8596 gcc_unreachable (); 8597} 8598 8599/* Generate an unsigned DImode to FP conversion. This is the same code 8600 optabs would emit if we didn't have TFmode patterns. */ 8601 8602void 8603sparc_emit_floatunsdi (rtx *operands, machine_mode mode) 8604{ 8605 rtx i0, i1, f0, in, out; 8606 8607 out = operands[0]; 8608 in = force_reg (DImode, operands[1]); 8609 rtx_code_label *neglab = gen_label_rtx (); 8610 rtx_code_label *donelab = gen_label_rtx (); 8611 i0 = gen_reg_rtx (DImode); 8612 i1 = gen_reg_rtx (DImode); 8613 f0 = gen_reg_rtx (mode); 8614 8615 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 8616 8617 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); 8618 emit_jump_insn (gen_jump (donelab)); 8619 emit_barrier (); 8620 8621 emit_label (neglab); 8622 8623 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 8624 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 8625 emit_insn (gen_iordi3 (i0, i0, i1)); 8626 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); 8627 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); 8628 8629 emit_label (donelab); 8630} 8631 8632/* Generate an FP to unsigned DImode conversion. This is the same code 8633 optabs would emit if we didn't have TFmode patterns. */ 8634 8635void 8636sparc_emit_fixunsdi (rtx *operands, machine_mode mode) 8637{ 8638 rtx i0, i1, f0, in, out, limit; 8639 8640 out = operands[0]; 8641 in = force_reg (mode, operands[1]); 8642 rtx_code_label *neglab = gen_label_rtx (); 8643 rtx_code_label *donelab = gen_label_rtx (); 8644 i0 = gen_reg_rtx (DImode); 8645 i1 = gen_reg_rtx (DImode); 8646 limit = gen_reg_rtx (mode); 8647 f0 = gen_reg_rtx (mode); 8648 8649 emit_move_insn (limit, 8650 const_double_from_real_value ( 8651 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); 8652 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); 8653 8654 emit_insn (gen_rtx_SET (out, 8655 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); 8656 emit_jump_insn (gen_jump (donelab)); 8657 emit_barrier (); 8658 8659 emit_label (neglab); 8660 8661 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit))); 8662 emit_insn (gen_rtx_SET (i0, 8663 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); 8664 emit_insn (gen_movdi (i1, const1_rtx)); 8665 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); 8666 emit_insn (gen_xordi3 (out, i0, i1)); 8667 8668 emit_label (donelab); 8669} 8670 8671/* Return the string to output a compare and branch instruction to DEST. 8672 DEST is the destination insn (i.e. the label), INSN is the source, 8673 and OP is the conditional expression. */ 8674 8675const char * 8676output_cbcond (rtx op, rtx dest, rtx_insn *insn) 8677{ 8678 machine_mode mode = GET_MODE (XEXP (op, 0)); 8679 enum rtx_code code = GET_CODE (op); 8680 const char *cond_str, *tmpl; 8681 int far, emit_nop, len; 8682 static char string[64]; 8683 char size_char; 8684 8685 /* Compare and Branch is limited to +-2KB. If it is too far away, 8686 change 8687 8688 cxbne X, Y, .LC30 8689 8690 to 8691 8692 cxbe X, Y, .+16 8693 nop 8694 ba,pt xcc, .LC30 8695 nop */ 8696 8697 len = get_attr_length (insn); 8698 8699 far = len == 4; 8700 emit_nop = len == 2; 8701 8702 if (far) 8703 code = reverse_condition (code); 8704 8705 size_char = ((mode == SImode) ? 'w' : 'x'); 8706 8707 switch (code) 8708 { 8709 case NE: 8710 cond_str = "ne"; 8711 break; 8712 8713 case EQ: 8714 cond_str = "e"; 8715 break; 8716 8717 case GE: 8718 cond_str = "ge"; 8719 break; 8720 8721 case GT: 8722 cond_str = "g"; 8723 break; 8724 8725 case LE: 8726 cond_str = "le"; 8727 break; 8728 8729 case LT: 8730 cond_str = "l"; 8731 break; 8732 8733 case GEU: 8734 cond_str = "cc"; 8735 break; 8736 8737 case GTU: 8738 cond_str = "gu"; 8739 break; 8740 8741 case LEU: 8742 cond_str = "leu"; 8743 break; 8744 8745 case LTU: 8746 cond_str = "cs"; 8747 break; 8748 8749 default: 8750 gcc_unreachable (); 8751 } 8752 8753 if (far) 8754 { 8755 int veryfar = 1, delta; 8756 8757 if (INSN_ADDRESSES_SET_P ()) 8758 { 8759 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8760 - INSN_ADDRESSES (INSN_UID (insn))); 8761 /* Leave some instructions for "slop". */ 8762 if (delta >= -260000 && delta < 260000) 8763 veryfar = 0; 8764 } 8765 8766 if (veryfar) 8767 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop"; 8768 else 8769 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop"; 8770 } 8771 else 8772 { 8773 if (emit_nop) 8774 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop"; 8775 else 8776 tmpl = "c%cb%s\t%%1, %%2, %%3"; 8777 } 8778 8779 snprintf (string, sizeof(string), tmpl, size_char, cond_str); 8780 8781 return string; 8782} 8783 8784/* Return the string to output a conditional branch to LABEL, testing 8785 register REG. LABEL is the operand number of the label; REG is the 8786 operand number of the reg. OP is the conditional expression. The mode 8787 of REG says what kind of comparison we made. 8788 8789 DEST is the destination insn (i.e. the label), INSN is the source. 8790 8791 REVERSED is nonzero if we should reverse the sense of the comparison. 8792 8793 ANNUL is nonzero if we should generate an annulling branch. */ 8794 8795const char * 8796output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, 8797 int annul, rtx_insn *insn) 8798{ 8799 static char string[64]; 8800 enum rtx_code code = GET_CODE (op); 8801 machine_mode mode = GET_MODE (XEXP (op, 0)); 8802 rtx note; 8803 int far; 8804 char *p; 8805 8806 /* branch on register are limited to +-128KB. If it is too far away, 8807 change 8808 8809 brnz,pt %g1, .LC30 8810 8811 to 8812 8813 brz,pn %g1, .+12 8814 nop 8815 ba,pt %xcc, .LC30 8816 8817 and 8818 8819 brgez,a,pn %o1, .LC29 8820 8821 to 8822 8823 brlz,pt %o1, .+16 8824 nop 8825 ba,pt %xcc, .LC29 */ 8826 8827 far = get_attr_length (insn) >= 3; 8828 8829 /* If not floating-point or if EQ or NE, we can just reverse the code. */ 8830 if (reversed ^ far) 8831 code = reverse_condition (code); 8832 8833 /* Only 64-bit versions of these instructions exist. */ 8834 gcc_assert (mode == DImode); 8835 8836 /* Start by writing the branch condition. */ 8837 8838 switch (code) 8839 { 8840 case NE: 8841 strcpy (string, "brnz"); 8842 break; 8843 8844 case EQ: 8845 strcpy (string, "brz"); 8846 break; 8847 8848 case GE: 8849 strcpy (string, "brgez"); 8850 break; 8851 8852 case LT: 8853 strcpy (string, "brlz"); 8854 break; 8855 8856 case LE: 8857 strcpy (string, "brlez"); 8858 break; 8859 8860 case GT: 8861 strcpy (string, "brgz"); 8862 break; 8863 8864 default: 8865 gcc_unreachable (); 8866 } 8867 8868 p = strchr (string, '\0'); 8869 8870 /* Now add the annulling, reg, label, and nop. */ 8871 if (annul && ! far) 8872 { 8873 strcpy (p, ",a"); 8874 p += 2; 8875 } 8876 8877 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8878 { 8879 strcpy (p, 8880 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8881 >= profile_probability::even ()) ^ far) 8882 ? ",pt" : ",pn"); 8883 p += 3; 8884 } 8885 8886 *p = p < string + 8 ? '\t' : ' '; 8887 p++; 8888 *p++ = '%'; 8889 *p++ = '0' + reg; 8890 *p++ = ','; 8891 *p++ = ' '; 8892 if (far) 8893 { 8894 int veryfar = 1, delta; 8895 8896 if (INSN_ADDRESSES_SET_P ()) 8897 { 8898 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8899 - INSN_ADDRESSES (INSN_UID (insn))); 8900 /* Leave some instructions for "slop". */ 8901 if (delta >= -260000 && delta < 260000) 8902 veryfar = 0; 8903 } 8904 8905 strcpy (p, ".+12\n\t nop\n\t"); 8906 /* Skip the next insn if requested or 8907 if we know that it will be a nop. */ 8908 if (annul || ! final_sequence) 8909 p[3] = '6'; 8910 p += 12; 8911 if (veryfar) 8912 { 8913 strcpy (p, "b\t"); 8914 p += 2; 8915 } 8916 else 8917 { 8918 strcpy (p, "ba,pt\t%%xcc, "); 8919 p += 13; 8920 } 8921 } 8922 *p++ = '%'; 8923 *p++ = 'l'; 8924 *p++ = '0' + label; 8925 *p++ = '%'; 8926 *p++ = '#'; 8927 *p = '\0'; 8928 8929 return string; 8930} 8931 8932/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. 8933 Such instructions cannot be used in the delay slot of return insn on v9. 8934 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. 8935 */ 8936 8937static int 8938epilogue_renumber (register rtx *where, int test) 8939{ 8940 register const char *fmt; 8941 register int i; 8942 register enum rtx_code code; 8943 8944 if (*where == 0) 8945 return 0; 8946 8947 code = GET_CODE (*where); 8948 8949 switch (code) 8950 { 8951 case REG: 8952 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ 8953 return 1; 8954 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) 8955 { 8956 if (ORIGINAL_REGNO (*where)) 8957 { 8958 rtx n = gen_raw_REG (GET_MODE (*where), 8959 OUTGOING_REGNO (REGNO (*where))); 8960 ORIGINAL_REGNO (n) = ORIGINAL_REGNO (*where); 8961 *where = n; 8962 } 8963 else 8964 *where = gen_rtx_REG (GET_MODE (*where), 8965 OUTGOING_REGNO (REGNO (*where))); 8966 } 8967 return 0; 8968 8969 case SCRATCH: 8970 case CC0: 8971 case PC: 8972 case CONST_INT: 8973 case CONST_WIDE_INT: 8974 case CONST_DOUBLE: 8975 return 0; 8976 8977 /* Do not replace the frame pointer with the stack pointer because 8978 it can cause the delayed instruction to load below the stack. 8979 This occurs when instructions like: 8980 8981 (set (reg/i:SI 24 %i0) 8982 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) 8983 (const_int -20 [0xffffffec])) 0)) 8984 8985 are in the return delayed slot. */ 8986 case PLUS: 8987 if (GET_CODE (XEXP (*where, 0)) == REG 8988 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM 8989 && (GET_CODE (XEXP (*where, 1)) != CONST_INT 8990 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) 8991 return 1; 8992 break; 8993 8994 case MEM: 8995 if (SPARC_STACK_BIAS 8996 && GET_CODE (XEXP (*where, 0)) == REG 8997 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) 8998 return 1; 8999 break; 9000 9001 default: 9002 break; 9003 } 9004 9005 fmt = GET_RTX_FORMAT (code); 9006 9007 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 9008 { 9009 if (fmt[i] == 'E') 9010 { 9011 register int j; 9012 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) 9013 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) 9014 return 1; 9015 } 9016 else if (fmt[i] == 'e' 9017 && epilogue_renumber (&(XEXP (*where, i)), test)) 9018 return 1; 9019 } 9020 return 0; 9021} 9022 9023/* Leaf functions and non-leaf functions have different needs. */ 9024 9025static const int 9026reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; 9027 9028static const int 9029reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; 9030 9031static const int *const reg_alloc_orders[] = { 9032 reg_leaf_alloc_order, 9033 reg_nonleaf_alloc_order}; 9034 9035void 9036order_regs_for_local_alloc (void) 9037{ 9038 static int last_order_nonleaf = 1; 9039 9040 if (df_regs_ever_live_p (15) != last_order_nonleaf) 9041 { 9042 last_order_nonleaf = !last_order_nonleaf; 9043 memcpy ((char *) reg_alloc_order, 9044 (const char *) reg_alloc_orders[last_order_nonleaf], 9045 FIRST_PSEUDO_REGISTER * sizeof (int)); 9046 } 9047} 9048 9049/* Return 1 if REG and MEM are legitimate enough to allow the various 9050 MEM<-->REG splits to be run. */ 9051 9052int 9053sparc_split_reg_mem_legitimate (rtx reg, rtx mem) 9054{ 9055 /* Punt if we are here by mistake. */ 9056 gcc_assert (reload_completed); 9057 9058 /* We must have an offsettable memory reference. */ 9059 if (!offsettable_memref_p (mem)) 9060 return 0; 9061 9062 /* If we have legitimate args for ldd/std, we do not want 9063 the split to happen. */ 9064 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8)) 9065 return 0; 9066 9067 /* Success. */ 9068 return 1; 9069} 9070 9071/* Split a REG <-- MEM move into a pair of moves in MODE. */ 9072 9073void 9074sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode) 9075{ 9076 rtx high_part = gen_highpart (mode, dest); 9077 rtx low_part = gen_lowpart (mode, dest); 9078 rtx word0 = adjust_address (src, mode, 0); 9079 rtx word1 = adjust_address (src, mode, 4); 9080 9081 if (reg_overlap_mentioned_p (high_part, word1)) 9082 { 9083 emit_move_insn_1 (low_part, word1); 9084 emit_move_insn_1 (high_part, word0); 9085 } 9086 else 9087 { 9088 emit_move_insn_1 (high_part, word0); 9089 emit_move_insn_1 (low_part, word1); 9090 } 9091} 9092 9093/* Split a MEM <-- REG move into a pair of moves in MODE. */ 9094 9095void 9096sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode) 9097{ 9098 rtx word0 = adjust_address (dest, mode, 0); 9099 rtx word1 = adjust_address (dest, mode, 4); 9100 rtx high_part = gen_highpart (mode, src); 9101 rtx low_part = gen_lowpart (mode, src); 9102 9103 emit_move_insn_1 (word0, high_part); 9104 emit_move_insn_1 (word1, low_part); 9105} 9106 9107/* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */ 9108 9109int 9110sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2) 9111{ 9112 /* Punt if we are here by mistake. */ 9113 gcc_assert (reload_completed); 9114 9115 if (GET_CODE (reg1) == SUBREG) 9116 reg1 = SUBREG_REG (reg1); 9117 if (GET_CODE (reg1) != REG) 9118 return 0; 9119 const int regno1 = REGNO (reg1); 9120 9121 if (GET_CODE (reg2) == SUBREG) 9122 reg2 = SUBREG_REG (reg2); 9123 if (GET_CODE (reg2) != REG) 9124 return 0; 9125 const int regno2 = REGNO (reg2); 9126 9127 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2)) 9128 return 1; 9129 9130 if (TARGET_VIS3) 9131 { 9132 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2)) 9133 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2))) 9134 return 1; 9135 } 9136 9137 return 0; 9138} 9139 9140/* Split a REG <--> REG move into a pair of moves in MODE. */ 9141 9142void 9143sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode) 9144{ 9145 rtx dest1 = gen_highpart (mode, dest); 9146 rtx dest2 = gen_lowpart (mode, dest); 9147 rtx src1 = gen_highpart (mode, src); 9148 rtx src2 = gen_lowpart (mode, src); 9149 9150 /* Now emit using the real source and destination we found, swapping 9151 the order if we detect overlap. */ 9152 if (reg_overlap_mentioned_p (dest1, src2)) 9153 { 9154 emit_move_insn_1 (dest2, src2); 9155 emit_move_insn_1 (dest1, src1); 9156 } 9157 else 9158 { 9159 emit_move_insn_1 (dest1, src1); 9160 emit_move_insn_1 (dest2, src2); 9161 } 9162} 9163 9164/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. 9165 This makes them candidates for using ldd and std insns. 9166 9167 Note reg1 and reg2 *must* be hard registers. */ 9168 9169int 9170registers_ok_for_ldd_peep (rtx reg1, rtx reg2) 9171{ 9172 /* We might have been passed a SUBREG. */ 9173 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) 9174 return 0; 9175 9176 if (REGNO (reg1) % 2 != 0) 9177 return 0; 9178 9179 /* Integer ldd is deprecated in SPARC V9 */ 9180 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1))) 9181 return 0; 9182 9183 return (REGNO (reg1) == REGNO (reg2) - 1); 9184} 9185 9186/* Return 1 if the addresses in mem1 and mem2 are suitable for use in 9187 an ldd or std insn. 9188 9189 This can only happen when addr1 and addr2, the addresses in mem1 9190 and mem2, are consecutive memory locations (addr1 + 4 == addr2). 9191 addr1 must also be aligned on a 64-bit boundary. 9192 9193 Also iff dependent_reg_rtx is not null it should not be used to 9194 compute the address for mem1, i.e. we cannot optimize a sequence 9195 like: 9196 ld [%o0], %o0 9197 ld [%o0 + 4], %o1 9198 to 9199 ldd [%o0], %o0 9200 nor: 9201 ld [%g3 + 4], %g3 9202 ld [%g3], %g2 9203 to 9204 ldd [%g3], %g2 9205 9206 But, note that the transformation from: 9207 ld [%g2 + 4], %g3 9208 ld [%g2], %g2 9209 to 9210 ldd [%g2], %g2 9211 is perfectly fine. Thus, the peephole2 patterns always pass us 9212 the destination register of the first load, never the second one. 9213 9214 For stores we don't have a similar problem, so dependent_reg_rtx is 9215 NULL_RTX. */ 9216 9217int 9218mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) 9219{ 9220 rtx addr1, addr2; 9221 unsigned int reg1; 9222 HOST_WIDE_INT offset1; 9223 9224 /* The mems cannot be volatile. */ 9225 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 9226 return 0; 9227 9228 /* MEM1 should be aligned on a 64-bit boundary. */ 9229 if (MEM_ALIGN (mem1) < 64) 9230 return 0; 9231 9232 addr1 = XEXP (mem1, 0); 9233 addr2 = XEXP (mem2, 0); 9234 9235 /* Extract a register number and offset (if used) from the first addr. */ 9236 if (GET_CODE (addr1) == PLUS) 9237 { 9238 /* If not a REG, return zero. */ 9239 if (GET_CODE (XEXP (addr1, 0)) != REG) 9240 return 0; 9241 else 9242 { 9243 reg1 = REGNO (XEXP (addr1, 0)); 9244 /* The offset must be constant! */ 9245 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) 9246 return 0; 9247 offset1 = INTVAL (XEXP (addr1, 1)); 9248 } 9249 } 9250 else if (GET_CODE (addr1) != REG) 9251 return 0; 9252 else 9253 { 9254 reg1 = REGNO (addr1); 9255 /* This was a simple (mem (reg)) expression. Offset is 0. */ 9256 offset1 = 0; 9257 } 9258 9259 /* Make sure the second address is a (mem (plus (reg) (const_int). */ 9260 if (GET_CODE (addr2) != PLUS) 9261 return 0; 9262 9263 if (GET_CODE (XEXP (addr2, 0)) != REG 9264 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) 9265 return 0; 9266 9267 if (reg1 != REGNO (XEXP (addr2, 0))) 9268 return 0; 9269 9270 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) 9271 return 0; 9272 9273 /* The first offset must be evenly divisible by 8 to ensure the 9274 address is 64-bit aligned. */ 9275 if (offset1 % 8 != 0) 9276 return 0; 9277 9278 /* The offset for the second addr must be 4 more than the first addr. */ 9279 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) 9280 return 0; 9281 9282 /* All the tests passed. addr1 and addr2 are valid for ldd and std 9283 instructions. */ 9284 return 1; 9285} 9286 9287/* Return the widened memory access made of MEM1 and MEM2 in MODE. */ 9288 9289rtx 9290widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode) 9291{ 9292 rtx x = widen_memory_access (mem1, mode, 0); 9293 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2); 9294 return x; 9295} 9296 9297/* Return 1 if reg is a pseudo, or is the first register in 9298 a hard register pair. This makes it suitable for use in 9299 ldd and std insns. */ 9300 9301int 9302register_ok_for_ldd (rtx reg) 9303{ 9304 /* We might have been passed a SUBREG. */ 9305 if (!REG_P (reg)) 9306 return 0; 9307 9308 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) 9309 return (REGNO (reg) % 2 == 0); 9310 9311 return 1; 9312} 9313 9314/* Return 1 if OP, a MEM, has an address which is known to be 9315 aligned to an 8-byte boundary. */ 9316 9317int 9318memory_ok_for_ldd (rtx op) 9319{ 9320 /* In 64-bit mode, we assume that the address is word-aligned. */ 9321 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) 9322 return 0; 9323 9324 if (! can_create_pseudo_p () 9325 && !strict_memory_address_p (Pmode, XEXP (op, 0))) 9326 return 0; 9327 9328 return 1; 9329} 9330 9331/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 9332 9333static bool 9334sparc_print_operand_punct_valid_p (unsigned char code) 9335{ 9336 if (code == '#' 9337 || code == '*' 9338 || code == '(' 9339 || code == ')' 9340 || code == '_' 9341 || code == '&') 9342 return true; 9343 9344 return false; 9345} 9346 9347/* Implement TARGET_PRINT_OPERAND. 9348 Print operand X (an rtx) in assembler syntax to file FILE. 9349 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 9350 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 9351 9352static void 9353sparc_print_operand (FILE *file, rtx x, int code) 9354{ 9355 const char *s; 9356 9357 switch (code) 9358 { 9359 case '#': 9360 /* Output an insn in a delay slot. */ 9361 if (final_sequence) 9362 sparc_indent_opcode = 1; 9363 else 9364 fputs ("\n\t nop", file); 9365 return; 9366 case '*': 9367 /* Output an annul flag if there's nothing for the delay slot and we 9368 are optimizing. This is always used with '(' below. 9369 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; 9370 this is a dbx bug. So, we only do this when optimizing. 9371 On UltraSPARC, a branch in a delay slot causes a pipeline flush. 9372 Always emit a nop in case the next instruction is a branch. */ 9373 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9374 fputs (",a", file); 9375 return; 9376 case '(': 9377 /* Output a 'nop' if there's nothing for the delay slot and we are 9378 not optimizing. This is always used with '*' above. */ 9379 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9380 fputs ("\n\t nop", file); 9381 else if (final_sequence) 9382 sparc_indent_opcode = 1; 9383 return; 9384 case ')': 9385 /* Output the right displacement from the saved PC on function return. 9386 The caller may have placed an "unimp" insn immediately after the call 9387 so we have to account for it. This insn is used in the 32-bit ABI 9388 when calling a function that returns a non zero-sized structure. The 9389 64-bit ABI doesn't have it. Be careful to have this test be the same 9390 as that for the call. The exception is when sparc_std_struct_return 9391 is enabled, the psABI is followed exactly and the adjustment is made 9392 by the code in sparc_struct_value_rtx. The call emitted is the same 9393 when sparc_std_struct_return is enabled. */ 9394 if (!TARGET_ARCH64 9395 && cfun->returns_struct 9396 && !sparc_std_struct_return 9397 && DECL_SIZE (DECL_RESULT (current_function_decl)) 9398 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) 9399 == INTEGER_CST 9400 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) 9401 fputs ("12", file); 9402 else 9403 fputc ('8', file); 9404 return; 9405 case '_': 9406 /* Output the Embedded Medium/Anywhere code model base register. */ 9407 fputs (EMBMEDANY_BASE_REG, file); 9408 return; 9409 case '&': 9410 /* Print some local dynamic TLS name. */ 9411 if (const char *name = get_some_local_dynamic_name ()) 9412 assemble_name (file, name); 9413 else 9414 output_operand_lossage ("'%%&' used without any " 9415 "local dynamic TLS references"); 9416 return; 9417 9418 case 'Y': 9419 /* Adjust the operand to take into account a RESTORE operation. */ 9420 if (GET_CODE (x) == CONST_INT) 9421 break; 9422 else if (GET_CODE (x) != REG) 9423 output_operand_lossage ("invalid %%Y operand"); 9424 else if (REGNO (x) < 8) 9425 fputs (reg_names[REGNO (x)], file); 9426 else if (REGNO (x) >= 24 && REGNO (x) < 32) 9427 fputs (reg_names[REGNO (x)-16], file); 9428 else 9429 output_operand_lossage ("invalid %%Y operand"); 9430 return; 9431 case 'L': 9432 /* Print out the low order register name of a register pair. */ 9433 if (WORDS_BIG_ENDIAN) 9434 fputs (reg_names[REGNO (x)+1], file); 9435 else 9436 fputs (reg_names[REGNO (x)], file); 9437 return; 9438 case 'H': 9439 /* Print out the high order register name of a register pair. */ 9440 if (WORDS_BIG_ENDIAN) 9441 fputs (reg_names[REGNO (x)], file); 9442 else 9443 fputs (reg_names[REGNO (x)+1], file); 9444 return; 9445 case 'R': 9446 /* Print out the second register name of a register pair or quad. 9447 I.e., R (%o0) => %o1. */ 9448 fputs (reg_names[REGNO (x)+1], file); 9449 return; 9450 case 'S': 9451 /* Print out the third register name of a register quad. 9452 I.e., S (%o0) => %o2. */ 9453 fputs (reg_names[REGNO (x)+2], file); 9454 return; 9455 case 'T': 9456 /* Print out the fourth register name of a register quad. 9457 I.e., T (%o0) => %o3. */ 9458 fputs (reg_names[REGNO (x)+3], file); 9459 return; 9460 case 'x': 9461 /* Print a condition code register. */ 9462 if (REGNO (x) == SPARC_ICC_REG) 9463 { 9464 switch (GET_MODE (x)) 9465 { 9466 case E_CCmode: 9467 case E_CCNZmode: 9468 case E_CCCmode: 9469 case E_CCVmode: 9470 s = "%icc"; 9471 break; 9472 case E_CCXmode: 9473 case E_CCXNZmode: 9474 case E_CCXCmode: 9475 case E_CCXVmode: 9476 s = "%xcc"; 9477 break; 9478 default: 9479 gcc_unreachable (); 9480 } 9481 fputs (s, file); 9482 } 9483 else 9484 /* %fccN register */ 9485 fputs (reg_names[REGNO (x)], file); 9486 return; 9487 case 'm': 9488 /* Print the operand's address only. */ 9489 output_address (GET_MODE (x), XEXP (x, 0)); 9490 return; 9491 case 'r': 9492 /* In this case we need a register. Use %g0 if the 9493 operand is const0_rtx. */ 9494 if (x == const0_rtx 9495 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) 9496 { 9497 fputs ("%g0", file); 9498 return; 9499 } 9500 else 9501 break; 9502 9503 case 'A': 9504 switch (GET_CODE (x)) 9505 { 9506 case IOR: 9507 s = "or"; 9508 break; 9509 case AND: 9510 s = "and"; 9511 break; 9512 case XOR: 9513 s = "xor"; 9514 break; 9515 default: 9516 output_operand_lossage ("invalid %%A operand"); 9517 s = ""; 9518 break; 9519 } 9520 fputs (s, file); 9521 return; 9522 9523 case 'B': 9524 switch (GET_CODE (x)) 9525 { 9526 case IOR: 9527 s = "orn"; 9528 break; 9529 case AND: 9530 s = "andn"; 9531 break; 9532 case XOR: 9533 s = "xnor"; 9534 break; 9535 default: 9536 output_operand_lossage ("invalid %%B operand"); 9537 s = ""; 9538 break; 9539 } 9540 fputs (s, file); 9541 return; 9542 9543 /* This is used by the conditional move instructions. */ 9544 case 'C': 9545 { 9546 machine_mode mode = GET_MODE (XEXP (x, 0)); 9547 switch (GET_CODE (x)) 9548 { 9549 case NE: 9550 if (mode == CCVmode || mode == CCXVmode) 9551 s = "vs"; 9552 else 9553 s = "ne"; 9554 break; 9555 case EQ: 9556 if (mode == CCVmode || mode == CCXVmode) 9557 s = "vc"; 9558 else 9559 s = "e"; 9560 break; 9561 case GE: 9562 if (mode == CCNZmode || mode == CCXNZmode) 9563 s = "pos"; 9564 else 9565 s = "ge"; 9566 break; 9567 case GT: 9568 s = "g"; 9569 break; 9570 case LE: 9571 s = "le"; 9572 break; 9573 case LT: 9574 if (mode == CCNZmode || mode == CCXNZmode) 9575 s = "neg"; 9576 else 9577 s = "l"; 9578 break; 9579 case GEU: 9580 s = "geu"; 9581 break; 9582 case GTU: 9583 s = "gu"; 9584 break; 9585 case LEU: 9586 s = "leu"; 9587 break; 9588 case LTU: 9589 s = "lu"; 9590 break; 9591 case LTGT: 9592 s = "lg"; 9593 break; 9594 case UNORDERED: 9595 s = "u"; 9596 break; 9597 case ORDERED: 9598 s = "o"; 9599 break; 9600 case UNLT: 9601 s = "ul"; 9602 break; 9603 case UNLE: 9604 s = "ule"; 9605 break; 9606 case UNGT: 9607 s = "ug"; 9608 break; 9609 case UNGE: 9610 s = "uge" 9611 ; break; 9612 case UNEQ: 9613 s = "ue"; 9614 break; 9615 default: 9616 output_operand_lossage ("invalid %%C operand"); 9617 s = ""; 9618 break; 9619 } 9620 fputs (s, file); 9621 return; 9622 } 9623 9624 /* This are used by the movr instruction pattern. */ 9625 case 'D': 9626 { 9627 switch (GET_CODE (x)) 9628 { 9629 case NE: 9630 s = "ne"; 9631 break; 9632 case EQ: 9633 s = "e"; 9634 break; 9635 case GE: 9636 s = "gez"; 9637 break; 9638 case LT: 9639 s = "lz"; 9640 break; 9641 case LE: 9642 s = "lez"; 9643 break; 9644 case GT: 9645 s = "gz"; 9646 break; 9647 default: 9648 output_operand_lossage ("invalid %%D operand"); 9649 s = ""; 9650 break; 9651 } 9652 fputs (s, file); 9653 return; 9654 } 9655 9656 case 'b': 9657 { 9658 /* Print a sign-extended character. */ 9659 int i = trunc_int_for_mode (INTVAL (x), QImode); 9660 fprintf (file, "%d", i); 9661 return; 9662 } 9663 9664 case 'f': 9665 /* Operand must be a MEM; write its address. */ 9666 if (GET_CODE (x) != MEM) 9667 output_operand_lossage ("invalid %%f operand"); 9668 output_address (GET_MODE (x), XEXP (x, 0)); 9669 return; 9670 9671 case 's': 9672 { 9673 /* Print a sign-extended 32-bit value. */ 9674 HOST_WIDE_INT i; 9675 if (GET_CODE(x) == CONST_INT) 9676 i = INTVAL (x); 9677 else 9678 { 9679 output_operand_lossage ("invalid %%s operand"); 9680 return; 9681 } 9682 i = trunc_int_for_mode (i, SImode); 9683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); 9684 return; 9685 } 9686 9687 case 0: 9688 /* Do nothing special. */ 9689 break; 9690 9691 default: 9692 /* Undocumented flag. */ 9693 output_operand_lossage ("invalid operand output code"); 9694 } 9695 9696 if (GET_CODE (x) == REG) 9697 fputs (reg_names[REGNO (x)], file); 9698 else if (GET_CODE (x) == MEM) 9699 { 9700 fputc ('[', file); 9701 /* Poor Sun assembler doesn't understand absolute addressing. */ 9702 if (CONSTANT_P (XEXP (x, 0))) 9703 fputs ("%g0+", file); 9704 output_address (GET_MODE (x), XEXP (x, 0)); 9705 fputc (']', file); 9706 } 9707 else if (GET_CODE (x) == HIGH) 9708 { 9709 fputs ("%hi(", file); 9710 output_addr_const (file, XEXP (x, 0)); 9711 fputc (')', file); 9712 } 9713 else if (GET_CODE (x) == LO_SUM) 9714 { 9715 sparc_print_operand (file, XEXP (x, 0), 0); 9716 if (TARGET_CM_MEDMID) 9717 fputs ("+%l44(", file); 9718 else 9719 fputs ("+%lo(", file); 9720 output_addr_const (file, XEXP (x, 1)); 9721 fputc (')', file); 9722 } 9723 else if (GET_CODE (x) == CONST_DOUBLE) 9724 output_operand_lossage ("floating-point constant not a valid immediate operand"); 9725 else 9726 output_addr_const (file, x); 9727} 9728 9729/* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 9730 9731static void 9732sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) 9733{ 9734 register rtx base, index = 0; 9735 int offset = 0; 9736 register rtx addr = x; 9737 9738 if (REG_P (addr)) 9739 fputs (reg_names[REGNO (addr)], file); 9740 else if (GET_CODE (addr) == PLUS) 9741 { 9742 if (CONST_INT_P (XEXP (addr, 0))) 9743 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); 9744 else if (CONST_INT_P (XEXP (addr, 1))) 9745 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); 9746 else 9747 base = XEXP (addr, 0), index = XEXP (addr, 1); 9748 if (GET_CODE (base) == LO_SUM) 9749 { 9750 gcc_assert (USE_AS_OFFSETABLE_LO10 9751 && TARGET_ARCH64 9752 && ! TARGET_CM_MEDMID); 9753 output_operand (XEXP (base, 0), 0); 9754 fputs ("+%lo(", file); 9755 output_address (VOIDmode, XEXP (base, 1)); 9756 fprintf (file, ")+%d", offset); 9757 } 9758 else 9759 { 9760 fputs (reg_names[REGNO (base)], file); 9761 if (index == 0) 9762 fprintf (file, "%+d", offset); 9763 else if (REG_P (index)) 9764 fprintf (file, "+%s", reg_names[REGNO (index)]); 9765 else if (GET_CODE (index) == SYMBOL_REF 9766 || GET_CODE (index) == LABEL_REF 9767 || GET_CODE (index) == CONST) 9768 fputc ('+', file), output_addr_const (file, index); 9769 else gcc_unreachable (); 9770 } 9771 } 9772 else if (GET_CODE (addr) == MINUS 9773 && GET_CODE (XEXP (addr, 1)) == LABEL_REF) 9774 { 9775 output_addr_const (file, XEXP (addr, 0)); 9776 fputs ("-(", file); 9777 output_addr_const (file, XEXP (addr, 1)); 9778 fputs ("-.)", file); 9779 } 9780 else if (GET_CODE (addr) == LO_SUM) 9781 { 9782 output_operand (XEXP (addr, 0), 0); 9783 if (TARGET_CM_MEDMID) 9784 fputs ("+%l44(", file); 9785 else 9786 fputs ("+%lo(", file); 9787 output_address (VOIDmode, XEXP (addr, 1)); 9788 fputc (')', file); 9789 } 9790 else if (flag_pic 9791 && GET_CODE (addr) == CONST 9792 && GET_CODE (XEXP (addr, 0)) == MINUS 9793 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST 9794 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS 9795 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) 9796 { 9797 addr = XEXP (addr, 0); 9798 output_addr_const (file, XEXP (addr, 0)); 9799 /* Group the args of the second CONST in parenthesis. */ 9800 fputs ("-(", file); 9801 /* Skip past the second CONST--it does nothing for us. */ 9802 output_addr_const (file, XEXP (XEXP (addr, 1), 0)); 9803 /* Close the parenthesis. */ 9804 fputc (')', file); 9805 } 9806 else 9807 { 9808 output_addr_const (file, addr); 9809 } 9810} 9811 9812/* Target hook for assembling integer objects. The sparc version has 9813 special handling for aligned DI-mode objects. */ 9814 9815static bool 9816sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) 9817{ 9818 /* ??? We only output .xword's for symbols and only then in environments 9819 where the assembler can handle them. */ 9820 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT) 9821 { 9822 if (TARGET_V9) 9823 { 9824 assemble_integer_with_op ("\t.xword\t", x); 9825 return true; 9826 } 9827 else 9828 { 9829 assemble_aligned_integer (4, const0_rtx); 9830 assemble_aligned_integer (4, x); 9831 return true; 9832 } 9833 } 9834 return default_assemble_integer (x, size, aligned_p); 9835} 9836 9837/* Return the value of a code used in the .proc pseudo-op that says 9838 what kind of result this function returns. For non-C types, we pick 9839 the closest C type. */ 9840 9841#ifndef SHORT_TYPE_SIZE 9842#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) 9843#endif 9844 9845#ifndef INT_TYPE_SIZE 9846#define INT_TYPE_SIZE BITS_PER_WORD 9847#endif 9848 9849#ifndef LONG_TYPE_SIZE 9850#define LONG_TYPE_SIZE BITS_PER_WORD 9851#endif 9852 9853#ifndef LONG_LONG_TYPE_SIZE 9854#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) 9855#endif 9856 9857#ifndef FLOAT_TYPE_SIZE 9858#define FLOAT_TYPE_SIZE BITS_PER_WORD 9859#endif 9860 9861#ifndef DOUBLE_TYPE_SIZE 9862#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9863#endif 9864 9865#ifndef LONG_DOUBLE_TYPE_SIZE 9866#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9867#endif 9868 9869unsigned long 9870sparc_type_code (register tree type) 9871{ 9872 register unsigned long qualifiers = 0; 9873 register unsigned shift; 9874 9875 /* Only the first 30 bits of the qualifier are valid. We must refrain from 9876 setting more, since some assemblers will give an error for this. Also, 9877 we must be careful to avoid shifts of 32 bits or more to avoid getting 9878 unpredictable results. */ 9879 9880 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) 9881 { 9882 switch (TREE_CODE (type)) 9883 { 9884 case ERROR_MARK: 9885 return qualifiers; 9886 9887 case ARRAY_TYPE: 9888 qualifiers |= (3 << shift); 9889 break; 9890 9891 case FUNCTION_TYPE: 9892 case METHOD_TYPE: 9893 qualifiers |= (2 << shift); 9894 break; 9895 9896 case POINTER_TYPE: 9897 case REFERENCE_TYPE: 9898 case OFFSET_TYPE: 9899 qualifiers |= (1 << shift); 9900 break; 9901 9902 case RECORD_TYPE: 9903 return (qualifiers | 8); 9904 9905 case UNION_TYPE: 9906 case QUAL_UNION_TYPE: 9907 return (qualifiers | 9); 9908 9909 case ENUMERAL_TYPE: 9910 return (qualifiers | 10); 9911 9912 case VOID_TYPE: 9913 return (qualifiers | 16); 9914 9915 case INTEGER_TYPE: 9916 /* If this is a range type, consider it to be the underlying 9917 type. */ 9918 if (TREE_TYPE (type) != 0) 9919 break; 9920 9921 /* Carefully distinguish all the standard types of C, 9922 without messing up if the language is not C. We do this by 9923 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to 9924 look at both the names and the above fields, but that's redundant. 9925 Any type whose size is between two C types will be considered 9926 to be the wider of the two types. Also, we do not have a 9927 special code to use for "long long", so anything wider than 9928 long is treated the same. Note that we can't distinguish 9929 between "int" and "long" in this code if they are the same 9930 size, but that's fine, since neither can the assembler. */ 9931 9932 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) 9933 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); 9934 9935 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) 9936 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); 9937 9938 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) 9939 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); 9940 9941 else 9942 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); 9943 9944 case REAL_TYPE: 9945 /* If this is a range type, consider it to be the underlying 9946 type. */ 9947 if (TREE_TYPE (type) != 0) 9948 break; 9949 9950 /* Carefully distinguish all the standard types of C, 9951 without messing up if the language is not C. */ 9952 9953 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) 9954 return (qualifiers | 6); 9955 9956 else 9957 return (qualifiers | 7); 9958 9959 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ 9960 /* ??? We need to distinguish between double and float complex types, 9961 but I don't know how yet because I can't reach this code from 9962 existing front-ends. */ 9963 return (qualifiers | 7); /* Who knows? */ 9964 9965 case VECTOR_TYPE: 9966 case BOOLEAN_TYPE: /* Boolean truth value type. */ 9967 case LANG_TYPE: 9968 case NULLPTR_TYPE: 9969 return qualifiers; 9970 9971 default: 9972 gcc_unreachable (); /* Not a type! */ 9973 } 9974 } 9975 9976 return qualifiers; 9977} 9978 9979/* Nested function support. */ 9980 9981/* Emit RTL insns to initialize the variable parts of a trampoline. 9982 FNADDR is an RTX for the address of the function's pure code. 9983 CXT is an RTX for the static chain value for the function. 9984 9985 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi 9986 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes 9987 (to store insns). This is a bit excessive. Perhaps a different 9988 mechanism would be better here. 9989 9990 Emit enough FLUSH insns to synchronize the data and instruction caches. */ 9991 9992static void 9993sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9994{ 9995 /* SPARC 32-bit trampoline: 9996 9997 sethi %hi(fn), %g1 9998 sethi %hi(static), %g2 9999 jmp %g1+%lo(fn) 10000 or %g2, %lo(static), %g2 10001 10002 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii 10003 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii 10004 */ 10005 10006 emit_move_insn 10007 (adjust_address (m_tramp, SImode, 0), 10008 expand_binop (SImode, ior_optab, 10009 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1), 10010 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), 10011 NULL_RTX, 1, OPTAB_DIRECT)); 10012 10013 emit_move_insn 10014 (adjust_address (m_tramp, SImode, 4), 10015 expand_binop (SImode, ior_optab, 10016 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1), 10017 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), 10018 NULL_RTX, 1, OPTAB_DIRECT)); 10019 10020 emit_move_insn 10021 (adjust_address (m_tramp, SImode, 8), 10022 expand_binop (SImode, ior_optab, 10023 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), 10024 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), 10025 NULL_RTX, 1, OPTAB_DIRECT)); 10026 10027 emit_move_insn 10028 (adjust_address (m_tramp, SImode, 12), 10029 expand_binop (SImode, ior_optab, 10030 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), 10031 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), 10032 NULL_RTX, 1, OPTAB_DIRECT)); 10033 10034 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is 10035 aligned on a 16 byte boundary so one flush clears it all. */ 10036 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0)))); 10037 if (sparc_cpu != PROCESSOR_ULTRASPARC 10038 && sparc_cpu != PROCESSOR_ULTRASPARC3 10039 && sparc_cpu != PROCESSOR_NIAGARA 10040 && sparc_cpu != PROCESSOR_NIAGARA2 10041 && sparc_cpu != PROCESSOR_NIAGARA3 10042 && sparc_cpu != PROCESSOR_NIAGARA4 10043 && sparc_cpu != PROCESSOR_NIAGARA7 10044 && sparc_cpu != PROCESSOR_M8) 10045 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8)))); 10046 10047 /* Call __enable_execute_stack after writing onto the stack to make sure 10048 the stack address is accessible. */ 10049#ifdef HAVE_ENABLE_EXECUTE_STACK 10050 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10051 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10052#endif 10053 10054} 10055 10056/* The 64-bit version is simpler because it makes more sense to load the 10057 values as "immediate" data out of the trampoline. It's also easier since 10058 we can read the PC without clobbering a register. */ 10059 10060static void 10061sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 10062{ 10063 /* SPARC 64-bit trampoline: 10064 10065 rd %pc, %g1 10066 ldx [%g1+24], %g5 10067 jmp %g5 10068 ldx [%g1+16], %g5 10069 +16 bytes data 10070 */ 10071 10072 emit_move_insn (adjust_address (m_tramp, SImode, 0), 10073 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); 10074 emit_move_insn (adjust_address (m_tramp, SImode, 4), 10075 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); 10076 emit_move_insn (adjust_address (m_tramp, SImode, 8), 10077 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); 10078 emit_move_insn (adjust_address (m_tramp, SImode, 12), 10079 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); 10080 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); 10081 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); 10082 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); 10083 10084 if (sparc_cpu != PROCESSOR_ULTRASPARC 10085 && sparc_cpu != PROCESSOR_ULTRASPARC3 10086 && sparc_cpu != PROCESSOR_NIAGARA 10087 && sparc_cpu != PROCESSOR_NIAGARA2 10088 && sparc_cpu != PROCESSOR_NIAGARA3 10089 && sparc_cpu != PROCESSOR_NIAGARA4 10090 && sparc_cpu != PROCESSOR_NIAGARA7 10091 && sparc_cpu != PROCESSOR_M8) 10092 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); 10093 10094 /* Call __enable_execute_stack after writing onto the stack to make sure 10095 the stack address is accessible. */ 10096#ifdef HAVE_ENABLE_EXECUTE_STACK 10097 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10098 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10099#endif 10100} 10101 10102/* Worker for TARGET_TRAMPOLINE_INIT. */ 10103 10104static void 10105sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 10106{ 10107 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 10108 cxt = force_reg (Pmode, cxt); 10109 if (TARGET_ARCH64) 10110 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); 10111 else 10112 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); 10113} 10114 10115/* Adjust the cost of a scheduling dependency. Return the new cost of 10116 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 10117 10118static int 10119supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, 10120 int cost) 10121{ 10122 enum attr_type insn_type; 10123 10124 if (recog_memoized (insn) < 0) 10125 return cost; 10126 10127 insn_type = get_attr_type (insn); 10128 10129 if (dep_type == 0) 10130 { 10131 /* Data dependency; DEP_INSN writes a register that INSN reads some 10132 cycles later. */ 10133 10134 /* if a load, then the dependence must be on the memory address; 10135 add an extra "cycle". Note that the cost could be two cycles 10136 if the reg was written late in an instruction group; we ca not tell 10137 here. */ 10138 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) 10139 return cost + 3; 10140 10141 /* Get the delay only if the address of the store is the dependence. */ 10142 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) 10143 { 10144 rtx pat = PATTERN(insn); 10145 rtx dep_pat = PATTERN (dep_insn); 10146 10147 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10148 return cost; /* This should not happen! */ 10149 10150 /* The dependency between the two instructions was on the data that 10151 is being stored. Assume that this implies that the address of the 10152 store is not dependent. */ 10153 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10154 return cost; 10155 10156 return cost + 3; /* An approximation. */ 10157 } 10158 10159 /* A shift instruction cannot receive its data from an instruction 10160 in the same cycle; add a one cycle penalty. */ 10161 if (insn_type == TYPE_SHIFT) 10162 return cost + 3; /* Split before cascade into shift. */ 10163 } 10164 else 10165 { 10166 /* Anti- or output- dependency; DEP_INSN reads/writes a register that 10167 INSN writes some cycles later. */ 10168 10169 /* These are only significant for the fpu unit; writing a fp reg before 10170 the fpu has finished with it stalls the processor. */ 10171 10172 /* Reusing an integer register causes no problems. */ 10173 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10174 return 0; 10175 } 10176 10177 return cost; 10178} 10179 10180static int 10181hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, 10182 int cost) 10183{ 10184 enum attr_type insn_type, dep_type; 10185 rtx pat = PATTERN(insn); 10186 rtx dep_pat = PATTERN (dep_insn); 10187 10188 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 10189 return cost; 10190 10191 insn_type = get_attr_type (insn); 10192 dep_type = get_attr_type (dep_insn); 10193 10194 switch (dtype) 10195 { 10196 case 0: 10197 /* Data dependency; DEP_INSN writes a register that INSN reads some 10198 cycles later. */ 10199 10200 switch (insn_type) 10201 { 10202 case TYPE_STORE: 10203 case TYPE_FPSTORE: 10204 /* Get the delay iff the address of the store is the dependence. */ 10205 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10206 return cost; 10207 10208 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10209 return cost; 10210 return cost + 3; 10211 10212 case TYPE_LOAD: 10213 case TYPE_SLOAD: 10214 case TYPE_FPLOAD: 10215 /* If a load, then the dependence must be on the memory address. If 10216 the addresses aren't equal, then it might be a false dependency */ 10217 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) 10218 { 10219 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET 10220 || GET_CODE (SET_DEST (dep_pat)) != MEM 10221 || GET_CODE (SET_SRC (pat)) != MEM 10222 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), 10223 XEXP (SET_SRC (pat), 0))) 10224 return cost + 2; 10225 10226 return cost + 8; 10227 } 10228 break; 10229 10230 case TYPE_BRANCH: 10231 /* Compare to branch latency is 0. There is no benefit from 10232 separating compare and branch. */ 10233 if (dep_type == TYPE_COMPARE) 10234 return 0; 10235 /* Floating point compare to branch latency is less than 10236 compare to conditional move. */ 10237 if (dep_type == TYPE_FPCMP) 10238 return cost - 1; 10239 break; 10240 default: 10241 break; 10242 } 10243 break; 10244 10245 case REG_DEP_ANTI: 10246 /* Anti-dependencies only penalize the fpu unit. */ 10247 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10248 return 0; 10249 break; 10250 10251 default: 10252 break; 10253 } 10254 10255 return cost; 10256} 10257 10258static int 10259leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, 10260 int cost) 10261{ 10262 enum attr_type insn_type, dep_type; 10263 rtx pat = PATTERN (insn); 10264 rtx dep_pat = PATTERN (dep_insn); 10265 10266 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 10267 return cost; 10268 10269 insn_type = get_attr_type (insn); 10270 dep_type = get_attr_type (dep_insn); 10271 10272 switch (dtype) 10273 { 10274 case REG_DEP_TRUE: 10275 /* Data dependency; DEP_INSN writes a register that INSN reads some 10276 cycles later. */ 10277 10278 switch (insn_type) 10279 { 10280 case TYPE_STORE: 10281 /* Try to schedule three instructions between the store and 10282 the ALU instruction that generated the data. */ 10283 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT) 10284 { 10285 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10286 break; 10287 10288 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10289 return 4; 10290 } 10291 break; 10292 default: 10293 break; 10294 } 10295 break; 10296 case REG_DEP_ANTI: 10297 /* Penalize anti-dependencies for FPU instructions. */ 10298 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD) 10299 return 4; 10300 break; 10301 default: 10302 break; 10303 } 10304 10305 return cost; 10306} 10307 10308static int 10309sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost, 10310 unsigned int) 10311{ 10312 switch (sparc_cpu) 10313 { 10314 case PROCESSOR_LEON5: 10315 cost = leon5_adjust_cost (insn, dep_type, dep, cost); 10316 break; 10317 case PROCESSOR_SUPERSPARC: 10318 cost = supersparc_adjust_cost (insn, dep_type, dep, cost); 10319 break; 10320 case PROCESSOR_HYPERSPARC: 10321 case PROCESSOR_SPARCLITE86X: 10322 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost); 10323 break; 10324 default: 10325 break; 10326 } 10327 return cost; 10328} 10329 10330static void 10331sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, 10332 int sched_verbose ATTRIBUTE_UNUSED, 10333 int max_ready ATTRIBUTE_UNUSED) 10334{} 10335 10336static int 10337sparc_use_sched_lookahead (void) 10338{ 10339 switch (sparc_cpu) 10340 { 10341 case PROCESSOR_ULTRASPARC: 10342 case PROCESSOR_ULTRASPARC3: 10343 return 4; 10344 case PROCESSOR_SUPERSPARC: 10345 case PROCESSOR_HYPERSPARC: 10346 case PROCESSOR_SPARCLITE86X: 10347 return 3; 10348 case PROCESSOR_NIAGARA4: 10349 case PROCESSOR_NIAGARA7: 10350 case PROCESSOR_M8: 10351 return 2; 10352 case PROCESSOR_NIAGARA: 10353 case PROCESSOR_NIAGARA2: 10354 case PROCESSOR_NIAGARA3: 10355 default: 10356 return 0; 10357 } 10358} 10359 10360static int 10361sparc_issue_rate (void) 10362{ 10363 switch (sparc_cpu) 10364 { 10365 case PROCESSOR_ULTRASPARC: 10366 case PROCESSOR_ULTRASPARC3: 10367 case PROCESSOR_M8: 10368 return 4; 10369 case PROCESSOR_SUPERSPARC: 10370 return 3; 10371 case PROCESSOR_HYPERSPARC: 10372 case PROCESSOR_SPARCLITE86X: 10373 case PROCESSOR_V9: 10374 /* Assume V9 processors are capable of at least dual-issue. */ 10375 case PROCESSOR_NIAGARA4: 10376 case PROCESSOR_NIAGARA7: 10377 return 2; 10378 case PROCESSOR_NIAGARA: 10379 case PROCESSOR_NIAGARA2: 10380 case PROCESSOR_NIAGARA3: 10381 default: 10382 return 1; 10383 } 10384} 10385 10386int 10387sparc_branch_cost (bool speed_p, bool predictable_p) 10388{ 10389 if (!speed_p) 10390 return 2; 10391 10392 /* For pre-V9 processors we use a single value (usually 3) to take into 10393 account the potential annulling of the delay slot (which ends up being 10394 a bubble in the pipeline slot) plus a cycle to take into consideration 10395 the instruction cache effects. 10396 10397 On V9 and later processors, which have branch prediction facilities, 10398 we take into account whether the branch is (easily) predictable. */ 10399 const int cost = sparc_costs->branch_cost; 10400 10401 switch (sparc_cpu) 10402 { 10403 case PROCESSOR_V9: 10404 case PROCESSOR_ULTRASPARC: 10405 case PROCESSOR_ULTRASPARC3: 10406 case PROCESSOR_NIAGARA: 10407 case PROCESSOR_NIAGARA2: 10408 case PROCESSOR_NIAGARA3: 10409 case PROCESSOR_NIAGARA4: 10410 case PROCESSOR_NIAGARA7: 10411 case PROCESSOR_M8: 10412 return cost + (predictable_p ? 0 : 2); 10413 10414 default: 10415 return cost; 10416 } 10417} 10418 10419static int 10420set_extends (rtx_insn *insn) 10421{ 10422 register rtx pat = PATTERN (insn); 10423 10424 switch (GET_CODE (SET_SRC (pat))) 10425 { 10426 /* Load and some shift instructions zero extend. */ 10427 case MEM: 10428 case ZERO_EXTEND: 10429 /* sethi clears the high bits */ 10430 case HIGH: 10431 /* LO_SUM is used with sethi. sethi cleared the high 10432 bits and the values used with lo_sum are positive */ 10433 case LO_SUM: 10434 /* Store flag stores 0 or 1 */ 10435 case LT: case LTU: 10436 case GT: case GTU: 10437 case LE: case LEU: 10438 case GE: case GEU: 10439 case EQ: 10440 case NE: 10441 return 1; 10442 case AND: 10443 { 10444 rtx op0 = XEXP (SET_SRC (pat), 0); 10445 rtx op1 = XEXP (SET_SRC (pat), 1); 10446 if (GET_CODE (op1) == CONST_INT) 10447 return INTVAL (op1) >= 0; 10448 if (GET_CODE (op0) != REG) 10449 return 0; 10450 if (sparc_check_64 (op0, insn) == 1) 10451 return 1; 10452 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10453 } 10454 case IOR: 10455 case XOR: 10456 { 10457 rtx op0 = XEXP (SET_SRC (pat), 0); 10458 rtx op1 = XEXP (SET_SRC (pat), 1); 10459 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) 10460 return 0; 10461 if (GET_CODE (op1) == CONST_INT) 10462 return INTVAL (op1) >= 0; 10463 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10464 } 10465 case LSHIFTRT: 10466 return GET_MODE (SET_SRC (pat)) == SImode; 10467 /* Positive integers leave the high bits zero. */ 10468 case CONST_INT: 10469 return !(INTVAL (SET_SRC (pat)) & 0x80000000); 10470 case ASHIFTRT: 10471 case SIGN_EXTEND: 10472 return - (GET_MODE (SET_SRC (pat)) == SImode); 10473 case REG: 10474 return sparc_check_64 (SET_SRC (pat), insn); 10475 default: 10476 return 0; 10477 } 10478} 10479 10480/* We _ought_ to have only one kind per function, but... */ 10481static GTY(()) rtx sparc_addr_diff_list; 10482static GTY(()) rtx sparc_addr_list; 10483 10484void 10485sparc_defer_case_vector (rtx lab, rtx vec, int diff) 10486{ 10487 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); 10488 if (diff) 10489 sparc_addr_diff_list 10490 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); 10491 else 10492 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); 10493} 10494 10495static void 10496sparc_output_addr_vec (rtx vec) 10497{ 10498 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10499 int idx, vlen = XVECLEN (body, 0); 10500 10501#ifdef ASM_OUTPUT_ADDR_VEC_START 10502 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10503#endif 10504 10505#ifdef ASM_OUTPUT_CASE_LABEL 10506 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10507 NEXT_INSN (lab)); 10508#else 10509 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10510#endif 10511 10512 for (idx = 0; idx < vlen; idx++) 10513 { 10514 ASM_OUTPUT_ADDR_VEC_ELT 10515 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10516 } 10517 10518#ifdef ASM_OUTPUT_ADDR_VEC_END 10519 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10520#endif 10521} 10522 10523static void 10524sparc_output_addr_diff_vec (rtx vec) 10525{ 10526 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10527 rtx base = XEXP (XEXP (body, 0), 0); 10528 int idx, vlen = XVECLEN (body, 1); 10529 10530#ifdef ASM_OUTPUT_ADDR_VEC_START 10531 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10532#endif 10533 10534#ifdef ASM_OUTPUT_CASE_LABEL 10535 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10536 NEXT_INSN (lab)); 10537#else 10538 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10539#endif 10540 10541 for (idx = 0; idx < vlen; idx++) 10542 { 10543 ASM_OUTPUT_ADDR_DIFF_ELT 10544 (asm_out_file, 10545 body, 10546 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10547 CODE_LABEL_NUMBER (base)); 10548 } 10549 10550#ifdef ASM_OUTPUT_ADDR_VEC_END 10551 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10552#endif 10553} 10554 10555static void 10556sparc_output_deferred_case_vectors (void) 10557{ 10558 rtx t; 10559 int align; 10560 10561 if (sparc_addr_list == NULL_RTX 10562 && sparc_addr_diff_list == NULL_RTX) 10563 return; 10564 10565 /* Align to cache line in the function's code section. */ 10566 switch_to_section (current_function_section ()); 10567 10568 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 10569 if (align > 0) 10570 ASM_OUTPUT_ALIGN (asm_out_file, align); 10571 10572 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) 10573 sparc_output_addr_vec (XEXP (t, 0)); 10574 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) 10575 sparc_output_addr_diff_vec (XEXP (t, 0)); 10576 10577 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; 10578} 10579 10580/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are 10581 unknown. Return 1 if the high bits are zero, -1 if the register is 10582 sign extended. */ 10583int 10584sparc_check_64 (rtx x, rtx_insn *insn) 10585{ 10586 /* If a register is set only once it is safe to ignore insns this 10587 code does not know how to handle. The loop will either recognize 10588 the single set and return the correct value or fail to recognize 10589 it and return 0. */ 10590 int set_once = 0; 10591 rtx y = x; 10592 10593 gcc_assert (GET_CODE (x) == REG); 10594 10595 if (GET_MODE (x) == DImode) 10596 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); 10597 10598 if (flag_expensive_optimizations 10599 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) 10600 set_once = 1; 10601 10602 if (insn == 0) 10603 { 10604 if (set_once) 10605 insn = get_last_insn_anywhere (); 10606 else 10607 return 0; 10608 } 10609 10610 while ((insn = PREV_INSN (insn))) 10611 { 10612 switch (GET_CODE (insn)) 10613 { 10614 case JUMP_INSN: 10615 case NOTE: 10616 break; 10617 case CODE_LABEL: 10618 case CALL_INSN: 10619 default: 10620 if (! set_once) 10621 return 0; 10622 break; 10623 case INSN: 10624 { 10625 rtx pat = PATTERN (insn); 10626 if (GET_CODE (pat) != SET) 10627 return 0; 10628 if (rtx_equal_p (x, SET_DEST (pat))) 10629 return set_extends (insn); 10630 if (y && rtx_equal_p (y, SET_DEST (pat))) 10631 return set_extends (insn); 10632 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) 10633 return 0; 10634 } 10635 } 10636 } 10637 return 0; 10638} 10639 10640/* Output a wide shift instruction in V8+ mode. INSN is the instruction, 10641 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 10642 10643const char * 10644output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode) 10645{ 10646 static char asm_code[60]; 10647 10648 /* The scratch register is only required when the destination 10649 register is not a 64-bit global or out register. */ 10650 if (which_alternative != 2) 10651 operands[3] = operands[0]; 10652 10653 /* We can only shift by constants <= 63. */ 10654 if (GET_CODE (operands[2]) == CONST_INT) 10655 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); 10656 10657 if (GET_CODE (operands[1]) == CONST_INT) 10658 { 10659 output_asm_insn ("mov\t%1, %3", operands); 10660 } 10661 else 10662 { 10663 output_asm_insn ("sllx\t%H1, 32, %3", operands); 10664 if (sparc_check_64 (operands[1], insn) <= 0) 10665 output_asm_insn ("srl\t%L1, 0, %L1", operands); 10666 output_asm_insn ("or\t%L1, %3, %3", operands); 10667 } 10668 10669 strcpy (asm_code, opcode); 10670 10671 if (which_alternative != 2) 10672 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); 10673 else 10674 return 10675 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); 10676} 10677 10678/* Output rtl to increment the profiler label LABELNO 10679 for profiling a function entry. */ 10680 10681void 10682sparc_profile_hook (int labelno) 10683{ 10684 char buf[32]; 10685 rtx lab, fun; 10686 10687 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); 10688 if (NO_PROFILE_COUNTERS) 10689 { 10690 emit_library_call (fun, LCT_NORMAL, VOIDmode); 10691 } 10692 else 10693 { 10694 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 10695 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 10696 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode); 10697 } 10698} 10699 10700#ifdef TARGET_SOLARIS 10701/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 10702 10703static void 10704sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, 10705 tree decl ATTRIBUTE_UNUSED) 10706{ 10707 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 10708 { 10709 solaris_elf_asm_comdat_section (name, flags, decl); 10710 return; 10711 } 10712 10713 fprintf (asm_out_file, "\t.section\t\"%s\"", name); 10714 10715 if (!(flags & SECTION_DEBUG)) 10716 fputs (",#alloc", asm_out_file); 10717#if HAVE_GAS_SECTION_EXCLUDE 10718 if (flags & SECTION_EXCLUDE) 10719 fputs (",#exclude", asm_out_file); 10720#endif 10721 if (flags & SECTION_WRITE) 10722 fputs (",#write", asm_out_file); 10723 if (flags & SECTION_TLS) 10724 fputs (",#tls", asm_out_file); 10725 if (flags & SECTION_CODE) 10726 fputs (",#execinstr", asm_out_file); 10727 10728 if (flags & SECTION_NOTYPE) 10729 ; 10730 else if (flags & SECTION_BSS) 10731 fputs (",#nobits", asm_out_file); 10732 else 10733 fputs (",#progbits", asm_out_file); 10734 10735 fputc ('\n', asm_out_file); 10736} 10737#endif /* TARGET_SOLARIS */ 10738 10739/* We do not allow indirect calls to be optimized into sibling calls. 10740 10741 We cannot use sibling calls when delayed branches are disabled 10742 because they will likely require the call delay slot to be filled. 10743 10744 Also, on SPARC 32-bit we cannot emit a sibling call when the 10745 current function returns a structure. This is because the "unimp 10746 after call" convention would cause the callee to return to the 10747 wrong place. The generic code already disallows cases where the 10748 function being called returns a structure. 10749 10750 It may seem strange how this last case could occur. Usually there 10751 is code after the call which jumps to epilogue code which dumps the 10752 return value into the struct return area. That ought to invalidate 10753 the sibling call right? Well, in the C++ case we can end up passing 10754 the pointer to the struct return area to a constructor (which returns 10755 void) and then nothing else happens. Such a sibling call would look 10756 valid without the added check here. 10757 10758 VxWorks PIC PLT entries require the global pointer to be initialized 10759 on entry. We therefore can't emit sibling calls to them. */ 10760static bool 10761sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10762{ 10763 return (decl 10764 && flag_delayed_branch 10765 && (TARGET_ARCH64 || ! cfun->returns_struct) 10766 && !(TARGET_VXWORKS_RTP 10767 && flag_pic 10768 && !targetm.binds_local_p (decl))); 10769} 10770 10771/* libfunc renaming. */ 10772 10773static void 10774sparc_init_libfuncs (void) 10775{ 10776 if (TARGET_ARCH32) 10777 { 10778 /* Use the subroutines that Sun's library provides for integer 10779 multiply and divide. The `*' prevents an underscore from 10780 being prepended by the compiler. .umul is a little faster 10781 than .mul. */ 10782 set_optab_libfunc (smul_optab, SImode, "*.umul"); 10783 set_optab_libfunc (sdiv_optab, SImode, "*.div"); 10784 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); 10785 set_optab_libfunc (smod_optab, SImode, "*.rem"); 10786 set_optab_libfunc (umod_optab, SImode, "*.urem"); 10787 10788 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ 10789 set_optab_libfunc (add_optab, TFmode, "_Q_add"); 10790 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); 10791 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); 10792 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); 10793 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); 10794 10795 /* We can define the TFmode sqrt optab only if TARGET_FPU. This 10796 is because with soft-float, the SFmode and DFmode sqrt 10797 instructions will be absent, and the compiler will notice and 10798 try to use the TFmode sqrt instruction for calls to the 10799 builtin function sqrt, but this fails. */ 10800 if (TARGET_FPU) 10801 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); 10802 10803 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); 10804 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); 10805 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); 10806 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); 10807 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); 10808 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); 10809 10810 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); 10811 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); 10812 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); 10813 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); 10814 10815 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); 10816 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); 10817 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); 10818 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); 10819 10820 if (DITF_CONVERSION_LIBFUNCS) 10821 { 10822 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); 10823 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); 10824 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); 10825 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); 10826 } 10827 10828 if (SUN_CONVERSION_LIBFUNCS) 10829 { 10830 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); 10831 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); 10832 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); 10833 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); 10834 } 10835 } 10836 if (TARGET_ARCH64) 10837 { 10838 /* In the SPARC 64bit ABI, SImode multiply and divide functions 10839 do not exist in the library. Make sure the compiler does not 10840 emit calls to them by accident. (It should always use the 10841 hardware instructions.) */ 10842 set_optab_libfunc (smul_optab, SImode, 0); 10843 set_optab_libfunc (sdiv_optab, SImode, 0); 10844 set_optab_libfunc (udiv_optab, SImode, 0); 10845 set_optab_libfunc (smod_optab, SImode, 0); 10846 set_optab_libfunc (umod_optab, SImode, 0); 10847 10848 if (SUN_INTEGER_MULTIPLY_64) 10849 { 10850 set_optab_libfunc (smul_optab, DImode, "__mul64"); 10851 set_optab_libfunc (sdiv_optab, DImode, "__div64"); 10852 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); 10853 set_optab_libfunc (smod_optab, DImode, "__rem64"); 10854 set_optab_libfunc (umod_optab, DImode, "__urem64"); 10855 } 10856 10857 if (SUN_CONVERSION_LIBFUNCS) 10858 { 10859 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); 10860 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); 10861 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); 10862 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); 10863 } 10864 } 10865} 10866 10867/* SPARC builtins. */ 10868enum sparc_builtins 10869{ 10870 /* FPU builtins. */ 10871 SPARC_BUILTIN_LDFSR, 10872 SPARC_BUILTIN_STFSR, 10873 10874 /* VIS 1.0 builtins. */ 10875 SPARC_BUILTIN_FPACK16, 10876 SPARC_BUILTIN_FPACK32, 10877 SPARC_BUILTIN_FPACKFIX, 10878 SPARC_BUILTIN_FEXPAND, 10879 SPARC_BUILTIN_FPMERGE, 10880 SPARC_BUILTIN_FMUL8X16, 10881 SPARC_BUILTIN_FMUL8X16AU, 10882 SPARC_BUILTIN_FMUL8X16AL, 10883 SPARC_BUILTIN_FMUL8SUX16, 10884 SPARC_BUILTIN_FMUL8ULX16, 10885 SPARC_BUILTIN_FMULD8SUX16, 10886 SPARC_BUILTIN_FMULD8ULX16, 10887 SPARC_BUILTIN_FALIGNDATAV4HI, 10888 SPARC_BUILTIN_FALIGNDATAV8QI, 10889 SPARC_BUILTIN_FALIGNDATAV2SI, 10890 SPARC_BUILTIN_FALIGNDATADI, 10891 SPARC_BUILTIN_WRGSR, 10892 SPARC_BUILTIN_RDGSR, 10893 SPARC_BUILTIN_ALIGNADDR, 10894 SPARC_BUILTIN_ALIGNADDRL, 10895 SPARC_BUILTIN_PDIST, 10896 SPARC_BUILTIN_EDGE8, 10897 SPARC_BUILTIN_EDGE8L, 10898 SPARC_BUILTIN_EDGE16, 10899 SPARC_BUILTIN_EDGE16L, 10900 SPARC_BUILTIN_EDGE32, 10901 SPARC_BUILTIN_EDGE32L, 10902 SPARC_BUILTIN_FCMPLE16, 10903 SPARC_BUILTIN_FCMPLE32, 10904 SPARC_BUILTIN_FCMPNE16, 10905 SPARC_BUILTIN_FCMPNE32, 10906 SPARC_BUILTIN_FCMPGT16, 10907 SPARC_BUILTIN_FCMPGT32, 10908 SPARC_BUILTIN_FCMPEQ16, 10909 SPARC_BUILTIN_FCMPEQ32, 10910 SPARC_BUILTIN_FPADD16, 10911 SPARC_BUILTIN_FPADD16S, 10912 SPARC_BUILTIN_FPADD32, 10913 SPARC_BUILTIN_FPADD32S, 10914 SPARC_BUILTIN_FPSUB16, 10915 SPARC_BUILTIN_FPSUB16S, 10916 SPARC_BUILTIN_FPSUB32, 10917 SPARC_BUILTIN_FPSUB32S, 10918 SPARC_BUILTIN_ARRAY8, 10919 SPARC_BUILTIN_ARRAY16, 10920 SPARC_BUILTIN_ARRAY32, 10921 10922 /* VIS 2.0 builtins. */ 10923 SPARC_BUILTIN_EDGE8N, 10924 SPARC_BUILTIN_EDGE8LN, 10925 SPARC_BUILTIN_EDGE16N, 10926 SPARC_BUILTIN_EDGE16LN, 10927 SPARC_BUILTIN_EDGE32N, 10928 SPARC_BUILTIN_EDGE32LN, 10929 SPARC_BUILTIN_BMASK, 10930 SPARC_BUILTIN_BSHUFFLEV4HI, 10931 SPARC_BUILTIN_BSHUFFLEV8QI, 10932 SPARC_BUILTIN_BSHUFFLEV2SI, 10933 SPARC_BUILTIN_BSHUFFLEDI, 10934 10935 /* VIS 3.0 builtins. */ 10936 SPARC_BUILTIN_CMASK8, 10937 SPARC_BUILTIN_CMASK16, 10938 SPARC_BUILTIN_CMASK32, 10939 SPARC_BUILTIN_FCHKSM16, 10940 SPARC_BUILTIN_FSLL16, 10941 SPARC_BUILTIN_FSLAS16, 10942 SPARC_BUILTIN_FSRL16, 10943 SPARC_BUILTIN_FSRA16, 10944 SPARC_BUILTIN_FSLL32, 10945 SPARC_BUILTIN_FSLAS32, 10946 SPARC_BUILTIN_FSRL32, 10947 SPARC_BUILTIN_FSRA32, 10948 SPARC_BUILTIN_PDISTN, 10949 SPARC_BUILTIN_FMEAN16, 10950 SPARC_BUILTIN_FPADD64, 10951 SPARC_BUILTIN_FPSUB64, 10952 SPARC_BUILTIN_FPADDS16, 10953 SPARC_BUILTIN_FPADDS16S, 10954 SPARC_BUILTIN_FPSUBS16, 10955 SPARC_BUILTIN_FPSUBS16S, 10956 SPARC_BUILTIN_FPADDS32, 10957 SPARC_BUILTIN_FPADDS32S, 10958 SPARC_BUILTIN_FPSUBS32, 10959 SPARC_BUILTIN_FPSUBS32S, 10960 SPARC_BUILTIN_FUCMPLE8, 10961 SPARC_BUILTIN_FUCMPNE8, 10962 SPARC_BUILTIN_FUCMPGT8, 10963 SPARC_BUILTIN_FUCMPEQ8, 10964 SPARC_BUILTIN_FHADDS, 10965 SPARC_BUILTIN_FHADDD, 10966 SPARC_BUILTIN_FHSUBS, 10967 SPARC_BUILTIN_FHSUBD, 10968 SPARC_BUILTIN_FNHADDS, 10969 SPARC_BUILTIN_FNHADDD, 10970 SPARC_BUILTIN_UMULXHI, 10971 SPARC_BUILTIN_XMULX, 10972 SPARC_BUILTIN_XMULXHI, 10973 10974 /* VIS 4.0 builtins. */ 10975 SPARC_BUILTIN_FPADD8, 10976 SPARC_BUILTIN_FPADDS8, 10977 SPARC_BUILTIN_FPADDUS8, 10978 SPARC_BUILTIN_FPADDUS16, 10979 SPARC_BUILTIN_FPCMPLE8, 10980 SPARC_BUILTIN_FPCMPGT8, 10981 SPARC_BUILTIN_FPCMPULE16, 10982 SPARC_BUILTIN_FPCMPUGT16, 10983 SPARC_BUILTIN_FPCMPULE32, 10984 SPARC_BUILTIN_FPCMPUGT32, 10985 SPARC_BUILTIN_FPMAX8, 10986 SPARC_BUILTIN_FPMAX16, 10987 SPARC_BUILTIN_FPMAX32, 10988 SPARC_BUILTIN_FPMAXU8, 10989 SPARC_BUILTIN_FPMAXU16, 10990 SPARC_BUILTIN_FPMAXU32, 10991 SPARC_BUILTIN_FPMIN8, 10992 SPARC_BUILTIN_FPMIN16, 10993 SPARC_BUILTIN_FPMIN32, 10994 SPARC_BUILTIN_FPMINU8, 10995 SPARC_BUILTIN_FPMINU16, 10996 SPARC_BUILTIN_FPMINU32, 10997 SPARC_BUILTIN_FPSUB8, 10998 SPARC_BUILTIN_FPSUBS8, 10999 SPARC_BUILTIN_FPSUBUS8, 11000 SPARC_BUILTIN_FPSUBUS16, 11001 11002 /* VIS 4.0B builtins. */ 11003 11004 /* Note that all the DICTUNPACK* entries should be kept 11005 contiguous. */ 11006 SPARC_BUILTIN_FIRST_DICTUNPACK, 11007 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK, 11008 SPARC_BUILTIN_DICTUNPACK16, 11009 SPARC_BUILTIN_DICTUNPACK32, 11010 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32, 11011 11012 /* Note that all the FPCMP*SHL entries should be kept 11013 contiguous. */ 11014 SPARC_BUILTIN_FIRST_FPCMPSHL, 11015 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL, 11016 SPARC_BUILTIN_FPCMPGT8SHL, 11017 SPARC_BUILTIN_FPCMPEQ8SHL, 11018 SPARC_BUILTIN_FPCMPNE8SHL, 11019 SPARC_BUILTIN_FPCMPLE16SHL, 11020 SPARC_BUILTIN_FPCMPGT16SHL, 11021 SPARC_BUILTIN_FPCMPEQ16SHL, 11022 SPARC_BUILTIN_FPCMPNE16SHL, 11023 SPARC_BUILTIN_FPCMPLE32SHL, 11024 SPARC_BUILTIN_FPCMPGT32SHL, 11025 SPARC_BUILTIN_FPCMPEQ32SHL, 11026 SPARC_BUILTIN_FPCMPNE32SHL, 11027 SPARC_BUILTIN_FPCMPULE8SHL, 11028 SPARC_BUILTIN_FPCMPUGT8SHL, 11029 SPARC_BUILTIN_FPCMPULE16SHL, 11030 SPARC_BUILTIN_FPCMPUGT16SHL, 11031 SPARC_BUILTIN_FPCMPULE32SHL, 11032 SPARC_BUILTIN_FPCMPUGT32SHL, 11033 SPARC_BUILTIN_FPCMPDE8SHL, 11034 SPARC_BUILTIN_FPCMPDE16SHL, 11035 SPARC_BUILTIN_FPCMPDE32SHL, 11036 SPARC_BUILTIN_FPCMPUR8SHL, 11037 SPARC_BUILTIN_FPCMPUR16SHL, 11038 SPARC_BUILTIN_FPCMPUR32SHL, 11039 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL, 11040 11041 SPARC_BUILTIN_MAX 11042}; 11043 11044static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX]; 11045static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX]; 11046 11047/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE. 11048 The instruction should require a constant operand of some sort. The 11049 function prints an error if OPVAL is not valid. */ 11050 11051static int 11052check_constant_argument (enum insn_code icode, int opnum, rtx opval) 11053{ 11054 if (GET_CODE (opval) != CONST_INT) 11055 { 11056 error ("%qs expects a constant argument", insn_data[icode].name); 11057 return false; 11058 } 11059 11060 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode)) 11061 { 11062 error ("constant argument out of range for %qs", insn_data[icode].name); 11063 return false; 11064 } 11065 return true; 11066} 11067 11068/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the 11069 function decl or NULL_TREE if the builtin was not added. */ 11070 11071static tree 11072def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code, 11073 tree type) 11074{ 11075 tree t 11076 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); 11077 11078 if (t) 11079 { 11080 sparc_builtins[code] = t; 11081 sparc_builtins_icode[code] = icode; 11082 } 11083 11084 return t; 11085} 11086 11087/* Likewise, but also marks the function as "const". */ 11088 11089static tree 11090def_builtin_const (const char *name, enum insn_code icode, 11091 enum sparc_builtins code, tree type) 11092{ 11093 tree t = def_builtin (name, icode, code, type); 11094 11095 if (t) 11096 TREE_READONLY (t) = 1; 11097 11098 return t; 11099} 11100 11101/* Implement the TARGET_INIT_BUILTINS target hook. 11102 Create builtin functions for special SPARC instructions. */ 11103 11104static void 11105sparc_init_builtins (void) 11106{ 11107 if (TARGET_FPU) 11108 sparc_fpu_init_builtins (); 11109 11110 if (TARGET_VIS) 11111 sparc_vis_init_builtins (); 11112} 11113 11114/* Create builtin functions for FPU instructions. */ 11115 11116static void 11117sparc_fpu_init_builtins (void) 11118{ 11119 tree ftype 11120 = build_function_type_list (void_type_node, 11121 build_pointer_type (unsigned_type_node), 0); 11122 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr, 11123 SPARC_BUILTIN_LDFSR, ftype); 11124 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr, 11125 SPARC_BUILTIN_STFSR, ftype); 11126} 11127 11128/* Create builtin functions for VIS instructions. */ 11129 11130static void 11131sparc_vis_init_builtins (void) 11132{ 11133 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); 11134 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); 11135 tree v4hi = build_vector_type (intHI_type_node, 4); 11136 tree v2hi = build_vector_type (intHI_type_node, 2); 11137 tree v2si = build_vector_type (intSI_type_node, 2); 11138 tree v1si = build_vector_type (intSI_type_node, 1); 11139 11140 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); 11141 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); 11142 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); 11143 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); 11144 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); 11145 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); 11146 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); 11147 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); 11148 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); 11149 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); 11150 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); 11151 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); 11152 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); 11153 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); 11154 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, 11155 v8qi, v8qi, 11156 intDI_type_node, 0); 11157 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node, 11158 v8qi, v8qi, 0); 11159 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node, 11160 v8qi, v8qi, 0); 11161 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node, 11162 intSI_type_node, 0); 11163 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node, 11164 intSI_type_node, 0); 11165 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node, 11166 intDI_type_node, 0); 11167 tree di_ftype_di_di = build_function_type_list (intDI_type_node, 11168 intDI_type_node, 11169 intDI_type_node, 0); 11170 tree si_ftype_si_si = build_function_type_list (intSI_type_node, 11171 intSI_type_node, 11172 intSI_type_node, 0); 11173 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, 11174 ptr_type_node, 11175 intSI_type_node, 0); 11176 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, 11177 ptr_type_node, 11178 intDI_type_node, 0); 11179 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, 11180 ptr_type_node, 11181 ptr_type_node, 0); 11182 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, 11183 ptr_type_node, 11184 ptr_type_node, 0); 11185 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, 11186 v4hi, v4hi, 0); 11187 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, 11188 v2si, v2si, 0); 11189 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, 11190 v4hi, v4hi, 0); 11191 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, 11192 v2si, v2si, 0); 11193 tree void_ftype_di = build_function_type_list (void_type_node, 11194 intDI_type_node, 0); 11195 tree di_ftype_void = build_function_type_list (intDI_type_node, 11196 void_type_node, 0); 11197 tree void_ftype_si = build_function_type_list (void_type_node, 11198 intSI_type_node, 0); 11199 tree sf_ftype_sf_sf = build_function_type_list (float_type_node, 11200 float_type_node, 11201 float_type_node, 0); 11202 tree df_ftype_df_df = build_function_type_list (double_type_node, 11203 double_type_node, 11204 double_type_node, 0); 11205 11206 /* Packing and expanding vectors. */ 11207 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, 11208 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi); 11209 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, 11210 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi); 11211 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, 11212 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si); 11213 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, 11214 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi); 11215 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, 11216 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi); 11217 11218 /* Multiplications. */ 11219 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, 11220 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi); 11221 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, 11222 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi); 11223 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, 11224 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi); 11225 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, 11226 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi); 11227 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, 11228 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi); 11229 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, 11230 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi); 11231 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, 11232 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi); 11233 11234 /* Data aligning. */ 11235 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, 11236 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi); 11237 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, 11238 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi); 11239 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, 11240 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si); 11241 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis, 11242 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di); 11243 11244 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis, 11245 SPARC_BUILTIN_WRGSR, void_ftype_di); 11246 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis, 11247 SPARC_BUILTIN_RDGSR, di_ftype_void); 11248 11249 if (TARGET_ARCH64) 11250 { 11251 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, 11252 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di); 11253 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis, 11254 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di); 11255 } 11256 else 11257 { 11258 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, 11259 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si); 11260 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis, 11261 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si); 11262 } 11263 11264 /* Pixel distance. */ 11265 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis, 11266 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di); 11267 11268 /* Edge handling. */ 11269 if (TARGET_ARCH64) 11270 { 11271 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, 11272 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr); 11273 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, 11274 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr); 11275 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, 11276 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr); 11277 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, 11278 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr); 11279 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, 11280 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr); 11281 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, 11282 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr); 11283 } 11284 else 11285 { 11286 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis, 11287 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr); 11288 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis, 11289 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr); 11290 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis, 11291 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr); 11292 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis, 11293 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr); 11294 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis, 11295 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr); 11296 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis, 11297 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr); 11298 } 11299 11300 /* Pixel compare. */ 11301 if (TARGET_ARCH64) 11302 { 11303 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, 11304 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi); 11305 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, 11306 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si); 11307 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, 11308 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi); 11309 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, 11310 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si); 11311 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, 11312 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi); 11313 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, 11314 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si); 11315 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, 11316 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi); 11317 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, 11318 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si); 11319 } 11320 else 11321 { 11322 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, 11323 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi); 11324 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, 11325 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si); 11326 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, 11327 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi); 11328 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, 11329 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si); 11330 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, 11331 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi); 11332 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, 11333 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si); 11334 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, 11335 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi); 11336 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, 11337 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si); 11338 } 11339 11340 /* Addition and subtraction. */ 11341 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, 11342 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi); 11343 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, 11344 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi); 11345 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, 11346 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si); 11347 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3, 11348 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si); 11349 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, 11350 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi); 11351 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, 11352 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi); 11353 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, 11354 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si); 11355 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3, 11356 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si); 11357 11358 /* Three-dimensional array addressing. */ 11359 if (TARGET_ARCH64) 11360 { 11361 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis, 11362 SPARC_BUILTIN_ARRAY8, di_ftype_di_di); 11363 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis, 11364 SPARC_BUILTIN_ARRAY16, di_ftype_di_di); 11365 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis, 11366 SPARC_BUILTIN_ARRAY32, di_ftype_di_di); 11367 } 11368 else 11369 { 11370 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis, 11371 SPARC_BUILTIN_ARRAY8, si_ftype_si_si); 11372 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis, 11373 SPARC_BUILTIN_ARRAY16, si_ftype_si_si); 11374 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis, 11375 SPARC_BUILTIN_ARRAY32, si_ftype_si_si); 11376 } 11377 11378 if (TARGET_VIS2) 11379 { 11380 /* Edge handling. */ 11381 if (TARGET_ARCH64) 11382 { 11383 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis, 11384 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr); 11385 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis, 11386 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr); 11387 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis, 11388 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr); 11389 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis, 11390 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr); 11391 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis, 11392 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr); 11393 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis, 11394 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr); 11395 } 11396 else 11397 { 11398 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis, 11399 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr); 11400 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis, 11401 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr); 11402 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis, 11403 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr); 11404 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis, 11405 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr); 11406 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis, 11407 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr); 11408 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis, 11409 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr); 11410 } 11411 11412 /* Byte mask and shuffle. */ 11413 if (TARGET_ARCH64) 11414 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis, 11415 SPARC_BUILTIN_BMASK, di_ftype_di_di); 11416 else 11417 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis, 11418 SPARC_BUILTIN_BMASK, si_ftype_si_si); 11419 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis, 11420 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi); 11421 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis, 11422 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi); 11423 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis, 11424 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si); 11425 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis, 11426 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di); 11427 } 11428 11429 if (TARGET_VIS3) 11430 { 11431 if (TARGET_ARCH64) 11432 { 11433 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis, 11434 SPARC_BUILTIN_CMASK8, void_ftype_di); 11435 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis, 11436 SPARC_BUILTIN_CMASK16, void_ftype_di); 11437 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis, 11438 SPARC_BUILTIN_CMASK32, void_ftype_di); 11439 } 11440 else 11441 { 11442 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis, 11443 SPARC_BUILTIN_CMASK8, void_ftype_si); 11444 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis, 11445 SPARC_BUILTIN_CMASK16, void_ftype_si); 11446 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis, 11447 SPARC_BUILTIN_CMASK32, void_ftype_si); 11448 } 11449 11450 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis, 11451 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi); 11452 11453 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3, 11454 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi); 11455 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3, 11456 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi); 11457 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3, 11458 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi); 11459 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3, 11460 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi); 11461 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3, 11462 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si); 11463 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3, 11464 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si); 11465 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3, 11466 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si); 11467 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3, 11468 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si); 11469 11470 if (TARGET_ARCH64) 11471 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis, 11472 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi); 11473 else 11474 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis, 11475 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi); 11476 11477 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis, 11478 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi); 11479 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis, 11480 SPARC_BUILTIN_FPADD64, di_ftype_di_di); 11481 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis, 11482 SPARC_BUILTIN_FPSUB64, di_ftype_di_di); 11483 11484 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3, 11485 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi); 11486 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3, 11487 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi); 11488 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3, 11489 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi); 11490 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3, 11491 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi); 11492 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3, 11493 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si); 11494 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3, 11495 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si); 11496 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3, 11497 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si); 11498 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3, 11499 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si); 11500 11501 if (TARGET_ARCH64) 11502 { 11503 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis, 11504 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi); 11505 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis, 11506 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi); 11507 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis, 11508 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi); 11509 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis, 11510 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi); 11511 } 11512 else 11513 { 11514 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis, 11515 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi); 11516 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis, 11517 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi); 11518 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis, 11519 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi); 11520 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis, 11521 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi); 11522 } 11523 11524 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis, 11525 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf); 11526 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis, 11527 SPARC_BUILTIN_FHADDD, df_ftype_df_df); 11528 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis, 11529 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf); 11530 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis, 11531 SPARC_BUILTIN_FHSUBD, df_ftype_df_df); 11532 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis, 11533 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf); 11534 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis, 11535 SPARC_BUILTIN_FNHADDD, df_ftype_df_df); 11536 11537 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis, 11538 SPARC_BUILTIN_UMULXHI, di_ftype_di_di); 11539 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis, 11540 SPARC_BUILTIN_XMULX, di_ftype_di_di); 11541 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis, 11542 SPARC_BUILTIN_XMULXHI, di_ftype_di_di); 11543 } 11544 11545 if (TARGET_VIS4) 11546 { 11547 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3, 11548 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi); 11549 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3, 11550 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi); 11551 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3, 11552 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi); 11553 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3, 11554 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi); 11555 11556 11557 if (TARGET_ARCH64) 11558 { 11559 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis, 11560 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi); 11561 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis, 11562 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi); 11563 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis, 11564 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi); 11565 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis, 11566 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi); 11567 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis, 11568 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11569 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis, 11570 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11571 } 11572 else 11573 { 11574 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis, 11575 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi); 11576 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis, 11577 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi); 11578 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis, 11579 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi); 11580 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis, 11581 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi); 11582 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis, 11583 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11584 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis, 11585 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11586 } 11587 11588 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3, 11589 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi); 11590 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3, 11591 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi); 11592 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3, 11593 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si); 11594 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3, 11595 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi); 11596 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3, 11597 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi); 11598 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3, 11599 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si); 11600 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3, 11601 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi); 11602 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3, 11603 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi); 11604 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3, 11605 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si); 11606 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3, 11607 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi); 11608 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3, 11609 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi); 11610 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3, 11611 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si); 11612 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3, 11613 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi); 11614 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3, 11615 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi); 11616 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3, 11617 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi); 11618 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3, 11619 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi); 11620 } 11621 11622 if (TARGET_VIS4B) 11623 { 11624 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8, 11625 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si); 11626 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16, 11627 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si); 11628 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32, 11629 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si); 11630 11631 if (TARGET_ARCH64) 11632 { 11633 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node, 11634 v8qi, v8qi, 11635 intSI_type_node, 0); 11636 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node, 11637 v4hi, v4hi, 11638 intSI_type_node, 0); 11639 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node, 11640 v2si, v2si, 11641 intSI_type_node, 0); 11642 11643 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl, 11644 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si); 11645 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl, 11646 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si); 11647 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl, 11648 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si); 11649 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl, 11650 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si); 11651 11652 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl, 11653 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si); 11654 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl, 11655 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si); 11656 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl, 11657 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si); 11658 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl, 11659 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si); 11660 11661 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl, 11662 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si); 11663 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl, 11664 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si); 11665 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl, 11666 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si); 11667 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl, 11668 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si); 11669 11670 11671 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl, 11672 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si); 11673 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl, 11674 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si); 11675 11676 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl, 11677 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si); 11678 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl, 11679 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si); 11680 11681 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl, 11682 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si); 11683 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl, 11684 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si); 11685 11686 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl, 11687 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si); 11688 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl, 11689 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si); 11690 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl, 11691 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si); 11692 11693 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl, 11694 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si); 11695 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl, 11696 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si); 11697 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl, 11698 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si); 11699 11700 } 11701 else 11702 { 11703 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node, 11704 v8qi, v8qi, 11705 intSI_type_node, 0); 11706 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node, 11707 v4hi, v4hi, 11708 intSI_type_node, 0); 11709 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node, 11710 v2si, v2si, 11711 intSI_type_node, 0); 11712 11713 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl, 11714 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si); 11715 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl, 11716 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si); 11717 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl, 11718 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si); 11719 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl, 11720 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si); 11721 11722 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl, 11723 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si); 11724 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl, 11725 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si); 11726 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl, 11727 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si); 11728 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl, 11729 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si); 11730 11731 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl, 11732 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si); 11733 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl, 11734 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si); 11735 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl, 11736 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si); 11737 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl, 11738 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si); 11739 11740 11741 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl, 11742 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si); 11743 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl, 11744 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si); 11745 11746 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl, 11747 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si); 11748 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl, 11749 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si); 11750 11751 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl, 11752 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si); 11753 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl, 11754 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si); 11755 11756 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl, 11757 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si); 11758 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl, 11759 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si); 11760 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl, 11761 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si); 11762 11763 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl, 11764 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si); 11765 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl, 11766 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si); 11767 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl, 11768 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si); 11769 } 11770 } 11771} 11772 11773/* Implement TARGET_BUILTIN_DECL hook. */ 11774 11775static tree 11776sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 11777{ 11778 if (code >= SPARC_BUILTIN_MAX) 11779 return error_mark_node; 11780 11781 return sparc_builtins[code]; 11782} 11783 11784/* Implemented TARGET_EXPAND_BUILTIN hook. */ 11785 11786static rtx 11787sparc_expand_builtin (tree exp, rtx target, 11788 rtx subtarget ATTRIBUTE_UNUSED, 11789 machine_mode tmode ATTRIBUTE_UNUSED, 11790 int ignore ATTRIBUTE_UNUSED) 11791{ 11792 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 11793 enum sparc_builtins code 11794 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl); 11795 enum insn_code icode = sparc_builtins_icode[code]; 11796 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 11797 call_expr_arg_iterator iter; 11798 int arg_count = 0; 11799 rtx pat, op[4]; 11800 tree arg; 11801 11802 if (nonvoid) 11803 { 11804 machine_mode tmode = insn_data[icode].operand[0].mode; 11805 if (!target 11806 || GET_MODE (target) != tmode 11807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11808 op[0] = gen_reg_rtx (tmode); 11809 else 11810 op[0] = target; 11811 } 11812 else 11813 op[0] = NULL_RTX; 11814 11815 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 11816 { 11817 const struct insn_operand_data *insn_op; 11818 int idx; 11819 11820 if (arg == error_mark_node) 11821 return NULL_RTX; 11822 11823 arg_count++; 11824 idx = arg_count - !nonvoid; 11825 insn_op = &insn_data[icode].operand[idx]; 11826 op[arg_count] = expand_normal (arg); 11827 11828 /* Some of the builtins require constant arguments. We check 11829 for this here. */ 11830 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL 11831 && code <= SPARC_BUILTIN_LAST_FPCMPSHL 11832 && arg_count == 3) 11833 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK 11834 && code <= SPARC_BUILTIN_LAST_DICTUNPACK 11835 && arg_count == 2)) 11836 { 11837 if (!check_constant_argument (icode, idx, op[arg_count])) 11838 return const0_rtx; 11839 } 11840 11841 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR) 11842 { 11843 if (!address_operand (op[arg_count], SImode)) 11844 { 11845 op[arg_count] = convert_memory_address (Pmode, op[arg_count]); 11846 op[arg_count] = copy_addr_to_reg (op[arg_count]); 11847 } 11848 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]); 11849 } 11850 11851 else if (insn_op->mode == V1DImode 11852 && GET_MODE (op[arg_count]) == DImode) 11853 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]); 11854 11855 else if (insn_op->mode == V1SImode 11856 && GET_MODE (op[arg_count]) == SImode) 11857 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]); 11858 11859 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count], 11860 insn_op->mode)) 11861 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]); 11862 } 11863 11864 switch (arg_count) 11865 { 11866 case 0: 11867 pat = GEN_FCN (icode) (op[0]); 11868 break; 11869 case 1: 11870 if (nonvoid) 11871 pat = GEN_FCN (icode) (op[0], op[1]); 11872 else 11873 pat = GEN_FCN (icode) (op[1]); 11874 break; 11875 case 2: 11876 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 11877 break; 11878 case 3: 11879 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 11880 break; 11881 default: 11882 gcc_unreachable (); 11883 } 11884 11885 if (!pat) 11886 return NULL_RTX; 11887 11888 emit_insn (pat); 11889 11890 return (nonvoid ? op[0] : const0_rtx); 11891} 11892 11893/* Return the upper 16 bits of the 8x16 multiplication. */ 11894 11895static int 11896sparc_vis_mul8x16 (int e8, int e16) 11897{ 11898 return (e8 * e16 + 128) / 256; 11899} 11900 11901/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put 11902 the result into the array N_ELTS, whose elements are of INNER_TYPE. */ 11903 11904static void 11905sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode, 11906 tree inner_type, tree cst0, tree cst1) 11907{ 11908 unsigned i, num = VECTOR_CST_NELTS (cst0); 11909 int scale; 11910 11911 switch (fncode) 11912 { 11913 case SPARC_BUILTIN_FMUL8X16: 11914 for (i = 0; i < num; ++i) 11915 { 11916 int val 11917 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11918 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i))); 11919 n_elts->quick_push (build_int_cst (inner_type, val)); 11920 } 11921 break; 11922 11923 case SPARC_BUILTIN_FMUL8X16AU: 11924 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0)); 11925 11926 for (i = 0; i < num; ++i) 11927 { 11928 int val 11929 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11930 scale); 11931 n_elts->quick_push (build_int_cst (inner_type, val)); 11932 } 11933 break; 11934 11935 case SPARC_BUILTIN_FMUL8X16AL: 11936 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1)); 11937 11938 for (i = 0; i < num; ++i) 11939 { 11940 int val 11941 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11942 scale); 11943 n_elts->quick_push (build_int_cst (inner_type, val)); 11944 } 11945 break; 11946 11947 default: 11948 gcc_unreachable (); 11949 } 11950} 11951 11952/* Implement TARGET_FOLD_BUILTIN hook. 11953 11954 Fold builtin functions for SPARC intrinsics. If IGNORE is true the 11955 result of the function call is ignored. NULL_TREE is returned if the 11956 function could not be folded. */ 11957 11958static tree 11959sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 11960 tree *args, bool ignore) 11961{ 11962 enum sparc_builtins code 11963 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl); 11964 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); 11965 tree arg0, arg1, arg2; 11966 11967 if (ignore) 11968 switch (code) 11969 { 11970 case SPARC_BUILTIN_LDFSR: 11971 case SPARC_BUILTIN_STFSR: 11972 case SPARC_BUILTIN_ALIGNADDR: 11973 case SPARC_BUILTIN_WRGSR: 11974 case SPARC_BUILTIN_BMASK: 11975 case SPARC_BUILTIN_CMASK8: 11976 case SPARC_BUILTIN_CMASK16: 11977 case SPARC_BUILTIN_CMASK32: 11978 break; 11979 11980 default: 11981 return build_zero_cst (rtype); 11982 } 11983 11984 switch (code) 11985 { 11986 case SPARC_BUILTIN_FEXPAND: 11987 arg0 = args[0]; 11988 STRIP_NOPS (arg0); 11989 11990 if (TREE_CODE (arg0) == VECTOR_CST) 11991 { 11992 tree inner_type = TREE_TYPE (rtype); 11993 unsigned i; 11994 11995 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 11996 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11997 { 11998 unsigned HOST_WIDE_INT val 11999 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i)); 12000 n_elts.quick_push (build_int_cst (inner_type, val << 4)); 12001 } 12002 return n_elts.build (); 12003 } 12004 break; 12005 12006 case SPARC_BUILTIN_FMUL8X16: 12007 case SPARC_BUILTIN_FMUL8X16AU: 12008 case SPARC_BUILTIN_FMUL8X16AL: 12009 arg0 = args[0]; 12010 arg1 = args[1]; 12011 STRIP_NOPS (arg0); 12012 STRIP_NOPS (arg1); 12013 12014 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 12015 { 12016 tree inner_type = TREE_TYPE (rtype); 12017 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 12018 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1); 12019 return n_elts.build (); 12020 } 12021 break; 12022 12023 case SPARC_BUILTIN_FPMERGE: 12024 arg0 = args[0]; 12025 arg1 = args[1]; 12026 STRIP_NOPS (arg0); 12027 STRIP_NOPS (arg1); 12028 12029 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 12030 { 12031 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1); 12032 unsigned i; 12033 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 12034 { 12035 n_elts.quick_push (VECTOR_CST_ELT (arg0, i)); 12036 n_elts.quick_push (VECTOR_CST_ELT (arg1, i)); 12037 } 12038 12039 return n_elts.build (); 12040 } 12041 break; 12042 12043 case SPARC_BUILTIN_PDIST: 12044 case SPARC_BUILTIN_PDISTN: 12045 arg0 = args[0]; 12046 arg1 = args[1]; 12047 STRIP_NOPS (arg0); 12048 STRIP_NOPS (arg1); 12049 if (code == SPARC_BUILTIN_PDIST) 12050 { 12051 arg2 = args[2]; 12052 STRIP_NOPS (arg2); 12053 } 12054 else 12055 arg2 = integer_zero_node; 12056 12057 if (TREE_CODE (arg0) == VECTOR_CST 12058 && TREE_CODE (arg1) == VECTOR_CST 12059 && TREE_CODE (arg2) == INTEGER_CST) 12060 { 12061 bool overflow = false; 12062 widest_int result = wi::to_widest (arg2); 12063 widest_int tmp; 12064 unsigned i; 12065 12066 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 12067 { 12068 tree e0 = VECTOR_CST_ELT (arg0, i); 12069 tree e1 = VECTOR_CST_ELT (arg1, i); 12070 12071 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf; 12072 12073 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf); 12074 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf); 12075 if (wi::neg_p (tmp)) 12076 tmp = wi::neg (tmp, &neg2_ovf); 12077 else 12078 neg2_ovf = wi::OVF_NONE; 12079 result = wi::add (result, tmp, SIGNED, &add2_ovf); 12080 overflow |= ((neg1_ovf != wi::OVF_NONE) 12081 | (neg2_ovf != wi::OVF_NONE) 12082 | (add1_ovf != wi::OVF_NONE) 12083 | (add2_ovf != wi::OVF_NONE)); 12084 } 12085 12086 gcc_assert (!overflow); 12087 12088 return wide_int_to_tree (rtype, result); 12089 } 12090 12091 default: 12092 break; 12093 } 12094 12095 return NULL_TREE; 12096} 12097 12098/* ??? This duplicates information provided to the compiler by the 12099 ??? scheduler description. Some day, teach genautomata to output 12100 ??? the latencies and then CSE will just use that. */ 12101 12102static bool 12103sparc_rtx_costs (rtx x, machine_mode mode, int outer_code, 12104 int opno ATTRIBUTE_UNUSED, 12105 int *total, bool speed ATTRIBUTE_UNUSED) 12106{ 12107 int code = GET_CODE (x); 12108 bool float_mode_p = FLOAT_MODE_P (mode); 12109 12110 switch (code) 12111 { 12112 case CONST_INT: 12113 if (SMALL_INT (x)) 12114 *total = 0; 12115 else 12116 *total = 2; 12117 return true; 12118 12119 case CONST_WIDE_INT: 12120 *total = 0; 12121 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0))) 12122 *total += 2; 12123 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1))) 12124 *total += 2; 12125 return true; 12126 12127 case HIGH: 12128 *total = 2; 12129 return true; 12130 12131 case CONST: 12132 case LABEL_REF: 12133 case SYMBOL_REF: 12134 *total = 4; 12135 return true; 12136 12137 case CONST_DOUBLE: 12138 *total = 8; 12139 return true; 12140 12141 case MEM: 12142 /* If outer-code was a sign or zero extension, a cost 12143 of COSTS_N_INSNS (1) was already added in. This is 12144 why we are subtracting it back out. */ 12145 if (outer_code == ZERO_EXTEND) 12146 { 12147 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); 12148 } 12149 else if (outer_code == SIGN_EXTEND) 12150 { 12151 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); 12152 } 12153 else if (float_mode_p) 12154 { 12155 *total = sparc_costs->float_load; 12156 } 12157 else 12158 { 12159 *total = sparc_costs->int_load; 12160 } 12161 12162 return true; 12163 12164 case PLUS: 12165 case MINUS: 12166 if (float_mode_p) 12167 *total = sparc_costs->float_plusminus; 12168 else 12169 *total = COSTS_N_INSNS (1); 12170 return false; 12171 12172 case FMA: 12173 { 12174 rtx sub; 12175 12176 gcc_assert (float_mode_p); 12177 *total = sparc_costs->float_mul; 12178 12179 sub = XEXP (x, 0); 12180 if (GET_CODE (sub) == NEG) 12181 sub = XEXP (sub, 0); 12182 *total += rtx_cost (sub, mode, FMA, 0, speed); 12183 12184 sub = XEXP (x, 2); 12185 if (GET_CODE (sub) == NEG) 12186 sub = XEXP (sub, 0); 12187 *total += rtx_cost (sub, mode, FMA, 2, speed); 12188 return true; 12189 } 12190 12191 case MULT: 12192 if (float_mode_p) 12193 *total = sparc_costs->float_mul; 12194 else if (TARGET_ARCH32 && !TARGET_HARD_MUL) 12195 *total = COSTS_N_INSNS (25); 12196 else 12197 { 12198 int bit_cost; 12199 12200 bit_cost = 0; 12201 if (sparc_costs->int_mul_bit_factor) 12202 { 12203 int nbits; 12204 12205 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 12206 { 12207 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 12208 for (nbits = 0; value != 0; value &= value - 1) 12209 nbits++; 12210 } 12211 else 12212 nbits = 7; 12213 12214 if (nbits < 3) 12215 nbits = 3; 12216 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; 12217 bit_cost = COSTS_N_INSNS (bit_cost); 12218 } 12219 12220 if (mode == DImode || !TARGET_HARD_MUL) 12221 *total = sparc_costs->int_mulX + bit_cost; 12222 else 12223 *total = sparc_costs->int_mul + bit_cost; 12224 } 12225 return false; 12226 12227 case ASHIFT: 12228 case ASHIFTRT: 12229 case LSHIFTRT: 12230 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; 12231 return false; 12232 12233 case DIV: 12234 case UDIV: 12235 case MOD: 12236 case UMOD: 12237 if (float_mode_p) 12238 { 12239 if (mode == DFmode) 12240 *total = sparc_costs->float_div_df; 12241 else 12242 *total = sparc_costs->float_div_sf; 12243 } 12244 else 12245 { 12246 if (mode == DImode) 12247 *total = sparc_costs->int_divX; 12248 else 12249 *total = sparc_costs->int_div; 12250 } 12251 return false; 12252 12253 case NEG: 12254 if (! float_mode_p) 12255 { 12256 *total = COSTS_N_INSNS (1); 12257 return false; 12258 } 12259 /* FALLTHRU */ 12260 12261 case ABS: 12262 case FLOAT: 12263 case UNSIGNED_FLOAT: 12264 case FIX: 12265 case UNSIGNED_FIX: 12266 case FLOAT_EXTEND: 12267 case FLOAT_TRUNCATE: 12268 *total = sparc_costs->float_move; 12269 return false; 12270 12271 case SQRT: 12272 if (mode == DFmode) 12273 *total = sparc_costs->float_sqrt_df; 12274 else 12275 *total = sparc_costs->float_sqrt_sf; 12276 return false; 12277 12278 case COMPARE: 12279 if (float_mode_p) 12280 *total = sparc_costs->float_cmp; 12281 else 12282 *total = COSTS_N_INSNS (1); 12283 return false; 12284 12285 case IF_THEN_ELSE: 12286 if (float_mode_p) 12287 *total = sparc_costs->float_cmove; 12288 else 12289 *total = sparc_costs->int_cmove; 12290 return false; 12291 12292 case IOR: 12293 /* Handle the NAND vector patterns. */ 12294 if (sparc_vector_mode_supported_p (mode) 12295 && GET_CODE (XEXP (x, 0)) == NOT 12296 && GET_CODE (XEXP (x, 1)) == NOT) 12297 { 12298 *total = COSTS_N_INSNS (1); 12299 return true; 12300 } 12301 else 12302 return false; 12303 12304 default: 12305 return false; 12306 } 12307} 12308 12309/* Return true if CLASS is either GENERAL_REGS or I64_REGS. */ 12310 12311static inline bool 12312general_or_i64_p (reg_class_t rclass) 12313{ 12314 return (rclass == GENERAL_REGS || rclass == I64_REGS); 12315} 12316 12317/* Implement TARGET_REGISTER_MOVE_COST. */ 12318 12319static int 12320sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 12321 reg_class_t from, reg_class_t to) 12322{ 12323 bool need_memory = false; 12324 12325 /* This helps postreload CSE to eliminate redundant comparisons. */ 12326 if (from == NO_REGS || to == NO_REGS) 12327 return 100; 12328 12329 if (from == FPCC_REGS || to == FPCC_REGS) 12330 need_memory = true; 12331 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to)) 12332 || (general_or_i64_p (from) && FP_REG_CLASS_P (to))) 12333 { 12334 if (TARGET_VIS3) 12335 { 12336 int size = GET_MODE_SIZE (mode); 12337 if (size == 8 || size == 4) 12338 { 12339 if (! TARGET_ARCH32 || size == 4) 12340 return 4; 12341 else 12342 return 6; 12343 } 12344 } 12345 need_memory = true; 12346 } 12347 12348 if (need_memory) 12349 { 12350 if (sparc_cpu == PROCESSOR_ULTRASPARC 12351 || sparc_cpu == PROCESSOR_ULTRASPARC3 12352 || sparc_cpu == PROCESSOR_NIAGARA 12353 || sparc_cpu == PROCESSOR_NIAGARA2 12354 || sparc_cpu == PROCESSOR_NIAGARA3 12355 || sparc_cpu == PROCESSOR_NIAGARA4 12356 || sparc_cpu == PROCESSOR_NIAGARA7 12357 || sparc_cpu == PROCESSOR_M8) 12358 return 12; 12359 12360 return 6; 12361 } 12362 12363 return 2; 12364} 12365 12366/* Emit the sequence of insns SEQ while preserving the registers REG and REG2. 12367 This is achieved by means of a manual dynamic stack space allocation in 12368 the current frame. We make the assumption that SEQ doesn't contain any 12369 function calls, with the possible exception of calls to the GOT helper. */ 12370 12371static void 12372emit_and_preserve (rtx seq, rtx reg, rtx reg2) 12373{ 12374 /* We must preserve the lowest 16 words for the register save area. */ 12375 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; 12376 /* We really need only 2 words of fresh stack space. */ 12377 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); 12378 12379 rtx slot 12380 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx, 12381 SPARC_STACK_BIAS + offset)); 12382 12383 emit_insn (gen_stack_pointer_inc (GEN_INT (-size))); 12384 emit_insn (gen_rtx_SET (slot, reg)); 12385 if (reg2) 12386 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD), 12387 reg2)); 12388 emit_insn (seq); 12389 if (reg2) 12390 emit_insn (gen_rtx_SET (reg2, 12391 adjust_address (slot, word_mode, UNITS_PER_WORD))); 12392 emit_insn (gen_rtx_SET (reg, slot)); 12393 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 12394} 12395 12396/* Output the assembler code for a thunk function. THUNK_DECL is the 12397 declaration for the thunk function itself, FUNCTION is the decl for 12398 the target function. DELTA is an immediate constant offset to be 12399 added to THIS. If VCALL_OFFSET is nonzero, the word at address 12400 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ 12401 12402static void 12403sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 12404 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 12405 tree function) 12406{ 12407 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); 12408 rtx this_rtx, funexp; 12409 rtx_insn *insn; 12410 unsigned int int_arg_first; 12411 12412 reload_completed = 1; 12413 epilogue_completed = 1; 12414 12415 emit_note (NOTE_INSN_PROLOGUE_END); 12416 12417 if (TARGET_FLAT) 12418 { 12419 sparc_leaf_function_p = 1; 12420 12421 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12422 } 12423 else if (flag_delayed_branch) 12424 { 12425 /* We will emit a regular sibcall below, so we need to instruct 12426 output_sibcall that we are in a leaf function. */ 12427 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1; 12428 12429 /* This will cause final.c to invoke leaf_renumber_regs so we 12430 must behave as if we were in a not-yet-leafified function. */ 12431 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; 12432 } 12433 else 12434 { 12435 /* We will emit the sibcall manually below, so we will need to 12436 manually spill non-leaf registers. */ 12437 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0; 12438 12439 /* We really are in a leaf function. */ 12440 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12441 } 12442 12443 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function 12444 returns a structure, the structure return pointer is there instead. */ 12445 if (TARGET_ARCH64 12446 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 12447 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); 12448 else 12449 this_rtx = gen_rtx_REG (Pmode, int_arg_first); 12450 12451 /* Add DELTA. When possible use a plain add, otherwise load it into 12452 a register first. */ 12453 if (delta) 12454 { 12455 rtx delta_rtx = GEN_INT (delta); 12456 12457 if (! SPARC_SIMM13_P (delta)) 12458 { 12459 rtx scratch = gen_rtx_REG (Pmode, 1); 12460 emit_move_insn (scratch, delta_rtx); 12461 delta_rtx = scratch; 12462 } 12463 12464 /* THIS_RTX += DELTA. */ 12465 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); 12466 } 12467 12468 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ 12469 if (vcall_offset) 12470 { 12471 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 12472 rtx scratch = gen_rtx_REG (Pmode, 1); 12473 12474 gcc_assert (vcall_offset < 0); 12475 12476 /* SCRATCH = *THIS_RTX. */ 12477 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); 12478 12479 /* Prepare for adding VCALL_OFFSET. The difficulty is that we 12480 may not have any available scratch register at this point. */ 12481 if (SPARC_SIMM13_P (vcall_offset)) 12482 ; 12483 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ 12484 else if (! fixed_regs[5] 12485 /* The below sequence is made up of at least 2 insns, 12486 while the default method may need only one. */ 12487 && vcall_offset < -8192) 12488 { 12489 rtx scratch2 = gen_rtx_REG (Pmode, 5); 12490 emit_move_insn (scratch2, vcall_offset_rtx); 12491 vcall_offset_rtx = scratch2; 12492 } 12493 else 12494 { 12495 rtx increment = GEN_INT (-4096); 12496 12497 /* VCALL_OFFSET is a negative number whose typical range can be 12498 estimated as -32768..0 in 32-bit mode. In almost all cases 12499 it is therefore cheaper to emit multiple add insns than 12500 spilling and loading the constant into a register (at least 12501 6 insns). */ 12502 while (! SPARC_SIMM13_P (vcall_offset)) 12503 { 12504 emit_insn (gen_add2_insn (scratch, increment)); 12505 vcall_offset += 4096; 12506 } 12507 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ 12508 } 12509 12510 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ 12511 emit_move_insn (scratch, gen_rtx_MEM (Pmode, 12512 gen_rtx_PLUS (Pmode, 12513 scratch, 12514 vcall_offset_rtx))); 12515 12516 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ 12517 emit_insn (gen_add2_insn (this_rtx, scratch)); 12518 } 12519 12520 /* Generate a tail call to the target function. */ 12521 if (! TREE_USED (function)) 12522 { 12523 assemble_external (function); 12524 TREE_USED (function) = 1; 12525 } 12526 funexp = XEXP (DECL_RTL (function), 0); 12527 12528 if (flag_delayed_branch) 12529 { 12530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 12531 insn = emit_call_insn (gen_sibcall (funexp)); 12532 SIBLING_CALL_P (insn) = 1; 12533 } 12534 else 12535 { 12536 /* The hoops we have to jump through in order to generate a sibcall 12537 without using delay slots... */ 12538 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); 12539 12540 if (flag_pic) 12541 { 12542 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ 12543 start_sequence (); 12544 load_got_register (); /* clobbers %o7 */ 12545 if (!TARGET_VXWORKS_RTP) 12546 pic_offset_table_rtx = got_register_rtx; 12547 scratch = sparc_legitimize_pic_address (funexp, scratch); 12548 seq = get_insns (); 12549 end_sequence (); 12550 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); 12551 } 12552 else if (TARGET_ARCH32) 12553 { 12554 emit_insn (gen_rtx_SET (scratch, 12555 gen_rtx_HIGH (SImode, funexp))); 12556 emit_insn (gen_rtx_SET (scratch, 12557 gen_rtx_LO_SUM (SImode, scratch, funexp))); 12558 } 12559 else /* TARGET_ARCH64 */ 12560 { 12561 switch (sparc_code_model) 12562 { 12563 case CM_MEDLOW: 12564 case CM_MEDMID: 12565 /* The destination can serve as a temporary. */ 12566 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); 12567 break; 12568 12569 case CM_MEDANY: 12570 case CM_EMBMEDANY: 12571 /* The destination cannot serve as a temporary. */ 12572 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ 12573 start_sequence (); 12574 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); 12575 seq = get_insns (); 12576 end_sequence (); 12577 emit_and_preserve (seq, spill_reg, 0); 12578 break; 12579 12580 default: 12581 gcc_unreachable (); 12582 } 12583 } 12584 12585 emit_jump_insn (gen_indirect_jump (scratch)); 12586 } 12587 12588 emit_barrier (); 12589 12590 /* Run just enough of rest_of_compilation to get the insns emitted. 12591 There's not really enough bulk here to make other passes such as 12592 instruction scheduling worth while. */ 12593 insn = get_insns (); 12594 shorten_branches (insn); 12595 assemble_start_function (thunk_fndecl, fnname); 12596 final_start_function (insn, file, 1); 12597 final (insn, file, 1); 12598 final_end_function (); 12599 assemble_end_function (thunk_fndecl, fnname); 12600 12601 reload_completed = 0; 12602 epilogue_completed = 0; 12603} 12604 12605/* Return true if sparc_output_mi_thunk would be able to output the 12606 assembler code for the thunk function specified by the arguments 12607 it is passed, and false otherwise. */ 12608static bool 12609sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, 12610 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 12611 HOST_WIDE_INT vcall_offset, 12612 const_tree function ATTRIBUTE_UNUSED) 12613{ 12614 /* Bound the loop used in the default method above. */ 12615 return (vcall_offset >= -32768 || ! fixed_regs[5]); 12616} 12617 12618/* How to allocate a 'struct machine_function'. */ 12619 12620static struct machine_function * 12621sparc_init_machine_status (void) 12622{ 12623 return ggc_cleared_alloc<machine_function> (); 12624} 12625 12626/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 12627 12628static unsigned HOST_WIDE_INT 12629sparc_asan_shadow_offset (void) 12630{ 12631 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29); 12632} 12633 12634/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 12635 We need to emit DTP-relative relocations. */ 12636 12637static void 12638sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) 12639{ 12640 switch (size) 12641 { 12642 case 4: 12643 fputs ("\t.word\t%r_tls_dtpoff32(", file); 12644 break; 12645 case 8: 12646 fputs ("\t.xword\t%r_tls_dtpoff64(", file); 12647 break; 12648 default: 12649 gcc_unreachable (); 12650 } 12651 output_addr_const (file, x); 12652 fputs (")", file); 12653} 12654 12655/* Do whatever processing is required at the end of a file. */ 12656 12657static void 12658sparc_file_end (void) 12659{ 12660 /* If we need to emit the special GOT helper function, do so now. */ 12661 if (got_helper_needed) 12662 { 12663 const char *name = XSTR (got_helper_rtx, 0); 12664#ifdef DWARF2_UNWIND_INFO 12665 bool do_cfi; 12666#endif 12667 12668 if (USE_HIDDEN_LINKONCE) 12669 { 12670 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 12671 get_identifier (name), 12672 build_function_type_list (void_type_node, 12673 NULL_TREE)); 12674 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 12675 NULL_TREE, void_type_node); 12676 TREE_PUBLIC (decl) = 1; 12677 TREE_STATIC (decl) = 1; 12678 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 12679 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 12680 DECL_VISIBILITY_SPECIFIED (decl) = 1; 12681 resolve_unique_section (decl, 0, flag_function_sections); 12682 allocate_struct_function (decl, true); 12683 cfun->is_thunk = 1; 12684 current_function_decl = decl; 12685 init_varasm_status (); 12686 assemble_start_function (decl, name); 12687 } 12688 else 12689 { 12690 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 12691 switch_to_section (text_section); 12692 if (align > 0) 12693 ASM_OUTPUT_ALIGN (asm_out_file, align); 12694 ASM_OUTPUT_LABEL (asm_out_file, name); 12695 } 12696 12697#ifdef DWARF2_UNWIND_INFO 12698 do_cfi = dwarf2out_do_cfi_asm (); 12699 if (do_cfi) 12700 output_asm_insn (".cfi_startproc", NULL); 12701#endif 12702 if (flag_delayed_branch) 12703 { 12704 output_asm_insn ("jmp\t%%o7+8", NULL); 12705 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx); 12706 } 12707 else 12708 { 12709 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx); 12710 output_asm_insn ("jmp\t%%o7+8", NULL); 12711 output_asm_insn (" nop", NULL); 12712 } 12713#ifdef DWARF2_UNWIND_INFO 12714 if (do_cfi) 12715 output_asm_insn (".cfi_endproc", NULL); 12716#endif 12717 } 12718 12719 if (NEED_INDICATE_EXEC_STACK) 12720 file_end_indicate_exec_stack (); 12721 12722#ifdef TARGET_SOLARIS 12723 solaris_file_end (); 12724#endif 12725} 12726 12727#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 12728/* Implement TARGET_MANGLE_TYPE. */ 12729 12730static const char * 12731sparc_mangle_type (const_tree type) 12732{ 12733 if (TARGET_ARCH32 12734 && TYPE_MAIN_VARIANT (type) == long_double_type_node 12735 && TARGET_LONG_DOUBLE_128) 12736 return "g"; 12737 12738 /* For all other types, use normal C++ mangling. */ 12739 return NULL; 12740} 12741#endif 12742 12743/* Expand a membar instruction for various use cases. Both the LOAD_STORE 12744 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where 12745 bit 0 indicates that X is true, and bit 1 indicates Y is true. */ 12746 12747void 12748sparc_emit_membar_for_model (enum memmodel model, 12749 int load_store, int before_after) 12750{ 12751 /* Bits for the MEMBAR mmask field. */ 12752 const int LoadLoad = 1; 12753 const int StoreLoad = 2; 12754 const int LoadStore = 4; 12755 const int StoreStore = 8; 12756 12757 int mm = 0, implied = 0; 12758 12759 switch (sparc_memory_model) 12760 { 12761 case SMM_SC: 12762 /* Sequential Consistency. All memory transactions are immediately 12763 visible in sequential execution order. No barriers needed. */ 12764 implied = LoadLoad | StoreLoad | LoadStore | StoreStore; 12765 break; 12766 12767 case SMM_TSO: 12768 /* Total Store Ordering: all memory transactions with store semantics 12769 are followed by an implied StoreStore. */ 12770 implied |= StoreStore; 12771 12772 /* If we're not looking for a raw barrer (before+after), then atomic 12773 operations get the benefit of being both load and store. */ 12774 if (load_store == 3 && before_after == 1) 12775 implied |= StoreLoad; 12776 /* FALLTHRU */ 12777 12778 case SMM_PSO: 12779 /* Partial Store Ordering: all memory transactions with load semantics 12780 are followed by an implied LoadLoad | LoadStore. */ 12781 implied |= LoadLoad | LoadStore; 12782 12783 /* If we're not looking for a raw barrer (before+after), then atomic 12784 operations get the benefit of being both load and store. */ 12785 if (load_store == 3 && before_after == 2) 12786 implied |= StoreLoad | StoreStore; 12787 /* FALLTHRU */ 12788 12789 case SMM_RMO: 12790 /* Relaxed Memory Ordering: no implicit bits. */ 12791 break; 12792 12793 default: 12794 gcc_unreachable (); 12795 } 12796 12797 if (before_after & 1) 12798 { 12799 if (is_mm_release (model) || is_mm_acq_rel (model) 12800 || is_mm_seq_cst (model)) 12801 { 12802 if (load_store & 1) 12803 mm |= LoadLoad | StoreLoad; 12804 if (load_store & 2) 12805 mm |= LoadStore | StoreStore; 12806 } 12807 } 12808 if (before_after & 2) 12809 { 12810 if (is_mm_acquire (model) || is_mm_acq_rel (model) 12811 || is_mm_seq_cst (model)) 12812 { 12813 if (load_store & 1) 12814 mm |= LoadLoad | LoadStore; 12815 if (load_store & 2) 12816 mm |= StoreLoad | StoreStore; 12817 } 12818 } 12819 12820 /* Remove the bits implied by the system memory model. */ 12821 mm &= ~implied; 12822 12823 /* For raw barriers (before+after), always emit a barrier. 12824 This will become a compile-time barrier if needed. */ 12825 if (mm || before_after == 3) 12826 emit_insn (gen_membar (GEN_INT (mm))); 12827} 12828 12829/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit 12830 compare and swap on the word containing the byte or half-word. */ 12831 12832static void 12833sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem, 12834 rtx oldval, rtx newval) 12835{ 12836 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 12837 rtx addr = gen_reg_rtx (Pmode); 12838 rtx off = gen_reg_rtx (SImode); 12839 rtx oldv = gen_reg_rtx (SImode); 12840 rtx newv = gen_reg_rtx (SImode); 12841 rtx oldvalue = gen_reg_rtx (SImode); 12842 rtx newvalue = gen_reg_rtx (SImode); 12843 rtx res = gen_reg_rtx (SImode); 12844 rtx resv = gen_reg_rtx (SImode); 12845 rtx memsi, val, mask, cc; 12846 12847 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 12848 12849 if (Pmode != SImode) 12850 addr1 = gen_lowpart (SImode, addr1); 12851 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 12852 12853 memsi = gen_rtx_MEM (SImode, addr); 12854 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 12855 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 12856 12857 val = copy_to_reg (memsi); 12858 12859 emit_insn (gen_rtx_SET (off, 12860 gen_rtx_XOR (SImode, off, 12861 GEN_INT (GET_MODE (mem) == QImode 12862 ? 3 : 2)))); 12863 12864 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 12865 12866 if (GET_MODE (mem) == QImode) 12867 mask = force_reg (SImode, GEN_INT (0xff)); 12868 else 12869 mask = force_reg (SImode, GEN_INT (0xffff)); 12870 12871 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off))); 12872 12873 emit_insn (gen_rtx_SET (val, 12874 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12875 val))); 12876 12877 oldval = gen_lowpart (SImode, oldval); 12878 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off))); 12879 12880 newval = gen_lowpart_common (SImode, newval); 12881 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off))); 12882 12883 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask))); 12884 12885 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask))); 12886 12887 rtx_code_label *end_label = gen_label_rtx (); 12888 rtx_code_label *loop_label = gen_label_rtx (); 12889 emit_label (loop_label); 12890 12891 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val))); 12892 12893 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val))); 12894 12895 emit_move_insn (bool_result, const1_rtx); 12896 12897 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue)); 12898 12899 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); 12900 12901 emit_insn (gen_rtx_SET (resv, 12902 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12903 res))); 12904 12905 emit_move_insn (bool_result, const0_rtx); 12906 12907 cc = gen_compare_reg_1 (NE, resv, val); 12908 emit_insn (gen_rtx_SET (val, resv)); 12909 12910 /* Use cbranchcc4 to separate the compare and branch! */ 12911 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), 12912 cc, const0_rtx, loop_label)); 12913 12914 emit_label (end_label); 12915 12916 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask))); 12917 12918 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off))); 12919 12920 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 12921} 12922 12923/* Expand code to perform a compare-and-swap. */ 12924 12925void 12926sparc_expand_compare_and_swap (rtx operands[]) 12927{ 12928 rtx bval, retval, mem, oldval, newval; 12929 machine_mode mode; 12930 enum memmodel model; 12931 12932 bval = operands[0]; 12933 retval = operands[1]; 12934 mem = operands[2]; 12935 oldval = operands[3]; 12936 newval = operands[4]; 12937 model = (enum memmodel) INTVAL (operands[6]); 12938 mode = GET_MODE (mem); 12939 12940 sparc_emit_membar_for_model (model, 3, 1); 12941 12942 if (reg_overlap_mentioned_p (retval, oldval)) 12943 oldval = copy_to_reg (oldval); 12944 12945 if (mode == QImode || mode == HImode) 12946 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval); 12947 else 12948 { 12949 rtx (*gen) (rtx, rtx, rtx, rtx); 12950 rtx x; 12951 12952 if (mode == SImode) 12953 gen = gen_atomic_compare_and_swapsi_1; 12954 else 12955 gen = gen_atomic_compare_and_swapdi_1; 12956 emit_insn (gen (retval, mem, oldval, newval)); 12957 12958 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1); 12959 if (x != bval) 12960 convert_move (bval, x, 1); 12961 } 12962 12963 sparc_emit_membar_for_model (model, 3, 2); 12964} 12965 12966void 12967sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) 12968{ 12969 rtx t_1, t_2, t_3; 12970 12971 sel = gen_lowpart (DImode, sel); 12972 switch (vmode) 12973 { 12974 case E_V2SImode: 12975 /* inp = xxxxxxxAxxxxxxxB */ 12976 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12977 NULL_RTX, 1, OPTAB_DIRECT); 12978 /* t_1 = ....xxxxxxxAxxx. */ 12979 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 12980 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); 12981 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 12982 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); 12983 /* sel = .......B */ 12984 /* t_1 = ...A.... */ 12985 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 12986 /* sel = ...A...B */ 12987 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); 12988 /* sel = AAAABBBB * 4 */ 12989 t_1 = force_reg (SImode, GEN_INT (0x01230123)); 12990 /* sel = { A*4, A*4+1, A*4+2, ... } */ 12991 break; 12992 12993 case E_V4HImode: 12994 /* inp = xxxAxxxBxxxCxxxD */ 12995 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 12996 NULL_RTX, 1, OPTAB_DIRECT); 12997 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12998 NULL_RTX, 1, OPTAB_DIRECT); 12999 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), 13000 NULL_RTX, 1, OPTAB_DIRECT); 13001 /* t_1 = ..xxxAxxxBxxxCxx */ 13002 /* t_2 = ....xxxAxxxBxxxC */ 13003 /* t_3 = ......xxxAxxxBxx */ 13004 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 13005 GEN_INT (0x07), 13006 NULL_RTX, 1, OPTAB_DIRECT); 13007 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 13008 GEN_INT (0x0700), 13009 NULL_RTX, 1, OPTAB_DIRECT); 13010 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), 13011 GEN_INT (0x070000), 13012 NULL_RTX, 1, OPTAB_DIRECT); 13013 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), 13014 GEN_INT (0x07000000), 13015 NULL_RTX, 1, OPTAB_DIRECT); 13016 /* sel = .......D */ 13017 /* t_1 = .....C.. */ 13018 /* t_2 = ...B.... */ 13019 /* t_3 = .A...... */ 13020 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 13021 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); 13022 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); 13023 /* sel = .A.B.C.D */ 13024 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); 13025 /* sel = AABBCCDD * 2 */ 13026 t_1 = force_reg (SImode, GEN_INT (0x01010101)); 13027 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ 13028 break; 13029 13030 case E_V8QImode: 13031 /* input = xAxBxCxDxExFxGxH */ 13032 sel = expand_simple_binop (DImode, AND, sel, 13033 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 13034 | 0x0f0f0f0f), 13035 NULL_RTX, 1, OPTAB_DIRECT); 13036 /* sel = .A.B.C.D.E.F.G.H */ 13037 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), 13038 NULL_RTX, 1, OPTAB_DIRECT); 13039 /* t_1 = ..A.B.C.D.E.F.G. */ 13040 sel = expand_simple_binop (DImode, IOR, sel, t_1, 13041 NULL_RTX, 1, OPTAB_DIRECT); 13042 /* sel = .AABBCCDDEEFFGGH */ 13043 sel = expand_simple_binop (DImode, AND, sel, 13044 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 13045 | 0xff00ff), 13046 NULL_RTX, 1, OPTAB_DIRECT); 13047 /* sel = ..AB..CD..EF..GH */ 13048 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 13049 NULL_RTX, 1, OPTAB_DIRECT); 13050 /* t_1 = ....AB..CD..EF.. */ 13051 sel = expand_simple_binop (DImode, IOR, sel, t_1, 13052 NULL_RTX, 1, OPTAB_DIRECT); 13053 /* sel = ..ABABCDCDEFEFGH */ 13054 sel = expand_simple_binop (DImode, AND, sel, 13055 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), 13056 NULL_RTX, 1, OPTAB_DIRECT); 13057 /* sel = ....ABCD....EFGH */ 13058 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 13059 NULL_RTX, 1, OPTAB_DIRECT); 13060 /* t_1 = ........ABCD.... */ 13061 sel = gen_lowpart (SImode, sel); 13062 t_1 = gen_lowpart (SImode, t_1); 13063 break; 13064 13065 default: 13066 gcc_unreachable (); 13067 } 13068 13069 /* Always perform the final addition/merge within the bmask insn. */ 13070 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); 13071} 13072 13073/* Implement TARGET_VEC_PERM_CONST. */ 13074 13075static bool 13076sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, 13077 rtx op1, const vec_perm_indices &sel) 13078{ 13079 if (!TARGET_VIS2) 13080 return false; 13081 13082 /* All 8-byte permutes are supported. */ 13083 if (!target) 13084 return GET_MODE_SIZE (vmode) == 8; 13085 13086 /* Force target-independent code to convert constant permutations on other 13087 modes down to V8QI. Rely on this to avoid the complexity of the byte 13088 order of the permutation. */ 13089 if (vmode != V8QImode) 13090 return false; 13091 13092 unsigned int i, mask; 13093 for (i = mask = 0; i < 8; ++i) 13094 mask |= (sel[i] & 0xf) << (28 - i*4); 13095 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode)); 13096 13097 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx)); 13098 emit_insn (gen_bshufflev8qi_vis (target, op0, op1)); 13099 return true; 13100} 13101 13102/* Implement TARGET_FRAME_POINTER_REQUIRED. */ 13103 13104static bool 13105sparc_frame_pointer_required (void) 13106{ 13107 /* If the stack pointer is dynamically modified in the function, it cannot 13108 serve as the frame pointer. */ 13109 if (cfun->calls_alloca) 13110 return true; 13111 13112 /* If the function receives nonlocal gotos, it needs to save the frame 13113 pointer in the nonlocal_goto_save_area object. */ 13114 if (cfun->has_nonlocal_label) 13115 return true; 13116 13117 /* In flat mode, that's it. */ 13118 if (TARGET_FLAT) 13119 return false; 13120 13121 /* Otherwise, the frame pointer is required if the function isn't leaf, but 13122 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */ 13123 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ()); 13124} 13125 13126/* The way this is structured, we can't eliminate SFP in favor of SP 13127 if the frame pointer is required: we want to use the SFP->HFP elimination 13128 in that case. But the test in update_eliminables doesn't know we are 13129 assuming below that we only do the former elimination. */ 13130 13131static bool 13132sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 13133{ 13134 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required (); 13135} 13136 13137/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that 13138 they won't be allocated. */ 13139 13140static void 13141sparc_conditional_register_usage (void) 13142{ 13143 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 13144 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13145 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ 13146 /* then honor it. */ 13147 if (TARGET_ARCH32 && fixed_regs[5]) 13148 fixed_regs[5] = 1; 13149 else if (TARGET_ARCH64 && fixed_regs[5] == 2) 13150 fixed_regs[5] = 0; 13151 if (! TARGET_V9) 13152 { 13153 int regno; 13154 for (regno = SPARC_FIRST_V9_FP_REG; 13155 regno <= SPARC_LAST_V9_FP_REG; 13156 regno++) 13157 fixed_regs[regno] = 1; 13158 /* %fcc0 is used by v8 and v9. */ 13159 for (regno = SPARC_FIRST_V9_FCC_REG + 1; 13160 regno <= SPARC_LAST_V9_FCC_REG; 13161 regno++) 13162 fixed_regs[regno] = 1; 13163 } 13164 if (! TARGET_FPU) 13165 { 13166 int regno; 13167 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) 13168 fixed_regs[regno] = 1; 13169 } 13170 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ 13171 /* then honor it. Likewise with g3 and g4. */ 13172 if (fixed_regs[2] == 2) 13173 fixed_regs[2] = ! TARGET_APP_REGS; 13174 if (fixed_regs[3] == 2) 13175 fixed_regs[3] = ! TARGET_APP_REGS; 13176 if (TARGET_ARCH32 && fixed_regs[4] == 2) 13177 fixed_regs[4] = ! TARGET_APP_REGS; 13178 else if (TARGET_CM_EMBMEDANY) 13179 fixed_regs[4] = 1; 13180 else if (fixed_regs[4] == 2) 13181 fixed_regs[4] = 0; 13182 if (TARGET_FLAT) 13183 { 13184 int regno; 13185 /* Disable leaf functions. */ 13186 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER); 13187 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 13188 leaf_reg_remap [regno] = regno; 13189 } 13190 if (TARGET_VIS) 13191 global_regs[SPARC_GSR_REG] = 1; 13192} 13193 13194/* Implement TARGET_USE_PSEUDO_PIC_REG. */ 13195 13196static bool 13197sparc_use_pseudo_pic_reg (void) 13198{ 13199 return !TARGET_VXWORKS_RTP && flag_pic; 13200} 13201 13202/* Implement TARGET_INIT_PIC_REG. */ 13203 13204static void 13205sparc_init_pic_reg (void) 13206{ 13207 edge entry_edge; 13208 rtx_insn *seq; 13209 13210 /* In PIC mode, we need to always initialize the PIC register if optimization 13211 is enabled, because we are called from IRA and LRA may later force things 13212 to the constant pool for optimization purposes. */ 13213 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize)) 13214 return; 13215 13216 start_sequence (); 13217 load_got_register (); 13218 if (!TARGET_VXWORKS_RTP) 13219 emit_move_insn (pic_offset_table_rtx, got_register_rtx); 13220 seq = get_insns (); 13221 end_sequence (); 13222 13223 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 13224 insert_insn_on_edge (seq, entry_edge); 13225 commit_one_edge_insertion (entry_edge); 13226} 13227 13228/* Implement TARGET_PREFERRED_RELOAD_CLASS: 13229 13230 - We can't load constants into FP registers. 13231 - We can't load FP constants into integer registers when soft-float, 13232 because there is no soft-float pattern with a r/F constraint. 13233 - We can't load FP constants into integer registers for TFmode unless 13234 it is 0.0L, because there is no movtf pattern with a r/F constraint. 13235 - Try and reload integer constants (symbolic or otherwise) back into 13236 registers directly, rather than having them dumped to memory. */ 13237 13238static reg_class_t 13239sparc_preferred_reload_class (rtx x, reg_class_t rclass) 13240{ 13241 machine_mode mode = GET_MODE (x); 13242 if (CONSTANT_P (x)) 13243 { 13244 if (FP_REG_CLASS_P (rclass) 13245 || rclass == GENERAL_OR_FP_REGS 13246 || rclass == GENERAL_OR_EXTRA_FP_REGS 13247 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU) 13248 || (mode == TFmode && ! const_zero_operand (x, mode))) 13249 return NO_REGS; 13250 13251 if (GET_MODE_CLASS (mode) == MODE_INT) 13252 return GENERAL_REGS; 13253 13254 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 13255 { 13256 if (! FP_REG_CLASS_P (rclass) 13257 || !(const_zero_operand (x, mode) 13258 || const_all_ones_operand (x, mode))) 13259 return NO_REGS; 13260 } 13261 } 13262 13263 if (TARGET_VIS3 13264 && ! TARGET_ARCH64 13265 && (rclass == EXTRA_FP_REGS 13266 || rclass == GENERAL_OR_EXTRA_FP_REGS)) 13267 { 13268 int regno = true_regnum (x); 13269 13270 if (SPARC_INT_REG_P (regno)) 13271 return (rclass == EXTRA_FP_REGS 13272 ? FP_REGS : GENERAL_OR_FP_REGS); 13273 } 13274 13275 return rclass; 13276} 13277 13278/* Return true if we use LRA instead of reload pass. */ 13279 13280static bool 13281sparc_lra_p (void) 13282{ 13283 return TARGET_LRA; 13284} 13285 13286/* Output a wide multiply instruction in V8+ mode. INSN is the instruction, 13287 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 13288 13289const char * 13290output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode) 13291{ 13292 char mulstr[32]; 13293 13294 gcc_assert (! TARGET_ARCH64); 13295 13296 if (sparc_check_64 (operands[1], insn) <= 0) 13297 output_asm_insn ("srl\t%L1, 0, %L1", operands); 13298 if (which_alternative == 1) 13299 output_asm_insn ("sllx\t%H1, 32, %H1", operands); 13300 if (GET_CODE (operands[2]) == CONST_INT) 13301 { 13302 if (which_alternative == 1) 13303 { 13304 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13305 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); 13306 output_asm_insn (mulstr, operands); 13307 return "srlx\t%L0, 32, %H0"; 13308 } 13309 else 13310 { 13311 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13312 output_asm_insn ("or\t%L1, %3, %3", operands); 13313 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); 13314 output_asm_insn (mulstr, operands); 13315 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13316 return "mov\t%3, %L0"; 13317 } 13318 } 13319 else if (rtx_equal_p (operands[1], operands[2])) 13320 { 13321 if (which_alternative == 1) 13322 { 13323 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13324 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); 13325 output_asm_insn (mulstr, operands); 13326 return "srlx\t%L0, 32, %H0"; 13327 } 13328 else 13329 { 13330 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13331 output_asm_insn ("or\t%L1, %3, %3", operands); 13332 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); 13333 output_asm_insn (mulstr, operands); 13334 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13335 return "mov\t%3, %L0"; 13336 } 13337 } 13338 if (sparc_check_64 (operands[2], insn) <= 0) 13339 output_asm_insn ("srl\t%L2, 0, %L2", operands); 13340 if (which_alternative == 1) 13341 { 13342 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13343 output_asm_insn ("sllx\t%H2, 32, %L1", operands); 13344 output_asm_insn ("or\t%L2, %L1, %L1", operands); 13345 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); 13346 output_asm_insn (mulstr, operands); 13347 return "srlx\t%L0, 32, %H0"; 13348 } 13349 else 13350 { 13351 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13352 output_asm_insn ("sllx\t%H2, 32, %4", operands); 13353 output_asm_insn ("or\t%L1, %3, %3", operands); 13354 output_asm_insn ("or\t%L2, %4, %4", operands); 13355 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); 13356 output_asm_insn (mulstr, operands); 13357 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13358 return "mov\t%3, %L0"; 13359 } 13360} 13361 13362/* Subroutine of sparc_expand_vector_init. Emit code to initialize 13363 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE 13364 and INNER_MODE are the modes describing TARGET. */ 13365 13366static void 13367vector_init_bshuffle (rtx target, rtx elt, machine_mode mode, 13368 machine_mode inner_mode) 13369{ 13370 rtx t1, final_insn, sel; 13371 int bmask; 13372 13373 t1 = gen_reg_rtx (mode); 13374 13375 elt = convert_modes (SImode, inner_mode, elt, true); 13376 emit_move_insn (gen_lowpart(SImode, t1), elt); 13377 13378 switch (mode) 13379 { 13380 case E_V2SImode: 13381 final_insn = gen_bshufflev2si_vis (target, t1, t1); 13382 bmask = 0x45674567; 13383 break; 13384 case E_V4HImode: 13385 final_insn = gen_bshufflev4hi_vis (target, t1, t1); 13386 bmask = 0x67676767; 13387 break; 13388 case E_V8QImode: 13389 final_insn = gen_bshufflev8qi_vis (target, t1, t1); 13390 bmask = 0x77777777; 13391 break; 13392 default: 13393 gcc_unreachable (); 13394 } 13395 13396 sel = force_reg (SImode, GEN_INT (bmask)); 13397 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); 13398 emit_insn (final_insn); 13399} 13400 13401/* Subroutine of sparc_expand_vector_init. Emit code to initialize 13402 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */ 13403 13404static void 13405vector_init_fpmerge (rtx target, rtx elt) 13406{ 13407 rtx t1, t2, t2_low, t3, t3_low; 13408 13409 t1 = gen_reg_rtx (V4QImode); 13410 elt = convert_modes (SImode, QImode, elt, true); 13411 emit_move_insn (gen_lowpart (SImode, t1), elt); 13412 13413 t2 = gen_reg_rtx (V8QImode); 13414 t2_low = gen_lowpart (V4QImode, t2); 13415 emit_insn (gen_fpmerge_vis (t2, t1, t1)); 13416 13417 t3 = gen_reg_rtx (V8QImode); 13418 t3_low = gen_lowpart (V4QImode, t3); 13419 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low)); 13420 13421 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low)); 13422} 13423 13424/* Subroutine of sparc_expand_vector_init. Emit code to initialize 13425 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */ 13426 13427static void 13428vector_init_faligndata (rtx target, rtx elt) 13429{ 13430 rtx t1 = gen_reg_rtx (V4HImode); 13431 int i; 13432 13433 elt = convert_modes (SImode, HImode, elt, true); 13434 emit_move_insn (gen_lowpart (SImode, t1), elt); 13435 13436 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), 13437 force_reg (SImode, GEN_INT (6)), 13438 const0_rtx)); 13439 13440 for (i = 0; i < 4; i++) 13441 emit_insn (gen_faligndatav4hi_vis (target, t1, target)); 13442} 13443 13444/* Emit code to initialize TARGET to values for individual fields VALS. */ 13445 13446void 13447sparc_expand_vector_init (rtx target, rtx vals) 13448{ 13449 const machine_mode mode = GET_MODE (target); 13450 const machine_mode inner_mode = GET_MODE_INNER (mode); 13451 const int n_elts = GET_MODE_NUNITS (mode); 13452 int i, n_var = 0; 13453 bool all_same = true; 13454 rtx mem; 13455 13456 for (i = 0; i < n_elts; i++) 13457 { 13458 rtx x = XVECEXP (vals, 0, i); 13459 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) 13460 n_var++; 13461 13462 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 13463 all_same = false; 13464 } 13465 13466 if (n_var == 0) 13467 { 13468 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 13469 return; 13470 } 13471 13472 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)) 13473 { 13474 if (GET_MODE_SIZE (inner_mode) == 4) 13475 { 13476 emit_move_insn (gen_lowpart (SImode, target), 13477 gen_lowpart (SImode, XVECEXP (vals, 0, 0))); 13478 return; 13479 } 13480 else if (GET_MODE_SIZE (inner_mode) == 8) 13481 { 13482 emit_move_insn (gen_lowpart (DImode, target), 13483 gen_lowpart (DImode, XVECEXP (vals, 0, 0))); 13484 return; 13485 } 13486 } 13487 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode) 13488 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode)) 13489 { 13490 emit_move_insn (gen_highpart (word_mode, target), 13491 gen_lowpart (word_mode, XVECEXP (vals, 0, 0))); 13492 emit_move_insn (gen_lowpart (word_mode, target), 13493 gen_lowpart (word_mode, XVECEXP (vals, 0, 1))); 13494 return; 13495 } 13496 13497 if (all_same && GET_MODE_SIZE (mode) == 8) 13498 { 13499 if (TARGET_VIS2) 13500 { 13501 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode); 13502 return; 13503 } 13504 if (mode == V8QImode) 13505 { 13506 vector_init_fpmerge (target, XVECEXP (vals, 0, 0)); 13507 return; 13508 } 13509 if (mode == V4HImode) 13510 { 13511 vector_init_faligndata (target, XVECEXP (vals, 0, 0)); 13512 return; 13513 } 13514 } 13515 13516 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 13517 for (i = 0; i < n_elts; i++) 13518 emit_move_insn (adjust_address_nv (mem, inner_mode, 13519 i * GET_MODE_SIZE (inner_mode)), 13520 XVECEXP (vals, 0, i)); 13521 emit_move_insn (target, mem); 13522} 13523 13524/* Implement TARGET_SECONDARY_RELOAD. */ 13525 13526static reg_class_t 13527sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 13528 machine_mode mode, secondary_reload_info *sri) 13529{ 13530 enum reg_class rclass = (enum reg_class) rclass_i; 13531 13532 sri->icode = CODE_FOR_nothing; 13533 sri->extra_cost = 0; 13534 13535 /* We need a temporary when loading/storing a HImode/QImode value 13536 between memory and the FPU registers. This can happen when combine puts 13537 a paradoxical subreg in a float/fix conversion insn. */ 13538 if (FP_REG_CLASS_P (rclass) 13539 && (mode == HImode || mode == QImode) 13540 && (GET_CODE (x) == MEM 13541 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 13542 && true_regnum (x) == -1))) 13543 return GENERAL_REGS; 13544 13545 /* On 32-bit we need a temporary when loading/storing a DFmode value 13546 between unaligned memory and the upper FPU registers. */ 13547 if (TARGET_ARCH32 13548 && rclass == EXTRA_FP_REGS 13549 && mode == DFmode 13550 && GET_CODE (x) == MEM 13551 && ! mem_min_alignment (x, 8)) 13552 return FP_REGS; 13553 13554 if (((TARGET_CM_MEDANY 13555 && symbolic_operand (x, mode)) 13556 || (TARGET_CM_EMBMEDANY 13557 && text_segment_operand (x, mode))) 13558 && ! flag_pic) 13559 { 13560 if (in_p) 13561 sri->icode = direct_optab_handler (reload_in_optab, mode); 13562 else 13563 sri->icode = direct_optab_handler (reload_out_optab, mode); 13564 return NO_REGS; 13565 } 13566 13567 if (TARGET_VIS3 && TARGET_ARCH32) 13568 { 13569 int regno = true_regnum (x); 13570 13571 /* When using VIS3 fp<-->int register moves, on 32-bit we have 13572 to move 8-byte values in 4-byte pieces. This only works via 13573 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to 13574 move between EXTRA_FP_REGS and GENERAL_REGS, we will need 13575 an FP_REGS intermediate move. */ 13576 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno)) 13577 || ((general_or_i64_p (rclass) 13578 || rclass == GENERAL_OR_FP_REGS) 13579 && SPARC_FP_REG_P (regno))) 13580 { 13581 sri->extra_cost = 2; 13582 return FP_REGS; 13583 } 13584 } 13585 13586 return NO_REGS; 13587} 13588 13589/* Implement TARGET_SECONDARY_MEMORY_NEEDED. 13590 13591 On SPARC when not VIS3 it is not possible to directly move data 13592 between GENERAL_REGS and FP_REGS. */ 13593 13594static bool 13595sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1, 13596 reg_class_t class2) 13597{ 13598 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2)) 13599 && (! TARGET_VIS3 13600 || GET_MODE_SIZE (mode) > 8 13601 || GET_MODE_SIZE (mode) < 4)); 13602} 13603 13604/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. 13605 13606 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9 13607 because the movsi and movsf patterns don't handle r/f moves. 13608 For v8 we copy the default definition. */ 13609 13610static machine_mode 13611sparc_secondary_memory_needed_mode (machine_mode mode) 13612{ 13613 if (TARGET_ARCH64) 13614 { 13615 if (GET_MODE_BITSIZE (mode) < 32) 13616 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); 13617 return mode; 13618 } 13619 else 13620 { 13621 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD) 13622 return mode_for_size (BITS_PER_WORD, 13623 GET_MODE_CLASS (mode), 0).require (); 13624 return mode; 13625 } 13626} 13627 13628/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into 13629 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ 13630 13631bool 13632sparc_expand_conditional_move (machine_mode mode, rtx *operands) 13633{ 13634 enum rtx_code rc = GET_CODE (operands[1]); 13635 machine_mode cmp_mode; 13636 rtx cc_reg, dst, cmp; 13637 13638 cmp = operands[1]; 13639 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) 13640 return false; 13641 13642 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) 13643 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); 13644 13645 cmp_mode = GET_MODE (XEXP (cmp, 0)); 13646 rc = GET_CODE (cmp); 13647 13648 dst = operands[0]; 13649 if (! rtx_equal_p (operands[2], dst) 13650 && ! rtx_equal_p (operands[3], dst)) 13651 { 13652 if (reg_overlap_mentioned_p (dst, cmp)) 13653 dst = gen_reg_rtx (mode); 13654 13655 emit_move_insn (dst, operands[3]); 13656 } 13657 else if (operands[2] == dst) 13658 { 13659 operands[2] = operands[3]; 13660 13661 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT) 13662 rc = reverse_condition_maybe_unordered (rc); 13663 else 13664 rc = reverse_condition (rc); 13665 } 13666 13667 if (XEXP (cmp, 1) == const0_rtx 13668 && GET_CODE (XEXP (cmp, 0)) == REG 13669 && cmp_mode == DImode 13670 && v9_regcmp_p (rc)) 13671 cc_reg = XEXP (cmp, 0); 13672 else 13673 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1)); 13674 13675 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx); 13676 13677 emit_insn (gen_rtx_SET (dst, 13678 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst))); 13679 13680 if (dst != operands[0]) 13681 emit_move_insn (operands[0], dst); 13682 13683 return true; 13684} 13685 13686/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] 13687 into OPERANDS[0] in MODE, depending on the outcome of the comparison of 13688 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. 13689 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine 13690 code to be used for the condition mask. */ 13691 13692void 13693sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode) 13694{ 13695 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; 13696 enum rtx_code code = GET_CODE (operands[3]); 13697 13698 mask = gen_reg_rtx (Pmode); 13699 cop0 = operands[4]; 13700 cop1 = operands[5]; 13701 if (code == LT || code == GE) 13702 { 13703 rtx t; 13704 13705 code = swap_condition (code); 13706 t = cop0; cop0 = cop1; cop1 = t; 13707 } 13708 13709 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); 13710 13711 fcmp = gen_rtx_UNSPEC (Pmode, 13712 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), 13713 fcode); 13714 13715 cmask = gen_rtx_UNSPEC (DImode, 13716 gen_rtvec (2, mask, gsr), 13717 ccode); 13718 13719 bshuf = gen_rtx_UNSPEC (mode, 13720 gen_rtvec (3, operands[1], operands[2], gsr), 13721 UNSPEC_BSHUFFLE); 13722 13723 emit_insn (gen_rtx_SET (mask, fcmp)); 13724 emit_insn (gen_rtx_SET (gsr, cmask)); 13725 13726 emit_insn (gen_rtx_SET (operands[0], bshuf)); 13727} 13728 13729/* On sparc, any mode which naturally allocates into the float 13730 registers should return 4 here. */ 13731 13732unsigned int 13733sparc_regmode_natural_size (machine_mode mode) 13734{ 13735 int size = UNITS_PER_WORD; 13736 13737 if (TARGET_ARCH64) 13738 { 13739 enum mode_class mclass = GET_MODE_CLASS (mode); 13740 13741 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) 13742 size = 4; 13743 } 13744 13745 return size; 13746} 13747 13748/* Implement TARGET_HARD_REGNO_NREGS. 13749 13750 On SPARC, ordinary registers hold 32 bits worth; this means both 13751 integer and floating point registers. On v9, integer regs hold 64 13752 bits worth; floating point regs hold 32 bits worth (this includes the 13753 new fp regs as even the odd ones are included in the hard register 13754 count). */ 13755 13756static unsigned int 13757sparc_hard_regno_nregs (unsigned int regno, machine_mode mode) 13758{ 13759 if (regno == SPARC_GSR_REG) 13760 return 1; 13761 if (TARGET_ARCH64) 13762 { 13763 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM) 13764 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13765 return CEIL (GET_MODE_SIZE (mode), 4); 13766 } 13767 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13768} 13769 13770/* Implement TARGET_HARD_REGNO_MODE_OK. 13771 13772 ??? Because of the funny way we pass parameters we should allow certain 13773 ??? types of float/complex values to be in integer registers during 13774 ??? RTL generation. This only matters on arch32. */ 13775 13776static bool 13777sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 13778{ 13779 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0; 13780} 13781 13782/* Implement TARGET_MODES_TIEABLE_P. 13783 13784 For V9 we have to deal with the fact that only the lower 32 floating 13785 point registers are 32-bit addressable. */ 13786 13787static bool 13788sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2) 13789{ 13790 enum mode_class mclass1, mclass2; 13791 unsigned short size1, size2; 13792 13793 if (mode1 == mode2) 13794 return true; 13795 13796 mclass1 = GET_MODE_CLASS (mode1); 13797 mclass2 = GET_MODE_CLASS (mode2); 13798 if (mclass1 != mclass2) 13799 return false; 13800 13801 if (! TARGET_V9) 13802 return true; 13803 13804 /* Classes are the same and we are V9 so we have to deal with upper 13805 vs. lower floating point registers. If one of the modes is a 13806 4-byte mode, and the other is not, we have to mark them as not 13807 tieable because only the lower 32 floating point register are 13808 addressable 32-bits at a time. 13809 13810 We can't just test explicitly for SFmode, otherwise we won't 13811 cover the vector mode cases properly. */ 13812 13813 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT) 13814 return true; 13815 13816 size1 = GET_MODE_SIZE (mode1); 13817 size2 = GET_MODE_SIZE (mode2); 13818 if ((size1 > 4 && size2 == 4) 13819 || (size2 > 4 && size1 == 4)) 13820 return false; 13821 13822 return true; 13823} 13824 13825/* Implement TARGET_CSTORE_MODE. */ 13826 13827static scalar_int_mode 13828sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED) 13829{ 13830 return (TARGET_ARCH64 ? DImode : SImode); 13831} 13832 13833/* Return the compound expression made of T1 and T2. */ 13834 13835static inline tree 13836compound_expr (tree t1, tree t2) 13837{ 13838 return build2 (COMPOUND_EXPR, void_type_node, t1, t2); 13839} 13840 13841/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 13842 13843static void 13844sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 13845{ 13846 if (!TARGET_FPU) 13847 return; 13848 13849 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5; 13850 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23; 13851 13852 /* We generate the equivalent of feholdexcept (&fenv_var): 13853 13854 unsigned int fenv_var; 13855 __builtin_store_fsr (&fenv_var); 13856 13857 unsigned int tmp1_var; 13858 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask); 13859 13860 __builtin_load_fsr (&tmp1_var); */ 13861 13862 tree fenv_var = create_tmp_var_raw (unsigned_type_node); 13863 TREE_ADDRESSABLE (fenv_var) = 1; 13864 tree fenv_addr = build_fold_addr_expr (fenv_var); 13865 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR]; 13866 tree hold_stfsr 13867 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, 13868 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE); 13869 13870 tree tmp1_var = create_tmp_var_raw (unsigned_type_node); 13871 TREE_ADDRESSABLE (tmp1_var) = 1; 13872 tree masked_fenv_var 13873 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 13874 build_int_cst (unsigned_type_node, 13875 ~(accrued_exception_mask | trap_enable_mask))); 13876 tree hold_mask 13877 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var, 13878 NULL_TREE, NULL_TREE); 13879 13880 tree tmp1_addr = build_fold_addr_expr (tmp1_var); 13881 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR]; 13882 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr); 13883 13884 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr); 13885 13886 /* We reload the value of tmp1_var to clear the exceptions: 13887 13888 __builtin_load_fsr (&tmp1_var); */ 13889 13890 *clear = build_call_expr (ldfsr, 1, tmp1_addr); 13891 13892 /* We generate the equivalent of feupdateenv (&fenv_var): 13893 13894 unsigned int tmp2_var; 13895 __builtin_store_fsr (&tmp2_var); 13896 13897 __builtin_load_fsr (&fenv_var); 13898 13899 if (SPARC_LOW_FE_EXCEPT_VALUES) 13900 tmp2_var >>= 5; 13901 __atomic_feraiseexcept ((int) tmp2_var); */ 13902 13903 tree tmp2_var = create_tmp_var_raw (unsigned_type_node); 13904 TREE_ADDRESSABLE (tmp2_var) = 1; 13905 tree tmp2_addr = build_fold_addr_expr (tmp2_var); 13906 tree update_stfsr 13907 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var, 13908 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE); 13909 13910 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr); 13911 13912 tree atomic_feraiseexcept 13913 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 13914 tree update_call 13915 = build_call_expr (atomic_feraiseexcept, 1, 13916 fold_convert (integer_type_node, tmp2_var)); 13917 13918 if (SPARC_LOW_FE_EXCEPT_VALUES) 13919 { 13920 tree shifted_tmp2_var 13921 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var, 13922 build_int_cst (unsigned_type_node, 5)); 13923 tree update_shift 13924 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var); 13925 update_call = compound_expr (update_shift, update_call); 13926 } 13927 13928 *update 13929 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call); 13930} 13931 13932/* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port. 13933 13934 SImode loads to floating-point registers are not zero-extended. 13935 The definition for LOAD_EXTEND_OP specifies that integer loads 13936 narrower than BITS_PER_WORD will be zero-extended. As a result, 13937 we inhibit changes from SImode unless they are to a mode that is 13938 identical in size. 13939 13940 Likewise for SFmode, since word-mode paradoxical subregs are 13941 problematic on big-endian architectures. */ 13942 13943static bool 13944sparc_can_change_mode_class (machine_mode from, machine_mode to, 13945 reg_class_t rclass) 13946{ 13947 if (TARGET_ARCH64 13948 && GET_MODE_SIZE (from) == 4 13949 && GET_MODE_SIZE (to) != 4) 13950 return !reg_classes_intersect_p (rclass, FP_REGS); 13951 return true; 13952} 13953 13954/* Implement TARGET_CONSTANT_ALIGNMENT. */ 13955 13956static HOST_WIDE_INT 13957sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align) 13958{ 13959 if (TREE_CODE (exp) == STRING_CST) 13960 return MAX (align, FASTEST_ALIGNMENT); 13961 return align; 13962} 13963 13964#include "gt-sparc.h" 13965