i386.c revision 218895
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to 19the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20Boston, MA 02110-1301, USA. */ 21 22/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 218895 2011-02-20 22:25:23Z mm $ */ 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "tm.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-codes.h" 38#include "insn-attr.h" 39#include "flags.h" 40#include "except.h" 41#include "function.h" 42#include "recog.h" 43#include "expr.h" 44#include "optabs.h" 45#include "toplev.h" 46#include "basic-block.h" 47#include "ggc.h" 48#include "target.h" 49#include "target-def.h" 50#include "langhooks.h" 51#include "cgraph.h" 52#include "tree-gimple.h" 53#include "dwarf2.h" 54#include "tm-constrs.h" 55 56#ifndef CHECK_STACK_LIMIT 57#define CHECK_STACK_LIMIT (-1) 58#endif 59 60/* Return index of given mode in mult and division cost tables. */ 61#define MODE_INDEX(mode) \ 62 ((mode) == QImode ? 0 \ 63 : (mode) == HImode ? 1 \ 64 : (mode) == SImode ? 2 \ 65 : (mode) == DImode ? 3 \ 66 : 4) 67 68/* Processor costs (relative to an add) */ 69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 70#define COSTS_N_BYTES(N) ((N) * 2) 71 72static const 73struct processor_costs size_cost = { /* costs for tuning for size */ 74 COSTS_N_BYTES (2), /* cost of an add instruction */ 75 COSTS_N_BYTES (3), /* cost of a lea instruction */ 76 COSTS_N_BYTES (2), /* variable shift costs */ 77 COSTS_N_BYTES (3), /* constant shift costs */ 78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 79 COSTS_N_BYTES (3), /* HI */ 80 COSTS_N_BYTES (3), /* SI */ 81 COSTS_N_BYTES (3), /* DI */ 82 COSTS_N_BYTES (5)}, /* other */ 83 0, /* cost of multiply per each bit set */ 84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 85 COSTS_N_BYTES (3), /* HI */ 86 COSTS_N_BYTES (3), /* SI */ 87 COSTS_N_BYTES (3), /* DI */ 88 COSTS_N_BYTES (5)}, /* other */ 89 COSTS_N_BYTES (3), /* cost of movsx */ 90 COSTS_N_BYTES (3), /* cost of movzx */ 91 0, /* "large" insn */ 92 2, /* MOVE_RATIO */ 93 2, /* cost for loading QImode using movzbl */ 94 {2, 2, 2}, /* cost of loading integer registers 95 in QImode, HImode and SImode. 96 Relative to reg-reg move (2). */ 97 {2, 2, 2}, /* cost of storing integer registers */ 98 2, /* cost of reg,reg fld/fst */ 99 {2, 2, 2}, /* cost of loading fp registers 100 in SFmode, DFmode and XFmode */ 101 {2, 2, 2}, /* cost of storing fp registers 102 in SFmode, DFmode and XFmode */ 103 3, /* cost of moving MMX register */ 104 {3, 3}, /* cost of loading MMX registers 105 in SImode and DImode */ 106 {3, 3}, /* cost of storing MMX registers 107 in SImode and DImode */ 108 3, /* cost of moving SSE register */ 109 {3, 3, 3}, /* cost of loading SSE registers 110 in SImode, DImode and TImode */ 111 {3, 3, 3}, /* cost of storing SSE registers 112 in SImode, DImode and TImode */ 113 3, /* MMX or SSE register to integer */ 114 0, /* size of prefetch block */ 115 0, /* number of parallel prefetches */ 116 2, /* Branch cost */ 117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 120 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 123}; 124 125/* Processor costs (relative to an add) */ 126static const 127struct processor_costs i386_cost = { /* 386 specific costs */ 128 COSTS_N_INSNS (1), /* cost of an add instruction */ 129 COSTS_N_INSNS (1), /* cost of a lea instruction */ 130 COSTS_N_INSNS (3), /* variable shift costs */ 131 COSTS_N_INSNS (2), /* constant shift costs */ 132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 133 COSTS_N_INSNS (6), /* HI */ 134 COSTS_N_INSNS (6), /* SI */ 135 COSTS_N_INSNS (6), /* DI */ 136 COSTS_N_INSNS (6)}, /* other */ 137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 139 COSTS_N_INSNS (23), /* HI */ 140 COSTS_N_INSNS (23), /* SI */ 141 COSTS_N_INSNS (23), /* DI */ 142 COSTS_N_INSNS (23)}, /* other */ 143 COSTS_N_INSNS (3), /* cost of movsx */ 144 COSTS_N_INSNS (2), /* cost of movzx */ 145 15, /* "large" insn */ 146 3, /* MOVE_RATIO */ 147 4, /* cost for loading QImode using movzbl */ 148 {2, 4, 2}, /* cost of loading integer registers 149 in QImode, HImode and SImode. 150 Relative to reg-reg move (2). */ 151 {2, 4, 2}, /* cost of storing integer registers */ 152 2, /* cost of reg,reg fld/fst */ 153 {8, 8, 8}, /* cost of loading fp registers 154 in SFmode, DFmode and XFmode */ 155 {8, 8, 8}, /* cost of storing fp registers 156 in SFmode, DFmode and XFmode */ 157 2, /* cost of moving MMX register */ 158 {4, 8}, /* cost of loading MMX registers 159 in SImode and DImode */ 160 {4, 8}, /* cost of storing MMX registers 161 in SImode and DImode */ 162 2, /* cost of moving SSE register */ 163 {4, 8, 16}, /* cost of loading SSE registers 164 in SImode, DImode and TImode */ 165 {4, 8, 16}, /* cost of storing SSE registers 166 in SImode, DImode and TImode */ 167 3, /* MMX or SSE register to integer */ 168 0, /* size of prefetch block */ 169 0, /* number of parallel prefetches */ 170 1, /* Branch cost */ 171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 174 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 177}; 178 179static const 180struct processor_costs i486_cost = { /* 486 specific costs */ 181 COSTS_N_INSNS (1), /* cost of an add instruction */ 182 COSTS_N_INSNS (1), /* cost of a lea instruction */ 183 COSTS_N_INSNS (3), /* variable shift costs */ 184 COSTS_N_INSNS (2), /* constant shift costs */ 185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 186 COSTS_N_INSNS (12), /* HI */ 187 COSTS_N_INSNS (12), /* SI */ 188 COSTS_N_INSNS (12), /* DI */ 189 COSTS_N_INSNS (12)}, /* other */ 190 1, /* cost of multiply per each bit set */ 191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 192 COSTS_N_INSNS (40), /* HI */ 193 COSTS_N_INSNS (40), /* SI */ 194 COSTS_N_INSNS (40), /* DI */ 195 COSTS_N_INSNS (40)}, /* other */ 196 COSTS_N_INSNS (3), /* cost of movsx */ 197 COSTS_N_INSNS (2), /* cost of movzx */ 198 15, /* "large" insn */ 199 3, /* MOVE_RATIO */ 200 4, /* cost for loading QImode using movzbl */ 201 {2, 4, 2}, /* cost of loading integer registers 202 in QImode, HImode and SImode. 203 Relative to reg-reg move (2). */ 204 {2, 4, 2}, /* cost of storing integer registers */ 205 2, /* cost of reg,reg fld/fst */ 206 {8, 8, 8}, /* cost of loading fp registers 207 in SFmode, DFmode and XFmode */ 208 {8, 8, 8}, /* cost of storing fp registers 209 in SFmode, DFmode and XFmode */ 210 2, /* cost of moving MMX register */ 211 {4, 8}, /* cost of loading MMX registers 212 in SImode and DImode */ 213 {4, 8}, /* cost of storing MMX registers 214 in SImode and DImode */ 215 2, /* cost of moving SSE register */ 216 {4, 8, 16}, /* cost of loading SSE registers 217 in SImode, DImode and TImode */ 218 {4, 8, 16}, /* cost of storing SSE registers 219 in SImode, DImode and TImode */ 220 3, /* MMX or SSE register to integer */ 221 0, /* size of prefetch block */ 222 0, /* number of parallel prefetches */ 223 1, /* Branch cost */ 224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 227 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 230}; 231 232static const 233struct processor_costs pentium_cost = { 234 COSTS_N_INSNS (1), /* cost of an add instruction */ 235 COSTS_N_INSNS (1), /* cost of a lea instruction */ 236 COSTS_N_INSNS (4), /* variable shift costs */ 237 COSTS_N_INSNS (1), /* constant shift costs */ 238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 239 COSTS_N_INSNS (11), /* HI */ 240 COSTS_N_INSNS (11), /* SI */ 241 COSTS_N_INSNS (11), /* DI */ 242 COSTS_N_INSNS (11)}, /* other */ 243 0, /* cost of multiply per each bit set */ 244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 245 COSTS_N_INSNS (25), /* HI */ 246 COSTS_N_INSNS (25), /* SI */ 247 COSTS_N_INSNS (25), /* DI */ 248 COSTS_N_INSNS (25)}, /* other */ 249 COSTS_N_INSNS (3), /* cost of movsx */ 250 COSTS_N_INSNS (2), /* cost of movzx */ 251 8, /* "large" insn */ 252 6, /* MOVE_RATIO */ 253 6, /* cost for loading QImode using movzbl */ 254 {2, 4, 2}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 4, 2}, /* cost of storing integer registers */ 258 2, /* cost of reg,reg fld/fst */ 259 {2, 2, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 6}, /* cost of storing fp registers 262 in SFmode, DFmode and XFmode */ 263 8, /* cost of moving MMX register */ 264 {8, 8}, /* cost of loading MMX registers 265 in SImode and DImode */ 266 {8, 8}, /* cost of storing MMX registers 267 in SImode and DImode */ 268 2, /* cost of moving SSE register */ 269 {4, 8, 16}, /* cost of loading SSE registers 270 in SImode, DImode and TImode */ 271 {4, 8, 16}, /* cost of storing SSE registers 272 in SImode, DImode and TImode */ 273 3, /* MMX or SSE register to integer */ 274 0, /* size of prefetch block */ 275 0, /* number of parallel prefetches */ 276 2, /* Branch cost */ 277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 280 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 283}; 284 285static const 286struct processor_costs pentiumpro_cost = { 287 COSTS_N_INSNS (1), /* cost of an add instruction */ 288 COSTS_N_INSNS (1), /* cost of a lea instruction */ 289 COSTS_N_INSNS (1), /* variable shift costs */ 290 COSTS_N_INSNS (1), /* constant shift costs */ 291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 292 COSTS_N_INSNS (4), /* HI */ 293 COSTS_N_INSNS (4), /* SI */ 294 COSTS_N_INSNS (4), /* DI */ 295 COSTS_N_INSNS (4)}, /* other */ 296 0, /* cost of multiply per each bit set */ 297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 298 COSTS_N_INSNS (17), /* HI */ 299 COSTS_N_INSNS (17), /* SI */ 300 COSTS_N_INSNS (17), /* DI */ 301 COSTS_N_INSNS (17)}, /* other */ 302 COSTS_N_INSNS (1), /* cost of movsx */ 303 COSTS_N_INSNS (1), /* cost of movzx */ 304 8, /* "large" insn */ 305 6, /* MOVE_RATIO */ 306 2, /* cost for loading QImode using movzbl */ 307 {4, 4, 4}, /* cost of loading integer registers 308 in QImode, HImode and SImode. 309 Relative to reg-reg move (2). */ 310 {2, 2, 2}, /* cost of storing integer registers */ 311 2, /* cost of reg,reg fld/fst */ 312 {2, 2, 6}, /* cost of loading fp registers 313 in SFmode, DFmode and XFmode */ 314 {4, 4, 6}, /* cost of storing fp registers 315 in SFmode, DFmode and XFmode */ 316 2, /* cost of moving MMX register */ 317 {2, 2}, /* cost of loading MMX registers 318 in SImode and DImode */ 319 {2, 2}, /* cost of storing MMX registers 320 in SImode and DImode */ 321 2, /* cost of moving SSE register */ 322 {2, 2, 8}, /* cost of loading SSE registers 323 in SImode, DImode and TImode */ 324 {2, 2, 8}, /* cost of storing SSE registers 325 in SImode, DImode and TImode */ 326 3, /* MMX or SSE register to integer */ 327 32, /* size of prefetch block */ 328 6, /* number of parallel prefetches */ 329 2, /* Branch cost */ 330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 333 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 336}; 337 338static const 339struct processor_costs k6_cost = { 340 COSTS_N_INSNS (1), /* cost of an add instruction */ 341 COSTS_N_INSNS (2), /* cost of a lea instruction */ 342 COSTS_N_INSNS (1), /* variable shift costs */ 343 COSTS_N_INSNS (1), /* constant shift costs */ 344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 345 COSTS_N_INSNS (3), /* HI */ 346 COSTS_N_INSNS (3), /* SI */ 347 COSTS_N_INSNS (3), /* DI */ 348 COSTS_N_INSNS (3)}, /* other */ 349 0, /* cost of multiply per each bit set */ 350 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 351 COSTS_N_INSNS (18), /* HI */ 352 COSTS_N_INSNS (18), /* SI */ 353 COSTS_N_INSNS (18), /* DI */ 354 COSTS_N_INSNS (18)}, /* other */ 355 COSTS_N_INSNS (2), /* cost of movsx */ 356 COSTS_N_INSNS (2), /* cost of movzx */ 357 8, /* "large" insn */ 358 4, /* MOVE_RATIO */ 359 3, /* cost for loading QImode using movzbl */ 360 {4, 5, 4}, /* cost of loading integer registers 361 in QImode, HImode and SImode. 362 Relative to reg-reg move (2). */ 363 {2, 3, 2}, /* cost of storing integer registers */ 364 4, /* cost of reg,reg fld/fst */ 365 {6, 6, 6}, /* cost of loading fp registers 366 in SFmode, DFmode and XFmode */ 367 {4, 4, 4}, /* cost of storing fp registers 368 in SFmode, DFmode and XFmode */ 369 2, /* cost of moving MMX register */ 370 {2, 2}, /* cost of loading MMX registers 371 in SImode and DImode */ 372 {2, 2}, /* cost of storing MMX registers 373 in SImode and DImode */ 374 2, /* cost of moving SSE register */ 375 {2, 2, 8}, /* cost of loading SSE registers 376 in SImode, DImode and TImode */ 377 {2, 2, 8}, /* cost of storing SSE registers 378 in SImode, DImode and TImode */ 379 6, /* MMX or SSE register to integer */ 380 32, /* size of prefetch block */ 381 1, /* number of parallel prefetches */ 382 1, /* Branch cost */ 383 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 384 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 385 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 386 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 387 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 388 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 389}; 390 391static const 392struct processor_costs athlon_cost = { 393 COSTS_N_INSNS (1), /* cost of an add instruction */ 394 COSTS_N_INSNS (2), /* cost of a lea instruction */ 395 COSTS_N_INSNS (1), /* variable shift costs */ 396 COSTS_N_INSNS (1), /* constant shift costs */ 397 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 398 COSTS_N_INSNS (5), /* HI */ 399 COSTS_N_INSNS (5), /* SI */ 400 COSTS_N_INSNS (5), /* DI */ 401 COSTS_N_INSNS (5)}, /* other */ 402 0, /* cost of multiply per each bit set */ 403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 404 COSTS_N_INSNS (26), /* HI */ 405 COSTS_N_INSNS (42), /* SI */ 406 COSTS_N_INSNS (74), /* DI */ 407 COSTS_N_INSNS (74)}, /* other */ 408 COSTS_N_INSNS (1), /* cost of movsx */ 409 COSTS_N_INSNS (1), /* cost of movzx */ 410 8, /* "large" insn */ 411 9, /* MOVE_RATIO */ 412 4, /* cost for loading QImode using movzbl */ 413 {3, 4, 3}, /* cost of loading integer registers 414 in QImode, HImode and SImode. 415 Relative to reg-reg move (2). */ 416 {3, 4, 3}, /* cost of storing integer registers */ 417 4, /* cost of reg,reg fld/fst */ 418 {4, 4, 12}, /* cost of loading fp registers 419 in SFmode, DFmode and XFmode */ 420 {6, 6, 8}, /* cost of storing fp registers 421 in SFmode, DFmode and XFmode */ 422 2, /* cost of moving MMX register */ 423 {4, 4}, /* cost of loading MMX registers 424 in SImode and DImode */ 425 {4, 4}, /* cost of storing MMX registers 426 in SImode and DImode */ 427 2, /* cost of moving SSE register */ 428 {4, 4, 6}, /* cost of loading SSE registers 429 in SImode, DImode and TImode */ 430 {4, 4, 5}, /* cost of storing SSE registers 431 in SImode, DImode and TImode */ 432 5, /* MMX or SSE register to integer */ 433 64, /* size of prefetch block */ 434 6, /* number of parallel prefetches */ 435 5, /* Branch cost */ 436 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 437 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 438 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 439 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 441 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 442}; 443 444static const 445struct processor_costs k8_cost = { 446 COSTS_N_INSNS (1), /* cost of an add instruction */ 447 COSTS_N_INSNS (2), /* cost of a lea instruction */ 448 COSTS_N_INSNS (1), /* variable shift costs */ 449 COSTS_N_INSNS (1), /* constant shift costs */ 450 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 451 COSTS_N_INSNS (4), /* HI */ 452 COSTS_N_INSNS (3), /* SI */ 453 COSTS_N_INSNS (4), /* DI */ 454 COSTS_N_INSNS (5)}, /* other */ 455 0, /* cost of multiply per each bit set */ 456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 457 COSTS_N_INSNS (26), /* HI */ 458 COSTS_N_INSNS (42), /* SI */ 459 COSTS_N_INSNS (74), /* DI */ 460 COSTS_N_INSNS (74)}, /* other */ 461 COSTS_N_INSNS (1), /* cost of movsx */ 462 COSTS_N_INSNS (1), /* cost of movzx */ 463 8, /* "large" insn */ 464 9, /* MOVE_RATIO */ 465 4, /* cost for loading QImode using movzbl */ 466 {3, 4, 3}, /* cost of loading integer registers 467 in QImode, HImode and SImode. 468 Relative to reg-reg move (2). */ 469 {3, 4, 3}, /* cost of storing integer registers */ 470 4, /* cost of reg,reg fld/fst */ 471 {4, 4, 12}, /* cost of loading fp registers 472 in SFmode, DFmode and XFmode */ 473 {6, 6, 8}, /* cost of storing fp registers 474 in SFmode, DFmode and XFmode */ 475 2, /* cost of moving MMX register */ 476 {3, 3}, /* cost of loading MMX registers 477 in SImode and DImode */ 478 {4, 4}, /* cost of storing MMX registers 479 in SImode and DImode */ 480 2, /* cost of moving SSE register */ 481 {4, 3, 6}, /* cost of loading SSE registers 482 in SImode, DImode and TImode */ 483 {4, 4, 5}, /* cost of storing SSE registers 484 in SImode, DImode and TImode */ 485 5, /* MMX or SSE register to integer */ 486 64, /* size of prefetch block */ 487 6, /* number of parallel prefetches */ 488 5, /* Branch cost */ 489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 491 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 492 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 495}; 496 497static const 498struct processor_costs pentium4_cost = { 499 COSTS_N_INSNS (1), /* cost of an add instruction */ 500 COSTS_N_INSNS (3), /* cost of a lea instruction */ 501 COSTS_N_INSNS (4), /* variable shift costs */ 502 COSTS_N_INSNS (4), /* constant shift costs */ 503 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 504 COSTS_N_INSNS (15), /* HI */ 505 COSTS_N_INSNS (15), /* SI */ 506 COSTS_N_INSNS (15), /* DI */ 507 COSTS_N_INSNS (15)}, /* other */ 508 0, /* cost of multiply per each bit set */ 509 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 510 COSTS_N_INSNS (56), /* HI */ 511 COSTS_N_INSNS (56), /* SI */ 512 COSTS_N_INSNS (56), /* DI */ 513 COSTS_N_INSNS (56)}, /* other */ 514 COSTS_N_INSNS (1), /* cost of movsx */ 515 COSTS_N_INSNS (1), /* cost of movzx */ 516 16, /* "large" insn */ 517 6, /* MOVE_RATIO */ 518 2, /* cost for loading QImode using movzbl */ 519 {4, 5, 4}, /* cost of loading integer registers 520 in QImode, HImode and SImode. 521 Relative to reg-reg move (2). */ 522 {2, 3, 2}, /* cost of storing integer registers */ 523 2, /* cost of reg,reg fld/fst */ 524 {2, 2, 6}, /* cost of loading fp registers 525 in SFmode, DFmode and XFmode */ 526 {4, 4, 6}, /* cost of storing fp registers 527 in SFmode, DFmode and XFmode */ 528 2, /* cost of moving MMX register */ 529 {2, 2}, /* cost of loading MMX registers 530 in SImode and DImode */ 531 {2, 2}, /* cost of storing MMX registers 532 in SImode and DImode */ 533 12, /* cost of moving SSE register */ 534 {12, 12, 12}, /* cost of loading SSE registers 535 in SImode, DImode and TImode */ 536 {2, 2, 8}, /* cost of storing SSE registers 537 in SImode, DImode and TImode */ 538 10, /* MMX or SSE register to integer */ 539 64, /* size of prefetch block */ 540 6, /* number of parallel prefetches */ 541 2, /* Branch cost */ 542 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 543 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 544 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 545 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 546 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 547 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 548}; 549 550static const 551struct processor_costs nocona_cost = { 552 COSTS_N_INSNS (1), /* cost of an add instruction */ 553 COSTS_N_INSNS (1), /* cost of a lea instruction */ 554 COSTS_N_INSNS (1), /* variable shift costs */ 555 COSTS_N_INSNS (1), /* constant shift costs */ 556 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 557 COSTS_N_INSNS (10), /* HI */ 558 COSTS_N_INSNS (10), /* SI */ 559 COSTS_N_INSNS (10), /* DI */ 560 COSTS_N_INSNS (10)}, /* other */ 561 0, /* cost of multiply per each bit set */ 562 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 563 COSTS_N_INSNS (66), /* HI */ 564 COSTS_N_INSNS (66), /* SI */ 565 COSTS_N_INSNS (66), /* DI */ 566 COSTS_N_INSNS (66)}, /* other */ 567 COSTS_N_INSNS (1), /* cost of movsx */ 568 COSTS_N_INSNS (1), /* cost of movzx */ 569 16, /* "large" insn */ 570 17, /* MOVE_RATIO */ 571 4, /* cost for loading QImode using movzbl */ 572 {4, 4, 4}, /* cost of loading integer registers 573 in QImode, HImode and SImode. 574 Relative to reg-reg move (2). */ 575 {4, 4, 4}, /* cost of storing integer registers */ 576 3, /* cost of reg,reg fld/fst */ 577 {12, 12, 12}, /* cost of loading fp registers 578 in SFmode, DFmode and XFmode */ 579 {4, 4, 4}, /* cost of storing fp registers 580 in SFmode, DFmode and XFmode */ 581 6, /* cost of moving MMX register */ 582 {12, 12}, /* cost of loading MMX registers 583 in SImode and DImode */ 584 {12, 12}, /* cost of storing MMX registers 585 in SImode and DImode */ 586 6, /* cost of moving SSE register */ 587 {12, 12, 12}, /* cost of loading SSE registers 588 in SImode, DImode and TImode */ 589 {12, 12, 12}, /* cost of storing SSE registers 590 in SImode, DImode and TImode */ 591 8, /* MMX or SSE register to integer */ 592 128, /* size of prefetch block */ 593 8, /* number of parallel prefetches */ 594 1, /* Branch cost */ 595 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 596 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 597 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 598 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 599 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 600 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 601}; 602 603/* Generic64 should produce code tuned for Nocona and K8. */ 604static const 605struct processor_costs generic64_cost = { 606 COSTS_N_INSNS (1), /* cost of an add instruction */ 607 /* On all chips taken into consideration lea is 2 cycles and more. With 608 this cost however our current implementation of synth_mult results in 609 use of unnecessary temporary registers causing regression on several 610 SPECfp benchmarks. */ 611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 612 COSTS_N_INSNS (1), /* variable shift costs */ 613 COSTS_N_INSNS (1), /* constant shift costs */ 614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 615 COSTS_N_INSNS (4), /* HI */ 616 COSTS_N_INSNS (3), /* SI */ 617 COSTS_N_INSNS (4), /* DI */ 618 COSTS_N_INSNS (2)}, /* other */ 619 0, /* cost of multiply per each bit set */ 620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 621 COSTS_N_INSNS (26), /* HI */ 622 COSTS_N_INSNS (42), /* SI */ 623 COSTS_N_INSNS (74), /* DI */ 624 COSTS_N_INSNS (74)}, /* other */ 625 COSTS_N_INSNS (1), /* cost of movsx */ 626 COSTS_N_INSNS (1), /* cost of movzx */ 627 8, /* "large" insn */ 628 17, /* MOVE_RATIO */ 629 4, /* cost for loading QImode using movzbl */ 630 {4, 4, 4}, /* cost of loading integer registers 631 in QImode, HImode and SImode. 632 Relative to reg-reg move (2). */ 633 {4, 4, 4}, /* cost of storing integer registers */ 634 4, /* cost of reg,reg fld/fst */ 635 {12, 12, 12}, /* cost of loading fp registers 636 in SFmode, DFmode and XFmode */ 637 {6, 6, 8}, /* cost of storing fp registers 638 in SFmode, DFmode and XFmode */ 639 2, /* cost of moving MMX register */ 640 {8, 8}, /* cost of loading MMX registers 641 in SImode and DImode */ 642 {8, 8}, /* cost of storing MMX registers 643 in SImode and DImode */ 644 2, /* cost of moving SSE register */ 645 {8, 8, 8}, /* cost of loading SSE registers 646 in SImode, DImode and TImode */ 647 {8, 8, 8}, /* cost of storing SSE registers 648 in SImode, DImode and TImode */ 649 5, /* MMX or SSE register to integer */ 650 64, /* size of prefetch block */ 651 6, /* number of parallel prefetches */ 652 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 653 is increased to perhaps more appropriate value of 5. */ 654 3, /* Branch cost */ 655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 658 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 661}; 662 663/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 664static const 665struct processor_costs generic32_cost = { 666 COSTS_N_INSNS (1), /* cost of an add instruction */ 667 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 668 COSTS_N_INSNS (1), /* variable shift costs */ 669 COSTS_N_INSNS (1), /* constant shift costs */ 670 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 671 COSTS_N_INSNS (4), /* HI */ 672 COSTS_N_INSNS (3), /* SI */ 673 COSTS_N_INSNS (4), /* DI */ 674 COSTS_N_INSNS (2)}, /* other */ 675 0, /* cost of multiply per each bit set */ 676 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 677 COSTS_N_INSNS (26), /* HI */ 678 COSTS_N_INSNS (42), /* SI */ 679 COSTS_N_INSNS (74), /* DI */ 680 COSTS_N_INSNS (74)}, /* other */ 681 COSTS_N_INSNS (1), /* cost of movsx */ 682 COSTS_N_INSNS (1), /* cost of movzx */ 683 8, /* "large" insn */ 684 17, /* MOVE_RATIO */ 685 4, /* cost for loading QImode using movzbl */ 686 {4, 4, 4}, /* cost of loading integer registers 687 in QImode, HImode and SImode. 688 Relative to reg-reg move (2). */ 689 {4, 4, 4}, /* cost of storing integer registers */ 690 4, /* cost of reg,reg fld/fst */ 691 {12, 12, 12}, /* cost of loading fp registers 692 in SFmode, DFmode and XFmode */ 693 {6, 6, 8}, /* cost of storing fp registers 694 in SFmode, DFmode and XFmode */ 695 2, /* cost of moving MMX register */ 696 {8, 8}, /* cost of loading MMX registers 697 in SImode and DImode */ 698 {8, 8}, /* cost of storing MMX registers 699 in SImode and DImode */ 700 2, /* cost of moving SSE register */ 701 {8, 8, 8}, /* cost of loading SSE registers 702 in SImode, DImode and TImode */ 703 {8, 8, 8}, /* cost of storing SSE registers 704 in SImode, DImode and TImode */ 705 5, /* MMX or SSE register to integer */ 706 64, /* size of prefetch block */ 707 6, /* number of parallel prefetches */ 708 3, /* Branch cost */ 709 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 711 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 712 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 713 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 714 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 715}; 716 717const struct processor_costs *ix86_cost = &pentium_cost; 718 719/* Processor feature/optimization bitmasks. */ 720#define m_386 (1<<PROCESSOR_I386) 721#define m_486 (1<<PROCESSOR_I486) 722#define m_PENT (1<<PROCESSOR_PENTIUM) 723#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 724#define m_K6 (1<<PROCESSOR_K6) 725#define m_ATHLON (1<<PROCESSOR_ATHLON) 726#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 727#define m_K8 (1<<PROCESSOR_K8) 728#define m_ATHLON_K8 (m_K8 | m_ATHLON) 729#define m_NOCONA (1<<PROCESSOR_NOCONA) 730#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 731#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 732#define m_GENERIC (m_GENERIC32 | m_GENERIC64) 733 734/* Generic instruction choice should be common subset of supported CPUs 735 (PPro/PENT4/NOCONA/Athlon/K8). */ 736 737/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for 738 Generic64 seems like good code size tradeoff. We can't enable it for 32bit 739 generic because it is not working well with PPro base chips. */ 740const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64; 741const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 742const int x86_zero_extend_with_and = m_486 | m_PENT; 743const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */; 744const int x86_double_with_add = ~m_386; 745const int x86_use_bit_test = m_386; 746const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC; 747const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; 748const int x86_3dnow_a = m_ATHLON_K8; 749const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 750/* Branch hints were put in P4 based on simulation result. But 751 after P4 was made, no performance benefit was observed with 752 branch hints. It also increases the code size. As the result, 753 icc never generates branch hints. */ 754const int x86_branch_hints = 0; 755const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */ 756/* We probably ought to watch for partial register stalls on Generic32 757 compilation setting as well. However in current implementation the 758 partial register stalls are not eliminated very well - they can 759 be introduced via subregs synthesized by combine and can happen 760 in caller/callee saving sequences. 761 Because this option pays back little on PPro based chips and is in conflict 762 with partial reg. dependencies used by Athlon/P4 based chips, it is better 763 to leave it off for generic32 for now. */ 764const int x86_partial_reg_stall = m_PPRO; 765const int x86_partial_flag_reg_stall = m_GENERIC; 766const int x86_use_himode_fiop = m_386 | m_486 | m_K6; 767const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC); 768const int x86_use_mov0 = m_K6; 769const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC); 770const int x86_read_modify_write = ~m_PENT; 771const int x86_read_modify = ~(m_PENT | m_PPRO); 772const int x86_split_long_moves = m_PPRO; 773const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */ 774const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 775const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; 776const int x86_qimode_math = ~(0); 777const int x86_promote_qi_regs = 0; 778/* On PPro this flag is meant to avoid partial register stalls. Just like 779 the x86_partial_reg_stall this option might be considered for Generic32 780 if our scheme for avoiding partial stalls was more effective. */ 781const int x86_himode_math = ~(m_PPRO); 782const int x86_promote_hi_regs = m_PPRO; 783const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC; 784const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC; 785const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC; 786const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC; 787const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC); 788const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 789const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 790const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC; 791const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC; 792const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC; 793const int x86_shift1 = ~m_486; 794const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 795/* In Generic model we have an conflict here in between PPro/Pentium4 based chips 796 that thread 128bit SSE registers as single units versus K8 based chips that 797 divide SSE registers to two 64bit halves. 798 x86_sse_partial_reg_dependency promote all store destinations to be 128bit 799 to allow register renaming on 128bit SSE units, but usually results in one 800 extra microop on 64bit SSE units. Experimental results shows that disabling 801 this option on P4 brings over 20% SPECfp regression, while enabling it on 802 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling 803 of moves. */ 804const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC; 805/* Set for machines where the type and dependencies are resolved on SSE 806 register parts instead of whole registers, so we may maintain just 807 lower part of scalar values in proper format leaving the upper part 808 undefined. */ 809const int x86_sse_split_regs = m_ATHLON_K8; 810const int x86_sse_typeless_stores = m_ATHLON_K8; 811const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; 812const int x86_use_ffreep = m_ATHLON_K8; 813const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; 814const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC); 815 816/* ??? Allowing interunit moves makes it all too easy for the compiler to put 817 integer data in xmm registers. Which results in pretty abysmal code. */ 818const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; 819 820const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32; 821/* Some CPU cores are not able to predict more than 4 branch instructions in 822 the 16 byte window. */ 823const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 824const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC; 825const int x86_use_bt = m_ATHLON_K8; 826/* Compare and exchange was added for 80486. */ 827const int x86_cmpxchg = ~m_386; 828/* Compare and exchange 8 bytes was added for pentium. */ 829const int x86_cmpxchg8b = ~(m_386 | m_486); 830/* Compare and exchange 16 bytes was added for nocona. */ 831const int x86_cmpxchg16b = m_NOCONA; 832/* Exchange and add was added for 80486. */ 833const int x86_xadd = ~m_386; 834const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC; 835 836/* In case the average insn count for single function invocation is 837 lower than this constant, emit fast (but longer) prologue and 838 epilogue code. */ 839#define FAST_PROLOGUE_INSN_COUNT 20 840 841/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 842static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 843static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 844static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 845 846/* Array of the smallest class containing reg number REGNO, indexed by 847 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 848 849enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 850{ 851 /* ax, dx, cx, bx */ 852 AREG, DREG, CREG, BREG, 853 /* si, di, bp, sp */ 854 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 855 /* FP registers */ 856 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 857 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 858 /* arg pointer */ 859 NON_Q_REGS, 860 /* flags, fpsr, dirflag, frame */ 861 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 862 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 863 SSE_REGS, SSE_REGS, 864 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 865 MMX_REGS, MMX_REGS, 866 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 867 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 868 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 869 SSE_REGS, SSE_REGS, 870}; 871 872/* The "default" register map used in 32bit mode. */ 873 874int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 875{ 876 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 877 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 878 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 879 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 880 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 881 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 882 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 883}; 884 885static int const x86_64_int_parameter_registers[6] = 886{ 887 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 888 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 889}; 890 891static int const x86_64_int_return_registers[4] = 892{ 893 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 894}; 895 896/* The "default" register map used in 64bit mode. */ 897int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 898{ 899 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 900 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 901 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 902 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 903 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 904 8,9,10,11,12,13,14,15, /* extended integer registers */ 905 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 906}; 907 908/* Define the register numbers to be used in Dwarf debugging information. 909 The SVR4 reference port C compiler uses the following register numbers 910 in its Dwarf output code: 911 0 for %eax (gcc regno = 0) 912 1 for %ecx (gcc regno = 2) 913 2 for %edx (gcc regno = 1) 914 3 for %ebx (gcc regno = 3) 915 4 for %esp (gcc regno = 7) 916 5 for %ebp (gcc regno = 6) 917 6 for %esi (gcc regno = 4) 918 7 for %edi (gcc regno = 5) 919 The following three DWARF register numbers are never generated by 920 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 921 believes these numbers have these meanings. 922 8 for %eip (no gcc equivalent) 923 9 for %eflags (gcc regno = 17) 924 10 for %trapno (no gcc equivalent) 925 It is not at all clear how we should number the FP stack registers 926 for the x86 architecture. If the version of SDB on x86/svr4 were 927 a bit less brain dead with respect to floating-point then we would 928 have a precedent to follow with respect to DWARF register numbers 929 for x86 FP registers, but the SDB on x86/svr4 is so completely 930 broken with respect to FP registers that it is hardly worth thinking 931 of it as something to strive for compatibility with. 932 The version of x86/svr4 SDB I have at the moment does (partially) 933 seem to believe that DWARF register number 11 is associated with 934 the x86 register %st(0), but that's about all. Higher DWARF 935 register numbers don't seem to be associated with anything in 936 particular, and even for DWARF regno 11, SDB only seems to under- 937 stand that it should say that a variable lives in %st(0) (when 938 asked via an `=' command) if we said it was in DWARF regno 11, 939 but SDB still prints garbage when asked for the value of the 940 variable in question (via a `/' command). 941 (Also note that the labels SDB prints for various FP stack regs 942 when doing an `x' command are all wrong.) 943 Note that these problems generally don't affect the native SVR4 944 C compiler because it doesn't allow the use of -O with -g and 945 because when it is *not* optimizing, it allocates a memory 946 location for each floating-point variable, and the memory 947 location is what gets described in the DWARF AT_location 948 attribute for the variable in question. 949 Regardless of the severe mental illness of the x86/svr4 SDB, we 950 do something sensible here and we use the following DWARF 951 register numbers. Note that these are all stack-top-relative 952 numbers. 953 11 for %st(0) (gcc regno = 8) 954 12 for %st(1) (gcc regno = 9) 955 13 for %st(2) (gcc regno = 10) 956 14 for %st(3) (gcc regno = 11) 957 15 for %st(4) (gcc regno = 12) 958 16 for %st(5) (gcc regno = 13) 959 17 for %st(6) (gcc regno = 14) 960 18 for %st(7) (gcc regno = 15) 961*/ 962int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 963{ 964 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 965 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 966 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 967 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 968 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 970 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 971}; 972 973/* Test and compare insns in i386.md store the information needed to 974 generate branch and scc insns here. */ 975 976rtx ix86_compare_op0 = NULL_RTX; 977rtx ix86_compare_op1 = NULL_RTX; 978rtx ix86_compare_emitted = NULL_RTX; 979 980/* Size of the register save area. */ 981#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 982 983/* Define the structure for the machine field in struct function. */ 984 985struct stack_local_entry GTY(()) 986{ 987 unsigned short mode; 988 unsigned short n; 989 rtx rtl; 990 struct stack_local_entry *next; 991}; 992 993/* Structure describing stack frame layout. 994 Stack grows downward: 995 996 [arguments] 997 <- ARG_POINTER 998 saved pc 999 1000 saved frame pointer if frame_pointer_needed 1001 <- HARD_FRAME_POINTER 1002 [saved regs] 1003 1004 [padding1] \ 1005 ) 1006 [va_arg registers] ( 1007 > to_allocate <- FRAME_POINTER 1008 [frame] ( 1009 ) 1010 [padding2] / 1011 */ 1012struct ix86_frame 1013{ 1014 int nregs; 1015 int padding1; 1016 int va_arg_size; 1017 HOST_WIDE_INT frame; 1018 int padding2; 1019 int outgoing_arguments_size; 1020 int red_zone_size; 1021 1022 HOST_WIDE_INT to_allocate; 1023 /* The offsets relative to ARG_POINTER. */ 1024 HOST_WIDE_INT frame_pointer_offset; 1025 HOST_WIDE_INT hard_frame_pointer_offset; 1026 HOST_WIDE_INT stack_pointer_offset; 1027 1028 /* When save_regs_using_mov is set, emit prologue using 1029 move instead of push instructions. */ 1030 bool save_regs_using_mov; 1031}; 1032 1033/* Code model option. */ 1034enum cmodel ix86_cmodel; 1035/* Asm dialect. */ 1036enum asm_dialect ix86_asm_dialect = ASM_ATT; 1037/* TLS dialects. */ 1038enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1039 1040/* Which unit we are generating floating point math for. */ 1041enum fpmath_unit ix86_fpmath; 1042 1043/* Which cpu are we scheduling for. */ 1044enum processor_type ix86_tune; 1045/* Which instruction set architecture to use. */ 1046enum processor_type ix86_arch; 1047 1048/* true if sse prefetch instruction is not NOOP. */ 1049int x86_prefetch_sse; 1050 1051/* ix86_regparm_string as a number */ 1052static int ix86_regparm; 1053 1054/* -mstackrealign option */ 1055extern int ix86_force_align_arg_pointer; 1056static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; 1057 1058/* Preferred alignment for stack boundary in bits. */ 1059unsigned int ix86_preferred_stack_boundary; 1060 1061/* Values 1-5: see jump.c */ 1062int ix86_branch_cost; 1063 1064/* Variables which are this size or smaller are put in the data/bss 1065 or ldata/lbss sections. */ 1066 1067int ix86_section_threshold = 65536; 1068 1069/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1070char internal_label_prefix[16]; 1071int internal_label_prefix_len; 1072 1073static bool ix86_handle_option (size_t, const char *, int); 1074static void output_pic_addr_const (FILE *, rtx, int); 1075static void put_condition_code (enum rtx_code, enum machine_mode, 1076 int, int, FILE *); 1077static const char *get_some_local_dynamic_name (void); 1078static int get_some_local_dynamic_name_1 (rtx *, void *); 1079static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 1080static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 1081 rtx *); 1082static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 1083static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 1084 enum machine_mode); 1085static rtx get_thread_pointer (int); 1086static rtx legitimize_tls_address (rtx, enum tls_model, int); 1087static void get_pc_thunk_name (char [32], unsigned int); 1088static rtx gen_push (rtx); 1089static int ix86_flags_dependent (rtx, rtx, enum attr_type); 1090static int ix86_agi_dependent (rtx, rtx, enum attr_type); 1091static struct machine_function * ix86_init_machine_status (void); 1092static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 1093static int ix86_nsaved_regs (void); 1094static void ix86_emit_save_regs (void); 1095static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 1096static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 1097static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 1098static HOST_WIDE_INT ix86_GOT_alias_set (void); 1099static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 1100static rtx ix86_expand_aligntest (rtx, int); 1101static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 1102static int ix86_issue_rate (void); 1103static int ix86_adjust_cost (rtx, rtx, rtx, int); 1104static int ia32_multipass_dfa_lookahead (void); 1105static void ix86_init_mmx_sse_builtins (void); 1106static rtx x86_this_parameter (tree); 1107static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 1108 HOST_WIDE_INT, tree); 1109static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 1110static void x86_file_start (void); 1111static void ix86_reorg (void); 1112static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 1113static tree ix86_build_builtin_va_list (void); 1114static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 1115 tree, int *, int); 1116static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); 1117static bool ix86_scalar_mode_supported_p (enum machine_mode); 1118static bool ix86_vector_mode_supported_p (enum machine_mode); 1119 1120static int ix86_address_cost (rtx); 1121static bool ix86_cannot_force_const_mem (rtx); 1122static rtx ix86_delegitimize_address (rtx); 1123 1124static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 1125 1126struct builtin_description; 1127static rtx ix86_expand_sse_comi (const struct builtin_description *, 1128 tree, rtx); 1129static rtx ix86_expand_sse_compare (const struct builtin_description *, 1130 tree, rtx); 1131static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 1132static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 1133static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 1134static rtx ix86_expand_store_builtin (enum insn_code, tree); 1135static rtx safe_vector_operand (rtx, enum machine_mode); 1136static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 1137static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 1138static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 1139static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 1140static int ix86_fp_comparison_cost (enum rtx_code code); 1141static unsigned int ix86_select_alt_pic_regnum (void); 1142static int ix86_save_reg (unsigned int, int); 1143static void ix86_compute_frame_layout (struct ix86_frame *); 1144static int ix86_comp_type_attributes (tree, tree); 1145static int ix86_function_regparm (tree, tree); 1146const struct attribute_spec ix86_attribute_table[]; 1147static bool ix86_function_ok_for_sibcall (tree, tree); 1148static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); 1149static int ix86_value_regno (enum machine_mode, tree, tree); 1150static bool contains_128bit_aligned_vector_p (tree); 1151static rtx ix86_struct_value_rtx (tree, int); 1152static bool ix86_ms_bitfield_layout_p (tree); 1153static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 1154static int extended_reg_mentioned_1 (rtx *, void *); 1155static bool ix86_rtx_costs (rtx, int, int, int *); 1156static int min_insn_size (rtx); 1157static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); 1158static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); 1159static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 1160 tree, bool); 1161static void ix86_init_builtins (void); 1162static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 1163static const char *ix86_mangle_fundamental_type (tree); 1164static tree ix86_stack_protect_fail (void); 1165static rtx ix86_internal_arg_pointer (void); 1166static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); 1167 1168/* This function is only used on Solaris. */ 1169static void i386_solaris_elf_named_section (const char *, unsigned int, tree) 1170 ATTRIBUTE_UNUSED; 1171 1172/* Register class used for passing given 64bit part of the argument. 1173 These represent classes as documented by the PS ABI, with the exception 1174 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1175 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1176 1177 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1178 whenever possible (upper half does contain padding). 1179 */ 1180enum x86_64_reg_class 1181 { 1182 X86_64_NO_CLASS, 1183 X86_64_INTEGER_CLASS, 1184 X86_64_INTEGERSI_CLASS, 1185 X86_64_SSE_CLASS, 1186 X86_64_SSESF_CLASS, 1187 X86_64_SSEDF_CLASS, 1188 X86_64_SSEUP_CLASS, 1189 X86_64_X87_CLASS, 1190 X86_64_X87UP_CLASS, 1191 X86_64_COMPLEX_X87_CLASS, 1192 X86_64_MEMORY_CLASS 1193 }; 1194static const char * const x86_64_reg_class_name[] = { 1195 "no", "integer", "integerSI", "sse", "sseSF", "sseDF", 1196 "sseup", "x87", "x87up", "cplx87", "no" 1197}; 1198 1199#define MAX_CLASSES 4 1200 1201/* Table of constants used by fldpi, fldln2, etc.... */ 1202static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1203static bool ext_80387_constants_init = 0; 1204static void init_ext_80387_constants (void); 1205static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; 1206static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 1207static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; 1208static section *x86_64_elf_select_section (tree decl, int reloc, 1209 unsigned HOST_WIDE_INT align) 1210 ATTRIBUTE_UNUSED; 1211 1212/* Initialize the GCC target structure. */ 1213#undef TARGET_ATTRIBUTE_TABLE 1214#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 1215#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 1216# undef TARGET_MERGE_DECL_ATTRIBUTES 1217# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 1218#endif 1219 1220#undef TARGET_COMP_TYPE_ATTRIBUTES 1221#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 1222 1223#undef TARGET_INIT_BUILTINS 1224#define TARGET_INIT_BUILTINS ix86_init_builtins 1225#undef TARGET_EXPAND_BUILTIN 1226#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 1227 1228#undef TARGET_ASM_FUNCTION_EPILOGUE 1229#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 1230 1231#undef TARGET_ENCODE_SECTION_INFO 1232#ifndef SUBTARGET_ENCODE_SECTION_INFO 1233#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 1234#else 1235#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 1236#endif 1237 1238#undef TARGET_ASM_OPEN_PAREN 1239#define TARGET_ASM_OPEN_PAREN "" 1240#undef TARGET_ASM_CLOSE_PAREN 1241#define TARGET_ASM_CLOSE_PAREN "" 1242 1243#undef TARGET_ASM_ALIGNED_HI_OP 1244#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 1245#undef TARGET_ASM_ALIGNED_SI_OP 1246#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 1247#ifdef ASM_QUAD 1248#undef TARGET_ASM_ALIGNED_DI_OP 1249#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 1250#endif 1251 1252#undef TARGET_ASM_UNALIGNED_HI_OP 1253#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 1254#undef TARGET_ASM_UNALIGNED_SI_OP 1255#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 1256#undef TARGET_ASM_UNALIGNED_DI_OP 1257#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 1258 1259#undef TARGET_SCHED_ADJUST_COST 1260#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 1261#undef TARGET_SCHED_ISSUE_RATE 1262#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 1263#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1264#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 1265 ia32_multipass_dfa_lookahead 1266 1267#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1268#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 1269 1270#ifdef HAVE_AS_TLS 1271#undef TARGET_HAVE_TLS 1272#define TARGET_HAVE_TLS true 1273#endif 1274#undef TARGET_CANNOT_FORCE_CONST_MEM 1275#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1276#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1277#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true 1278 1279#undef TARGET_DELEGITIMIZE_ADDRESS 1280#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1281 1282#undef TARGET_MS_BITFIELD_LAYOUT_P 1283#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1284 1285#if TARGET_MACHO 1286#undef TARGET_BINDS_LOCAL_P 1287#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1288#endif 1289 1290#undef TARGET_ASM_OUTPUT_MI_THUNK 1291#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1292#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1293#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1294 1295#undef TARGET_ASM_FILE_START 1296#define TARGET_ASM_FILE_START x86_file_start 1297 1298#undef TARGET_DEFAULT_TARGET_FLAGS 1299#define TARGET_DEFAULT_TARGET_FLAGS \ 1300 (TARGET_DEFAULT \ 1301 | TARGET_64BIT_DEFAULT \ 1302 | TARGET_SUBTARGET_DEFAULT \ 1303 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 1304 1305#undef TARGET_HANDLE_OPTION 1306#define TARGET_HANDLE_OPTION ix86_handle_option 1307 1308#undef TARGET_RTX_COSTS 1309#define TARGET_RTX_COSTS ix86_rtx_costs 1310#undef TARGET_ADDRESS_COST 1311#define TARGET_ADDRESS_COST ix86_address_cost 1312 1313#undef TARGET_FIXED_CONDITION_CODE_REGS 1314#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1315#undef TARGET_CC_MODES_COMPATIBLE 1316#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1317 1318#undef TARGET_MACHINE_DEPENDENT_REORG 1319#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1320 1321#undef TARGET_BUILD_BUILTIN_VA_LIST 1322#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1323 1324#undef TARGET_MD_ASM_CLOBBERS 1325#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 1326 1327#undef TARGET_PROMOTE_PROTOTYPES 1328#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 1329#undef TARGET_STRUCT_VALUE_RTX 1330#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 1331#undef TARGET_SETUP_INCOMING_VARARGS 1332#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 1333#undef TARGET_MUST_PASS_IN_STACK 1334#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 1335#undef TARGET_PASS_BY_REFERENCE 1336#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 1337#undef TARGET_INTERNAL_ARG_POINTER 1338#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 1339#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 1340#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 1341 1342#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1343#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 1344 1345#undef TARGET_SCALAR_MODE_SUPPORTED_P 1346#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 1347 1348#undef TARGET_VECTOR_MODE_SUPPORTED_P 1349#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 1350 1351#ifdef HAVE_AS_TLS 1352#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1353#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 1354#endif 1355 1356#ifdef SUBTARGET_INSERT_ATTRIBUTES 1357#undef TARGET_INSERT_ATTRIBUTES 1358#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 1359#endif 1360 1361#undef TARGET_MANGLE_FUNDAMENTAL_TYPE 1362#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type 1363 1364#undef TARGET_STACK_PROTECT_FAIL 1365#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 1366 1367#undef TARGET_FUNCTION_VALUE 1368#define TARGET_FUNCTION_VALUE ix86_function_value 1369 1370struct gcc_target targetm = TARGET_INITIALIZER; 1371 1372 1373/* The svr4 ABI for the i386 says that records and unions are returned 1374 in memory. */ 1375#ifndef DEFAULT_PCC_STRUCT_RETURN 1376#define DEFAULT_PCC_STRUCT_RETURN 1 1377#endif 1378 1379/* Implement TARGET_HANDLE_OPTION. */ 1380 1381static bool 1382ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1383{ 1384 switch (code) 1385 { 1386 case OPT_m3dnow: 1387 if (!value) 1388 { 1389 target_flags &= ~MASK_3DNOW_A; 1390 target_flags_explicit |= MASK_3DNOW_A; 1391 } 1392 return true; 1393 1394 case OPT_mmmx: 1395 if (!value) 1396 { 1397 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); 1398 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; 1399 } 1400 return true; 1401 1402 case OPT_msse: 1403 if (!value) 1404 { 1405 target_flags &= ~(MASK_SSE2 | MASK_SSE3); 1406 target_flags_explicit |= MASK_SSE2 | MASK_SSE3; 1407 } 1408 return true; 1409 1410 case OPT_msse2: 1411 if (!value) 1412 { 1413 target_flags &= ~MASK_SSE3; 1414 target_flags_explicit |= MASK_SSE3; 1415 } 1416 return true; 1417 1418 default: 1419 return true; 1420 } 1421} 1422 1423/* Sometimes certain combinations of command options do not make 1424 sense on a particular target machine. You can define a macro 1425 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1426 defined, is executed once just after all the command options have 1427 been parsed. 1428 1429 Don't use this macro to turn on various extra optimizations for 1430 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1431 1432void 1433override_options (void) 1434{ 1435 int i; 1436 int ix86_tune_defaulted = 0; 1437 1438 /* Comes from final.c -- no real reason to change it. */ 1439#define MAX_CODE_ALIGN 16 1440 1441 static struct ptt 1442 { 1443 const struct processor_costs *cost; /* Processor costs */ 1444 const int target_enable; /* Target flags to enable. */ 1445 const int target_disable; /* Target flags to disable. */ 1446 const int align_loop; /* Default alignments. */ 1447 const int align_loop_max_skip; 1448 const int align_jump; 1449 const int align_jump_max_skip; 1450 const int align_func; 1451 } 1452 const processor_target_table[PROCESSOR_max] = 1453 { 1454 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1455 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1456 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1457 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1458 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1459 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1460 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1461 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, 1462 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, 1463 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, 1464 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16} 1465 }; 1466 1467 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1468 static struct pta 1469 { 1470 const char *const name; /* processor name or nickname. */ 1471 const enum processor_type processor; 1472 const enum pta_flags 1473 { 1474 PTA_SSE = 1, 1475 PTA_SSE2 = 2, 1476 PTA_SSE3 = 4, 1477 PTA_MMX = 8, 1478 PTA_PREFETCH_SSE = 16, 1479 PTA_3DNOW = 32, 1480 PTA_3DNOW_A = 64, 1481 PTA_64BIT = 128 1482 } flags; 1483 } 1484 const processor_alias_table[] = 1485 { 1486 {"i386", PROCESSOR_I386, 0}, 1487 {"i486", PROCESSOR_I486, 0}, 1488 {"i586", PROCESSOR_PENTIUM, 0}, 1489 {"pentium", PROCESSOR_PENTIUM, 0}, 1490 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1491 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1492 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1493 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1494 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1495 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1496 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1497 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1498 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1499 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1500 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1501 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1502 | PTA_MMX | PTA_PREFETCH_SSE}, 1503 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1504 | PTA_MMX | PTA_PREFETCH_SSE}, 1505 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 1506 | PTA_MMX | PTA_PREFETCH_SSE}, 1507 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1508 | PTA_MMX | PTA_PREFETCH_SSE}, 1509 {"k6", PROCESSOR_K6, PTA_MMX}, 1510 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1511 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1512 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1513 | PTA_3DNOW_A}, 1514 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1515 | PTA_3DNOW | PTA_3DNOW_A}, 1516 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1517 | PTA_3DNOW_A | PTA_SSE}, 1518 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1519 | PTA_3DNOW_A | PTA_SSE}, 1520 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1521 | PTA_3DNOW_A | PTA_SSE}, 1522 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1523 | PTA_SSE | PTA_SSE2 }, 1524 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1526 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1528 | PTA_SSE3 }, 1529 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1531 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1532 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1533 | PTA_SSE3 }, 1534 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1535 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1536 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1537 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1538 | PTA_SSE3 }, 1539 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1540 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1541 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, 1542 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, 1543 }; 1544 1545 int const pta_size = ARRAY_SIZE (processor_alias_table); 1546 1547#ifdef SUBTARGET_OVERRIDE_OPTIONS 1548 SUBTARGET_OVERRIDE_OPTIONS; 1549#endif 1550 1551#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 1552 SUBSUBTARGET_OVERRIDE_OPTIONS; 1553#endif 1554 1555 /* -fPIC is the default for x86_64. */ 1556 if (TARGET_MACHO && TARGET_64BIT) 1557 flag_pic = 2; 1558 1559 /* Set the default values for switches whose default depends on TARGET_64BIT 1560 in case they weren't overwritten by command line options. */ 1561 if (TARGET_64BIT) 1562 { 1563 /* Mach-O doesn't support omitting the frame pointer for now. */ 1564 if (flag_omit_frame_pointer == 2) 1565 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 1566 if (flag_asynchronous_unwind_tables == 2) 1567 flag_asynchronous_unwind_tables = 1; 1568 if (flag_pcc_struct_return == 2) 1569 flag_pcc_struct_return = 0; 1570 } 1571 else 1572 { 1573 if (flag_omit_frame_pointer == 2) 1574 flag_omit_frame_pointer = 0; 1575 if (flag_asynchronous_unwind_tables == 2) 1576 flag_asynchronous_unwind_tables = 0; 1577 if (flag_pcc_struct_return == 2) 1578 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1579 } 1580 1581 /* Need to check -mtune=generic first. */ 1582 if (ix86_tune_string) 1583 { 1584 if (!strcmp (ix86_tune_string, "generic") 1585 || !strcmp (ix86_tune_string, "i686") 1586 /* As special support for cross compilers we read -mtune=native 1587 as -mtune=generic. With native compilers we won't see the 1588 -mtune=native, as it was changed by the driver. */ 1589 || !strcmp (ix86_tune_string, "native")) 1590 { 1591 if (TARGET_64BIT) 1592 ix86_tune_string = "generic64"; 1593 else 1594 ix86_tune_string = "generic32"; 1595 } 1596 else if (!strncmp (ix86_tune_string, "generic", 7)) 1597 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1598 } 1599 else 1600 { 1601 if (ix86_arch_string) 1602 ix86_tune_string = ix86_arch_string; 1603 if (!ix86_tune_string) 1604 { 1605 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1606 ix86_tune_defaulted = 1; 1607 } 1608 1609 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 1610 need to use a sensible tune option. */ 1611 if (!strcmp (ix86_tune_string, "generic") 1612 || !strcmp (ix86_tune_string, "x86-64") 1613 || !strcmp (ix86_tune_string, "i686")) 1614 { 1615 if (TARGET_64BIT) 1616 ix86_tune_string = "generic64"; 1617 else 1618 ix86_tune_string = "generic32"; 1619 } 1620 } 1621 if (!strcmp (ix86_tune_string, "x86-64")) 1622 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " 1623 "-mtune=generic instead as appropriate."); 1624 1625 if (!ix86_arch_string) 1626 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486"; 1627 if (!strcmp (ix86_arch_string, "generic")) 1628 error ("generic CPU can be used only for -mtune= switch"); 1629 if (!strncmp (ix86_arch_string, "generic", 7)) 1630 error ("bad value (%s) for -march= switch", ix86_arch_string); 1631 1632 if (ix86_cmodel_string != 0) 1633 { 1634 if (!strcmp (ix86_cmodel_string, "small")) 1635 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1636 else if (!strcmp (ix86_cmodel_string, "medium")) 1637 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 1638 else if (flag_pic) 1639 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1640 else if (!strcmp (ix86_cmodel_string, "32")) 1641 ix86_cmodel = CM_32; 1642 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1643 ix86_cmodel = CM_KERNEL; 1644 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1645 ix86_cmodel = CM_LARGE; 1646 else 1647 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1648 } 1649 else 1650 { 1651 ix86_cmodel = CM_32; 1652 if (TARGET_64BIT) 1653 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1654 } 1655 if (ix86_asm_string != 0) 1656 { 1657 if (! TARGET_MACHO 1658 && !strcmp (ix86_asm_string, "intel")) 1659 ix86_asm_dialect = ASM_INTEL; 1660 else if (!strcmp (ix86_asm_string, "att")) 1661 ix86_asm_dialect = ASM_ATT; 1662 else 1663 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1664 } 1665 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1666 error ("code model %qs not supported in the %s bit mode", 1667 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1668 if (ix86_cmodel == CM_LARGE) 1669 sorry ("code model %<large%> not supported yet"); 1670 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1671 sorry ("%i-bit mode not compiled in", 1672 (target_flags & MASK_64BIT) ? 64 : 32); 1673 1674 for (i = 0; i < pta_size; i++) 1675 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1676 { 1677 ix86_arch = processor_alias_table[i].processor; 1678 /* Default cpu tuning to the architecture. */ 1679 ix86_tune = ix86_arch; 1680 if (processor_alias_table[i].flags & PTA_MMX 1681 && !(target_flags_explicit & MASK_MMX)) 1682 target_flags |= MASK_MMX; 1683 if (processor_alias_table[i].flags & PTA_3DNOW 1684 && !(target_flags_explicit & MASK_3DNOW)) 1685 target_flags |= MASK_3DNOW; 1686 if (processor_alias_table[i].flags & PTA_3DNOW_A 1687 && !(target_flags_explicit & MASK_3DNOW_A)) 1688 target_flags |= MASK_3DNOW_A; 1689 if (processor_alias_table[i].flags & PTA_SSE 1690 && !(target_flags_explicit & MASK_SSE)) 1691 target_flags |= MASK_SSE; 1692 if (processor_alias_table[i].flags & PTA_SSE2 1693 && !(target_flags_explicit & MASK_SSE2)) 1694 target_flags |= MASK_SSE2; 1695 if (processor_alias_table[i].flags & PTA_SSE3 1696 && !(target_flags_explicit & MASK_SSE3)) 1697 target_flags |= MASK_SSE3; 1698 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1699 x86_prefetch_sse = true; 1700 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1701 error ("CPU you selected does not support x86-64 " 1702 "instruction set"); 1703 break; 1704 } 1705 1706 if (i == pta_size) 1707 error ("bad value (%s) for -march= switch", ix86_arch_string); 1708 1709 for (i = 0; i < pta_size; i++) 1710 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 1711 { 1712 ix86_tune = processor_alias_table[i].processor; 1713 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1714 { 1715 if (ix86_tune_defaulted) 1716 { 1717 ix86_tune_string = "x86-64"; 1718 for (i = 0; i < pta_size; i++) 1719 if (! strcmp (ix86_tune_string, 1720 processor_alias_table[i].name)) 1721 break; 1722 ix86_tune = processor_alias_table[i].processor; 1723 } 1724 else 1725 error ("CPU you selected does not support x86-64 " 1726 "instruction set"); 1727 } 1728 /* Intel CPUs have always interpreted SSE prefetch instructions as 1729 NOPs; so, we can enable SSE prefetch instructions even when 1730 -mtune (rather than -march) points us to a processor that has them. 1731 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 1732 higher processors. */ 1733 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 1734 x86_prefetch_sse = true; 1735 break; 1736 } 1737 if (i == pta_size) 1738 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1739 1740 if (optimize_size) 1741 ix86_cost = &size_cost; 1742 else 1743 ix86_cost = processor_target_table[ix86_tune].cost; 1744 target_flags |= processor_target_table[ix86_tune].target_enable; 1745 target_flags &= ~processor_target_table[ix86_tune].target_disable; 1746 1747 /* Arrange to set up i386_stack_locals for all functions. */ 1748 init_machine_status = ix86_init_machine_status; 1749 1750 /* Validate -mregparm= value. */ 1751 if (ix86_regparm_string) 1752 { 1753 i = atoi (ix86_regparm_string); 1754 if (i < 0 || i > REGPARM_MAX) 1755 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1756 else 1757 ix86_regparm = i; 1758 } 1759 else 1760 if (TARGET_64BIT) 1761 ix86_regparm = REGPARM_MAX; 1762 1763 /* If the user has provided any of the -malign-* options, 1764 warn and use that value only if -falign-* is not set. 1765 Remove this code in GCC 3.2 or later. */ 1766 if (ix86_align_loops_string) 1767 { 1768 warning (0, "-malign-loops is obsolete, use -falign-loops"); 1769 if (align_loops == 0) 1770 { 1771 i = atoi (ix86_align_loops_string); 1772 if (i < 0 || i > MAX_CODE_ALIGN) 1773 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1774 else 1775 align_loops = 1 << i; 1776 } 1777 } 1778 1779 if (ix86_align_jumps_string) 1780 { 1781 warning (0, "-malign-jumps is obsolete, use -falign-jumps"); 1782 if (align_jumps == 0) 1783 { 1784 i = atoi (ix86_align_jumps_string); 1785 if (i < 0 || i > MAX_CODE_ALIGN) 1786 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1787 else 1788 align_jumps = 1 << i; 1789 } 1790 } 1791 1792 if (ix86_align_funcs_string) 1793 { 1794 warning (0, "-malign-functions is obsolete, use -falign-functions"); 1795 if (align_functions == 0) 1796 { 1797 i = atoi (ix86_align_funcs_string); 1798 if (i < 0 || i > MAX_CODE_ALIGN) 1799 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1800 else 1801 align_functions = 1 << i; 1802 } 1803 } 1804 1805 /* Default align_* from the processor table. */ 1806 if (align_loops == 0) 1807 { 1808 align_loops = processor_target_table[ix86_tune].align_loop; 1809 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 1810 } 1811 if (align_jumps == 0) 1812 { 1813 align_jumps = processor_target_table[ix86_tune].align_jump; 1814 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 1815 } 1816 if (align_functions == 0) 1817 { 1818 align_functions = processor_target_table[ix86_tune].align_func; 1819 } 1820 1821 /* Validate -mbranch-cost= value, or provide default. */ 1822 ix86_branch_cost = ix86_cost->branch_cost; 1823 if (ix86_branch_cost_string) 1824 { 1825 i = atoi (ix86_branch_cost_string); 1826 if (i < 0 || i > 5) 1827 error ("-mbranch-cost=%d is not between 0 and 5", i); 1828 else 1829 ix86_branch_cost = i; 1830 } 1831 if (ix86_section_threshold_string) 1832 { 1833 i = atoi (ix86_section_threshold_string); 1834 if (i < 0) 1835 error ("-mlarge-data-threshold=%d is negative", i); 1836 else 1837 ix86_section_threshold = i; 1838 } 1839 1840 if (ix86_tls_dialect_string) 1841 { 1842 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1843 ix86_tls_dialect = TLS_DIALECT_GNU; 1844 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 1845 ix86_tls_dialect = TLS_DIALECT_GNU2; 1846 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1847 ix86_tls_dialect = TLS_DIALECT_SUN; 1848 else 1849 error ("bad value (%s) for -mtls-dialect= switch", 1850 ix86_tls_dialect_string); 1851 } 1852 1853 /* Keep nonleaf frame pointers. */ 1854 if (flag_omit_frame_pointer) 1855 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 1856 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 1857 flag_omit_frame_pointer = 1; 1858 1859 /* If we're doing fast math, we don't care about comparison order 1860 wrt NaNs. This lets us use a shorter comparison sequence. */ 1861 if (flag_finite_math_only) 1862 target_flags &= ~MASK_IEEE_FP; 1863 1864 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1865 since the insns won't need emulation. */ 1866 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1867 target_flags &= ~MASK_NO_FANCY_MATH_387; 1868 1869 /* Likewise, if the target doesn't have a 387, or we've specified 1870 software floating point, don't use 387 inline intrinsics. */ 1871 if (!TARGET_80387) 1872 target_flags |= MASK_NO_FANCY_MATH_387; 1873 1874 /* Turn on SSE2 builtins for -msse3. */ 1875 if (TARGET_SSE3) 1876 target_flags |= MASK_SSE2; 1877 1878 /* Turn on SSE builtins for -msse2. */ 1879 if (TARGET_SSE2) 1880 target_flags |= MASK_SSE; 1881 1882 /* Turn on MMX builtins for -msse. */ 1883 if (TARGET_SSE) 1884 { 1885 target_flags |= MASK_MMX & ~target_flags_explicit; 1886 x86_prefetch_sse = true; 1887 } 1888 1889 /* Turn on MMX builtins for 3Dnow. */ 1890 if (TARGET_3DNOW) 1891 target_flags |= MASK_MMX; 1892 1893 if (TARGET_64BIT) 1894 { 1895 if (TARGET_ALIGN_DOUBLE) 1896 error ("-malign-double makes no sense in the 64bit mode"); 1897 if (TARGET_RTD) 1898 error ("-mrtd calling convention not supported in the 64bit mode"); 1899 1900 /* Enable by default the SSE and MMX builtins. Do allow the user to 1901 explicitly disable any of these. In particular, disabling SSE and 1902 MMX for kernel code is extremely useful. */ 1903 target_flags 1904 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) 1905 & ~target_flags_explicit); 1906 } 1907 else 1908 { 1909 /* i386 ABI does not specify red zone. It still makes sense to use it 1910 when programmer takes care to stack from being destroyed. */ 1911 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 1912 target_flags |= MASK_NO_RED_ZONE; 1913 } 1914 1915 /* Validate -mpreferred-stack-boundary= value, or provide default. 1916 The default of 128 bits is for Pentium III's SSE __m128. We can't 1917 change it because of optimize_size. Otherwise, we can't mix object 1918 files compiled with -Os and -On. */ 1919 ix86_preferred_stack_boundary = 128; 1920 if (ix86_preferred_stack_boundary_string) 1921 { 1922 i = atoi (ix86_preferred_stack_boundary_string); 1923 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1924 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1925 TARGET_64BIT ? 4 : 2); 1926 else 1927 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1928 } 1929 1930 /* Accept -msseregparm only if at least SSE support is enabled. */ 1931 if (TARGET_SSEREGPARM 1932 && ! TARGET_SSE) 1933 error ("-msseregparm used without SSE enabled"); 1934 1935 ix86_fpmath = TARGET_FPMATH_DEFAULT; 1936 1937 if (ix86_fpmath_string != 0) 1938 { 1939 if (! strcmp (ix86_fpmath_string, "387")) 1940 ix86_fpmath = FPMATH_387; 1941 else if (! strcmp (ix86_fpmath_string, "sse")) 1942 { 1943 if (!TARGET_SSE) 1944 { 1945 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 1946 ix86_fpmath = FPMATH_387; 1947 } 1948 else 1949 ix86_fpmath = FPMATH_SSE; 1950 } 1951 else if (! strcmp (ix86_fpmath_string, "387,sse") 1952 || ! strcmp (ix86_fpmath_string, "sse,387")) 1953 { 1954 if (!TARGET_SSE) 1955 { 1956 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 1957 ix86_fpmath = FPMATH_387; 1958 } 1959 else if (!TARGET_80387) 1960 { 1961 warning (0, "387 instruction set disabled, using SSE arithmetics"); 1962 ix86_fpmath = FPMATH_SSE; 1963 } 1964 else 1965 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1966 } 1967 else 1968 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1969 } 1970 1971 /* If the i387 is disabled, then do not return values in it. */ 1972 if (!TARGET_80387) 1973 target_flags &= ~MASK_FLOAT_RETURNS; 1974 1975 if ((x86_accumulate_outgoing_args & TUNEMASK) 1976 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1977 && !optimize_size) 1978 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1979 1980 /* ??? Unwind info is not correct around the CFG unless either a frame 1981 pointer is present or M_A_O_A is set. Fixing this requires rewriting 1982 unwind info generation to be aware of the CFG and propagating states 1983 around edges. */ 1984 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 1985 || flag_exceptions || flag_non_call_exceptions) 1986 && flag_omit_frame_pointer 1987 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 1988 { 1989 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1990 warning (0, "unwind tables currently require either a frame pointer " 1991 "or -maccumulate-outgoing-args for correctness"); 1992 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1993 } 1994 1995 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1996 { 1997 char *p; 1998 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1999 p = strchr (internal_label_prefix, 'X'); 2000 internal_label_prefix_len = p - internal_label_prefix; 2001 *p = '\0'; 2002 } 2003 2004 /* When scheduling description is not available, disable scheduler pass 2005 so it won't slow down the compilation and make x87 code slower. */ 2006 if (!TARGET_SCHEDULE) 2007 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 2008} 2009 2010/* switch to the appropriate section for output of DECL. 2011 DECL is either a `VAR_DECL' node or a constant of some sort. 2012 RELOC indicates whether forming the initial value of DECL requires 2013 link-time relocations. */ 2014 2015static section * 2016x86_64_elf_select_section (tree decl, int reloc, 2017 unsigned HOST_WIDE_INT align) 2018{ 2019 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2020 && ix86_in_large_data_p (decl)) 2021 { 2022 const char *sname = NULL; 2023 unsigned int flags = SECTION_WRITE; 2024 switch (categorize_decl_for_section (decl, reloc)) 2025 { 2026 case SECCAT_DATA: 2027 sname = ".ldata"; 2028 break; 2029 case SECCAT_DATA_REL: 2030 sname = ".ldata.rel"; 2031 break; 2032 case SECCAT_DATA_REL_LOCAL: 2033 sname = ".ldata.rel.local"; 2034 break; 2035 case SECCAT_DATA_REL_RO: 2036 sname = ".ldata.rel.ro"; 2037 break; 2038 case SECCAT_DATA_REL_RO_LOCAL: 2039 sname = ".ldata.rel.ro.local"; 2040 break; 2041 case SECCAT_BSS: 2042 sname = ".lbss"; 2043 flags |= SECTION_BSS; 2044 break; 2045 case SECCAT_RODATA: 2046 case SECCAT_RODATA_MERGE_STR: 2047 case SECCAT_RODATA_MERGE_STR_INIT: 2048 case SECCAT_RODATA_MERGE_CONST: 2049 sname = ".lrodata"; 2050 flags = 0; 2051 break; 2052 case SECCAT_SRODATA: 2053 case SECCAT_SDATA: 2054 case SECCAT_SBSS: 2055 gcc_unreachable (); 2056 case SECCAT_TEXT: 2057 case SECCAT_TDATA: 2058 case SECCAT_TBSS: 2059 /* We don't split these for medium model. Place them into 2060 default sections and hope for best. */ 2061 break; 2062 } 2063 if (sname) 2064 { 2065 /* We might get called with string constants, but get_named_section 2066 doesn't like them as they are not DECLs. Also, we need to set 2067 flags in that case. */ 2068 if (!DECL_P (decl)) 2069 return get_section (sname, flags, NULL); 2070 return get_named_section (decl, sname, reloc); 2071 } 2072 } 2073 return default_elf_select_section (decl, reloc, align); 2074} 2075 2076/* Build up a unique section name, expressed as a 2077 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 2078 RELOC indicates whether the initial value of EXP requires 2079 link-time relocations. */ 2080 2081static void 2082x86_64_elf_unique_section (tree decl, int reloc) 2083{ 2084 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2085 && ix86_in_large_data_p (decl)) 2086 { 2087 const char *prefix = NULL; 2088 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 2089 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 2090 2091 switch (categorize_decl_for_section (decl, reloc)) 2092 { 2093 case SECCAT_DATA: 2094 case SECCAT_DATA_REL: 2095 case SECCAT_DATA_REL_LOCAL: 2096 case SECCAT_DATA_REL_RO: 2097 case SECCAT_DATA_REL_RO_LOCAL: 2098 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; 2099 break; 2100 case SECCAT_BSS: 2101 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; 2102 break; 2103 case SECCAT_RODATA: 2104 case SECCAT_RODATA_MERGE_STR: 2105 case SECCAT_RODATA_MERGE_STR_INIT: 2106 case SECCAT_RODATA_MERGE_CONST: 2107 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; 2108 break; 2109 case SECCAT_SRODATA: 2110 case SECCAT_SDATA: 2111 case SECCAT_SBSS: 2112 gcc_unreachable (); 2113 case SECCAT_TEXT: 2114 case SECCAT_TDATA: 2115 case SECCAT_TBSS: 2116 /* We don't split these for medium model. Place them into 2117 default sections and hope for best. */ 2118 break; 2119 } 2120 if (prefix) 2121 { 2122 const char *name; 2123 size_t nlen, plen; 2124 char *string; 2125 plen = strlen (prefix); 2126 2127 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 2128 name = targetm.strip_name_encoding (name); 2129 nlen = strlen (name); 2130 2131 string = alloca (nlen + plen + 1); 2132 memcpy (string, prefix, plen); 2133 memcpy (string + plen, name, nlen + 1); 2134 2135 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); 2136 return; 2137 } 2138 } 2139 default_unique_section (decl, reloc); 2140} 2141 2142#ifdef COMMON_ASM_OP 2143/* This says how to output assembler code to declare an 2144 uninitialized external linkage data object. 2145 2146 For medium model x86-64 we need to use .largecomm opcode for 2147 large objects. */ 2148void 2149x86_elf_aligned_common (FILE *file, 2150 const char *name, unsigned HOST_WIDE_INT size, 2151 int align) 2152{ 2153 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2154 && size > (unsigned int)ix86_section_threshold) 2155 fprintf (file, ".largecomm\t"); 2156 else 2157 fprintf (file, "%s", COMMON_ASM_OP); 2158 assemble_name (file, name); 2159 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 2160 size, align / BITS_PER_UNIT); 2161} 2162 2163/* Utility function for targets to use in implementing 2164 ASM_OUTPUT_ALIGNED_BSS. */ 2165 2166void 2167x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 2168 const char *name, unsigned HOST_WIDE_INT size, 2169 int align) 2170{ 2171 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2172 && size > (unsigned int)ix86_section_threshold) 2173 switch_to_section (get_named_section (decl, ".lbss", 0)); 2174 else 2175 switch_to_section (bss_section); 2176 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 2177#ifdef ASM_DECLARE_OBJECT_NAME 2178 last_assemble_variable_decl = decl; 2179 ASM_DECLARE_OBJECT_NAME (file, name, decl); 2180#else 2181 /* Standard thing is just output label for the object. */ 2182 ASM_OUTPUT_LABEL (file, name); 2183#endif /* ASM_DECLARE_OBJECT_NAME */ 2184 ASM_OUTPUT_SKIP (file, size ? size : 1); 2185} 2186#endif 2187 2188void 2189optimization_options (int level, int size ATTRIBUTE_UNUSED) 2190{ 2191 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 2192 make the problem with not enough registers even worse. */ 2193#ifdef INSN_SCHEDULING 2194 if (level > 1) 2195 flag_schedule_insns = 0; 2196#endif 2197 2198 if (TARGET_MACHO) 2199 /* The Darwin libraries never set errno, so we might as well 2200 avoid calling them when that's the only reason we would. */ 2201 flag_errno_math = 0; 2202 2203 /* The default values of these switches depend on the TARGET_64BIT 2204 that is not known at this moment. Mark these values with 2 and 2205 let user the to override these. In case there is no command line option 2206 specifying them, we will set the defaults in override_options. */ 2207 if (optimize >= 1) 2208 flag_omit_frame_pointer = 2; 2209 flag_pcc_struct_return = 2; 2210 flag_asynchronous_unwind_tables = 2; 2211#ifdef SUBTARGET_OPTIMIZATION_OPTIONS 2212 SUBTARGET_OPTIMIZATION_OPTIONS; 2213#endif 2214} 2215 2216/* Table of valid machine attributes. */ 2217const struct attribute_spec ix86_attribute_table[] = 2218{ 2219 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 2220 /* Stdcall attribute says callee is responsible for popping arguments 2221 if they are not variable. */ 2222 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2223 /* Fastcall attribute says callee is responsible for popping arguments 2224 if they are not variable. */ 2225 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2226 /* Cdecl attribute says the callee is a normal C declaration */ 2227 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2228 /* Regparm attribute specifies how many integer arguments are to be 2229 passed in registers. */ 2230 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 2231 /* Sseregparm attribute says we are using x86_64 calling conventions 2232 for FP arguments. */ 2233 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2234 /* force_align_arg_pointer says this function realigns the stack at entry. */ 2235 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 2236 false, true, true, ix86_handle_cconv_attribute }, 2237#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2238 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 2239 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 2240 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 2241#endif 2242 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2243 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2244#ifdef SUBTARGET_ATTRIBUTE_TABLE 2245 SUBTARGET_ATTRIBUTE_TABLE, 2246#endif 2247 { NULL, 0, 0, false, false, false, NULL } 2248}; 2249 2250/* Decide whether we can make a sibling call to a function. DECL is the 2251 declaration of the function being targeted by the call and EXP is the 2252 CALL_EXPR representing the call. */ 2253 2254static bool 2255ix86_function_ok_for_sibcall (tree decl, tree exp) 2256{ 2257 tree func; 2258 rtx a, b; 2259 2260 /* If we are generating position-independent code, we cannot sibcall 2261 optimize any indirect call, or a direct call to a global function, 2262 as the PLT requires %ebx be live. */ 2263 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 2264 return false; 2265 2266 if (decl) 2267 func = decl; 2268 else 2269 { 2270 func = TREE_TYPE (TREE_OPERAND (exp, 0)); 2271 if (POINTER_TYPE_P (func)) 2272 func = TREE_TYPE (func); 2273 } 2274 2275 /* Check that the return value locations are the same. Like 2276 if we are returning floats on the 80387 register stack, we cannot 2277 make a sibcall from a function that doesn't return a float to a 2278 function that does or, conversely, from a function that does return 2279 a float to a function that doesn't; the necessary stack adjustment 2280 would not be executed. This is also the place we notice 2281 differences in the return value ABI. Note that it is ok for one 2282 of the functions to have void return type as long as the return 2283 value of the other is passed in a register. */ 2284 a = ix86_function_value (TREE_TYPE (exp), func, false); 2285 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 2286 cfun->decl, false); 2287 if (STACK_REG_P (a) || STACK_REG_P (b)) 2288 { 2289 if (!rtx_equal_p (a, b)) 2290 return false; 2291 } 2292 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 2293 ; 2294 else if (!rtx_equal_p (a, b)) 2295 return false; 2296 2297 /* If this call is indirect, we'll need to be able to use a call-clobbered 2298 register for the address of the target function. Make sure that all 2299 such registers are not used for passing parameters. */ 2300 if (!decl && !TARGET_64BIT) 2301 { 2302 tree type; 2303 2304 /* We're looking at the CALL_EXPR, we need the type of the function. */ 2305 type = TREE_OPERAND (exp, 0); /* pointer expression */ 2306 type = TREE_TYPE (type); /* pointer type */ 2307 type = TREE_TYPE (type); /* function type */ 2308 2309 if (ix86_function_regparm (type, NULL) >= 3) 2310 { 2311 /* ??? Need to count the actual number of registers to be used, 2312 not the possible number of registers. Fix later. */ 2313 return false; 2314 } 2315 } 2316 2317#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2318 /* Dllimport'd functions are also called indirectly. */ 2319 if (decl && DECL_DLLIMPORT_P (decl) 2320 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) 2321 return false; 2322#endif 2323 2324 /* If we forced aligned the stack, then sibcalling would unalign the 2325 stack, which may break the called function. */ 2326 if (cfun->machine->force_align_arg_pointer) 2327 return false; 2328 2329 /* Otherwise okay. That also includes certain types of indirect calls. */ 2330 return true; 2331} 2332 2333/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 2334 calling convention attributes; 2335 arguments as in struct attribute_spec.handler. */ 2336 2337static tree 2338ix86_handle_cconv_attribute (tree *node, tree name, 2339 tree args, 2340 int flags ATTRIBUTE_UNUSED, 2341 bool *no_add_attrs) 2342{ 2343 if (TREE_CODE (*node) != FUNCTION_TYPE 2344 && TREE_CODE (*node) != METHOD_TYPE 2345 && TREE_CODE (*node) != FIELD_DECL 2346 && TREE_CODE (*node) != TYPE_DECL) 2347 { 2348 warning (OPT_Wattributes, "%qs attribute only applies to functions", 2349 IDENTIFIER_POINTER (name)); 2350 *no_add_attrs = true; 2351 return NULL_TREE; 2352 } 2353 2354 /* Can combine regparm with all attributes but fastcall. */ 2355 if (is_attribute_p ("regparm", name)) 2356 { 2357 tree cst; 2358 2359 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2360 { 2361 error ("fastcall and regparm attributes are not compatible"); 2362 } 2363 2364 cst = TREE_VALUE (args); 2365 if (TREE_CODE (cst) != INTEGER_CST) 2366 { 2367 warning (OPT_Wattributes, 2368 "%qs attribute requires an integer constant argument", 2369 IDENTIFIER_POINTER (name)); 2370 *no_add_attrs = true; 2371 } 2372 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 2373 { 2374 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 2375 IDENTIFIER_POINTER (name), REGPARM_MAX); 2376 *no_add_attrs = true; 2377 } 2378 2379 if (!TARGET_64BIT 2380 && lookup_attribute (ix86_force_align_arg_pointer_string, 2381 TYPE_ATTRIBUTES (*node)) 2382 && compare_tree_int (cst, REGPARM_MAX-1)) 2383 { 2384 error ("%s functions limited to %d register parameters", 2385 ix86_force_align_arg_pointer_string, REGPARM_MAX-1); 2386 } 2387 2388 return NULL_TREE; 2389 } 2390 2391 if (TARGET_64BIT) 2392 { 2393 warning (OPT_Wattributes, "%qs attribute ignored", 2394 IDENTIFIER_POINTER (name)); 2395 *no_add_attrs = true; 2396 return NULL_TREE; 2397 } 2398 2399 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 2400 if (is_attribute_p ("fastcall", name)) 2401 { 2402 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2403 { 2404 error ("fastcall and cdecl attributes are not compatible"); 2405 } 2406 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2407 { 2408 error ("fastcall and stdcall attributes are not compatible"); 2409 } 2410 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 2411 { 2412 error ("fastcall and regparm attributes are not compatible"); 2413 } 2414 } 2415 2416 /* Can combine stdcall with fastcall (redundant), regparm and 2417 sseregparm. */ 2418 else if (is_attribute_p ("stdcall", name)) 2419 { 2420 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2421 { 2422 error ("stdcall and cdecl attributes are not compatible"); 2423 } 2424 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2425 { 2426 error ("stdcall and fastcall attributes are not compatible"); 2427 } 2428 } 2429 2430 /* Can combine cdecl with regparm and sseregparm. */ 2431 else if (is_attribute_p ("cdecl", name)) 2432 { 2433 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2434 { 2435 error ("stdcall and cdecl attributes are not compatible"); 2436 } 2437 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2438 { 2439 error ("fastcall and cdecl attributes are not compatible"); 2440 } 2441 } 2442 2443 /* Can combine sseregparm with all attributes. */ 2444 2445 return NULL_TREE; 2446} 2447 2448/* Return 0 if the attributes for two types are incompatible, 1 if they 2449 are compatible, and 2 if they are nearly compatible (which causes a 2450 warning to be generated). */ 2451 2452static int 2453ix86_comp_type_attributes (tree type1, tree type2) 2454{ 2455 /* Check for mismatch of non-default calling convention. */ 2456 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 2457 2458 if (TREE_CODE (type1) != FUNCTION_TYPE) 2459 return 1; 2460 2461 /* Check for mismatched fastcall/regparm types. */ 2462 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 2463 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 2464 || (ix86_function_regparm (type1, NULL) 2465 != ix86_function_regparm (type2, NULL))) 2466 return 0; 2467 2468 /* Check for mismatched sseregparm types. */ 2469 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 2470 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 2471 return 0; 2472 2473 /* Check for mismatched return types (cdecl vs stdcall). */ 2474 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 2475 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 2476 return 0; 2477 2478 return 1; 2479} 2480 2481/* Return the regparm value for a function with the indicated TYPE and DECL. 2482 DECL may be NULL when calling function indirectly 2483 or considering a libcall. */ 2484 2485static int 2486ix86_function_regparm (tree type, tree decl) 2487{ 2488 tree attr; 2489 int regparm = ix86_regparm; 2490 bool user_convention = false; 2491 2492 if (!TARGET_64BIT) 2493 { 2494 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 2495 if (attr) 2496 { 2497 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 2498 user_convention = true; 2499 } 2500 2501 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 2502 { 2503 regparm = 2; 2504 user_convention = true; 2505 } 2506 2507 /* Use register calling convention for local functions when possible. */ 2508 if (!TARGET_64BIT && !user_convention && decl 2509 && flag_unit_at_a_time && !profile_flag) 2510 { 2511 struct cgraph_local_info *i = cgraph_local_info (decl); 2512 if (i && i->local) 2513 { 2514 int local_regparm, globals = 0, regno; 2515 2516 /* Make sure no regparm register is taken by a global register 2517 variable. */ 2518 for (local_regparm = 0; local_regparm < 3; local_regparm++) 2519 if (global_regs[local_regparm]) 2520 break; 2521 /* We can't use regparm(3) for nested functions as these use 2522 static chain pointer in third argument. */ 2523 if (local_regparm == 3 2524 && decl_function_context (decl) 2525 && !DECL_NO_STATIC_CHAIN (decl)) 2526 local_regparm = 2; 2527 /* If the function realigns its stackpointer, the 2528 prologue will clobber %ecx. If we've already 2529 generated code for the callee, the callee 2530 DECL_STRUCT_FUNCTION is gone, so we fall back to 2531 scanning the attributes for the self-realigning 2532 property. */ 2533 if ((DECL_STRUCT_FUNCTION (decl) 2534 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) 2535 || (!DECL_STRUCT_FUNCTION (decl) 2536 && lookup_attribute (ix86_force_align_arg_pointer_string, 2537 TYPE_ATTRIBUTES (TREE_TYPE (decl))))) 2538 local_regparm = 2; 2539 /* Each global register variable increases register preassure, 2540 so the more global reg vars there are, the smaller regparm 2541 optimization use, unless requested by the user explicitly. */ 2542 for (regno = 0; regno < 6; regno++) 2543 if (global_regs[regno]) 2544 globals++; 2545 local_regparm 2546 = globals < local_regparm ? local_regparm - globals : 0; 2547 2548 if (local_regparm > regparm) 2549 regparm = local_regparm; 2550 } 2551 } 2552 } 2553 return regparm; 2554} 2555 2556/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 2557 DFmode (2) arguments in SSE registers for a function with the 2558 indicated TYPE and DECL. DECL may be NULL when calling function 2559 indirectly or considering a libcall. Otherwise return 0. */ 2560 2561static int 2562ix86_function_sseregparm (tree type, tree decl) 2563{ 2564 /* Use SSE registers to pass SFmode and DFmode arguments if requested 2565 by the sseregparm attribute. */ 2566 if (TARGET_SSEREGPARM 2567 || (type 2568 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 2569 { 2570 if (!TARGET_SSE) 2571 { 2572 if (decl) 2573 error ("Calling %qD with attribute sseregparm without " 2574 "SSE/SSE2 enabled", decl); 2575 else 2576 error ("Calling %qT with attribute sseregparm without " 2577 "SSE/SSE2 enabled", type); 2578 return 0; 2579 } 2580 2581 return 2; 2582 } 2583 2584 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 2585 (and DFmode for SSE2) arguments in SSE registers, 2586 even for 32-bit targets. */ 2587 if (!TARGET_64BIT && decl 2588 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) 2589 { 2590 struct cgraph_local_info *i = cgraph_local_info (decl); 2591 if (i && i->local) 2592 return TARGET_SSE2 ? 2 : 1; 2593 } 2594 2595 return 0; 2596} 2597 2598/* Return true if EAX is live at the start of the function. Used by 2599 ix86_expand_prologue to determine if we need special help before 2600 calling allocate_stack_worker. */ 2601 2602static bool 2603ix86_eax_live_at_start_p (void) 2604{ 2605 /* Cheat. Don't bother working forward from ix86_function_regparm 2606 to the function type to whether an actual argument is located in 2607 eax. Instead just look at cfg info, which is still close enough 2608 to correct at this point. This gives false positives for broken 2609 functions that might use uninitialized data that happens to be 2610 allocated in eax, but who cares? */ 2611 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); 2612} 2613 2614/* Value is the number of bytes of arguments automatically 2615 popped when returning from a subroutine call. 2616 FUNDECL is the declaration node of the function (as a tree), 2617 FUNTYPE is the data type of the function (as a tree), 2618 or for a library call it is an identifier node for the subroutine name. 2619 SIZE is the number of bytes of arguments passed on the stack. 2620 2621 On the 80386, the RTD insn may be used to pop them if the number 2622 of args is fixed, but if the number is variable then the caller 2623 must pop them all. RTD can't be used for library calls now 2624 because the library is compiled with the Unix compiler. 2625 Use of RTD is a selectable option, since it is incompatible with 2626 standard Unix calling sequences. If the option is not selected, 2627 the caller must always pop the args. 2628 2629 The attribute stdcall is equivalent to RTD on a per module basis. */ 2630 2631int 2632ix86_return_pops_args (tree fundecl, tree funtype, int size) 2633{ 2634 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 2635 2636 /* Cdecl functions override -mrtd, and never pop the stack. */ 2637 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 2638 2639 /* Stdcall and fastcall functions will pop the stack if not 2640 variable args. */ 2641 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 2642 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 2643 rtd = 1; 2644 2645 if (rtd 2646 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 2647 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 2648 == void_type_node))) 2649 return size; 2650 } 2651 2652 /* Lose any fake structure return argument if it is passed on the stack. */ 2653 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 2654 && !TARGET_64BIT 2655 && !KEEP_AGGREGATE_RETURN_POINTER) 2656 { 2657 int nregs = ix86_function_regparm (funtype, fundecl); 2658 2659 if (!nregs) 2660 return GET_MODE_SIZE (Pmode); 2661 } 2662 2663 return 0; 2664} 2665 2666/* Argument support functions. */ 2667 2668/* Return true when register may be used to pass function parameters. */ 2669bool 2670ix86_function_arg_regno_p (int regno) 2671{ 2672 int i; 2673 if (!TARGET_64BIT) 2674 { 2675 if (TARGET_MACHO) 2676 return (regno < REGPARM_MAX 2677 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 2678 else 2679 return (regno < REGPARM_MAX 2680 || (TARGET_MMX && MMX_REGNO_P (regno) 2681 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 2682 || (TARGET_SSE && SSE_REGNO_P (regno) 2683 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 2684 } 2685 2686 if (TARGET_MACHO) 2687 { 2688 if (SSE_REGNO_P (regno) && TARGET_SSE) 2689 return true; 2690 } 2691 else 2692 { 2693 if (TARGET_SSE && SSE_REGNO_P (regno) 2694 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 2695 return true; 2696 } 2697 /* RAX is used as hidden argument to va_arg functions. */ 2698 if (!regno) 2699 return true; 2700 for (i = 0; i < REGPARM_MAX; i++) 2701 if (regno == x86_64_int_parameter_registers[i]) 2702 return true; 2703 return false; 2704} 2705 2706/* Return if we do not know how to pass TYPE solely in registers. */ 2707 2708static bool 2709ix86_must_pass_in_stack (enum machine_mode mode, tree type) 2710{ 2711 if (must_pass_in_stack_var_size_or_pad (mode, type)) 2712 return true; 2713 2714 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 2715 The layout_type routine is crafty and tries to trick us into passing 2716 currently unsupported vector types on the stack by using TImode. */ 2717 return (!TARGET_64BIT && mode == TImode 2718 && type && TREE_CODE (type) != VECTOR_TYPE); 2719} 2720 2721/* Initialize a variable CUM of type CUMULATIVE_ARGS 2722 for a call to a function whose data type is FNTYPE. 2723 For a library call, FNTYPE is 0. */ 2724 2725void 2726init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 2727 tree fntype, /* tree ptr for function decl */ 2728 rtx libname, /* SYMBOL_REF of library name or 0 */ 2729 tree fndecl) 2730{ 2731 static CUMULATIVE_ARGS zero_cum; 2732 tree param, next_param; 2733 2734 if (TARGET_DEBUG_ARG) 2735 { 2736 fprintf (stderr, "\ninit_cumulative_args ("); 2737 if (fntype) 2738 fprintf (stderr, "fntype code = %s, ret code = %s", 2739 tree_code_name[(int) TREE_CODE (fntype)], 2740 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 2741 else 2742 fprintf (stderr, "no fntype"); 2743 2744 if (libname) 2745 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 2746 } 2747 2748 *cum = zero_cum; 2749 2750 /* Set up the number of registers to use for passing arguments. */ 2751 cum->nregs = ix86_regparm; 2752 if (TARGET_SSE) 2753 cum->sse_nregs = SSE_REGPARM_MAX; 2754 if (TARGET_MMX) 2755 cum->mmx_nregs = MMX_REGPARM_MAX; 2756 cum->warn_sse = true; 2757 cum->warn_mmx = true; 2758 cum->maybe_vaarg = false; 2759 2760 /* Use ecx and edx registers if function has fastcall attribute, 2761 else look for regparm information. */ 2762 if (fntype && !TARGET_64BIT) 2763 { 2764 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 2765 { 2766 cum->nregs = 2; 2767 cum->fastcall = 1; 2768 } 2769 else 2770 cum->nregs = ix86_function_regparm (fntype, fndecl); 2771 } 2772 2773 /* Set up the number of SSE registers used for passing SFmode 2774 and DFmode arguments. Warn for mismatching ABI. */ 2775 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); 2776 2777 /* Determine if this function has variable arguments. This is 2778 indicated by the last argument being 'void_type_mode' if there 2779 are no variable arguments. If there are variable arguments, then 2780 we won't pass anything in registers in 32-bit mode. */ 2781 2782 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) 2783 { 2784 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 2785 param != 0; param = next_param) 2786 { 2787 next_param = TREE_CHAIN (param); 2788 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 2789 { 2790 if (!TARGET_64BIT) 2791 { 2792 cum->nregs = 0; 2793 cum->sse_nregs = 0; 2794 cum->mmx_nregs = 0; 2795 cum->warn_sse = 0; 2796 cum->warn_mmx = 0; 2797 cum->fastcall = 0; 2798 cum->float_in_sse = 0; 2799 } 2800 cum->maybe_vaarg = true; 2801 } 2802 } 2803 } 2804 if ((!fntype && !libname) 2805 || (fntype && !TYPE_ARG_TYPES (fntype))) 2806 cum->maybe_vaarg = true; 2807 2808 if (TARGET_DEBUG_ARG) 2809 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 2810 2811 return; 2812} 2813 2814/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 2815 But in the case of vector types, it is some vector mode. 2816 2817 When we have only some of our vector isa extensions enabled, then there 2818 are some modes for which vector_mode_supported_p is false. For these 2819 modes, the generic vector support in gcc will choose some non-vector mode 2820 in order to implement the type. By computing the natural mode, we'll 2821 select the proper ABI location for the operand and not depend on whatever 2822 the middle-end decides to do with these vector types. */ 2823 2824static enum machine_mode 2825type_natural_mode (tree type) 2826{ 2827 enum machine_mode mode = TYPE_MODE (type); 2828 2829 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 2830 { 2831 HOST_WIDE_INT size = int_size_in_bytes (type); 2832 if ((size == 8 || size == 16) 2833 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 2834 && TYPE_VECTOR_SUBPARTS (type) > 1) 2835 { 2836 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 2837 2838 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 2839 mode = MIN_MODE_VECTOR_FLOAT; 2840 else 2841 mode = MIN_MODE_VECTOR_INT; 2842 2843 /* Get the mode which has this inner mode and number of units. */ 2844 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 2845 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 2846 && GET_MODE_INNER (mode) == innermode) 2847 return mode; 2848 2849 gcc_unreachable (); 2850 } 2851 } 2852 2853 return mode; 2854} 2855 2856/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 2857 this may not agree with the mode that the type system has chosen for the 2858 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 2859 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 2860 2861static rtx 2862gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 2863 unsigned int regno) 2864{ 2865 rtx tmp; 2866 2867 if (orig_mode != BLKmode) 2868 tmp = gen_rtx_REG (orig_mode, regno); 2869 else 2870 { 2871 tmp = gen_rtx_REG (mode, regno); 2872 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 2873 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 2874 } 2875 2876 return tmp; 2877} 2878 2879/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 2880 of this code is to classify each 8bytes of incoming argument by the register 2881 class and assign registers accordingly. */ 2882 2883/* Return the union class of CLASS1 and CLASS2. 2884 See the x86-64 PS ABI for details. */ 2885 2886static enum x86_64_reg_class 2887merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 2888{ 2889 /* Rule #1: If both classes are equal, this is the resulting class. */ 2890 if (class1 == class2) 2891 return class1; 2892 2893 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 2894 the other class. */ 2895 if (class1 == X86_64_NO_CLASS) 2896 return class2; 2897 if (class2 == X86_64_NO_CLASS) 2898 return class1; 2899 2900 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 2901 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 2902 return X86_64_MEMORY_CLASS; 2903 2904 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 2905 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 2906 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 2907 return X86_64_INTEGERSI_CLASS; 2908 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 2909 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 2910 return X86_64_INTEGER_CLASS; 2911 2912 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 2913 MEMORY is used. */ 2914 if (class1 == X86_64_X87_CLASS 2915 || class1 == X86_64_X87UP_CLASS 2916 || class1 == X86_64_COMPLEX_X87_CLASS 2917 || class2 == X86_64_X87_CLASS 2918 || class2 == X86_64_X87UP_CLASS 2919 || class2 == X86_64_COMPLEX_X87_CLASS) 2920 return X86_64_MEMORY_CLASS; 2921 2922 /* Rule #6: Otherwise class SSE is used. */ 2923 return X86_64_SSE_CLASS; 2924} 2925 2926/* Classify the argument of type TYPE and mode MODE. 2927 CLASSES will be filled by the register class used to pass each word 2928 of the operand. The number of words is returned. In case the parameter 2929 should be passed in memory, 0 is returned. As a special case for zero 2930 sized containers, classes[0] will be NO_CLASS and 1 is returned. 2931 2932 BIT_OFFSET is used internally for handling records and specifies offset 2933 of the offset in bits modulo 256 to avoid overflow cases. 2934 2935 See the x86-64 PS ABI for details. 2936*/ 2937 2938static int 2939classify_argument (enum machine_mode mode, tree type, 2940 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 2941{ 2942 HOST_WIDE_INT bytes = 2943 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2944 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2945 2946 /* Variable sized entities are always passed/returned in memory. */ 2947 if (bytes < 0) 2948 return 0; 2949 2950 if (mode != VOIDmode 2951 && targetm.calls.must_pass_in_stack (mode, type)) 2952 return 0; 2953 2954 if (type && AGGREGATE_TYPE_P (type)) 2955 { 2956 int i; 2957 tree field; 2958 enum x86_64_reg_class subclasses[MAX_CLASSES]; 2959 2960 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 2961 if (bytes > 16) 2962 return 0; 2963 2964 for (i = 0; i < words; i++) 2965 classes[i] = X86_64_NO_CLASS; 2966 2967 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 2968 signalize memory class, so handle it as special case. */ 2969 if (!words) 2970 { 2971 classes[0] = X86_64_NO_CLASS; 2972 return 1; 2973 } 2974 2975 /* Classify each field of record and merge classes. */ 2976 switch (TREE_CODE (type)) 2977 { 2978 case RECORD_TYPE: 2979 /* For classes first merge in the field of the subclasses. */ 2980 if (TYPE_BINFO (type)) 2981 { 2982 tree binfo, base_binfo; 2983 int basenum; 2984 2985 for (binfo = TYPE_BINFO (type), basenum = 0; 2986 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) 2987 { 2988 int num; 2989 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; 2990 tree type = BINFO_TYPE (base_binfo); 2991 2992 num = classify_argument (TYPE_MODE (type), 2993 type, subclasses, 2994 (offset + bit_offset) % 256); 2995 if (!num) 2996 return 0; 2997 for (i = 0; i < num; i++) 2998 { 2999 int pos = (offset + (bit_offset % 64)) / 8 / 8; 3000 classes[i + pos] = 3001 merge_classes (subclasses[i], classes[i + pos]); 3002 } 3003 } 3004 } 3005 /* And now merge the fields of structure. */ 3006 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3007 { 3008 if (TREE_CODE (field) == FIELD_DECL) 3009 { 3010 int num; 3011 3012 if (TREE_TYPE (field) == error_mark_node) 3013 continue; 3014 3015 /* Bitfields are always classified as integer. Handle them 3016 early, since later code would consider them to be 3017 misaligned integers. */ 3018 if (DECL_BIT_FIELD (field)) 3019 { 3020 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3021 i < ((int_bit_position (field) + (bit_offset % 64)) 3022 + tree_low_cst (DECL_SIZE (field), 0) 3023 + 63) / 8 / 8; i++) 3024 classes[i] = 3025 merge_classes (X86_64_INTEGER_CLASS, 3026 classes[i]); 3027 } 3028 else 3029 { 3030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3031 TREE_TYPE (field), subclasses, 3032 (int_bit_position (field) 3033 + bit_offset) % 256); 3034 if (!num) 3035 return 0; 3036 for (i = 0; i < num; i++) 3037 { 3038 int pos = 3039 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3040 classes[i + pos] = 3041 merge_classes (subclasses[i], classes[i + pos]); 3042 } 3043 } 3044 } 3045 } 3046 break; 3047 3048 case ARRAY_TYPE: 3049 /* Arrays are handled as small records. */ 3050 { 3051 int num; 3052 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 3053 TREE_TYPE (type), subclasses, bit_offset); 3054 if (!num) 3055 return 0; 3056 3057 /* The partial classes are now full classes. */ 3058 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 3059 subclasses[0] = X86_64_SSE_CLASS; 3060 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 3061 subclasses[0] = X86_64_INTEGER_CLASS; 3062 3063 for (i = 0; i < words; i++) 3064 classes[i] = subclasses[i % num]; 3065 3066 break; 3067 } 3068 case UNION_TYPE: 3069 case QUAL_UNION_TYPE: 3070 /* Unions are similar to RECORD_TYPE but offset is always 0. 3071 */ 3072 3073 /* Unions are not derived. */ 3074 gcc_assert (!TYPE_BINFO (type) 3075 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); 3076 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3077 { 3078 if (TREE_CODE (field) == FIELD_DECL) 3079 { 3080 int num; 3081 3082 if (TREE_TYPE (field) == error_mark_node) 3083 continue; 3084 3085 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3086 TREE_TYPE (field), subclasses, 3087 bit_offset); 3088 if (!num) 3089 return 0; 3090 for (i = 0; i < num; i++) 3091 classes[i] = merge_classes (subclasses[i], classes[i]); 3092 } 3093 } 3094 break; 3095 3096 default: 3097 gcc_unreachable (); 3098 } 3099 3100 /* Final merger cleanup. */ 3101 for (i = 0; i < words; i++) 3102 { 3103 /* If one class is MEMORY, everything should be passed in 3104 memory. */ 3105 if (classes[i] == X86_64_MEMORY_CLASS) 3106 return 0; 3107 3108 /* The X86_64_SSEUP_CLASS should be always preceded by 3109 X86_64_SSE_CLASS. */ 3110 if (classes[i] == X86_64_SSEUP_CLASS 3111 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 3112 classes[i] = X86_64_SSE_CLASS; 3113 3114 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 3115 if (classes[i] == X86_64_X87UP_CLASS 3116 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 3117 classes[i] = X86_64_SSE_CLASS; 3118 } 3119 return words; 3120 } 3121 3122 /* Compute alignment needed. We align all types to natural boundaries with 3123 exception of XFmode that is aligned to 64bits. */ 3124 if (mode != VOIDmode && mode != BLKmode) 3125 { 3126 int mode_alignment = GET_MODE_BITSIZE (mode); 3127 3128 if (mode == XFmode) 3129 mode_alignment = 128; 3130 else if (mode == XCmode) 3131 mode_alignment = 256; 3132 if (COMPLEX_MODE_P (mode)) 3133 mode_alignment /= 2; 3134 /* Misaligned fields are always returned in memory. */ 3135 if (bit_offset % mode_alignment) 3136 return 0; 3137 } 3138 3139 /* for V1xx modes, just use the base mode */ 3140 if (VECTOR_MODE_P (mode) 3141 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 3142 mode = GET_MODE_INNER (mode); 3143 3144 /* Classification of atomic types. */ 3145 switch (mode) 3146 { 3147 case SDmode: 3148 case DDmode: 3149 classes[0] = X86_64_SSE_CLASS; 3150 return 1; 3151 case TDmode: 3152 classes[0] = X86_64_SSE_CLASS; 3153 classes[1] = X86_64_SSEUP_CLASS; 3154 return 2; 3155 case DImode: 3156 case SImode: 3157 case HImode: 3158 case QImode: 3159 case CSImode: 3160 case CHImode: 3161 case CQImode: 3162 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3163 classes[0] = X86_64_INTEGERSI_CLASS; 3164 else 3165 classes[0] = X86_64_INTEGER_CLASS; 3166 return 1; 3167 case CDImode: 3168 case TImode: 3169 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 3170 return 2; 3171 case CTImode: 3172 return 0; 3173 case SFmode: 3174 if (!(bit_offset % 64)) 3175 classes[0] = X86_64_SSESF_CLASS; 3176 else 3177 classes[0] = X86_64_SSE_CLASS; 3178 return 1; 3179 case DFmode: 3180 classes[0] = X86_64_SSEDF_CLASS; 3181 return 1; 3182 case XFmode: 3183 classes[0] = X86_64_X87_CLASS; 3184 classes[1] = X86_64_X87UP_CLASS; 3185 return 2; 3186 case TFmode: 3187 classes[0] = X86_64_SSE_CLASS; 3188 classes[1] = X86_64_SSEUP_CLASS; 3189 return 2; 3190 case SCmode: 3191 classes[0] = X86_64_SSE_CLASS; 3192 return 1; 3193 case DCmode: 3194 classes[0] = X86_64_SSEDF_CLASS; 3195 classes[1] = X86_64_SSEDF_CLASS; 3196 return 2; 3197 case XCmode: 3198 classes[0] = X86_64_COMPLEX_X87_CLASS; 3199 return 1; 3200 case TCmode: 3201 /* This modes is larger than 16 bytes. */ 3202 return 0; 3203 case V4SFmode: 3204 case V4SImode: 3205 case V16QImode: 3206 case V8HImode: 3207 case V2DFmode: 3208 case V2DImode: 3209 classes[0] = X86_64_SSE_CLASS; 3210 classes[1] = X86_64_SSEUP_CLASS; 3211 return 2; 3212 case V2SFmode: 3213 case V2SImode: 3214 case V4HImode: 3215 case V8QImode: 3216 classes[0] = X86_64_SSE_CLASS; 3217 return 1; 3218 case BLKmode: 3219 case VOIDmode: 3220 return 0; 3221 default: 3222 gcc_assert (VECTOR_MODE_P (mode)); 3223 3224 if (bytes > 16) 3225 return 0; 3226 3227 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 3228 3229 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3230 classes[0] = X86_64_INTEGERSI_CLASS; 3231 else 3232 classes[0] = X86_64_INTEGER_CLASS; 3233 classes[1] = X86_64_INTEGER_CLASS; 3234 return 1 + (bytes > 8); 3235 } 3236} 3237 3238/* Examine the argument and return set number of register required in each 3239 class. Return 0 iff parameter should be passed in memory. */ 3240static int 3241examine_argument (enum machine_mode mode, tree type, int in_return, 3242 int *int_nregs, int *sse_nregs) 3243{ 3244 enum x86_64_reg_class class[MAX_CLASSES]; 3245 int n = classify_argument (mode, type, class, 0); 3246 3247 *int_nregs = 0; 3248 *sse_nregs = 0; 3249 if (!n) 3250 return 0; 3251 for (n--; n >= 0; n--) 3252 switch (class[n]) 3253 { 3254 case X86_64_INTEGER_CLASS: 3255 case X86_64_INTEGERSI_CLASS: 3256 (*int_nregs)++; 3257 break; 3258 case X86_64_SSE_CLASS: 3259 case X86_64_SSESF_CLASS: 3260 case X86_64_SSEDF_CLASS: 3261 (*sse_nregs)++; 3262 break; 3263 case X86_64_NO_CLASS: 3264 case X86_64_SSEUP_CLASS: 3265 break; 3266 case X86_64_X87_CLASS: 3267 case X86_64_X87UP_CLASS: 3268 if (!in_return) 3269 return 0; 3270 break; 3271 case X86_64_COMPLEX_X87_CLASS: 3272 return in_return ? 2 : 0; 3273 case X86_64_MEMORY_CLASS: 3274 gcc_unreachable (); 3275 } 3276 return 1; 3277} 3278 3279/* Construct container for the argument used by GCC interface. See 3280 FUNCTION_ARG for the detailed description. */ 3281 3282static rtx 3283construct_container (enum machine_mode mode, enum machine_mode orig_mode, 3284 tree type, int in_return, int nintregs, int nsseregs, 3285 const int *intreg, int sse_regno) 3286{ 3287 /* The following variables hold the static issued_error state. */ 3288 static bool issued_sse_arg_error; 3289 static bool issued_sse_ret_error; 3290 static bool issued_x87_ret_error; 3291 3292 enum machine_mode tmpmode; 3293 int bytes = 3294 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3295 enum x86_64_reg_class class[MAX_CLASSES]; 3296 int n; 3297 int i; 3298 int nexps = 0; 3299 int needed_sseregs, needed_intregs; 3300 rtx exp[MAX_CLASSES]; 3301 rtx ret; 3302 3303 n = classify_argument (mode, type, class, 0); 3304 if (TARGET_DEBUG_ARG) 3305 { 3306 if (!n) 3307 fprintf (stderr, "Memory class\n"); 3308 else 3309 { 3310 fprintf (stderr, "Classes:"); 3311 for (i = 0; i < n; i++) 3312 { 3313 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 3314 } 3315 fprintf (stderr, "\n"); 3316 } 3317 } 3318 if (!n) 3319 return NULL; 3320 if (!examine_argument (mode, type, in_return, &needed_intregs, 3321 &needed_sseregs)) 3322 return NULL; 3323 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 3324 return NULL; 3325 3326 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 3327 some less clueful developer tries to use floating-point anyway. */ 3328 if (needed_sseregs && !TARGET_SSE) 3329 { 3330 if (in_return) 3331 { 3332 if (!issued_sse_ret_error) 3333 { 3334 error ("SSE register return with SSE disabled"); 3335 issued_sse_ret_error = true; 3336 } 3337 } 3338 else if (!issued_sse_arg_error) 3339 { 3340 error ("SSE register argument with SSE disabled"); 3341 issued_sse_arg_error = true; 3342 } 3343 return NULL; 3344 } 3345 3346 /* Likewise, error if the ABI requires us to return values in the 3347 x87 registers and the user specified -mno-80387. */ 3348 if (!TARGET_80387 && in_return) 3349 for (i = 0; i < n; i++) 3350 if (class[i] == X86_64_X87_CLASS 3351 || class[i] == X86_64_X87UP_CLASS 3352 || class[i] == X86_64_COMPLEX_X87_CLASS) 3353 { 3354 if (!issued_x87_ret_error) 3355 { 3356 error ("x87 register return with x87 disabled"); 3357 issued_x87_ret_error = true; 3358 } 3359 return NULL; 3360 } 3361 3362 /* First construct simple cases. Avoid SCmode, since we want to use 3363 single register to pass this type. */ 3364 if (n == 1 && mode != SCmode) 3365 switch (class[0]) 3366 { 3367 case X86_64_INTEGER_CLASS: 3368 case X86_64_INTEGERSI_CLASS: 3369 return gen_rtx_REG (mode, intreg[0]); 3370 case X86_64_SSE_CLASS: 3371 case X86_64_SSESF_CLASS: 3372 case X86_64_SSEDF_CLASS: 3373 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); 3374 case X86_64_X87_CLASS: 3375 case X86_64_COMPLEX_X87_CLASS: 3376 return gen_rtx_REG (mode, FIRST_STACK_REG); 3377 case X86_64_NO_CLASS: 3378 /* Zero sized array, struct or class. */ 3379 return NULL; 3380 default: 3381 gcc_unreachable (); 3382 } 3383 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 3384 && mode != BLKmode) 3385 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 3386 if (n == 2 3387 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 3388 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 3389 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 3390 && class[1] == X86_64_INTEGER_CLASS 3391 && (mode == CDImode || mode == TImode || mode == TFmode) 3392 && intreg[0] + 1 == intreg[1]) 3393 return gen_rtx_REG (mode, intreg[0]); 3394 3395 /* Otherwise figure out the entries of the PARALLEL. */ 3396 for (i = 0; i < n; i++) 3397 { 3398 switch (class[i]) 3399 { 3400 case X86_64_NO_CLASS: 3401 break; 3402 case X86_64_INTEGER_CLASS: 3403 case X86_64_INTEGERSI_CLASS: 3404 /* Merge TImodes on aligned occasions here too. */ 3405 if (i * 8 + 8 > bytes) 3406 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 3407 else if (class[i] == X86_64_INTEGERSI_CLASS) 3408 tmpmode = SImode; 3409 else 3410 tmpmode = DImode; 3411 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 3412 if (tmpmode == BLKmode) 3413 tmpmode = DImode; 3414 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3415 gen_rtx_REG (tmpmode, *intreg), 3416 GEN_INT (i*8)); 3417 intreg++; 3418 break; 3419 case X86_64_SSESF_CLASS: 3420 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3421 gen_rtx_REG (SFmode, 3422 SSE_REGNO (sse_regno)), 3423 GEN_INT (i*8)); 3424 sse_regno++; 3425 break; 3426 case X86_64_SSEDF_CLASS: 3427 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3428 gen_rtx_REG (DFmode, 3429 SSE_REGNO (sse_regno)), 3430 GEN_INT (i*8)); 3431 sse_regno++; 3432 break; 3433 case X86_64_SSE_CLASS: 3434 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 3435 tmpmode = TImode; 3436 else 3437 tmpmode = DImode; 3438 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3439 gen_rtx_REG (tmpmode, 3440 SSE_REGNO (sse_regno)), 3441 GEN_INT (i*8)); 3442 if (tmpmode == TImode) 3443 i++; 3444 sse_regno++; 3445 break; 3446 default: 3447 gcc_unreachable (); 3448 } 3449 } 3450 3451 /* Empty aligned struct, union or class. */ 3452 if (nexps == 0) 3453 return NULL; 3454 3455 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 3456 for (i = 0; i < nexps; i++) 3457 XVECEXP (ret, 0, i) = exp [i]; 3458 return ret; 3459} 3460 3461/* Update the data in CUM to advance over an argument 3462 of mode MODE and data type TYPE. 3463 (TYPE is null for libcalls where that information may not be available.) */ 3464 3465void 3466function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3467 tree type, int named) 3468{ 3469 int bytes = 3470 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3471 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3472 3473 if (type) 3474 mode = type_natural_mode (type); 3475 3476 if (TARGET_DEBUG_ARG) 3477 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " 3478 "mode=%s, named=%d)\n\n", 3479 words, cum->words, cum->nregs, cum->sse_nregs, 3480 GET_MODE_NAME (mode), named); 3481 3482 if (TARGET_64BIT) 3483 { 3484 int int_nregs, sse_nregs; 3485 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 3486 cum->words += words; 3487 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 3488 { 3489 cum->nregs -= int_nregs; 3490 cum->sse_nregs -= sse_nregs; 3491 cum->regno += int_nregs; 3492 cum->sse_regno += sse_nregs; 3493 } 3494 else 3495 cum->words += words; 3496 } 3497 else 3498 { 3499 switch (mode) 3500 { 3501 default: 3502 break; 3503 3504 case BLKmode: 3505 if (bytes < 0) 3506 break; 3507 /* FALLTHRU */ 3508 3509 case DImode: 3510 case SImode: 3511 case HImode: 3512 case QImode: 3513 cum->words += words; 3514 cum->nregs -= words; 3515 cum->regno += words; 3516 3517 if (cum->nregs <= 0) 3518 { 3519 cum->nregs = 0; 3520 cum->regno = 0; 3521 } 3522 break; 3523 3524 case DFmode: 3525 if (cum->float_in_sse < 2) 3526 break; 3527 case SFmode: 3528 if (cum->float_in_sse < 1) 3529 break; 3530 /* FALLTHRU */ 3531 3532 case TImode: 3533 case V16QImode: 3534 case V8HImode: 3535 case V4SImode: 3536 case V2DImode: 3537 case V4SFmode: 3538 case V2DFmode: 3539 if (!type || !AGGREGATE_TYPE_P (type)) 3540 { 3541 cum->sse_words += words; 3542 cum->sse_nregs -= 1; 3543 cum->sse_regno += 1; 3544 if (cum->sse_nregs <= 0) 3545 { 3546 cum->sse_nregs = 0; 3547 cum->sse_regno = 0; 3548 } 3549 } 3550 break; 3551 3552 case V8QImode: 3553 case V4HImode: 3554 case V2SImode: 3555 case V2SFmode: 3556 if (!type || !AGGREGATE_TYPE_P (type)) 3557 { 3558 cum->mmx_words += words; 3559 cum->mmx_nregs -= 1; 3560 cum->mmx_regno += 1; 3561 if (cum->mmx_nregs <= 0) 3562 { 3563 cum->mmx_nregs = 0; 3564 cum->mmx_regno = 0; 3565 } 3566 } 3567 break; 3568 } 3569 } 3570} 3571 3572/* Define where to put the arguments to a function. 3573 Value is zero to push the argument on the stack, 3574 or a hard register in which to store the argument. 3575 3576 MODE is the argument's machine mode. 3577 TYPE is the data type of the argument (as a tree). 3578 This is null for libcalls where that information may 3579 not be available. 3580 CUM is a variable of type CUMULATIVE_ARGS which gives info about 3581 the preceding args and about the function being called. 3582 NAMED is nonzero if this argument is a named parameter 3583 (otherwise it is an extra parameter matching an ellipsis). */ 3584 3585rtx 3586function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 3587 tree type, int named) 3588{ 3589 enum machine_mode mode = orig_mode; 3590 rtx ret = NULL_RTX; 3591 int bytes = 3592 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3593 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3594 static bool warnedsse, warnedmmx; 3595 3596 /* To simplify the code below, represent vector types with a vector mode 3597 even if MMX/SSE are not active. */ 3598 if (type && TREE_CODE (type) == VECTOR_TYPE) 3599 mode = type_natural_mode (type); 3600 3601 /* Handle a hidden AL argument containing number of registers for varargs 3602 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 3603 any AL settings. */ 3604 if (mode == VOIDmode) 3605 { 3606 if (TARGET_64BIT) 3607 return GEN_INT (cum->maybe_vaarg 3608 ? (cum->sse_nregs < 0 3609 ? SSE_REGPARM_MAX 3610 : cum->sse_regno) 3611 : -1); 3612 else 3613 return constm1_rtx; 3614 } 3615 if (TARGET_64BIT) 3616 ret = construct_container (mode, orig_mode, type, 0, cum->nregs, 3617 cum->sse_nregs, 3618 &x86_64_int_parameter_registers [cum->regno], 3619 cum->sse_regno); 3620 else 3621 switch (mode) 3622 { 3623 /* For now, pass fp/complex values on the stack. */ 3624 default: 3625 break; 3626 3627 case BLKmode: 3628 if (bytes < 0) 3629 break; 3630 /* FALLTHRU */ 3631 case DImode: 3632 case SImode: 3633 case HImode: 3634 case QImode: 3635 if (words <= cum->nregs) 3636 { 3637 int regno = cum->regno; 3638 3639 /* Fastcall allocates the first two DWORD (SImode) or 3640 smaller arguments to ECX and EDX. */ 3641 if (cum->fastcall) 3642 { 3643 if (mode == BLKmode || mode == DImode) 3644 break; 3645 3646 /* ECX not EAX is the first allocated register. */ 3647 if (regno == 0) 3648 regno = 2; 3649 } 3650 ret = gen_rtx_REG (mode, regno); 3651 } 3652 break; 3653 case DFmode: 3654 if (cum->float_in_sse < 2) 3655 break; 3656 case SFmode: 3657 if (cum->float_in_sse < 1) 3658 break; 3659 /* FALLTHRU */ 3660 case TImode: 3661 case V16QImode: 3662 case V8HImode: 3663 case V4SImode: 3664 case V2DImode: 3665 case V4SFmode: 3666 case V2DFmode: 3667 if (!type || !AGGREGATE_TYPE_P (type)) 3668 { 3669 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 3670 { 3671 warnedsse = true; 3672 warning (0, "SSE vector argument without SSE enabled " 3673 "changes the ABI"); 3674 } 3675 if (cum->sse_nregs) 3676 ret = gen_reg_or_parallel (mode, orig_mode, 3677 cum->sse_regno + FIRST_SSE_REG); 3678 } 3679 break; 3680 case V8QImode: 3681 case V4HImode: 3682 case V2SImode: 3683 case V2SFmode: 3684 if (!type || !AGGREGATE_TYPE_P (type)) 3685 { 3686 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 3687 { 3688 warnedmmx = true; 3689 warning (0, "MMX vector argument without MMX enabled " 3690 "changes the ABI"); 3691 } 3692 if (cum->mmx_nregs) 3693 ret = gen_reg_or_parallel (mode, orig_mode, 3694 cum->mmx_regno + FIRST_MMX_REG); 3695 } 3696 break; 3697 } 3698 3699 if (TARGET_DEBUG_ARG) 3700 { 3701 fprintf (stderr, 3702 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 3703 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 3704 3705 if (ret) 3706 print_simple_rtl (stderr, ret); 3707 else 3708 fprintf (stderr, ", stack"); 3709 3710 fprintf (stderr, " )\n"); 3711 } 3712 3713 return ret; 3714} 3715 3716/* A C expression that indicates when an argument must be passed by 3717 reference. If nonzero for an argument, a copy of that argument is 3718 made in memory and a pointer to the argument is passed instead of 3719 the argument itself. The pointer is passed in whatever way is 3720 appropriate for passing a pointer to that type. */ 3721 3722static bool 3723ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 3724 enum machine_mode mode ATTRIBUTE_UNUSED, 3725 tree type, bool named ATTRIBUTE_UNUSED) 3726{ 3727 if (!TARGET_64BIT) 3728 return 0; 3729 3730 if (type && int_size_in_bytes (type) == -1) 3731 { 3732 if (TARGET_DEBUG_ARG) 3733 fprintf (stderr, "function_arg_pass_by_reference\n"); 3734 return 1; 3735 } 3736 3737 return 0; 3738} 3739 3740/* Return true when TYPE should be 128bit aligned for 32bit argument passing 3741 ABI. Only called if TARGET_SSE. */ 3742static bool 3743contains_128bit_aligned_vector_p (tree type) 3744{ 3745 enum machine_mode mode = TYPE_MODE (type); 3746 if (SSE_REG_MODE_P (mode) 3747 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 3748 return true; 3749 if (TYPE_ALIGN (type) < 128) 3750 return false; 3751 3752 if (AGGREGATE_TYPE_P (type)) 3753 { 3754 /* Walk the aggregates recursively. */ 3755 switch (TREE_CODE (type)) 3756 { 3757 case RECORD_TYPE: 3758 case UNION_TYPE: 3759 case QUAL_UNION_TYPE: 3760 { 3761 tree field; 3762 3763 if (TYPE_BINFO (type)) 3764 { 3765 tree binfo, base_binfo; 3766 int i; 3767 3768 for (binfo = TYPE_BINFO (type), i = 0; 3769 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) 3770 if (contains_128bit_aligned_vector_p 3771 (BINFO_TYPE (base_binfo))) 3772 return true; 3773 } 3774 /* And now merge the fields of structure. */ 3775 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3776 { 3777 if (TREE_CODE (field) == FIELD_DECL 3778 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 3779 return true; 3780 } 3781 break; 3782 } 3783 3784 case ARRAY_TYPE: 3785 /* Just for use if some languages passes arrays by value. */ 3786 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 3787 return true; 3788 break; 3789 3790 default: 3791 gcc_unreachable (); 3792 } 3793 } 3794 return false; 3795} 3796 3797/* Gives the alignment boundary, in bits, of an argument with the 3798 specified mode and type. */ 3799 3800int 3801ix86_function_arg_boundary (enum machine_mode mode, tree type) 3802{ 3803 int align; 3804 if (type) 3805 align = TYPE_ALIGN (type); 3806 else 3807 align = GET_MODE_ALIGNMENT (mode); 3808 if (align < PARM_BOUNDARY) 3809 align = PARM_BOUNDARY; 3810 if (!TARGET_64BIT) 3811 { 3812 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 3813 make an exception for SSE modes since these require 128bit 3814 alignment. 3815 3816 The handling here differs from field_alignment. ICC aligns MMX 3817 arguments to 4 byte boundaries, while structure fields are aligned 3818 to 8 byte boundaries. */ 3819 if (!TARGET_SSE) 3820 align = PARM_BOUNDARY; 3821 else if (!type) 3822 { 3823 if (!SSE_REG_MODE_P (mode)) 3824 align = PARM_BOUNDARY; 3825 } 3826 else 3827 { 3828 if (!contains_128bit_aligned_vector_p (type)) 3829 align = PARM_BOUNDARY; 3830 } 3831 } 3832 if (align > 128) 3833 align = 128; 3834 return align; 3835} 3836 3837/* Return true if N is a possible register number of function value. */ 3838bool 3839ix86_function_value_regno_p (int regno) 3840{ 3841 if (TARGET_MACHO) 3842 { 3843 if (!TARGET_64BIT) 3844 { 3845 return ((regno) == 0 3846 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3847 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 3848 } 3849 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 3850 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 3851 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 3852 } 3853 else 3854 { 3855 if (regno == 0 3856 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3857 || (regno == FIRST_SSE_REG && TARGET_SSE)) 3858 return true; 3859 3860 if (!TARGET_64BIT 3861 && (regno == FIRST_MMX_REG && TARGET_MMX)) 3862 return true; 3863 3864 return false; 3865 } 3866} 3867 3868/* Define how to find the value returned by a function. 3869 VALTYPE is the data type of the value (as a tree). 3870 If the precise function being called is known, FUNC is its FUNCTION_DECL; 3871 otherwise, FUNC is 0. */ 3872rtx 3873ix86_function_value (tree valtype, tree fntype_or_decl, 3874 bool outgoing ATTRIBUTE_UNUSED) 3875{ 3876 enum machine_mode natmode = type_natural_mode (valtype); 3877 3878 if (TARGET_64BIT) 3879 { 3880 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, 3881 1, REGPARM_MAX, SSE_REGPARM_MAX, 3882 x86_64_int_return_registers, 0); 3883 /* For zero sized structures, construct_container return NULL, but we 3884 need to keep rest of compiler happy by returning meaningful value. */ 3885 if (!ret) 3886 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 3887 return ret; 3888 } 3889 else 3890 { 3891 tree fn = NULL_TREE, fntype; 3892 if (fntype_or_decl 3893 && DECL_P (fntype_or_decl)) 3894 fn = fntype_or_decl; 3895 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 3896 return gen_rtx_REG (TYPE_MODE (valtype), 3897 ix86_value_regno (natmode, fn, fntype)); 3898 } 3899} 3900 3901/* Return true iff type is returned in memory. */ 3902int 3903ix86_return_in_memory (tree type) 3904{ 3905 int needed_intregs, needed_sseregs, size; 3906 enum machine_mode mode = type_natural_mode (type); 3907 3908 if (TARGET_64BIT) 3909 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 3910 3911 if (mode == BLKmode) 3912 return 1; 3913 3914 size = int_size_in_bytes (type); 3915 3916 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 3917 return 0; 3918 3919 if (VECTOR_MODE_P (mode) || mode == TImode) 3920 { 3921 /* User-created vectors small enough to fit in EAX. */ 3922 if (size < 8) 3923 return 0; 3924 3925 /* MMX/3dNow values are returned in MM0, 3926 except when it doesn't exits. */ 3927 if (size == 8) 3928 return (TARGET_MMX ? 0 : 1); 3929 3930 /* SSE values are returned in XMM0, except when it doesn't exist. */ 3931 if (size == 16) 3932 return (TARGET_SSE ? 0 : 1); 3933 } 3934 3935 if (mode == XFmode) 3936 return 0; 3937 3938 if (mode == TDmode) 3939 return 1; 3940 3941 if (size > 12) 3942 return 1; 3943 return 0; 3944} 3945 3946/* When returning SSE vector types, we have a choice of either 3947 (1) being abi incompatible with a -march switch, or 3948 (2) generating an error. 3949 Given no good solution, I think the safest thing is one warning. 3950 The user won't be able to use -Werror, but.... 3951 3952 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 3953 called in response to actually generating a caller or callee that 3954 uses such a type. As opposed to RETURN_IN_MEMORY, which is called 3955 via aggregate_value_p for general type probing from tree-ssa. */ 3956 3957static rtx 3958ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 3959{ 3960 static bool warnedsse, warnedmmx; 3961 3962 if (type) 3963 { 3964 /* Look at the return type of the function, not the function type. */ 3965 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 3966 3967 if (!TARGET_SSE && !warnedsse) 3968 { 3969 if (mode == TImode 3970 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 3971 { 3972 warnedsse = true; 3973 warning (0, "SSE vector return without SSE enabled " 3974 "changes the ABI"); 3975 } 3976 } 3977 3978 if (!TARGET_MMX && !warnedmmx) 3979 { 3980 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 3981 { 3982 warnedmmx = true; 3983 warning (0, "MMX vector return without MMX enabled " 3984 "changes the ABI"); 3985 } 3986 } 3987 } 3988 3989 return NULL; 3990} 3991 3992/* Define how to find the value returned by a library function 3993 assuming the value has mode MODE. */ 3994rtx 3995ix86_libcall_value (enum machine_mode mode) 3996{ 3997 if (TARGET_64BIT) 3998 { 3999 switch (mode) 4000 { 4001 case SFmode: 4002 case SCmode: 4003 case DFmode: 4004 case DCmode: 4005 case TFmode: 4006 case SDmode: 4007 case DDmode: 4008 case TDmode: 4009 return gen_rtx_REG (mode, FIRST_SSE_REG); 4010 case XFmode: 4011 case XCmode: 4012 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 4013 case TCmode: 4014 return NULL; 4015 default: 4016 return gen_rtx_REG (mode, 0); 4017 } 4018 } 4019 else 4020 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); 4021} 4022 4023/* Given a mode, return the register to use for a return value. */ 4024 4025static int 4026ix86_value_regno (enum machine_mode mode, tree func, tree fntype) 4027{ 4028 gcc_assert (!TARGET_64BIT); 4029 4030 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 4031 we normally prevent this case when mmx is not available. However 4032 some ABIs may require the result to be returned like DImode. */ 4033 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4034 return TARGET_MMX ? FIRST_MMX_REG : 0; 4035 4036 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 4037 we prevent this case when sse is not available. However some ABIs 4038 may require the result to be returned like integer TImode. */ 4039 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4040 return TARGET_SSE ? FIRST_SSE_REG : 0; 4041 4042 /* Decimal floating point values can go in %eax, unlike other float modes. */ 4043 if (DECIMAL_FLOAT_MODE_P (mode)) 4044 return 0; 4045 4046 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ 4047 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) 4048 return 0; 4049 4050 /* Floating point return values in %st(0), except for local functions when 4051 SSE math is enabled or for functions with sseregparm attribute. */ 4052 if ((func || fntype) 4053 && (mode == SFmode || mode == DFmode)) 4054 { 4055 int sse_level = ix86_function_sseregparm (fntype, func); 4056 if ((sse_level >= 1 && mode == SFmode) 4057 || (sse_level == 2 && mode == DFmode)) 4058 return FIRST_SSE_REG; 4059 } 4060 4061 return FIRST_FLOAT_REG; 4062} 4063 4064/* Create the va_list data type. */ 4065 4066static tree 4067ix86_build_builtin_va_list (void) 4068{ 4069 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 4070 4071 /* For i386 we use plain pointer to argument area. */ 4072 if (!TARGET_64BIT) 4073 return build_pointer_type (char_type_node); 4074 4075 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4076 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 4077 4078 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 4079 unsigned_type_node); 4080 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 4081 unsigned_type_node); 4082 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 4083 ptr_type_node); 4084 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 4085 ptr_type_node); 4086 4087 va_list_gpr_counter_field = f_gpr; 4088 va_list_fpr_counter_field = f_fpr; 4089 4090 DECL_FIELD_CONTEXT (f_gpr) = record; 4091 DECL_FIELD_CONTEXT (f_fpr) = record; 4092 DECL_FIELD_CONTEXT (f_ovf) = record; 4093 DECL_FIELD_CONTEXT (f_sav) = record; 4094 4095 TREE_CHAIN (record) = type_decl; 4096 TYPE_NAME (record) = type_decl; 4097 TYPE_FIELDS (record) = f_gpr; 4098 TREE_CHAIN (f_gpr) = f_fpr; 4099 TREE_CHAIN (f_fpr) = f_ovf; 4100 TREE_CHAIN (f_ovf) = f_sav; 4101 4102 layout_type (record); 4103 4104 /* The correct type is an array type of one element. */ 4105 return build_array_type (record, build_index_type (size_zero_node)); 4106} 4107 4108/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 4109 4110static void 4111ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4112 tree type, int *pretend_size ATTRIBUTE_UNUSED, 4113 int no_rtl) 4114{ 4115 CUMULATIVE_ARGS next_cum; 4116 rtx save_area = NULL_RTX, mem; 4117 rtx label; 4118 rtx label_ref; 4119 rtx tmp_reg; 4120 rtx nsse_reg; 4121 int set; 4122 tree fntype; 4123 int stdarg_p; 4124 int i; 4125 4126 if (!TARGET_64BIT) 4127 return; 4128 4129 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) 4130 return; 4131 4132 /* Indicate to allocate space on the stack for varargs save area. */ 4133 ix86_save_varrargs_registers = 1; 4134 4135 cfun->stack_alignment_needed = 128; 4136 4137 fntype = TREE_TYPE (current_function_decl); 4138 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 4139 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 4140 != void_type_node)); 4141 4142 /* For varargs, we do not want to skip the dummy va_dcl argument. 4143 For stdargs, we do want to skip the last named argument. */ 4144 next_cum = *cum; 4145 if (stdarg_p) 4146 function_arg_advance (&next_cum, mode, type, 1); 4147 4148 if (!no_rtl) 4149 save_area = frame_pointer_rtx; 4150 4151 set = get_varargs_alias_set (); 4152 4153 for (i = next_cum.regno; 4154 i < ix86_regparm 4155 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4156 i++) 4157 { 4158 mem = gen_rtx_MEM (Pmode, 4159 plus_constant (save_area, i * UNITS_PER_WORD)); 4160 MEM_NOTRAP_P (mem) = 1; 4161 set_mem_alias_set (mem, set); 4162 emit_move_insn (mem, gen_rtx_REG (Pmode, 4163 x86_64_int_parameter_registers[i])); 4164 } 4165 4166 if (next_cum.sse_nregs && cfun->va_list_fpr_size) 4167 { 4168 /* Now emit code to save SSE registers. The AX parameter contains number 4169 of SSE parameter registers used to call this function. We use 4170 sse_prologue_save insn template that produces computed jump across 4171 SSE saves. We need some preparation work to get this working. */ 4172 4173 label = gen_label_rtx (); 4174 label_ref = gen_rtx_LABEL_REF (Pmode, label); 4175 4176 /* Compute address to jump to : 4177 label - 5*eax + nnamed_sse_arguments*5 */ 4178 tmp_reg = gen_reg_rtx (Pmode); 4179 nsse_reg = gen_reg_rtx (Pmode); 4180 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 4181 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4182 gen_rtx_MULT (Pmode, nsse_reg, 4183 GEN_INT (4)))); 4184 if (next_cum.sse_regno) 4185 emit_move_insn 4186 (nsse_reg, 4187 gen_rtx_CONST (DImode, 4188 gen_rtx_PLUS (DImode, 4189 label_ref, 4190 GEN_INT (next_cum.sse_regno * 4)))); 4191 else 4192 emit_move_insn (nsse_reg, label_ref); 4193 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 4194 4195 /* Compute address of memory block we save into. We always use pointer 4196 pointing 127 bytes after first byte to store - this is needed to keep 4197 instruction size limited by 4 bytes. */ 4198 tmp_reg = gen_reg_rtx (Pmode); 4199 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4200 plus_constant (save_area, 4201 8 * REGPARM_MAX + 127))); 4202 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 4203 MEM_NOTRAP_P (mem) = 1; 4204 set_mem_alias_set (mem, set); 4205 set_mem_align (mem, BITS_PER_WORD); 4206 4207 /* And finally do the dirty job! */ 4208 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 4209 GEN_INT (next_cum.sse_regno), label)); 4210 } 4211 4212} 4213 4214/* Implement va_start. */ 4215 4216void 4217ix86_va_start (tree valist, rtx nextarg) 4218{ 4219 HOST_WIDE_INT words, n_gpr, n_fpr; 4220 tree f_gpr, f_fpr, f_ovf, f_sav; 4221 tree gpr, fpr, ovf, sav, t; 4222 tree type; 4223 4224 /* Only 64bit target needs something special. */ 4225 if (!TARGET_64BIT) 4226 { 4227 std_expand_builtin_va_start (valist, nextarg); 4228 return; 4229 } 4230 4231 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4232 f_fpr = TREE_CHAIN (f_gpr); 4233 f_ovf = TREE_CHAIN (f_fpr); 4234 f_sav = TREE_CHAIN (f_ovf); 4235 4236 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4237 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4238 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4239 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4240 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4241 4242 /* Count number of gp and fp argument registers used. */ 4243 words = current_function_args_info.words; 4244 n_gpr = current_function_args_info.regno; 4245 n_fpr = current_function_args_info.sse_regno; 4246 4247 if (TARGET_DEBUG_ARG) 4248 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 4249 (int) words, (int) n_gpr, (int) n_fpr); 4250 4251 if (cfun->va_list_gpr_size) 4252 { 4253 type = TREE_TYPE (gpr); 4254 t = build2 (MODIFY_EXPR, type, gpr, 4255 build_int_cst (type, n_gpr * 8)); 4256 TREE_SIDE_EFFECTS (t) = 1; 4257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4258 } 4259 4260 if (cfun->va_list_fpr_size) 4261 { 4262 type = TREE_TYPE (fpr); 4263 t = build2 (MODIFY_EXPR, type, fpr, 4264 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); 4265 TREE_SIDE_EFFECTS (t) = 1; 4266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4267 } 4268 4269 /* Find the overflow area. */ 4270 type = TREE_TYPE (ovf); 4271 t = make_tree (type, virtual_incoming_args_rtx); 4272 if (words != 0) 4273 t = build2 (PLUS_EXPR, type, t, 4274 build_int_cst (type, words * UNITS_PER_WORD)); 4275 t = build2 (MODIFY_EXPR, type, ovf, t); 4276 TREE_SIDE_EFFECTS (t) = 1; 4277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4278 4279 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) 4280 { 4281 /* Find the register save area. 4282 Prologue of the function save it right above stack frame. */ 4283 type = TREE_TYPE (sav); 4284 t = make_tree (type, frame_pointer_rtx); 4285 t = build2 (MODIFY_EXPR, type, sav, t); 4286 TREE_SIDE_EFFECTS (t) = 1; 4287 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4288 } 4289} 4290 4291/* Implement va_arg. */ 4292 4293tree 4294ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) 4295{ 4296 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4297 tree f_gpr, f_fpr, f_ovf, f_sav; 4298 tree gpr, fpr, ovf, sav, t; 4299 int size, rsize; 4300 tree lab_false, lab_over = NULL_TREE; 4301 tree addr, t2; 4302 rtx container; 4303 int indirect_p = 0; 4304 tree ptrtype; 4305 enum machine_mode nat_mode; 4306 4307 /* Only 64bit target needs something special. */ 4308 if (!TARGET_64BIT) 4309 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4310 4311 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4312 f_fpr = TREE_CHAIN (f_gpr); 4313 f_ovf = TREE_CHAIN (f_fpr); 4314 f_sav = TREE_CHAIN (f_ovf); 4315 4316 valist = build_va_arg_indirect_ref (valist); 4317 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4318 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4319 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4320 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4321 4322 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 4323 if (indirect_p) 4324 type = build_pointer_type (type); 4325 size = int_size_in_bytes (type); 4326 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4327 4328 nat_mode = type_natural_mode (type); 4329 container = construct_container (nat_mode, TYPE_MODE (type), type, 0, 4330 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 4331 4332 /* Pull the value out of the saved registers. */ 4333 4334 addr = create_tmp_var (ptr_type_node, "addr"); 4335 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 4336 4337 if (container) 4338 { 4339 int needed_intregs, needed_sseregs; 4340 bool need_temp; 4341 tree int_addr, sse_addr; 4342 4343 lab_false = create_artificial_label (); 4344 lab_over = create_artificial_label (); 4345 4346 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4347 4348 need_temp = (!REG_P (container) 4349 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4350 || TYPE_ALIGN (type) > 128)); 4351 4352 /* In case we are passing structure, verify that it is consecutive block 4353 on the register save area. If not we need to do moves. */ 4354 if (!need_temp && !REG_P (container)) 4355 { 4356 /* Verify that all registers are strictly consecutive */ 4357 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4358 { 4359 int i; 4360 4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4362 { 4363 rtx slot = XVECEXP (container, 0, i); 4364 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4365 || INTVAL (XEXP (slot, 1)) != i * 16) 4366 need_temp = 1; 4367 } 4368 } 4369 else 4370 { 4371 int i; 4372 4373 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4374 { 4375 rtx slot = XVECEXP (container, 0, i); 4376 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4377 || INTVAL (XEXP (slot, 1)) != i * 8) 4378 need_temp = 1; 4379 } 4380 } 4381 } 4382 if (!need_temp) 4383 { 4384 int_addr = addr; 4385 sse_addr = addr; 4386 } 4387 else 4388 { 4389 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4390 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 4391 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4392 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 4393 } 4394 4395 /* First ensure that we fit completely in registers. */ 4396 if (needed_intregs) 4397 { 4398 t = build_int_cst (TREE_TYPE (gpr), 4399 (REGPARM_MAX - needed_intregs + 1) * 8); 4400 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4401 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4402 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4403 gimplify_and_add (t, pre_p); 4404 } 4405 if (needed_sseregs) 4406 { 4407 t = build_int_cst (TREE_TYPE (fpr), 4408 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4409 + REGPARM_MAX * 8); 4410 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4411 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4412 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4413 gimplify_and_add (t, pre_p); 4414 } 4415 4416 /* Compute index to start of area used for integer regs. */ 4417 if (needed_intregs) 4418 { 4419 /* int_addr = gpr + sav; */ 4420 t = fold_convert (ptr_type_node, gpr); 4421 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4422 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); 4423 gimplify_and_add (t, pre_p); 4424 } 4425 if (needed_sseregs) 4426 { 4427 /* sse_addr = fpr + sav; */ 4428 t = fold_convert (ptr_type_node, fpr); 4429 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4430 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); 4431 gimplify_and_add (t, pre_p); 4432 } 4433 if (need_temp) 4434 { 4435 int i; 4436 tree temp = create_tmp_var (type, "va_arg_tmp"); 4437 4438 /* addr = &temp; */ 4439 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4440 t = build2 (MODIFY_EXPR, void_type_node, addr, t); 4441 gimplify_and_add (t, pre_p); 4442 4443 for (i = 0; i < XVECLEN (container, 0); i++) 4444 { 4445 rtx slot = XVECEXP (container, 0, i); 4446 rtx reg = XEXP (slot, 0); 4447 enum machine_mode mode = GET_MODE (reg); 4448 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 4449 tree addr_type = build_pointer_type (piece_type); 4450 tree src_addr, src; 4451 int src_offset; 4452 tree dest_addr, dest; 4453 4454 if (SSE_REGNO_P (REGNO (reg))) 4455 { 4456 src_addr = sse_addr; 4457 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4458 } 4459 else 4460 { 4461 src_addr = int_addr; 4462 src_offset = REGNO (reg) * 8; 4463 } 4464 src_addr = fold_convert (addr_type, src_addr); 4465 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, 4466 size_int (src_offset))); 4467 src = build_va_arg_indirect_ref (src_addr); 4468 4469 dest_addr = fold_convert (addr_type, addr); 4470 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, 4471 size_int (INTVAL (XEXP (slot, 1))))); 4472 dest = build_va_arg_indirect_ref (dest_addr); 4473 4474 t = build2 (MODIFY_EXPR, void_type_node, dest, src); 4475 gimplify_and_add (t, pre_p); 4476 } 4477 } 4478 4479 if (needed_intregs) 4480 { 4481 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4482 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4483 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 4484 gimplify_and_add (t, pre_p); 4485 } 4486 if (needed_sseregs) 4487 { 4488 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4489 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4490 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 4491 gimplify_and_add (t, pre_p); 4492 } 4493 4494 t = build1 (GOTO_EXPR, void_type_node, lab_over); 4495 gimplify_and_add (t, pre_p); 4496 4497 t = build1 (LABEL_EXPR, void_type_node, lab_false); 4498 append_to_statement_list (t, pre_p); 4499 } 4500 4501 /* ... otherwise out of the overflow area. */ 4502 4503 /* Care for on-stack alignment if needed. */ 4504 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 4505 || integer_zerop (TYPE_SIZE (type))) 4506 t = ovf; 4507 else 4508 { 4509 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 4510 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, 4511 build_int_cst (TREE_TYPE (ovf), align - 1)); 4512 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4513 build_int_cst (TREE_TYPE (t), -align)); 4514 } 4515 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4516 4517 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); 4518 gimplify_and_add (t2, pre_p); 4519 4520 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 4521 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); 4522 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 4523 gimplify_and_add (t, pre_p); 4524 4525 if (container) 4526 { 4527 t = build1 (LABEL_EXPR, void_type_node, lab_over); 4528 append_to_statement_list (t, pre_p); 4529 } 4530 4531 ptrtype = build_pointer_type (type); 4532 addr = fold_convert (ptrtype, addr); 4533 4534 if (indirect_p) 4535 addr = build_va_arg_indirect_ref (addr); 4536 return build_va_arg_indirect_ref (addr); 4537} 4538 4539/* Return nonzero if OPNUM's MEM should be matched 4540 in movabs* patterns. */ 4541 4542int 4543ix86_check_movabs (rtx insn, int opnum) 4544{ 4545 rtx set, mem; 4546 4547 set = PATTERN (insn); 4548 if (GET_CODE (set) == PARALLEL) 4549 set = XVECEXP (set, 0, 0); 4550 gcc_assert (GET_CODE (set) == SET); 4551 mem = XEXP (set, opnum); 4552 while (GET_CODE (mem) == SUBREG) 4553 mem = SUBREG_REG (mem); 4554 gcc_assert (GET_CODE (mem) == MEM); 4555 return (volatile_ok || !MEM_VOLATILE_P (mem)); 4556} 4557 4558/* Initialize the table of extra 80387 mathematical constants. */ 4559 4560static void 4561init_ext_80387_constants (void) 4562{ 4563 static const char * cst[5] = 4564 { 4565 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4566 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4567 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4568 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4569 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4570 }; 4571 int i; 4572 4573 for (i = 0; i < 5; i++) 4574 { 4575 real_from_string (&ext_80387_constants_table[i], cst[i]); 4576 /* Ensure each constant is rounded to XFmode precision. */ 4577 real_convert (&ext_80387_constants_table[i], 4578 XFmode, &ext_80387_constants_table[i]); 4579 } 4580 4581 ext_80387_constants_init = 1; 4582} 4583 4584/* Return true if the constant is something that can be loaded with 4585 a special instruction. */ 4586 4587int 4588standard_80387_constant_p (rtx x) 4589{ 4590 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4591 return -1; 4592 4593 if (x == CONST0_RTX (GET_MODE (x))) 4594 return 1; 4595 if (x == CONST1_RTX (GET_MODE (x))) 4596 return 2; 4597 4598 /* For XFmode constants, try to find a special 80387 instruction when 4599 optimizing for size or on those CPUs that benefit from them. */ 4600 if (GET_MODE (x) == XFmode 4601 && (optimize_size || x86_ext_80387_constants & TUNEMASK)) 4602 { 4603 REAL_VALUE_TYPE r; 4604 int i; 4605 4606 if (! ext_80387_constants_init) 4607 init_ext_80387_constants (); 4608 4609 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4610 for (i = 0; i < 5; i++) 4611 if (real_identical (&r, &ext_80387_constants_table[i])) 4612 return i + 3; 4613 } 4614 4615 return 0; 4616} 4617 4618/* Return the opcode of the special instruction to be used to load 4619 the constant X. */ 4620 4621const char * 4622standard_80387_constant_opcode (rtx x) 4623{ 4624 switch (standard_80387_constant_p (x)) 4625 { 4626 case 1: 4627 return "fldz"; 4628 case 2: 4629 return "fld1"; 4630 case 3: 4631 return "fldlg2"; 4632 case 4: 4633 return "fldln2"; 4634 case 5: 4635 return "fldl2e"; 4636 case 6: 4637 return "fldl2t"; 4638 case 7: 4639 return "fldpi"; 4640 default: 4641 gcc_unreachable (); 4642 } 4643} 4644 4645/* Return the CONST_DOUBLE representing the 80387 constant that is 4646 loaded by the specified special instruction. The argument IDX 4647 matches the return value from standard_80387_constant_p. */ 4648 4649rtx 4650standard_80387_constant_rtx (int idx) 4651{ 4652 int i; 4653 4654 if (! ext_80387_constants_init) 4655 init_ext_80387_constants (); 4656 4657 switch (idx) 4658 { 4659 case 3: 4660 case 4: 4661 case 5: 4662 case 6: 4663 case 7: 4664 i = idx - 3; 4665 break; 4666 4667 default: 4668 gcc_unreachable (); 4669 } 4670 4671 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4672 XFmode); 4673} 4674 4675/* Return 1 if mode is a valid mode for sse. */ 4676static int 4677standard_sse_mode_p (enum machine_mode mode) 4678{ 4679 switch (mode) 4680 { 4681 case V16QImode: 4682 case V8HImode: 4683 case V4SImode: 4684 case V2DImode: 4685 case V4SFmode: 4686 case V2DFmode: 4687 return 1; 4688 4689 default: 4690 return 0; 4691 } 4692} 4693 4694/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 4695 */ 4696int 4697standard_sse_constant_p (rtx x) 4698{ 4699 enum machine_mode mode = GET_MODE (x); 4700 4701 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 4702 return 1; 4703 if (vector_all_ones_operand (x, mode) 4704 && standard_sse_mode_p (mode)) 4705 return TARGET_SSE2 ? 2 : -1; 4706 4707 return 0; 4708} 4709 4710/* Return the opcode of the special instruction to be used to load 4711 the constant X. */ 4712 4713const char * 4714standard_sse_constant_opcode (rtx insn, rtx x) 4715{ 4716 switch (standard_sse_constant_p (x)) 4717 { 4718 case 1: 4719 if (get_attr_mode (insn) == MODE_V4SF) 4720 return "xorps\t%0, %0"; 4721 else if (get_attr_mode (insn) == MODE_V2DF) 4722 return "xorpd\t%0, %0"; 4723 else 4724 return "pxor\t%0, %0"; 4725 case 2: 4726 return "pcmpeqd\t%0, %0"; 4727 } 4728 gcc_unreachable (); 4729} 4730 4731/* Returns 1 if OP contains a symbol reference */ 4732 4733int 4734symbolic_reference_mentioned_p (rtx op) 4735{ 4736 const char *fmt; 4737 int i; 4738 4739 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4740 return 1; 4741 4742 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4743 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4744 { 4745 if (fmt[i] == 'E') 4746 { 4747 int j; 4748 4749 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4750 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4751 return 1; 4752 } 4753 4754 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4755 return 1; 4756 } 4757 4758 return 0; 4759} 4760 4761/* Return 1 if it is appropriate to emit `ret' instructions in the 4762 body of a function. Do this only if the epilogue is simple, needing a 4763 couple of insns. Prior to reloading, we can't tell how many registers 4764 must be saved, so return 0 then. Return 0 if there is no frame 4765 marker to de-allocate. */ 4766 4767int 4768ix86_can_use_return_insn_p (void) 4769{ 4770 struct ix86_frame frame; 4771 4772 if (! reload_completed || frame_pointer_needed) 4773 return 0; 4774 4775 /* Don't allow more than 32 pop, since that's all we can do 4776 with one instruction. */ 4777 if (current_function_pops_args 4778 && current_function_args_size >= 32768) 4779 return 0; 4780 4781 ix86_compute_frame_layout (&frame); 4782 return frame.to_allocate == 0 && frame.nregs == 0; 4783} 4784 4785/* Value should be nonzero if functions must have frame pointers. 4786 Zero means the frame pointer need not be set up (and parms may 4787 be accessed via the stack pointer) in functions that seem suitable. */ 4788 4789int 4790ix86_frame_pointer_required (void) 4791{ 4792 /* If we accessed previous frames, then the generated code expects 4793 to be able to access the saved ebp value in our frame. */ 4794 if (cfun->machine->accesses_prev_frame) 4795 return 1; 4796 4797 /* Several x86 os'es need a frame pointer for other reasons, 4798 usually pertaining to setjmp. */ 4799 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4800 return 1; 4801 4802 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4803 the frame pointer by default. Turn it back on now if we've not 4804 got a leaf function. */ 4805 if (TARGET_OMIT_LEAF_FRAME_POINTER 4806 && (!current_function_is_leaf 4807 || ix86_current_function_calls_tls_descriptor)) 4808 return 1; 4809 4810 if (current_function_profile) 4811 return 1; 4812 4813 return 0; 4814} 4815 4816/* Record that the current function accesses previous call frames. */ 4817 4818void 4819ix86_setup_frame_addresses (void) 4820{ 4821 cfun->machine->accesses_prev_frame = 1; 4822} 4823 4824#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 4825# define USE_HIDDEN_LINKONCE 1 4826#else 4827# define USE_HIDDEN_LINKONCE 0 4828#endif 4829 4830static int pic_labels_used; 4831 4832/* Fills in the label name that should be used for a pc thunk for 4833 the given register. */ 4834 4835static void 4836get_pc_thunk_name (char name[32], unsigned int regno) 4837{ 4838 gcc_assert (!TARGET_64BIT); 4839 4840 if (USE_HIDDEN_LINKONCE) 4841 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4842 else 4843 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4844} 4845 4846 4847/* This function generates code for -fpic that loads %ebx with 4848 the return address of the caller and then returns. */ 4849 4850void 4851ix86_file_end (void) 4852{ 4853 rtx xops[2]; 4854 int regno; 4855 4856 for (regno = 0; regno < 8; ++regno) 4857 { 4858 char name[32]; 4859 4860 if (! ((pic_labels_used >> regno) & 1)) 4861 continue; 4862 4863 get_pc_thunk_name (name, regno); 4864 4865#if TARGET_MACHO 4866 if (TARGET_MACHO) 4867 { 4868 switch_to_section (darwin_sections[text_coal_section]); 4869 fputs ("\t.weak_definition\t", asm_out_file); 4870 assemble_name (asm_out_file, name); 4871 fputs ("\n\t.private_extern\t", asm_out_file); 4872 assemble_name (asm_out_file, name); 4873 fputs ("\n", asm_out_file); 4874 ASM_OUTPUT_LABEL (asm_out_file, name); 4875 } 4876 else 4877#endif 4878 if (USE_HIDDEN_LINKONCE) 4879 { 4880 tree decl; 4881 4882 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4883 error_mark_node); 4884 TREE_PUBLIC (decl) = 1; 4885 TREE_STATIC (decl) = 1; 4886 DECL_ONE_ONLY (decl) = 1; 4887 4888 (*targetm.asm_out.unique_section) (decl, 0); 4889 switch_to_section (get_named_section (decl, NULL, 0)); 4890 4891 (*targetm.asm_out.globalize_label) (asm_out_file, name); 4892 fputs ("\t.hidden\t", asm_out_file); 4893 assemble_name (asm_out_file, name); 4894 fputc ('\n', asm_out_file); 4895 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 4896 } 4897 else 4898 { 4899 switch_to_section (text_section); 4900 ASM_OUTPUT_LABEL (asm_out_file, name); 4901 } 4902 4903 xops[0] = gen_rtx_REG (SImode, regno); 4904 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4906 output_asm_insn ("ret", xops); 4907 } 4908 4909 if (NEED_INDICATE_EXEC_STACK) 4910 file_end_indicate_exec_stack (); 4911} 4912 4913/* Emit code for the SET_GOT patterns. */ 4914 4915const char * 4916output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 4917{ 4918 rtx xops[3]; 4919 4920 xops[0] = dest; 4921 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4922 4923 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4924 { 4925 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 4926 4927 if (!flag_pic) 4928 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4929 else 4930 output_asm_insn ("call\t%a2", xops); 4931 4932#if TARGET_MACHO 4933 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 4934 is what will be referenced by the Mach-O PIC subsystem. */ 4935 if (!label) 4936 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4937#endif 4938 4939 (*targetm.asm_out.internal_label) (asm_out_file, "L", 4940 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4941 4942 if (flag_pic) 4943 output_asm_insn ("pop{l}\t%0", xops); 4944 } 4945 else 4946 { 4947 char name[32]; 4948 get_pc_thunk_name (name, REGNO (dest)); 4949 pic_labels_used |= 1 << REGNO (dest); 4950 4951 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4952 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4953 output_asm_insn ("call\t%X2", xops); 4954 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 4955 is what will be referenced by the Mach-O PIC subsystem. */ 4956#if TARGET_MACHO 4957 if (!label) 4958 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4959 else 4960 targetm.asm_out.internal_label (asm_out_file, "L", 4961 CODE_LABEL_NUMBER (label)); 4962#endif 4963 } 4964 4965 if (TARGET_MACHO) 4966 return ""; 4967 4968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4970 else 4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 4972 4973 return ""; 4974} 4975 4976/* Generate an "push" pattern for input ARG. */ 4977 4978static rtx 4979gen_push (rtx arg) 4980{ 4981 return gen_rtx_SET (VOIDmode, 4982 gen_rtx_MEM (Pmode, 4983 gen_rtx_PRE_DEC (Pmode, 4984 stack_pointer_rtx)), 4985 arg); 4986} 4987 4988/* Return >= 0 if there is an unused call-clobbered register available 4989 for the entire function. */ 4990 4991static unsigned int 4992ix86_select_alt_pic_regnum (void) 4993{ 4994 if (current_function_is_leaf && !current_function_profile 4995 && !ix86_current_function_calls_tls_descriptor) 4996 { 4997 int i; 4998 for (i = 2; i >= 0; --i) 4999 if (!regs_ever_live[i]) 5000 return i; 5001 } 5002 5003 return INVALID_REGNUM; 5004} 5005 5006/* Return 1 if we need to save REGNO. */ 5007static int 5008ix86_save_reg (unsigned int regno, int maybe_eh_return) 5009{ 5010 if (pic_offset_table_rtx 5011 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 5012 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5013 || current_function_profile 5014 || current_function_calls_eh_return 5015 || current_function_uses_const_pool)) 5016 { 5017 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 5018 return 0; 5019 return 1; 5020 } 5021 5022 if (current_function_calls_eh_return && maybe_eh_return) 5023 { 5024 unsigned i; 5025 for (i = 0; ; i++) 5026 { 5027 unsigned test = EH_RETURN_DATA_REGNO (i); 5028 if (test == INVALID_REGNUM) 5029 break; 5030 if (test == regno) 5031 return 1; 5032 } 5033 } 5034 5035 if (cfun->machine->force_align_arg_pointer 5036 && regno == REGNO (cfun->machine->force_align_arg_pointer)) 5037 return 1; 5038 5039 return (regs_ever_live[regno] 5040 && !call_used_regs[regno] 5041 && !fixed_regs[regno] 5042 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5043} 5044 5045/* Return number of registers to be saved on the stack. */ 5046 5047static int 5048ix86_nsaved_regs (void) 5049{ 5050 int nregs = 0; 5051 int regno; 5052 5053 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5054 if (ix86_save_reg (regno, true)) 5055 nregs++; 5056 return nregs; 5057} 5058 5059/* Return the offset between two registers, one to be eliminated, and the other 5060 its replacement, at the start of a routine. */ 5061 5062HOST_WIDE_INT 5063ix86_initial_elimination_offset (int from, int to) 5064{ 5065 struct ix86_frame frame; 5066 ix86_compute_frame_layout (&frame); 5067 5068 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5069 return frame.hard_frame_pointer_offset; 5070 else if (from == FRAME_POINTER_REGNUM 5071 && to == HARD_FRAME_POINTER_REGNUM) 5072 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5073 else 5074 { 5075 gcc_assert (to == STACK_POINTER_REGNUM); 5076 5077 if (from == ARG_POINTER_REGNUM) 5078 return frame.stack_pointer_offset; 5079 5080 gcc_assert (from == FRAME_POINTER_REGNUM); 5081 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5082 } 5083} 5084 5085/* Fill structure ix86_frame about frame of currently computed function. */ 5086 5087static void 5088ix86_compute_frame_layout (struct ix86_frame *frame) 5089{ 5090 HOST_WIDE_INT total_size; 5091 unsigned int stack_alignment_needed; 5092 HOST_WIDE_INT offset; 5093 unsigned int preferred_alignment; 5094 HOST_WIDE_INT size = get_frame_size (); 5095 5096 frame->nregs = ix86_nsaved_regs (); 5097 total_size = size; 5098 5099 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5100 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5101 5102 /* During reload iteration the amount of registers saved can change. 5103 Recompute the value as needed. Do not recompute when amount of registers 5104 didn't change as reload does multiple calls to the function and does not 5105 expect the decision to change within single iteration. */ 5106 if (!optimize_size 5107 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5108 { 5109 int count = frame->nregs; 5110 5111 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5112 /* The fast prologue uses move instead of push to save registers. This 5113 is significantly longer, but also executes faster as modern hardware 5114 can execute the moves in parallel, but can't do that for push/pop. 5115 5116 Be careful about choosing what prologue to emit: When function takes 5117 many instructions to execute we may use slow version as well as in 5118 case function is known to be outside hot spot (this is known with 5119 feedback only). Weight the size of function by number of registers 5120 to save as it is cheap to use one or two push instructions but very 5121 slow to use many of them. */ 5122 if (count) 5123 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5124 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5125 || (flag_branch_probabilities 5126 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5127 cfun->machine->use_fast_prologue_epilogue = false; 5128 else 5129 cfun->machine->use_fast_prologue_epilogue 5130 = !expensive_function_p (count); 5131 } 5132 if (TARGET_PROLOGUE_USING_MOVE 5133 && cfun->machine->use_fast_prologue_epilogue) 5134 frame->save_regs_using_mov = true; 5135 else 5136 frame->save_regs_using_mov = false; 5137 5138 5139 /* Skip return address and saved base pointer. */ 5140 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5141 5142 frame->hard_frame_pointer_offset = offset; 5143 5144 /* Do some sanity checking of stack_alignment_needed and 5145 preferred_alignment, since i386 port is the only using those features 5146 that may break easily. */ 5147 5148 gcc_assert (!size || stack_alignment_needed); 5149 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 5150 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5151 gcc_assert (stack_alignment_needed 5152 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5153 5154 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5155 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5156 5157 /* Register save area */ 5158 offset += frame->nregs * UNITS_PER_WORD; 5159 5160 /* Va-arg area */ 5161 if (ix86_save_varrargs_registers) 5162 { 5163 offset += X86_64_VARARGS_SIZE; 5164 frame->va_arg_size = X86_64_VARARGS_SIZE; 5165 } 5166 else 5167 frame->va_arg_size = 0; 5168 5169 /* Align start of frame for local function. */ 5170 frame->padding1 = ((offset + stack_alignment_needed - 1) 5171 & -stack_alignment_needed) - offset; 5172 5173 offset += frame->padding1; 5174 5175 /* Frame pointer points here. */ 5176 frame->frame_pointer_offset = offset; 5177 5178 offset += size; 5179 5180 /* Add outgoing arguments area. Can be skipped if we eliminated 5181 all the function calls as dead code. 5182 Skipping is however impossible when function calls alloca. Alloca 5183 expander assumes that last current_function_outgoing_args_size 5184 of stack frame are unused. */ 5185 if (ACCUMULATE_OUTGOING_ARGS 5186 && (!current_function_is_leaf || current_function_calls_alloca 5187 || ix86_current_function_calls_tls_descriptor)) 5188 { 5189 offset += current_function_outgoing_args_size; 5190 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5191 } 5192 else 5193 frame->outgoing_arguments_size = 0; 5194 5195 /* Align stack boundary. Only needed if we're calling another function 5196 or using alloca. */ 5197 if (!current_function_is_leaf || current_function_calls_alloca 5198 || ix86_current_function_calls_tls_descriptor) 5199 frame->padding2 = ((offset + preferred_alignment - 1) 5200 & -preferred_alignment) - offset; 5201 else 5202 frame->padding2 = 0; 5203 5204 offset += frame->padding2; 5205 5206 /* We've reached end of stack frame. */ 5207 frame->stack_pointer_offset = offset; 5208 5209 /* Size prologue needs to allocate. */ 5210 frame->to_allocate = 5211 (size + frame->padding1 + frame->padding2 5212 + frame->outgoing_arguments_size + frame->va_arg_size); 5213 5214 if ((!frame->to_allocate && frame->nregs <= 1) 5215 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5216 frame->save_regs_using_mov = false; 5217 5218 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5219 && current_function_is_leaf 5220 && !ix86_current_function_calls_tls_descriptor) 5221 { 5222 frame->red_zone_size = frame->to_allocate; 5223 if (frame->save_regs_using_mov) 5224 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5225 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5226 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5227 } 5228 else 5229 frame->red_zone_size = 0; 5230 frame->to_allocate -= frame->red_zone_size; 5231 frame->stack_pointer_offset -= frame->red_zone_size; 5232#if 0 5233 fprintf (stderr, "nregs: %i\n", frame->nregs); 5234 fprintf (stderr, "size: %i\n", size); 5235 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5236 fprintf (stderr, "padding1: %i\n", frame->padding1); 5237 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5238 fprintf (stderr, "padding2: %i\n", frame->padding2); 5239 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5240 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5241 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5242 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5243 frame->hard_frame_pointer_offset); 5244 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5245#endif 5246} 5247 5248/* Emit code to save registers in the prologue. */ 5249 5250static void 5251ix86_emit_save_regs (void) 5252{ 5253 unsigned int regno; 5254 rtx insn; 5255 5256 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) 5257 if (ix86_save_reg (regno, true)) 5258 { 5259 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5260 RTX_FRAME_RELATED_P (insn) = 1; 5261 } 5262} 5263 5264/* Emit code to save registers using MOV insns. First register 5265 is restored from POINTER + OFFSET. */ 5266static void 5267ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5268{ 5269 unsigned int regno; 5270 rtx insn; 5271 5272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5273 if (ix86_save_reg (regno, true)) 5274 { 5275 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5276 Pmode, offset), 5277 gen_rtx_REG (Pmode, regno)); 5278 RTX_FRAME_RELATED_P (insn) = 1; 5279 offset += UNITS_PER_WORD; 5280 } 5281} 5282 5283/* Expand prologue or epilogue stack adjustment. 5284 The pattern exist to put a dependency on all ebp-based memory accesses. 5285 STYLE should be negative if instructions should be marked as frame related, 5286 zero if %r11 register is live and cannot be freely used and positive 5287 otherwise. */ 5288 5289static void 5290pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5291{ 5292 rtx insn; 5293 5294 if (! TARGET_64BIT) 5295 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5296 else if (x86_64_immediate_operand (offset, DImode)) 5297 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5298 else 5299 { 5300 rtx r11; 5301 /* r11 is used by indirect sibcall return as well, set before the 5302 epilogue and used after the epilogue. ATM indirect sibcall 5303 shouldn't be used together with huge frame sizes in one 5304 function because of the frame_size check in sibcall.c. */ 5305 gcc_assert (style); 5306 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5307 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5308 if (style < 0) 5309 RTX_FRAME_RELATED_P (insn) = 1; 5310 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5311 offset)); 5312 } 5313 if (style < 0) 5314 RTX_FRAME_RELATED_P (insn) = 1; 5315} 5316 5317/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 5318 5319static rtx 5320ix86_internal_arg_pointer (void) 5321{ 5322 bool has_force_align_arg_pointer = 5323 (0 != lookup_attribute (ix86_force_align_arg_pointer_string, 5324 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); 5325 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN 5326 && DECL_NAME (current_function_decl) 5327 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 5328 && DECL_FILE_SCOPE_P (current_function_decl)) 5329 || ix86_force_align_arg_pointer 5330 || has_force_align_arg_pointer) 5331 { 5332 /* Nested functions can't realign the stack due to a register 5333 conflict. */ 5334 if (DECL_CONTEXT (current_function_decl) 5335 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) 5336 { 5337 if (ix86_force_align_arg_pointer) 5338 warning (0, "-mstackrealign ignored for nested functions"); 5339 if (has_force_align_arg_pointer) 5340 error ("%s not supported for nested functions", 5341 ix86_force_align_arg_pointer_string); 5342 return virtual_incoming_args_rtx; 5343 } 5344 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); 5345 return copy_to_reg (cfun->machine->force_align_arg_pointer); 5346 } 5347 else 5348 return virtual_incoming_args_rtx; 5349} 5350 5351/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 5352 This is called from dwarf2out.c to emit call frame instructions 5353 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 5354static void 5355ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 5356{ 5357 rtx unspec = SET_SRC (pattern); 5358 gcc_assert (GET_CODE (unspec) == UNSPEC); 5359 5360 switch (index) 5361 { 5362 case UNSPEC_REG_SAVE: 5363 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 5364 SET_DEST (pattern)); 5365 break; 5366 case UNSPEC_DEF_CFA: 5367 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 5368 INTVAL (XVECEXP (unspec, 0, 0))); 5369 break; 5370 default: 5371 gcc_unreachable (); 5372 } 5373} 5374 5375/* Expand the prologue into a bunch of separate insns. */ 5376 5377void 5378ix86_expand_prologue (void) 5379{ 5380 rtx insn; 5381 bool pic_reg_used; 5382 struct ix86_frame frame; 5383 HOST_WIDE_INT allocate; 5384 5385 ix86_compute_frame_layout (&frame); 5386 5387 if (cfun->machine->force_align_arg_pointer) 5388 { 5389 rtx x, y; 5390 5391 /* Grab the argument pointer. */ 5392 x = plus_constant (stack_pointer_rtx, 4); 5393 y = cfun->machine->force_align_arg_pointer; 5394 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 5395 RTX_FRAME_RELATED_P (insn) = 1; 5396 5397 /* The unwind info consists of two parts: install the fafp as the cfa, 5398 and record the fafp as the "save register" of the stack pointer. 5399 The later is there in order that the unwinder can see where it 5400 should restore the stack pointer across the and insn. */ 5401 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); 5402 x = gen_rtx_SET (VOIDmode, y, x); 5403 RTX_FRAME_RELATED_P (x) = 1; 5404 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), 5405 UNSPEC_REG_SAVE); 5406 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); 5407 RTX_FRAME_RELATED_P (y) = 1; 5408 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); 5409 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5410 REG_NOTES (insn) = x; 5411 5412 /* Align the stack. */ 5413 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, 5414 GEN_INT (-16))); 5415 5416 /* And here we cheat like madmen with the unwind info. We force the 5417 cfa register back to sp+4, which is exactly what it was at the 5418 start of the function. Re-pushing the return address results in 5419 the return at the same spot relative to the cfa, and thus is 5420 correct wrt the unwind info. */ 5421 x = cfun->machine->force_align_arg_pointer; 5422 x = gen_frame_mem (Pmode, plus_constant (x, -4)); 5423 insn = emit_insn (gen_push (x)); 5424 RTX_FRAME_RELATED_P (insn) = 1; 5425 5426 x = GEN_INT (4); 5427 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); 5428 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); 5429 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5430 REG_NOTES (insn) = x; 5431 } 5432 5433 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5434 slower on all targets. Also sdb doesn't like it. */ 5435 5436 if (frame_pointer_needed) 5437 { 5438 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5439 RTX_FRAME_RELATED_P (insn) = 1; 5440 5441 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5442 RTX_FRAME_RELATED_P (insn) = 1; 5443 } 5444 5445 allocate = frame.to_allocate; 5446 5447 if (!frame.save_regs_using_mov) 5448 ix86_emit_save_regs (); 5449 else 5450 allocate += frame.nregs * UNITS_PER_WORD; 5451 5452 /* When using red zone we may start register saving before allocating 5453 the stack frame saving one cycle of the prologue. */ 5454 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5455 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5456 : stack_pointer_rtx, 5457 -frame.nregs * UNITS_PER_WORD); 5458 5459 if (allocate == 0) 5460 ; 5461 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5462 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5463 GEN_INT (-allocate), -1); 5464 else 5465 { 5466 /* Only valid for Win32. */ 5467 rtx eax = gen_rtx_REG (SImode, 0); 5468 bool eax_live = ix86_eax_live_at_start_p (); 5469 rtx t; 5470 5471 gcc_assert (!TARGET_64BIT); 5472 5473 if (eax_live) 5474 { 5475 emit_insn (gen_push (eax)); 5476 allocate -= 4; 5477 } 5478 5479 emit_move_insn (eax, GEN_INT (allocate)); 5480 5481 insn = emit_insn (gen_allocate_stack_worker (eax)); 5482 RTX_FRAME_RELATED_P (insn) = 1; 5483 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 5484 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 5485 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 5486 t, REG_NOTES (insn)); 5487 5488 if (eax_live) 5489 { 5490 if (frame_pointer_needed) 5491 t = plus_constant (hard_frame_pointer_rtx, 5492 allocate 5493 - frame.to_allocate 5494 - frame.nregs * UNITS_PER_WORD); 5495 else 5496 t = plus_constant (stack_pointer_rtx, allocate); 5497 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5498 } 5499 } 5500 5501 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5502 { 5503 if (!frame_pointer_needed || !frame.to_allocate) 5504 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5505 else 5506 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5507 -frame.nregs * UNITS_PER_WORD); 5508 } 5509 5510 pic_reg_used = false; 5511 if (pic_offset_table_rtx 5512 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5513 || current_function_profile)) 5514 { 5515 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5516 5517 if (alt_pic_reg_used != INVALID_REGNUM) 5518 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5519 5520 pic_reg_used = true; 5521 } 5522 5523 if (pic_reg_used) 5524 { 5525 if (TARGET_64BIT) 5526 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 5527 else 5528 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5529 5530 /* Even with accurate pre-reload life analysis, we can wind up 5531 deleting all references to the pic register after reload. 5532 Consider if cross-jumping unifies two sides of a branch 5533 controlled by a comparison vs the only read from a global. 5534 In which case, allow the set_got to be deleted, though we're 5535 too late to do anything about the ebx save in the prologue. */ 5536 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5537 } 5538 5539 /* Prevent function calls from be scheduled before the call to mcount. 5540 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5541 if (current_function_profile) 5542 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5543} 5544 5545/* Emit code to restore saved registers using MOV insns. First register 5546 is restored from POINTER + OFFSET. */ 5547static void 5548ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5549 int maybe_eh_return) 5550{ 5551 int regno; 5552 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5553 5554 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5555 if (ix86_save_reg (regno, maybe_eh_return)) 5556 { 5557 /* Ensure that adjust_address won't be forced to produce pointer 5558 out of range allowed by x86-64 instruction set. */ 5559 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5560 { 5561 rtx r11; 5562 5563 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5564 emit_move_insn (r11, GEN_INT (offset)); 5565 emit_insn (gen_adddi3 (r11, r11, pointer)); 5566 base_address = gen_rtx_MEM (Pmode, r11); 5567 offset = 0; 5568 } 5569 emit_move_insn (gen_rtx_REG (Pmode, regno), 5570 adjust_address (base_address, Pmode, offset)); 5571 offset += UNITS_PER_WORD; 5572 } 5573} 5574 5575/* Restore function stack, frame, and registers. */ 5576 5577void 5578ix86_expand_epilogue (int style) 5579{ 5580 int regno; 5581 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5582 struct ix86_frame frame; 5583 HOST_WIDE_INT offset; 5584 5585 ix86_compute_frame_layout (&frame); 5586 5587 /* Calculate start of saved registers relative to ebp. Special care 5588 must be taken for the normal return case of a function using 5589 eh_return: the eax and edx registers are marked as saved, but not 5590 restored along this path. */ 5591 offset = frame.nregs; 5592 if (current_function_calls_eh_return && style != 2) 5593 offset -= 2; 5594 offset *= -UNITS_PER_WORD; 5595 5596 /* If we're only restoring one register and sp is not valid then 5597 using a move instruction to restore the register since it's 5598 less work than reloading sp and popping the register. 5599 5600 The default code result in stack adjustment using add/lea instruction, 5601 while this code results in LEAVE instruction (or discrete equivalent), 5602 so it is profitable in some other cases as well. Especially when there 5603 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5604 and there is exactly one register to pop. This heuristic may need some 5605 tuning in future. */ 5606 if ((!sp_valid && frame.nregs <= 1) 5607 || (TARGET_EPILOGUE_USING_MOVE 5608 && cfun->machine->use_fast_prologue_epilogue 5609 && (frame.nregs > 1 || frame.to_allocate)) 5610 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5611 || (frame_pointer_needed && TARGET_USE_LEAVE 5612 && cfun->machine->use_fast_prologue_epilogue 5613 && frame.nregs == 1) 5614 || current_function_calls_eh_return) 5615 { 5616 /* Restore registers. We can use ebp or esp to address the memory 5617 locations. If both are available, default to ebp, since offsets 5618 are known to be small. Only exception is esp pointing directly to the 5619 end of block of saved registers, where we may simplify addressing 5620 mode. */ 5621 5622 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5623 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5624 frame.to_allocate, style == 2); 5625 else 5626 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5627 offset, style == 2); 5628 5629 /* eh_return epilogues need %ecx added to the stack pointer. */ 5630 if (style == 2) 5631 { 5632 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5633 5634 if (frame_pointer_needed) 5635 { 5636 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5637 tmp = plus_constant (tmp, UNITS_PER_WORD); 5638 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5639 5640 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5641 emit_move_insn (hard_frame_pointer_rtx, tmp); 5642 5643 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5644 const0_rtx, style); 5645 } 5646 else 5647 { 5648 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5649 tmp = plus_constant (tmp, (frame.to_allocate 5650 + frame.nregs * UNITS_PER_WORD)); 5651 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5652 } 5653 } 5654 else if (!frame_pointer_needed) 5655 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5656 GEN_INT (frame.to_allocate 5657 + frame.nregs * UNITS_PER_WORD), 5658 style); 5659 /* If not an i386, mov & pop is faster than "leave". */ 5660 else if (TARGET_USE_LEAVE || optimize_size 5661 || !cfun->machine->use_fast_prologue_epilogue) 5662 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5663 else 5664 { 5665 pro_epilogue_adjust_stack (stack_pointer_rtx, 5666 hard_frame_pointer_rtx, 5667 const0_rtx, style); 5668 if (TARGET_64BIT) 5669 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5670 else 5671 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5672 } 5673 } 5674 else 5675 { 5676 /* First step is to deallocate the stack frame so that we can 5677 pop the registers. */ 5678 if (!sp_valid) 5679 { 5680 gcc_assert (frame_pointer_needed); 5681 pro_epilogue_adjust_stack (stack_pointer_rtx, 5682 hard_frame_pointer_rtx, 5683 GEN_INT (offset), style); 5684 } 5685 else if (frame.to_allocate) 5686 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5687 GEN_INT (frame.to_allocate), style); 5688 5689 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5690 if (ix86_save_reg (regno, false)) 5691 { 5692 if (TARGET_64BIT) 5693 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 5694 else 5695 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 5696 } 5697 if (frame_pointer_needed) 5698 { 5699 /* Leave results in shorter dependency chains on CPUs that are 5700 able to grok it fast. */ 5701 if (TARGET_USE_LEAVE) 5702 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5703 else if (TARGET_64BIT) 5704 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5705 else 5706 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5707 } 5708 } 5709 5710 if (cfun->machine->force_align_arg_pointer) 5711 { 5712 emit_insn (gen_addsi3 (stack_pointer_rtx, 5713 cfun->machine->force_align_arg_pointer, 5714 GEN_INT (-4))); 5715 } 5716 5717 /* Sibcall epilogues don't want a return instruction. */ 5718 if (style == 0) 5719 return; 5720 5721 if (current_function_pops_args && current_function_args_size) 5722 { 5723 rtx popc = GEN_INT (current_function_pops_args); 5724 5725 /* i386 can only pop 64K bytes. If asked to pop more, pop 5726 return address, do explicit add, and jump indirectly to the 5727 caller. */ 5728 5729 if (current_function_pops_args >= 65536) 5730 { 5731 rtx ecx = gen_rtx_REG (SImode, 2); 5732 5733 /* There is no "pascal" calling convention in 64bit ABI. */ 5734 gcc_assert (!TARGET_64BIT); 5735 5736 emit_insn (gen_popsi1 (ecx)); 5737 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 5738 emit_jump_insn (gen_return_indirect_internal (ecx)); 5739 } 5740 else 5741 emit_jump_insn (gen_return_pop_internal (popc)); 5742 } 5743 else 5744 emit_jump_insn (gen_return_internal ()); 5745} 5746 5747/* Reset from the function's potential modifications. */ 5748 5749static void 5750ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 5751 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5752{ 5753 if (pic_offset_table_rtx) 5754 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 5755#if TARGET_MACHO 5756 /* Mach-O doesn't support labels at the end of objects, so if 5757 it looks like we might want one, insert a NOP. */ 5758 { 5759 rtx insn = get_last_insn (); 5760 while (insn 5761 && NOTE_P (insn) 5762 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) 5763 insn = PREV_INSN (insn); 5764 if (insn 5765 && (LABEL_P (insn) 5766 || (NOTE_P (insn) 5767 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) 5768 fputs ("\tnop\n", file); 5769 } 5770#endif 5771 5772} 5773 5774/* Extract the parts of an RTL expression that is a valid memory address 5775 for an instruction. Return 0 if the structure of the address is 5776 grossly off. Return -1 if the address contains ASHIFT, so it is not 5777 strictly valid, but still used for computing length of lea instruction. */ 5778 5779int 5780ix86_decompose_address (rtx addr, struct ix86_address *out) 5781{ 5782 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 5783 rtx base_reg, index_reg; 5784 HOST_WIDE_INT scale = 1; 5785 rtx scale_rtx = NULL_RTX; 5786 int retval = 1; 5787 enum ix86_address_seg seg = SEG_DEFAULT; 5788 5789 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 5790 base = addr; 5791 else if (GET_CODE (addr) == PLUS) 5792 { 5793 rtx addends[4], op; 5794 int n = 0, i; 5795 5796 op = addr; 5797 do 5798 { 5799 if (n >= 4) 5800 return 0; 5801 addends[n++] = XEXP (op, 1); 5802 op = XEXP (op, 0); 5803 } 5804 while (GET_CODE (op) == PLUS); 5805 if (n >= 4) 5806 return 0; 5807 addends[n] = op; 5808 5809 for (i = n; i >= 0; --i) 5810 { 5811 op = addends[i]; 5812 switch (GET_CODE (op)) 5813 { 5814 case MULT: 5815 if (index) 5816 return 0; 5817 index = XEXP (op, 0); 5818 scale_rtx = XEXP (op, 1); 5819 break; 5820 5821 case UNSPEC: 5822 if (XINT (op, 1) == UNSPEC_TP 5823 && TARGET_TLS_DIRECT_SEG_REFS 5824 && seg == SEG_DEFAULT) 5825 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 5826 else 5827 return 0; 5828 break; 5829 5830 case REG: 5831 case SUBREG: 5832 if (!base) 5833 base = op; 5834 else if (!index) 5835 index = op; 5836 else 5837 return 0; 5838 break; 5839 5840 case CONST: 5841 case CONST_INT: 5842 case SYMBOL_REF: 5843 case LABEL_REF: 5844 if (disp) 5845 return 0; 5846 disp = op; 5847 break; 5848 5849 default: 5850 return 0; 5851 } 5852 } 5853 } 5854 else if (GET_CODE (addr) == MULT) 5855 { 5856 index = XEXP (addr, 0); /* index*scale */ 5857 scale_rtx = XEXP (addr, 1); 5858 } 5859 else if (GET_CODE (addr) == ASHIFT) 5860 { 5861 rtx tmp; 5862 5863 /* We're called for lea too, which implements ashift on occasion. */ 5864 index = XEXP (addr, 0); 5865 tmp = XEXP (addr, 1); 5866 if (GET_CODE (tmp) != CONST_INT) 5867 return 0; 5868 scale = INTVAL (tmp); 5869 if ((unsigned HOST_WIDE_INT) scale > 3) 5870 return 0; 5871 scale = 1 << scale; 5872 retval = -1; 5873 } 5874 else 5875 disp = addr; /* displacement */ 5876 5877 /* Extract the integral value of scale. */ 5878 if (scale_rtx) 5879 { 5880 if (GET_CODE (scale_rtx) != CONST_INT) 5881 return 0; 5882 scale = INTVAL (scale_rtx); 5883 } 5884 5885 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 5886 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 5887 5888 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 5889 if (base_reg && index_reg && scale == 1 5890 && (index_reg == arg_pointer_rtx 5891 || index_reg == frame_pointer_rtx 5892 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 5893 { 5894 rtx tmp; 5895 tmp = base, base = index, index = tmp; 5896 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 5897 } 5898 5899 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5900 if ((base_reg == hard_frame_pointer_rtx 5901 || base_reg == frame_pointer_rtx 5902 || base_reg == arg_pointer_rtx) && !disp) 5903 disp = const0_rtx; 5904 5905 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5906 Avoid this by transforming to [%esi+0]. */ 5907 if (ix86_tune == PROCESSOR_K6 && !optimize_size 5908 && base_reg && !index_reg && !disp 5909 && REG_P (base_reg) 5910 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 5911 disp = const0_rtx; 5912 5913 /* Special case: encode reg+reg instead of reg*2. */ 5914 if (!base && index && scale && scale == 2) 5915 base = index, base_reg = index_reg, scale = 1; 5916 5917 /* Special case: scaling cannot be encoded without base or displacement. */ 5918 if (!base && !disp && index && scale != 1) 5919 disp = const0_rtx; 5920 5921 out->base = base; 5922 out->index = index; 5923 out->disp = disp; 5924 out->scale = scale; 5925 out->seg = seg; 5926 5927 return retval; 5928} 5929 5930/* Return cost of the memory address x. 5931 For i386, it is better to use a complex address than let gcc copy 5932 the address into a reg and make a new pseudo. But not if the address 5933 requires to two regs - that would mean more pseudos with longer 5934 lifetimes. */ 5935static int 5936ix86_address_cost (rtx x) 5937{ 5938 struct ix86_address parts; 5939 int cost = 1; 5940 int ok = ix86_decompose_address (x, &parts); 5941 5942 gcc_assert (ok); 5943 5944 if (parts.base && GET_CODE (parts.base) == SUBREG) 5945 parts.base = SUBREG_REG (parts.base); 5946 if (parts.index && GET_CODE (parts.index) == SUBREG) 5947 parts.index = SUBREG_REG (parts.index); 5948 5949 /* More complex memory references are better. */ 5950 if (parts.disp && parts.disp != const0_rtx) 5951 cost--; 5952 if (parts.seg != SEG_DEFAULT) 5953 cost--; 5954 5955 /* Attempt to minimize number of registers in the address. */ 5956 if ((parts.base 5957 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5958 || (parts.index 5959 && (!REG_P (parts.index) 5960 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5961 cost++; 5962 5963 if (parts.base 5964 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5965 && parts.index 5966 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5967 && parts.base != parts.index) 5968 cost++; 5969 5970 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5971 since it's predecode logic can't detect the length of instructions 5972 and it degenerates to vector decoded. Increase cost of such 5973 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5974 to split such addresses or even refuse such addresses at all. 5975 5976 Following addressing modes are affected: 5977 [base+scale*index] 5978 [scale*index+disp] 5979 [base+index] 5980 5981 The first and last case may be avoidable by explicitly coding the zero in 5982 memory address, but I don't have AMD-K6 machine handy to check this 5983 theory. */ 5984 5985 if (TARGET_K6 5986 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5987 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5988 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5989 cost += 10; 5990 5991 return cost; 5992} 5993 5994/* If X is a machine specific address (i.e. a symbol or label being 5995 referenced as a displacement from the GOT implemented using an 5996 UNSPEC), then return the base term. Otherwise return X. */ 5997 5998rtx 5999ix86_find_base_term (rtx x) 6000{ 6001 rtx term; 6002 6003 if (TARGET_64BIT) 6004 { 6005 if (GET_CODE (x) != CONST) 6006 return x; 6007 term = XEXP (x, 0); 6008 if (GET_CODE (term) == PLUS 6009 && (GET_CODE (XEXP (term, 1)) == CONST_INT 6010 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 6011 term = XEXP (term, 0); 6012 if (GET_CODE (term) != UNSPEC 6013 || XINT (term, 1) != UNSPEC_GOTPCREL) 6014 return x; 6015 6016 term = XVECEXP (term, 0, 0); 6017 6018 if (GET_CODE (term) != SYMBOL_REF 6019 && GET_CODE (term) != LABEL_REF) 6020 return x; 6021 6022 return term; 6023 } 6024 6025 term = ix86_delegitimize_address (x); 6026 6027 if (GET_CODE (term) != SYMBOL_REF 6028 && GET_CODE (term) != LABEL_REF) 6029 return x; 6030 6031 return term; 6032} 6033 6034/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 6035 this is used for to form addresses to local data when -fPIC is in 6036 use. */ 6037 6038static bool 6039darwin_local_data_pic (rtx disp) 6040{ 6041 if (GET_CODE (disp) == MINUS) 6042 { 6043 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 6044 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 6045 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 6046 { 6047 const char *sym_name = XSTR (XEXP (disp, 1), 0); 6048 if (! strcmp (sym_name, "<pic base>")) 6049 return true; 6050 } 6051 } 6052 6053 return false; 6054} 6055 6056/* Determine if a given RTX is a valid constant. We already know this 6057 satisfies CONSTANT_P. */ 6058 6059bool 6060legitimate_constant_p (rtx x) 6061{ 6062 switch (GET_CODE (x)) 6063 { 6064 case CONST: 6065 x = XEXP (x, 0); 6066 6067 if (GET_CODE (x) == PLUS) 6068 { 6069 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6070 return false; 6071 x = XEXP (x, 0); 6072 } 6073 6074 if (TARGET_MACHO && darwin_local_data_pic (x)) 6075 return true; 6076 6077 /* Only some unspecs are valid as "constants". */ 6078 if (GET_CODE (x) == UNSPEC) 6079 switch (XINT (x, 1)) 6080 { 6081 case UNSPEC_GOTOFF: 6082 return TARGET_64BIT; 6083 case UNSPEC_TPOFF: 6084 case UNSPEC_NTPOFF: 6085 x = XVECEXP (x, 0, 0); 6086 return (GET_CODE (x) == SYMBOL_REF 6087 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6088 case UNSPEC_DTPOFF: 6089 x = XVECEXP (x, 0, 0); 6090 return (GET_CODE (x) == SYMBOL_REF 6091 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 6092 default: 6093 return false; 6094 } 6095 6096 /* We must have drilled down to a symbol. */ 6097 if (GET_CODE (x) == LABEL_REF) 6098 return true; 6099 if (GET_CODE (x) != SYMBOL_REF) 6100 return false; 6101 /* FALLTHRU */ 6102 6103 case SYMBOL_REF: 6104 /* TLS symbols are never valid. */ 6105 if (SYMBOL_REF_TLS_MODEL (x)) 6106 return false; 6107 break; 6108 6109 case CONST_DOUBLE: 6110 if (GET_MODE (x) == TImode 6111 && x != CONST0_RTX (TImode) 6112 && !TARGET_64BIT) 6113 return false; 6114 break; 6115 6116 case CONST_VECTOR: 6117 if (x == CONST0_RTX (GET_MODE (x))) 6118 return true; 6119 return false; 6120 6121 default: 6122 break; 6123 } 6124 6125 /* Otherwise we handle everything else in the move patterns. */ 6126 return true; 6127} 6128 6129/* Determine if it's legal to put X into the constant pool. This 6130 is not possible for the address of thread-local symbols, which 6131 is checked above. */ 6132 6133static bool 6134ix86_cannot_force_const_mem (rtx x) 6135{ 6136 /* We can always put integral constants and vectors in memory. */ 6137 switch (GET_CODE (x)) 6138 { 6139 case CONST_INT: 6140 case CONST_DOUBLE: 6141 case CONST_VECTOR: 6142 return false; 6143 6144 default: 6145 break; 6146 } 6147 return !legitimate_constant_p (x); 6148} 6149 6150/* Determine if a given RTX is a valid constant address. */ 6151 6152bool 6153constant_address_p (rtx x) 6154{ 6155 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 6156} 6157 6158/* Nonzero if the constant value X is a legitimate general operand 6159 when generating PIC code. It is given that flag_pic is on and 6160 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 6161 6162bool 6163legitimate_pic_operand_p (rtx x) 6164{ 6165 rtx inner; 6166 6167 switch (GET_CODE (x)) 6168 { 6169 case CONST: 6170 inner = XEXP (x, 0); 6171 if (GET_CODE (inner) == PLUS 6172 && GET_CODE (XEXP (inner, 1)) == CONST_INT) 6173 inner = XEXP (inner, 0); 6174 6175 /* Only some unspecs are valid as "constants". */ 6176 if (GET_CODE (inner) == UNSPEC) 6177 switch (XINT (inner, 1)) 6178 { 6179 case UNSPEC_GOTOFF: 6180 return TARGET_64BIT; 6181 case UNSPEC_TPOFF: 6182 x = XVECEXP (inner, 0, 0); 6183 return (GET_CODE (x) == SYMBOL_REF 6184 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6185 default: 6186 return false; 6187 } 6188 /* FALLTHRU */ 6189 6190 case SYMBOL_REF: 6191 case LABEL_REF: 6192 return legitimate_pic_address_disp_p (x); 6193 6194 default: 6195 return true; 6196 } 6197} 6198 6199/* Determine if a given CONST RTX is a valid memory displacement 6200 in PIC mode. */ 6201 6202int 6203legitimate_pic_address_disp_p (rtx disp) 6204{ 6205 bool saw_plus; 6206 6207 /* In 64bit mode we can allow direct addresses of symbols and labels 6208 when they are not dynamic symbols. */ 6209 if (TARGET_64BIT) 6210 { 6211 rtx op0 = disp, op1; 6212 6213 switch (GET_CODE (disp)) 6214 { 6215 case LABEL_REF: 6216 return true; 6217 6218 case CONST: 6219 if (GET_CODE (XEXP (disp, 0)) != PLUS) 6220 break; 6221 op0 = XEXP (XEXP (disp, 0), 0); 6222 op1 = XEXP (XEXP (disp, 0), 1); 6223 if (GET_CODE (op1) != CONST_INT 6224 || INTVAL (op1) >= 16*1024*1024 6225 || INTVAL (op1) < -16*1024*1024) 6226 break; 6227 if (GET_CODE (op0) == LABEL_REF) 6228 return true; 6229 if (GET_CODE (op0) != SYMBOL_REF) 6230 break; 6231 /* FALLTHRU */ 6232 6233 case SYMBOL_REF: 6234 /* TLS references should always be enclosed in UNSPEC. */ 6235 if (SYMBOL_REF_TLS_MODEL (op0)) 6236 return false; 6237 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) 6238 return true; 6239 break; 6240 6241 default: 6242 break; 6243 } 6244 } 6245 if (GET_CODE (disp) != CONST) 6246 return 0; 6247 disp = XEXP (disp, 0); 6248 6249 if (TARGET_64BIT) 6250 { 6251 /* We are unsafe to allow PLUS expressions. This limit allowed distance 6252 of GOT tables. We should not need these anyway. */ 6253 if (GET_CODE (disp) != UNSPEC 6254 || (XINT (disp, 1) != UNSPEC_GOTPCREL 6255 && XINT (disp, 1) != UNSPEC_GOTOFF)) 6256 return 0; 6257 6258 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 6259 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 6260 return 0; 6261 return 1; 6262 } 6263 6264 saw_plus = false; 6265 if (GET_CODE (disp) == PLUS) 6266 { 6267 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 6268 return 0; 6269 disp = XEXP (disp, 0); 6270 saw_plus = true; 6271 } 6272 6273 if (TARGET_MACHO && darwin_local_data_pic (disp)) 6274 return 1; 6275 6276 if (GET_CODE (disp) != UNSPEC) 6277 return 0; 6278 6279 switch (XINT (disp, 1)) 6280 { 6281 case UNSPEC_GOT: 6282 if (saw_plus) 6283 return false; 6284 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6285 case UNSPEC_GOTOFF: 6286 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 6287 While ABI specify also 32bit relocation but we don't produce it in 6288 small PIC model at all. */ 6289 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6290 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6291 && !TARGET_64BIT) 6292 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6293 return false; 6294 case UNSPEC_GOTTPOFF: 6295 case UNSPEC_GOTNTPOFF: 6296 case UNSPEC_INDNTPOFF: 6297 if (saw_plus) 6298 return false; 6299 disp = XVECEXP (disp, 0, 0); 6300 return (GET_CODE (disp) == SYMBOL_REF 6301 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 6302 case UNSPEC_NTPOFF: 6303 disp = XVECEXP (disp, 0, 0); 6304 return (GET_CODE (disp) == SYMBOL_REF 6305 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 6306 case UNSPEC_DTPOFF: 6307 disp = XVECEXP (disp, 0, 0); 6308 return (GET_CODE (disp) == SYMBOL_REF 6309 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 6310 } 6311 6312 return 0; 6313} 6314 6315/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6316 memory address for an instruction. The MODE argument is the machine mode 6317 for the MEM expression that wants to use this address. 6318 6319 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6320 convert common non-canonical forms to canonical form so that they will 6321 be recognized. */ 6322 6323int 6324legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6325{ 6326 struct ix86_address parts; 6327 rtx base, index, disp; 6328 HOST_WIDE_INT scale; 6329 const char *reason = NULL; 6330 rtx reason_rtx = NULL_RTX; 6331 6332 if (TARGET_DEBUG_ADDR) 6333 { 6334 fprintf (stderr, 6335 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6336 GET_MODE_NAME (mode), strict); 6337 debug_rtx (addr); 6338 } 6339 6340 if (ix86_decompose_address (addr, &parts) <= 0) 6341 { 6342 reason = "decomposition failed"; 6343 goto report_error; 6344 } 6345 6346 base = parts.base; 6347 index = parts.index; 6348 disp = parts.disp; 6349 scale = parts.scale; 6350 6351 /* Validate base register. 6352 6353 Don't allow SUBREG's that span more than a word here. It can lead to spill 6354 failures when the base is one word out of a two word structure, which is 6355 represented internally as a DImode int. */ 6356 6357 if (base) 6358 { 6359 rtx reg; 6360 reason_rtx = base; 6361 6362 if (REG_P (base)) 6363 reg = base; 6364 else if (GET_CODE (base) == SUBREG 6365 && REG_P (SUBREG_REG (base)) 6366 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 6367 <= UNITS_PER_WORD) 6368 reg = SUBREG_REG (base); 6369 else 6370 { 6371 reason = "base is not a register"; 6372 goto report_error; 6373 } 6374 6375 if (GET_MODE (base) != Pmode) 6376 { 6377 reason = "base is not in Pmode"; 6378 goto report_error; 6379 } 6380 6381 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 6382 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 6383 { 6384 reason = "base is not valid"; 6385 goto report_error; 6386 } 6387 } 6388 6389 /* Validate index register. 6390 6391 Don't allow SUBREG's that span more than a word here -- same as above. */ 6392 6393 if (index) 6394 { 6395 rtx reg; 6396 reason_rtx = index; 6397 6398 if (REG_P (index)) 6399 reg = index; 6400 else if (GET_CODE (index) == SUBREG 6401 && REG_P (SUBREG_REG (index)) 6402 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 6403 <= UNITS_PER_WORD) 6404 reg = SUBREG_REG (index); 6405 else 6406 { 6407 reason = "index is not a register"; 6408 goto report_error; 6409 } 6410 6411 if (GET_MODE (index) != Pmode) 6412 { 6413 reason = "index is not in Pmode"; 6414 goto report_error; 6415 } 6416 6417 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 6418 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 6419 { 6420 reason = "index is not valid"; 6421 goto report_error; 6422 } 6423 } 6424 6425 /* Validate scale factor. */ 6426 if (scale != 1) 6427 { 6428 reason_rtx = GEN_INT (scale); 6429 if (!index) 6430 { 6431 reason = "scale without index"; 6432 goto report_error; 6433 } 6434 6435 if (scale != 2 && scale != 4 && scale != 8) 6436 { 6437 reason = "scale is not a valid multiplier"; 6438 goto report_error; 6439 } 6440 } 6441 6442 /* Validate displacement. */ 6443 if (disp) 6444 { 6445 reason_rtx = disp; 6446 6447 if (GET_CODE (disp) == CONST 6448 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6449 switch (XINT (XEXP (disp, 0), 1)) 6450 { 6451 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 6452 used. While ABI specify also 32bit relocations, we don't produce 6453 them at all and use IP relative instead. */ 6454 case UNSPEC_GOT: 6455 case UNSPEC_GOTOFF: 6456 gcc_assert (flag_pic); 6457 if (!TARGET_64BIT) 6458 goto is_legitimate_pic; 6459 reason = "64bit address unspec"; 6460 goto report_error; 6461 6462 case UNSPEC_GOTPCREL: 6463 gcc_assert (flag_pic); 6464 goto is_legitimate_pic; 6465 6466 case UNSPEC_GOTTPOFF: 6467 case UNSPEC_GOTNTPOFF: 6468 case UNSPEC_INDNTPOFF: 6469 case UNSPEC_NTPOFF: 6470 case UNSPEC_DTPOFF: 6471 break; 6472 6473 default: 6474 reason = "invalid address unspec"; 6475 goto report_error; 6476 } 6477 6478 else if (SYMBOLIC_CONST (disp) 6479 && (flag_pic 6480 || (TARGET_MACHO 6481#if TARGET_MACHO 6482 && MACHOPIC_INDIRECT 6483 && !machopic_operand_p (disp) 6484#endif 6485 ))) 6486 { 6487 6488 is_legitimate_pic: 6489 if (TARGET_64BIT && (index || base)) 6490 { 6491 /* foo@dtpoff(%rX) is ok. */ 6492 if (GET_CODE (disp) != CONST 6493 || GET_CODE (XEXP (disp, 0)) != PLUS 6494 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6495 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6496 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6497 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6498 { 6499 reason = "non-constant pic memory reference"; 6500 goto report_error; 6501 } 6502 } 6503 else if (! legitimate_pic_address_disp_p (disp)) 6504 { 6505 reason = "displacement is an invalid pic construct"; 6506 goto report_error; 6507 } 6508 6509 /* This code used to verify that a symbolic pic displacement 6510 includes the pic_offset_table_rtx register. 6511 6512 While this is good idea, unfortunately these constructs may 6513 be created by "adds using lea" optimization for incorrect 6514 code like: 6515 6516 int a; 6517 int foo(int i) 6518 { 6519 return *(&a+i); 6520 } 6521 6522 This code is nonsensical, but results in addressing 6523 GOT table with pic_offset_table_rtx base. We can't 6524 just refuse it easily, since it gets matched by 6525 "addsi3" pattern, that later gets split to lea in the 6526 case output register differs from input. While this 6527 can be handled by separate addsi pattern for this case 6528 that never results in lea, this seems to be easier and 6529 correct fix for crash to disable this test. */ 6530 } 6531 else if (GET_CODE (disp) != LABEL_REF 6532 && GET_CODE (disp) != CONST_INT 6533 && (GET_CODE (disp) != CONST 6534 || !legitimate_constant_p (disp)) 6535 && (GET_CODE (disp) != SYMBOL_REF 6536 || !legitimate_constant_p (disp))) 6537 { 6538 reason = "displacement is not constant"; 6539 goto report_error; 6540 } 6541 else if (TARGET_64BIT 6542 && !x86_64_immediate_operand (disp, VOIDmode)) 6543 { 6544 reason = "displacement is out of range"; 6545 goto report_error; 6546 } 6547 } 6548 6549 /* Everything looks valid. */ 6550 if (TARGET_DEBUG_ADDR) 6551 fprintf (stderr, "Success.\n"); 6552 return TRUE; 6553 6554 report_error: 6555 if (TARGET_DEBUG_ADDR) 6556 { 6557 fprintf (stderr, "Error: %s\n", reason); 6558 debug_rtx (reason_rtx); 6559 } 6560 return FALSE; 6561} 6562 6563/* Return a unique alias set for the GOT. */ 6564 6565static HOST_WIDE_INT 6566ix86_GOT_alias_set (void) 6567{ 6568 static HOST_WIDE_INT set = -1; 6569 if (set == -1) 6570 set = new_alias_set (); 6571 return set; 6572} 6573 6574/* Return a legitimate reference for ORIG (an address) using the 6575 register REG. If REG is 0, a new pseudo is generated. 6576 6577 There are two types of references that must be handled: 6578 6579 1. Global data references must load the address from the GOT, via 6580 the PIC reg. An insn is emitted to do this load, and the reg is 6581 returned. 6582 6583 2. Static data references, constant pool addresses, and code labels 6584 compute the address as an offset from the GOT, whose base is in 6585 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6586 differentiate them from global data objects. The returned 6587 address is the PIC reg + an unspec constant. 6588 6589 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6590 reg also appears in the address. */ 6591 6592static rtx 6593legitimize_pic_address (rtx orig, rtx reg) 6594{ 6595 rtx addr = orig; 6596 rtx new = orig; 6597 rtx base; 6598 6599#if TARGET_MACHO 6600 if (TARGET_MACHO && !TARGET_64BIT) 6601 { 6602 if (reg == 0) 6603 reg = gen_reg_rtx (Pmode); 6604 /* Use the generic Mach-O PIC machinery. */ 6605 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6606 } 6607#endif 6608 6609 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6610 new = addr; 6611 else if (TARGET_64BIT 6612 && ix86_cmodel != CM_SMALL_PIC 6613 && local_symbolic_operand (addr, Pmode)) 6614 { 6615 rtx tmpreg; 6616 /* This symbol may be referenced via a displacement from the PIC 6617 base address (@GOTOFF). */ 6618 6619 if (reload_in_progress) 6620 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6621 if (GET_CODE (addr) == CONST) 6622 addr = XEXP (addr, 0); 6623 if (GET_CODE (addr) == PLUS) 6624 { 6625 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6626 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6627 } 6628 else 6629 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6630 new = gen_rtx_CONST (Pmode, new); 6631 if (!reg) 6632 tmpreg = gen_reg_rtx (Pmode); 6633 else 6634 tmpreg = reg; 6635 emit_move_insn (tmpreg, new); 6636 6637 if (reg != 0) 6638 { 6639 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 6640 tmpreg, 1, OPTAB_DIRECT); 6641 new = reg; 6642 } 6643 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 6644 } 6645 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6646 { 6647 /* This symbol may be referenced via a displacement from the PIC 6648 base address (@GOTOFF). */ 6649 6650 if (reload_in_progress) 6651 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6652 if (GET_CODE (addr) == CONST) 6653 addr = XEXP (addr, 0); 6654 if (GET_CODE (addr) == PLUS) 6655 { 6656 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6657 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6658 } 6659 else 6660 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6661 new = gen_rtx_CONST (Pmode, new); 6662 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6663 6664 if (reg != 0) 6665 { 6666 emit_move_insn (reg, new); 6667 new = reg; 6668 } 6669 } 6670 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 6671 { 6672 if (TARGET_64BIT) 6673 { 6674 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6675 new = gen_rtx_CONST (Pmode, new); 6676 new = gen_const_mem (Pmode, new); 6677 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6678 6679 if (reg == 0) 6680 reg = gen_reg_rtx (Pmode); 6681 /* Use directly gen_movsi, otherwise the address is loaded 6682 into register for CSE. We don't want to CSE this addresses, 6683 instead we CSE addresses from the GOT table, so skip this. */ 6684 emit_insn (gen_movsi (reg, new)); 6685 new = reg; 6686 } 6687 else 6688 { 6689 /* This symbol must be referenced via a load from the 6690 Global Offset Table (@GOT). */ 6691 6692 if (reload_in_progress) 6693 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6694 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 6695 new = gen_rtx_CONST (Pmode, new); 6696 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6697 new = gen_const_mem (Pmode, new); 6698 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6699 6700 if (reg == 0) 6701 reg = gen_reg_rtx (Pmode); 6702 emit_move_insn (reg, new); 6703 new = reg; 6704 } 6705 } 6706 else 6707 { 6708 if (GET_CODE (addr) == CONST_INT 6709 && !x86_64_immediate_operand (addr, VOIDmode)) 6710 { 6711 if (reg) 6712 { 6713 emit_move_insn (reg, addr); 6714 new = reg; 6715 } 6716 else 6717 new = force_reg (Pmode, addr); 6718 } 6719 else if (GET_CODE (addr) == CONST) 6720 { 6721 addr = XEXP (addr, 0); 6722 6723 /* We must match stuff we generate before. Assume the only 6724 unspecs that can get here are ours. Not that we could do 6725 anything with them anyway.... */ 6726 if (GET_CODE (addr) == UNSPEC 6727 || (GET_CODE (addr) == PLUS 6728 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 6729 return orig; 6730 gcc_assert (GET_CODE (addr) == PLUS); 6731 } 6732 if (GET_CODE (addr) == PLUS) 6733 { 6734 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 6735 6736 /* Check first to see if this is a constant offset from a @GOTOFF 6737 symbol reference. */ 6738 if (local_symbolic_operand (op0, Pmode) 6739 && GET_CODE (op1) == CONST_INT) 6740 { 6741 if (!TARGET_64BIT) 6742 { 6743 if (reload_in_progress) 6744 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6745 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 6746 UNSPEC_GOTOFF); 6747 new = gen_rtx_PLUS (Pmode, new, op1); 6748 new = gen_rtx_CONST (Pmode, new); 6749 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6750 6751 if (reg != 0) 6752 { 6753 emit_move_insn (reg, new); 6754 new = reg; 6755 } 6756 } 6757 else 6758 { 6759 if (INTVAL (op1) < -16*1024*1024 6760 || INTVAL (op1) >= 16*1024*1024) 6761 { 6762 if (!x86_64_immediate_operand (op1, Pmode)) 6763 op1 = force_reg (Pmode, op1); 6764 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 6765 } 6766 } 6767 } 6768 else 6769 { 6770 base = legitimize_pic_address (XEXP (addr, 0), reg); 6771 new = legitimize_pic_address (XEXP (addr, 1), 6772 base == reg ? NULL_RTX : reg); 6773 6774 if (GET_CODE (new) == CONST_INT) 6775 new = plus_constant (base, INTVAL (new)); 6776 else 6777 { 6778 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 6779 { 6780 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 6781 new = XEXP (new, 1); 6782 } 6783 new = gen_rtx_PLUS (Pmode, base, new); 6784 } 6785 } 6786 } 6787 } 6788 return new; 6789} 6790 6791/* Load the thread pointer. If TO_REG is true, force it into a register. */ 6792 6793static rtx 6794get_thread_pointer (int to_reg) 6795{ 6796 rtx tp, reg, insn; 6797 6798 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 6799 if (!to_reg) 6800 return tp; 6801 6802 reg = gen_reg_rtx (Pmode); 6803 insn = gen_rtx_SET (VOIDmode, reg, tp); 6804 insn = emit_insn (insn); 6805 6806 return reg; 6807} 6808 6809/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 6810 false if we expect this to be used for a memory address and true if 6811 we expect to load the address into a register. */ 6812 6813static rtx 6814legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 6815{ 6816 rtx dest, base, off, pic, tp; 6817 int type; 6818 6819 switch (model) 6820 { 6821 case TLS_MODEL_GLOBAL_DYNAMIC: 6822 dest = gen_reg_rtx (Pmode); 6823 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6824 6825 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6826 { 6827 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6828 6829 start_sequence (); 6830 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6831 insns = get_insns (); 6832 end_sequence (); 6833 6834 emit_libcall_block (insns, dest, rax, x); 6835 } 6836 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6837 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 6838 else 6839 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6840 6841 if (TARGET_GNU2_TLS) 6842 { 6843 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 6844 6845 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 6846 } 6847 break; 6848 6849 case TLS_MODEL_LOCAL_DYNAMIC: 6850 base = gen_reg_rtx (Pmode); 6851 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6852 6853 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6854 { 6855 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6856 6857 start_sequence (); 6858 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6859 insns = get_insns (); 6860 end_sequence (); 6861 6862 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6863 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6864 emit_libcall_block (insns, base, rax, note); 6865 } 6866 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6867 emit_insn (gen_tls_local_dynamic_base_64 (base)); 6868 else 6869 emit_insn (gen_tls_local_dynamic_base_32 (base)); 6870 6871 if (TARGET_GNU2_TLS) 6872 { 6873 rtx x = ix86_tls_module_base (); 6874 6875 set_unique_reg_note (get_last_insn (), REG_EQUIV, 6876 gen_rtx_MINUS (Pmode, x, tp)); 6877 } 6878 6879 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6880 off = gen_rtx_CONST (Pmode, off); 6881 6882 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 6883 6884 if (TARGET_GNU2_TLS) 6885 { 6886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 6887 6888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 6889 } 6890 6891 break; 6892 6893 case TLS_MODEL_INITIAL_EXEC: 6894 if (TARGET_64BIT) 6895 { 6896 pic = NULL; 6897 type = UNSPEC_GOTNTPOFF; 6898 } 6899 else if (flag_pic) 6900 { 6901 if (reload_in_progress) 6902 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6903 pic = pic_offset_table_rtx; 6904 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6905 } 6906 else if (!TARGET_ANY_GNU_TLS) 6907 { 6908 pic = gen_reg_rtx (Pmode); 6909 emit_insn (gen_set_got (pic)); 6910 type = UNSPEC_GOTTPOFF; 6911 } 6912 else 6913 { 6914 pic = NULL; 6915 type = UNSPEC_INDNTPOFF; 6916 } 6917 6918 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6919 off = gen_rtx_CONST (Pmode, off); 6920 if (pic) 6921 off = gen_rtx_PLUS (Pmode, pic, off); 6922 off = gen_const_mem (Pmode, off); 6923 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6924 6925 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6926 { 6927 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6928 off = force_reg (Pmode, off); 6929 return gen_rtx_PLUS (Pmode, base, off); 6930 } 6931 else 6932 { 6933 base = get_thread_pointer (true); 6934 dest = gen_reg_rtx (Pmode); 6935 emit_insn (gen_subsi3 (dest, base, off)); 6936 } 6937 break; 6938 6939 case TLS_MODEL_LOCAL_EXEC: 6940 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6941 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6942 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6943 off = gen_rtx_CONST (Pmode, off); 6944 6945 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6946 { 6947 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6948 return gen_rtx_PLUS (Pmode, base, off); 6949 } 6950 else 6951 { 6952 base = get_thread_pointer (true); 6953 dest = gen_reg_rtx (Pmode); 6954 emit_insn (gen_subsi3 (dest, base, off)); 6955 } 6956 break; 6957 6958 default: 6959 gcc_unreachable (); 6960 } 6961 6962 return dest; 6963} 6964 6965/* Try machine-dependent ways of modifying an illegitimate address 6966 to be legitimate. If we find one, return the new, valid address. 6967 This macro is used in only one place: `memory_address' in explow.c. 6968 6969 OLDX is the address as it was before break_out_memory_refs was called. 6970 In some cases it is useful to look at this to decide what needs to be done. 6971 6972 MODE and WIN are passed so that this macro can use 6973 GO_IF_LEGITIMATE_ADDRESS. 6974 6975 It is always safe for this macro to do nothing. It exists to recognize 6976 opportunities to optimize the output. 6977 6978 For the 80386, we handle X+REG by loading X into a register R and 6979 using R+REG. R will go in a general reg and indexing will be used. 6980 However, if REG is a broken-out memory address or multiplication, 6981 nothing needs to be done because REG can certainly go in a general reg. 6982 6983 When -fpic is used, special handling is needed for symbolic references. 6984 See comments by legitimize_pic_address in i386.c for details. */ 6985 6986rtx 6987legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 6988{ 6989 int changed = 0; 6990 unsigned log; 6991 6992 if (TARGET_DEBUG_ADDR) 6993 { 6994 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 6995 GET_MODE_NAME (mode)); 6996 debug_rtx (x); 6997 } 6998 6999 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 7000 if (log) 7001 return legitimize_tls_address (x, log, false); 7002 if (GET_CODE (x) == CONST 7003 && GET_CODE (XEXP (x, 0)) == PLUS 7004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 7005 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 7006 { 7007 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); 7008 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 7009 } 7010 7011 if (flag_pic && SYMBOLIC_CONST (x)) 7012 return legitimize_pic_address (x, 0); 7013 7014 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 7015 if (GET_CODE (x) == ASHIFT 7016 && GET_CODE (XEXP (x, 1)) == CONST_INT 7017 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 7018 { 7019 changed = 1; 7020 log = INTVAL (XEXP (x, 1)); 7021 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 7022 GEN_INT (1 << log)); 7023 } 7024 7025 if (GET_CODE (x) == PLUS) 7026 { 7027 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 7028 7029 if (GET_CODE (XEXP (x, 0)) == ASHIFT 7030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 7031 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 7032 { 7033 changed = 1; 7034 log = INTVAL (XEXP (XEXP (x, 0), 1)); 7035 XEXP (x, 0) = gen_rtx_MULT (Pmode, 7036 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 7037 GEN_INT (1 << log)); 7038 } 7039 7040 if (GET_CODE (XEXP (x, 1)) == ASHIFT 7041 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 7042 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 7043 { 7044 changed = 1; 7045 log = INTVAL (XEXP (XEXP (x, 1), 1)); 7046 XEXP (x, 1) = gen_rtx_MULT (Pmode, 7047 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 7048 GEN_INT (1 << log)); 7049 } 7050 7051 /* Put multiply first if it isn't already. */ 7052 if (GET_CODE (XEXP (x, 1)) == MULT) 7053 { 7054 rtx tmp = XEXP (x, 0); 7055 XEXP (x, 0) = XEXP (x, 1); 7056 XEXP (x, 1) = tmp; 7057 changed = 1; 7058 } 7059 7060 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 7062 created by virtual register instantiation, register elimination, and 7063 similar optimizations. */ 7064 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 7065 { 7066 changed = 1; 7067 x = gen_rtx_PLUS (Pmode, 7068 gen_rtx_PLUS (Pmode, XEXP (x, 0), 7069 XEXP (XEXP (x, 1), 0)), 7070 XEXP (XEXP (x, 1), 1)); 7071 } 7072 7073 /* Canonicalize 7074 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 7075 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 7076 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 7077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 7078 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 7079 && CONSTANT_P (XEXP (x, 1))) 7080 { 7081 rtx constant; 7082 rtx other = NULL_RTX; 7083 7084 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7085 { 7086 constant = XEXP (x, 1); 7087 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 7088 } 7089 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 7090 { 7091 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 7092 other = XEXP (x, 1); 7093 } 7094 else 7095 constant = 0; 7096 7097 if (constant) 7098 { 7099 changed = 1; 7100 x = gen_rtx_PLUS (Pmode, 7101 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 7102 XEXP (XEXP (XEXP (x, 0), 1), 0)), 7103 plus_constant (other, INTVAL (constant))); 7104 } 7105 } 7106 7107 if (changed && legitimate_address_p (mode, x, FALSE)) 7108 return x; 7109 7110 if (GET_CODE (XEXP (x, 0)) == MULT) 7111 { 7112 changed = 1; 7113 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 7114 } 7115 7116 if (GET_CODE (XEXP (x, 1)) == MULT) 7117 { 7118 changed = 1; 7119 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 7120 } 7121 7122 if (changed 7123 && GET_CODE (XEXP (x, 1)) == REG 7124 && GET_CODE (XEXP (x, 0)) == REG) 7125 return x; 7126 7127 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 7128 { 7129 changed = 1; 7130 x = legitimize_pic_address (x, 0); 7131 } 7132 7133 if (changed && legitimate_address_p (mode, x, FALSE)) 7134 return x; 7135 7136 if (GET_CODE (XEXP (x, 0)) == REG) 7137 { 7138 rtx temp = gen_reg_rtx (Pmode); 7139 rtx val = force_operand (XEXP (x, 1), temp); 7140 if (val != temp) 7141 emit_move_insn (temp, val); 7142 7143 XEXP (x, 1) = temp; 7144 return x; 7145 } 7146 7147 else if (GET_CODE (XEXP (x, 1)) == REG) 7148 { 7149 rtx temp = gen_reg_rtx (Pmode); 7150 rtx val = force_operand (XEXP (x, 0), temp); 7151 if (val != temp) 7152 emit_move_insn (temp, val); 7153 7154 XEXP (x, 0) = temp; 7155 return x; 7156 } 7157 } 7158 7159 return x; 7160} 7161 7162/* Print an integer constant expression in assembler syntax. Addition 7163 and subtraction are the only arithmetic that may appear in these 7164 expressions. FILE is the stdio stream to write to, X is the rtx, and 7165 CODE is the operand print code from the output string. */ 7166 7167static void 7168output_pic_addr_const (FILE *file, rtx x, int code) 7169{ 7170 char buf[256]; 7171 7172 switch (GET_CODE (x)) 7173 { 7174 case PC: 7175 gcc_assert (flag_pic); 7176 putc ('.', file); 7177 break; 7178 7179 case SYMBOL_REF: 7180 if (! TARGET_MACHO || TARGET_64BIT) 7181 output_addr_const (file, x); 7182 else 7183 { 7184 const char *name = XSTR (x, 0); 7185 7186 /* Mark the decl as referenced so that cgraph will output the function. */ 7187 if (SYMBOL_REF_DECL (x)) 7188 mark_decl_referenced (SYMBOL_REF_DECL (x)); 7189 7190#if TARGET_MACHO 7191 if (MACHOPIC_INDIRECT 7192 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 7193 name = machopic_indirection_name (x, /*stub_p=*/true); 7194#endif 7195 assemble_name (file, name); 7196 } 7197 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 7198 fputs ("@PLT", file); 7199 break; 7200 7201 case LABEL_REF: 7202 x = XEXP (x, 0); 7203 /* FALLTHRU */ 7204 case CODE_LABEL: 7205 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 7206 assemble_name (asm_out_file, buf); 7207 break; 7208 7209 case CONST_INT: 7210 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7211 break; 7212 7213 case CONST: 7214 /* This used to output parentheses around the expression, 7215 but that does not work on the 386 (either ATT or BSD assembler). */ 7216 output_pic_addr_const (file, XEXP (x, 0), code); 7217 break; 7218 7219 case CONST_DOUBLE: 7220 if (GET_MODE (x) == VOIDmode) 7221 { 7222 /* We can use %d if the number is <32 bits and positive. */ 7223 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 7224 fprintf (file, "0x%lx%08lx", 7225 (unsigned long) CONST_DOUBLE_HIGH (x), 7226 (unsigned long) CONST_DOUBLE_LOW (x)); 7227 else 7228 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 7229 } 7230 else 7231 /* We can't handle floating point constants; 7232 PRINT_OPERAND must handle them. */ 7233 output_operand_lossage ("floating constant misused"); 7234 break; 7235 7236 case PLUS: 7237 /* Some assemblers need integer constants to appear first. */ 7238 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 7239 { 7240 output_pic_addr_const (file, XEXP (x, 0), code); 7241 putc ('+', file); 7242 output_pic_addr_const (file, XEXP (x, 1), code); 7243 } 7244 else 7245 { 7246 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); 7247 output_pic_addr_const (file, XEXP (x, 1), code); 7248 putc ('+', file); 7249 output_pic_addr_const (file, XEXP (x, 0), code); 7250 } 7251 break; 7252 7253 case MINUS: 7254 if (!TARGET_MACHO) 7255 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 7256 output_pic_addr_const (file, XEXP (x, 0), code); 7257 putc ('-', file); 7258 output_pic_addr_const (file, XEXP (x, 1), code); 7259 if (!TARGET_MACHO) 7260 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 7261 break; 7262 7263 case UNSPEC: 7264 gcc_assert (XVECLEN (x, 0) == 1); 7265 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 7266 switch (XINT (x, 1)) 7267 { 7268 case UNSPEC_GOT: 7269 fputs ("@GOT", file); 7270 break; 7271 case UNSPEC_GOTOFF: 7272 fputs ("@GOTOFF", file); 7273 break; 7274 case UNSPEC_GOTPCREL: 7275 fputs ("@GOTPCREL(%rip)", file); 7276 break; 7277 case UNSPEC_GOTTPOFF: 7278 /* FIXME: This might be @TPOFF in Sun ld too. */ 7279 fputs ("@GOTTPOFF", file); 7280 break; 7281 case UNSPEC_TPOFF: 7282 fputs ("@TPOFF", file); 7283 break; 7284 case UNSPEC_NTPOFF: 7285 if (TARGET_64BIT) 7286 fputs ("@TPOFF", file); 7287 else 7288 fputs ("@NTPOFF", file); 7289 break; 7290 case UNSPEC_DTPOFF: 7291 fputs ("@DTPOFF", file); 7292 break; 7293 case UNSPEC_GOTNTPOFF: 7294 if (TARGET_64BIT) 7295 fputs ("@GOTTPOFF(%rip)", file); 7296 else 7297 fputs ("@GOTNTPOFF", file); 7298 break; 7299 case UNSPEC_INDNTPOFF: 7300 fputs ("@INDNTPOFF", file); 7301 break; 7302 default: 7303 output_operand_lossage ("invalid UNSPEC as operand"); 7304 break; 7305 } 7306 break; 7307 7308 default: 7309 output_operand_lossage ("invalid expression as operand"); 7310 } 7311} 7312 7313/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7314 We need to emit DTP-relative relocations. */ 7315 7316static void 7317i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 7318{ 7319 fputs (ASM_LONG, file); 7320 output_addr_const (file, x); 7321 fputs ("@DTPOFF", file); 7322 switch (size) 7323 { 7324 case 4: 7325 break; 7326 case 8: 7327 fputs (", 0", file); 7328 break; 7329 default: 7330 gcc_unreachable (); 7331 } 7332} 7333 7334/* In the name of slightly smaller debug output, and to cater to 7335 general assembler lossage, recognize PIC+GOTOFF and turn it back 7336 into a direct symbol reference. 7337 7338 On Darwin, this is necessary to avoid a crash, because Darwin 7339 has a different PIC label for each routine but the DWARF debugging 7340 information is not associated with any particular routine, so it's 7341 necessary to remove references to the PIC label from RTL stored by 7342 the DWARF output code. */ 7343 7344static rtx 7345ix86_delegitimize_address (rtx orig_x) 7346{ 7347 rtx x = orig_x; 7348 /* reg_addend is NULL or a multiple of some register. */ 7349 rtx reg_addend = NULL_RTX; 7350 /* const_addend is NULL or a const_int. */ 7351 rtx const_addend = NULL_RTX; 7352 /* This is the result, or NULL. */ 7353 rtx result = NULL_RTX; 7354 7355 if (GET_CODE (x) == MEM) 7356 x = XEXP (x, 0); 7357 7358 if (TARGET_64BIT) 7359 { 7360 if (GET_CODE (x) != CONST 7361 || GET_CODE (XEXP (x, 0)) != UNSPEC 7362 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 7363 || GET_CODE (orig_x) != MEM) 7364 return orig_x; 7365 return XVECEXP (XEXP (x, 0), 0, 0); 7366 } 7367 7368 if (GET_CODE (x) != PLUS 7369 || GET_CODE (XEXP (x, 1)) != CONST) 7370 return orig_x; 7371 7372 if (GET_CODE (XEXP (x, 0)) == REG 7373 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7374 /* %ebx + GOT/GOTOFF */ 7375 ; 7376 else if (GET_CODE (XEXP (x, 0)) == PLUS) 7377 { 7378 /* %ebx + %reg * scale + GOT/GOTOFF */ 7379 reg_addend = XEXP (x, 0); 7380 if (GET_CODE (XEXP (reg_addend, 0)) == REG 7381 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) 7382 reg_addend = XEXP (reg_addend, 1); 7383 else if (GET_CODE (XEXP (reg_addend, 1)) == REG 7384 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) 7385 reg_addend = XEXP (reg_addend, 0); 7386 else 7387 return orig_x; 7388 if (GET_CODE (reg_addend) != REG 7389 && GET_CODE (reg_addend) != MULT 7390 && GET_CODE (reg_addend) != ASHIFT) 7391 return orig_x; 7392 } 7393 else 7394 return orig_x; 7395 7396 x = XEXP (XEXP (x, 1), 0); 7397 if (GET_CODE (x) == PLUS 7398 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7399 { 7400 const_addend = XEXP (x, 1); 7401 x = XEXP (x, 0); 7402 } 7403 7404 if (GET_CODE (x) == UNSPEC 7405 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7406 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 7407 result = XVECEXP (x, 0, 0); 7408 7409 if (TARGET_MACHO && darwin_local_data_pic (x) 7410 && GET_CODE (orig_x) != MEM) 7411 result = XEXP (x, 0); 7412 7413 if (! result) 7414 return orig_x; 7415 7416 if (const_addend) 7417 result = gen_rtx_PLUS (Pmode, result, const_addend); 7418 if (reg_addend) 7419 result = gen_rtx_PLUS (Pmode, reg_addend, result); 7420 return result; 7421} 7422 7423static void 7424put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7425 int fp, FILE *file) 7426{ 7427 const char *suffix; 7428 7429 if (mode == CCFPmode || mode == CCFPUmode) 7430 { 7431 enum rtx_code second_code, bypass_code; 7432 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7433 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 7434 code = ix86_fp_compare_code_to_integer (code); 7435 mode = CCmode; 7436 } 7437 if (reverse) 7438 code = reverse_condition (code); 7439 7440 switch (code) 7441 { 7442 case EQ: 7443 suffix = "e"; 7444 break; 7445 case NE: 7446 suffix = "ne"; 7447 break; 7448 case GT: 7449 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 7450 suffix = "g"; 7451 break; 7452 case GTU: 7453 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 7454 Those same assemblers have the same but opposite lossage on cmov. */ 7455 gcc_assert (mode == CCmode); 7456 suffix = fp ? "nbe" : "a"; 7457 break; 7458 case LT: 7459 switch (mode) 7460 { 7461 case CCNOmode: 7462 case CCGOCmode: 7463 suffix = "s"; 7464 break; 7465 7466 case CCmode: 7467 case CCGCmode: 7468 suffix = "l"; 7469 break; 7470 7471 default: 7472 gcc_unreachable (); 7473 } 7474 break; 7475 case LTU: 7476 gcc_assert (mode == CCmode); 7477 suffix = "b"; 7478 break; 7479 case GE: 7480 switch (mode) 7481 { 7482 case CCNOmode: 7483 case CCGOCmode: 7484 suffix = "ns"; 7485 break; 7486 7487 case CCmode: 7488 case CCGCmode: 7489 suffix = "ge"; 7490 break; 7491 7492 default: 7493 gcc_unreachable (); 7494 } 7495 break; 7496 case GEU: 7497 /* ??? As above. */ 7498 gcc_assert (mode == CCmode); 7499 suffix = fp ? "nb" : "ae"; 7500 break; 7501 case LE: 7502 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 7503 suffix = "le"; 7504 break; 7505 case LEU: 7506 gcc_assert (mode == CCmode); 7507 suffix = "be"; 7508 break; 7509 case UNORDERED: 7510 suffix = fp ? "u" : "p"; 7511 break; 7512 case ORDERED: 7513 suffix = fp ? "nu" : "np"; 7514 break; 7515 default: 7516 gcc_unreachable (); 7517 } 7518 fputs (suffix, file); 7519} 7520 7521/* Print the name of register X to FILE based on its machine mode and number. 7522 If CODE is 'w', pretend the mode is HImode. 7523 If CODE is 'b', pretend the mode is QImode. 7524 If CODE is 'k', pretend the mode is SImode. 7525 If CODE is 'q', pretend the mode is DImode. 7526 If CODE is 'h', pretend the reg is the 'high' byte register. 7527 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7528 7529void 7530print_reg (rtx x, int code, FILE *file) 7531{ 7532 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM 7533 && REGNO (x) != FRAME_POINTER_REGNUM 7534 && REGNO (x) != FLAGS_REG 7535 && REGNO (x) != FPSR_REG); 7536 7537 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7538 putc ('%', file); 7539 7540 if (code == 'w' || MMX_REG_P (x)) 7541 code = 2; 7542 else if (code == 'b') 7543 code = 1; 7544 else if (code == 'k') 7545 code = 4; 7546 else if (code == 'q') 7547 code = 8; 7548 else if (code == 'y') 7549 code = 3; 7550 else if (code == 'h') 7551 code = 0; 7552 else 7553 code = GET_MODE_SIZE (GET_MODE (x)); 7554 7555 /* Irritatingly, AMD extended registers use different naming convention 7556 from the normal registers. */ 7557 if (REX_INT_REG_P (x)) 7558 { 7559 gcc_assert (TARGET_64BIT); 7560 switch (code) 7561 { 7562 case 0: 7563 error ("extended registers have no high halves"); 7564 break; 7565 case 1: 7566 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7567 break; 7568 case 2: 7569 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7570 break; 7571 case 4: 7572 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7573 break; 7574 case 8: 7575 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7576 break; 7577 default: 7578 error ("unsupported operand size for extended register"); 7579 break; 7580 } 7581 return; 7582 } 7583 switch (code) 7584 { 7585 case 3: 7586 if (STACK_TOP_P (x)) 7587 { 7588 fputs ("st(0)", file); 7589 break; 7590 } 7591 /* FALLTHRU */ 7592 case 8: 7593 case 4: 7594 case 12: 7595 if (! ANY_FP_REG_P (x)) 7596 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7597 /* FALLTHRU */ 7598 case 16: 7599 case 2: 7600 normal: 7601 fputs (hi_reg_name[REGNO (x)], file); 7602 break; 7603 case 1: 7604 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7605 goto normal; 7606 fputs (qi_reg_name[REGNO (x)], file); 7607 break; 7608 case 0: 7609 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7610 goto normal; 7611 fputs (qi_high_reg_name[REGNO (x)], file); 7612 break; 7613 default: 7614 gcc_unreachable (); 7615 } 7616} 7617 7618/* Locate some local-dynamic symbol still in use by this function 7619 so that we can print its name in some tls_local_dynamic_base 7620 pattern. */ 7621 7622static const char * 7623get_some_local_dynamic_name (void) 7624{ 7625 rtx insn; 7626 7627 if (cfun->machine->some_ld_name) 7628 return cfun->machine->some_ld_name; 7629 7630 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7631 if (INSN_P (insn) 7632 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7633 return cfun->machine->some_ld_name; 7634 7635 gcc_unreachable (); 7636} 7637 7638static int 7639get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7640{ 7641 rtx x = *px; 7642 7643 if (GET_CODE (x) == SYMBOL_REF 7644 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 7645 { 7646 cfun->machine->some_ld_name = XSTR (x, 0); 7647 return 1; 7648 } 7649 7650 return 0; 7651} 7652 7653/* Meaning of CODE: 7654 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7655 C -- print opcode suffix for set/cmov insn. 7656 c -- like C, but print reversed condition 7657 F,f -- likewise, but for floating-point. 7658 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7659 otherwise nothing 7660 R -- print the prefix for register names. 7661 z -- print the opcode suffix for the size of the current operand. 7662 * -- print a star (in certain assembler syntax) 7663 A -- print an absolute memory reference. 7664 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7665 s -- print a shift double count, followed by the assemblers argument 7666 delimiter. 7667 b -- print the QImode name of the register for the indicated operand. 7668 %b0 would print %al if operands[0] is reg 0. 7669 w -- likewise, print the HImode name of the register. 7670 k -- likewise, print the SImode name of the register. 7671 q -- likewise, print the DImode name of the register. 7672 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7673 y -- print "st(0)" instead of "st" as a register. 7674 D -- print condition for SSE cmp instruction. 7675 P -- if PIC, print an @PLT suffix. 7676 X -- don't print any sort of PIC '@' suffix for a symbol. 7677 & -- print some in-use local-dynamic symbol name. 7678 H -- print a memory address offset by 8; used for sse high-parts 7679 */ 7680 7681void 7682print_operand (FILE *file, rtx x, int code) 7683{ 7684 if (code) 7685 { 7686 switch (code) 7687 { 7688 case '*': 7689 if (ASSEMBLER_DIALECT == ASM_ATT) 7690 putc ('*', file); 7691 return; 7692 7693 case '&': 7694 assemble_name (file, get_some_local_dynamic_name ()); 7695 return; 7696 7697 case 'A': 7698 switch (ASSEMBLER_DIALECT) 7699 { 7700 case ASM_ATT: 7701 putc ('*', file); 7702 break; 7703 7704 case ASM_INTEL: 7705 /* Intel syntax. For absolute addresses, registers should not 7706 be surrounded by braces. */ 7707 if (GET_CODE (x) != REG) 7708 { 7709 putc ('[', file); 7710 PRINT_OPERAND (file, x, 0); 7711 putc (']', file); 7712 return; 7713 } 7714 break; 7715 7716 default: 7717 gcc_unreachable (); 7718 } 7719 7720 PRINT_OPERAND (file, x, 0); 7721 return; 7722 7723 7724 case 'L': 7725 if (ASSEMBLER_DIALECT == ASM_ATT) 7726 putc ('l', file); 7727 return; 7728 7729 case 'W': 7730 if (ASSEMBLER_DIALECT == ASM_ATT) 7731 putc ('w', file); 7732 return; 7733 7734 case 'B': 7735 if (ASSEMBLER_DIALECT == ASM_ATT) 7736 putc ('b', file); 7737 return; 7738 7739 case 'Q': 7740 if (ASSEMBLER_DIALECT == ASM_ATT) 7741 putc ('l', file); 7742 return; 7743 7744 case 'S': 7745 if (ASSEMBLER_DIALECT == ASM_ATT) 7746 putc ('s', file); 7747 return; 7748 7749 case 'T': 7750 if (ASSEMBLER_DIALECT == ASM_ATT) 7751 putc ('t', file); 7752 return; 7753 7754 case 'z': 7755 /* 387 opcodes don't get size suffixes if the operands are 7756 registers. */ 7757 if (STACK_REG_P (x)) 7758 return; 7759 7760 /* Likewise if using Intel opcodes. */ 7761 if (ASSEMBLER_DIALECT == ASM_INTEL) 7762 return; 7763 7764 /* This is the size of op from size of operand. */ 7765 switch (GET_MODE_SIZE (GET_MODE (x))) 7766 { 7767 case 2: 7768#ifdef HAVE_GAS_FILDS_FISTS 7769 putc ('s', file); 7770#endif 7771 return; 7772 7773 case 4: 7774 if (GET_MODE (x) == SFmode) 7775 { 7776 putc ('s', file); 7777 return; 7778 } 7779 else 7780 putc ('l', file); 7781 return; 7782 7783 case 12: 7784 case 16: 7785 putc ('t', file); 7786 return; 7787 7788 case 8: 7789 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 7790 { 7791#ifdef GAS_MNEMONICS 7792 putc ('q', file); 7793#else 7794 putc ('l', file); 7795 putc ('l', file); 7796#endif 7797 } 7798 else 7799 putc ('l', file); 7800 return; 7801 7802 default: 7803 gcc_unreachable (); 7804 } 7805 7806 case 'b': 7807 case 'w': 7808 case 'k': 7809 case 'q': 7810 case 'h': 7811 case 'y': 7812 case 'X': 7813 case 'P': 7814 break; 7815 7816 case 's': 7817 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 7818 { 7819 PRINT_OPERAND (file, x, 0); 7820 putc (',', file); 7821 } 7822 return; 7823 7824 case 'D': 7825 /* Little bit of braindamage here. The SSE compare instructions 7826 does use completely different names for the comparisons that the 7827 fp conditional moves. */ 7828 switch (GET_CODE (x)) 7829 { 7830 case EQ: 7831 case UNEQ: 7832 fputs ("eq", file); 7833 break; 7834 case LT: 7835 case UNLT: 7836 fputs ("lt", file); 7837 break; 7838 case LE: 7839 case UNLE: 7840 fputs ("le", file); 7841 break; 7842 case UNORDERED: 7843 fputs ("unord", file); 7844 break; 7845 case NE: 7846 case LTGT: 7847 fputs ("neq", file); 7848 break; 7849 case UNGE: 7850 case GE: 7851 fputs ("nlt", file); 7852 break; 7853 case UNGT: 7854 case GT: 7855 fputs ("nle", file); 7856 break; 7857 case ORDERED: 7858 fputs ("ord", file); 7859 break; 7860 default: 7861 gcc_unreachable (); 7862 } 7863 return; 7864 case 'O': 7865#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7866 if (ASSEMBLER_DIALECT == ASM_ATT) 7867 { 7868 switch (GET_MODE (x)) 7869 { 7870 case HImode: putc ('w', file); break; 7871 case SImode: 7872 case SFmode: putc ('l', file); break; 7873 case DImode: 7874 case DFmode: putc ('q', file); break; 7875 default: gcc_unreachable (); 7876 } 7877 putc ('.', file); 7878 } 7879#endif 7880 return; 7881 case 'C': 7882 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 7883 return; 7884 case 'F': 7885#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7886 if (ASSEMBLER_DIALECT == ASM_ATT) 7887 putc ('.', file); 7888#endif 7889 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 7890 return; 7891 7892 /* Like above, but reverse condition */ 7893 case 'c': 7894 /* Check to see if argument to %c is really a constant 7895 and not a condition code which needs to be reversed. */ 7896 if (!COMPARISON_P (x)) 7897 { 7898 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 7899 return; 7900 } 7901 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 7902 return; 7903 case 'f': 7904#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7905 if (ASSEMBLER_DIALECT == ASM_ATT) 7906 putc ('.', file); 7907#endif 7908 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 7909 return; 7910 7911 case 'H': 7912 /* It doesn't actually matter what mode we use here, as we're 7913 only going to use this for printing. */ 7914 x = adjust_address_nv (x, DImode, 8); 7915 break; 7916 7917 case '+': 7918 { 7919 rtx x; 7920 7921 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 7922 return; 7923 7924 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 7925 if (x) 7926 { 7927 int pred_val = INTVAL (XEXP (x, 0)); 7928 7929 if (pred_val < REG_BR_PROB_BASE * 45 / 100 7930 || pred_val > REG_BR_PROB_BASE * 55 / 100) 7931 { 7932 int taken = pred_val > REG_BR_PROB_BASE / 2; 7933 int cputaken = final_forward_branch_p (current_output_insn) == 0; 7934 7935 /* Emit hints only in the case default branch prediction 7936 heuristics would fail. */ 7937 if (taken != cputaken) 7938 { 7939 /* We use 3e (DS) prefix for taken branches and 7940 2e (CS) prefix for not taken branches. */ 7941 if (taken) 7942 fputs ("ds ; ", file); 7943 else 7944 fputs ("cs ; ", file); 7945 } 7946 } 7947 } 7948 return; 7949 } 7950 default: 7951 output_operand_lossage ("invalid operand code '%c'", code); 7952 } 7953 } 7954 7955 if (GET_CODE (x) == REG) 7956 print_reg (x, code, file); 7957 7958 else if (GET_CODE (x) == MEM) 7959 { 7960 /* No `byte ptr' prefix for call instructions. */ 7961 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7962 { 7963 const char * size; 7964 switch (GET_MODE_SIZE (GET_MODE (x))) 7965 { 7966 case 1: size = "BYTE"; break; 7967 case 2: size = "WORD"; break; 7968 case 4: size = "DWORD"; break; 7969 case 8: size = "QWORD"; break; 7970 case 12: size = "XWORD"; break; 7971 case 16: size = "XMMWORD"; break; 7972 default: 7973 gcc_unreachable (); 7974 } 7975 7976 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7977 if (code == 'b') 7978 size = "BYTE"; 7979 else if (code == 'w') 7980 size = "WORD"; 7981 else if (code == 'k') 7982 size = "DWORD"; 7983 7984 fputs (size, file); 7985 fputs (" PTR ", file); 7986 } 7987 7988 x = XEXP (x, 0); 7989 /* Avoid (%rip) for call operands. */ 7990 if (CONSTANT_ADDRESS_P (x) && code == 'P' 7991 && GET_CODE (x) != CONST_INT) 7992 output_addr_const (file, x); 7993 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7994 output_operand_lossage ("invalid constraints for operand"); 7995 else 7996 output_address (x); 7997 } 7998 7999 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 8000 { 8001 REAL_VALUE_TYPE r; 8002 long l; 8003 8004 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 8005 REAL_VALUE_TO_TARGET_SINGLE (r, l); 8006 8007 if (ASSEMBLER_DIALECT == ASM_ATT) 8008 putc ('$', file); 8009 fprintf (file, "0x%08lx", l); 8010 } 8011 8012 /* These float cases don't actually occur as immediate operands. */ 8013 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 8014 { 8015 char dstr[30]; 8016 8017 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8018 fprintf (file, "%s", dstr); 8019 } 8020 8021 else if (GET_CODE (x) == CONST_DOUBLE 8022 && GET_MODE (x) == XFmode) 8023 { 8024 char dstr[30]; 8025 8026 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8027 fprintf (file, "%s", dstr); 8028 } 8029 8030 else 8031 { 8032 /* We have patterns that allow zero sets of memory, for instance. 8033 In 64-bit mode, we should probably support all 8-byte vectors, 8034 since we can in fact encode that into an immediate. */ 8035 if (GET_CODE (x) == CONST_VECTOR) 8036 { 8037 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 8038 x = const0_rtx; 8039 } 8040 8041 if (code != 'P') 8042 { 8043 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8044 { 8045 if (ASSEMBLER_DIALECT == ASM_ATT) 8046 putc ('$', file); 8047 } 8048 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 8049 || GET_CODE (x) == LABEL_REF) 8050 { 8051 if (ASSEMBLER_DIALECT == ASM_ATT) 8052 putc ('$', file); 8053 else 8054 fputs ("OFFSET FLAT:", file); 8055 } 8056 } 8057 if (GET_CODE (x) == CONST_INT) 8058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 8059 else if (flag_pic) 8060 output_pic_addr_const (file, x, code); 8061 else 8062 output_addr_const (file, x); 8063 } 8064} 8065 8066/* Print a memory operand whose address is ADDR. */ 8067 8068void 8069print_operand_address (FILE *file, rtx addr) 8070{ 8071 struct ix86_address parts; 8072 rtx base, index, disp; 8073 int scale; 8074 int ok = ix86_decompose_address (addr, &parts); 8075 8076 gcc_assert (ok); 8077 8078 base = parts.base; 8079 index = parts.index; 8080 disp = parts.disp; 8081 scale = parts.scale; 8082 8083 switch (parts.seg) 8084 { 8085 case SEG_DEFAULT: 8086 break; 8087 case SEG_FS: 8088 case SEG_GS: 8089 if (USER_LABEL_PREFIX[0] == 0) 8090 putc ('%', file); 8091 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 8092 break; 8093 default: 8094 gcc_unreachable (); 8095 } 8096 8097 if (!base && !index) 8098 { 8099 /* Displacement only requires special attention. */ 8100 8101 if (GET_CODE (disp) == CONST_INT) 8102 { 8103 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 8104 { 8105 if (USER_LABEL_PREFIX[0] == 0) 8106 putc ('%', file); 8107 fputs ("ds:", file); 8108 } 8109 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 8110 } 8111 else if (flag_pic) 8112 output_pic_addr_const (file, disp, 0); 8113 else 8114 output_addr_const (file, disp); 8115 8116 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 8117 if (TARGET_64BIT) 8118 { 8119 if (GET_CODE (disp) == CONST 8120 && GET_CODE (XEXP (disp, 0)) == PLUS 8121 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8122 disp = XEXP (XEXP (disp, 0), 0); 8123 if (GET_CODE (disp) == LABEL_REF 8124 || (GET_CODE (disp) == SYMBOL_REF 8125 && SYMBOL_REF_TLS_MODEL (disp) == 0)) 8126 fputs ("(%rip)", file); 8127 } 8128 } 8129 else 8130 { 8131 if (ASSEMBLER_DIALECT == ASM_ATT) 8132 { 8133 if (disp) 8134 { 8135 if (flag_pic) 8136 output_pic_addr_const (file, disp, 0); 8137 else if (GET_CODE (disp) == LABEL_REF) 8138 output_asm_label (disp); 8139 else 8140 output_addr_const (file, disp); 8141 } 8142 8143 putc ('(', file); 8144 if (base) 8145 print_reg (base, 0, file); 8146 if (index) 8147 { 8148 putc (',', file); 8149 print_reg (index, 0, file); 8150 if (scale != 1) 8151 fprintf (file, ",%d", scale); 8152 } 8153 putc (')', file); 8154 } 8155 else 8156 { 8157 rtx offset = NULL_RTX; 8158 8159 if (disp) 8160 { 8161 /* Pull out the offset of a symbol; print any symbol itself. */ 8162 if (GET_CODE (disp) == CONST 8163 && GET_CODE (XEXP (disp, 0)) == PLUS 8164 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8165 { 8166 offset = XEXP (XEXP (disp, 0), 1); 8167 disp = gen_rtx_CONST (VOIDmode, 8168 XEXP (XEXP (disp, 0), 0)); 8169 } 8170 8171 if (flag_pic) 8172 output_pic_addr_const (file, disp, 0); 8173 else if (GET_CODE (disp) == LABEL_REF) 8174 output_asm_label (disp); 8175 else if (GET_CODE (disp) == CONST_INT) 8176 offset = disp; 8177 else 8178 output_addr_const (file, disp); 8179 } 8180 8181 putc ('[', file); 8182 if (base) 8183 { 8184 print_reg (base, 0, file); 8185 if (offset) 8186 { 8187 if (INTVAL (offset) >= 0) 8188 putc ('+', file); 8189 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8190 } 8191 } 8192 else if (offset) 8193 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8194 else 8195 putc ('0', file); 8196 8197 if (index) 8198 { 8199 putc ('+', file); 8200 print_reg (index, 0, file); 8201 if (scale != 1) 8202 fprintf (file, "*%d", scale); 8203 } 8204 putc (']', file); 8205 } 8206 } 8207} 8208 8209bool 8210output_addr_const_extra (FILE *file, rtx x) 8211{ 8212 rtx op; 8213 8214 if (GET_CODE (x) != UNSPEC) 8215 return false; 8216 8217 op = XVECEXP (x, 0, 0); 8218 switch (XINT (x, 1)) 8219 { 8220 case UNSPEC_GOTTPOFF: 8221 output_addr_const (file, op); 8222 /* FIXME: This might be @TPOFF in Sun ld. */ 8223 fputs ("@GOTTPOFF", file); 8224 break; 8225 case UNSPEC_TPOFF: 8226 output_addr_const (file, op); 8227 fputs ("@TPOFF", file); 8228 break; 8229 case UNSPEC_NTPOFF: 8230 output_addr_const (file, op); 8231 if (TARGET_64BIT) 8232 fputs ("@TPOFF", file); 8233 else 8234 fputs ("@NTPOFF", file); 8235 break; 8236 case UNSPEC_DTPOFF: 8237 output_addr_const (file, op); 8238 fputs ("@DTPOFF", file); 8239 break; 8240 case UNSPEC_GOTNTPOFF: 8241 output_addr_const (file, op); 8242 if (TARGET_64BIT) 8243 fputs ("@GOTTPOFF(%rip)", file); 8244 else 8245 fputs ("@GOTNTPOFF", file); 8246 break; 8247 case UNSPEC_INDNTPOFF: 8248 output_addr_const (file, op); 8249 fputs ("@INDNTPOFF", file); 8250 break; 8251 8252 default: 8253 return false; 8254 } 8255 8256 return true; 8257} 8258 8259/* Split one or more DImode RTL references into pairs of SImode 8260 references. The RTL can be REG, offsettable MEM, integer constant, or 8261 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8262 split and "num" is its length. lo_half and hi_half are output arrays 8263 that parallel "operands". */ 8264 8265void 8266split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8267{ 8268 while (num--) 8269 { 8270 rtx op = operands[num]; 8271 8272 /* simplify_subreg refuse to split volatile memory addresses, 8273 but we still have to handle it. */ 8274 if (GET_CODE (op) == MEM) 8275 { 8276 lo_half[num] = adjust_address (op, SImode, 0); 8277 hi_half[num] = adjust_address (op, SImode, 4); 8278 } 8279 else 8280 { 8281 lo_half[num] = simplify_gen_subreg (SImode, op, 8282 GET_MODE (op) == VOIDmode 8283 ? DImode : GET_MODE (op), 0); 8284 hi_half[num] = simplify_gen_subreg (SImode, op, 8285 GET_MODE (op) == VOIDmode 8286 ? DImode : GET_MODE (op), 4); 8287 } 8288 } 8289} 8290/* Split one or more TImode RTL references into pairs of DImode 8291 references. The RTL can be REG, offsettable MEM, integer constant, or 8292 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8293 split and "num" is its length. lo_half and hi_half are output arrays 8294 that parallel "operands". */ 8295 8296void 8297split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8298{ 8299 while (num--) 8300 { 8301 rtx op = operands[num]; 8302 8303 /* simplify_subreg refuse to split volatile memory addresses, but we 8304 still have to handle it. */ 8305 if (GET_CODE (op) == MEM) 8306 { 8307 lo_half[num] = adjust_address (op, DImode, 0); 8308 hi_half[num] = adjust_address (op, DImode, 8); 8309 } 8310 else 8311 { 8312 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 8313 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 8314 } 8315 } 8316} 8317 8318/* Output code to perform a 387 binary operation in INSN, one of PLUS, 8319 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 8320 is the expression of the binary operation. The output may either be 8321 emitted here, or returned to the caller, like all output_* functions. 8322 8323 There is no guarantee that the operands are the same mode, as they 8324 might be within FLOAT or FLOAT_EXTEND expressions. */ 8325 8326#ifndef SYSV386_COMPAT 8327/* Set to 1 for compatibility with brain-damaged assemblers. No-one 8328 wants to fix the assemblers because that causes incompatibility 8329 with gcc. No-one wants to fix gcc because that causes 8330 incompatibility with assemblers... You can use the option of 8331 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 8332#define SYSV386_COMPAT 1 8333#endif 8334 8335const char * 8336output_387_binary_op (rtx insn, rtx *operands) 8337{ 8338 static char buf[30]; 8339 const char *p; 8340 const char *ssep; 8341 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 8342 8343#ifdef ENABLE_CHECKING 8344 /* Even if we do not want to check the inputs, this documents input 8345 constraints. Which helps in understanding the following code. */ 8346 if (STACK_REG_P (operands[0]) 8347 && ((REG_P (operands[1]) 8348 && REGNO (operands[0]) == REGNO (operands[1]) 8349 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 8350 || (REG_P (operands[2]) 8351 && REGNO (operands[0]) == REGNO (operands[2]) 8352 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 8353 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 8354 ; /* ok */ 8355 else 8356 gcc_assert (is_sse); 8357#endif 8358 8359 switch (GET_CODE (operands[3])) 8360 { 8361 case PLUS: 8362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8364 p = "fiadd"; 8365 else 8366 p = "fadd"; 8367 ssep = "add"; 8368 break; 8369 8370 case MINUS: 8371 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8372 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8373 p = "fisub"; 8374 else 8375 p = "fsub"; 8376 ssep = "sub"; 8377 break; 8378 8379 case MULT: 8380 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8381 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8382 p = "fimul"; 8383 else 8384 p = "fmul"; 8385 ssep = "mul"; 8386 break; 8387 8388 case DIV: 8389 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8390 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8391 p = "fidiv"; 8392 else 8393 p = "fdiv"; 8394 ssep = "div"; 8395 break; 8396 8397 default: 8398 gcc_unreachable (); 8399 } 8400 8401 if (is_sse) 8402 { 8403 strcpy (buf, ssep); 8404 if (GET_MODE (operands[0]) == SFmode) 8405 strcat (buf, "ss\t{%2, %0|%0, %2}"); 8406 else 8407 strcat (buf, "sd\t{%2, %0|%0, %2}"); 8408 return buf; 8409 } 8410 strcpy (buf, p); 8411 8412 switch (GET_CODE (operands[3])) 8413 { 8414 case MULT: 8415 case PLUS: 8416 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 8417 { 8418 rtx temp = operands[2]; 8419 operands[2] = operands[1]; 8420 operands[1] = temp; 8421 } 8422 8423 /* know operands[0] == operands[1]. */ 8424 8425 if (GET_CODE (operands[2]) == MEM) 8426 { 8427 p = "%z2\t%2"; 8428 break; 8429 } 8430 8431 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8432 { 8433 if (STACK_TOP_P (operands[0])) 8434 /* How is it that we are storing to a dead operand[2]? 8435 Well, presumably operands[1] is dead too. We can't 8436 store the result to st(0) as st(0) gets popped on this 8437 instruction. Instead store to operands[2] (which I 8438 think has to be st(1)). st(1) will be popped later. 8439 gcc <= 2.8.1 didn't have this check and generated 8440 assembly code that the Unixware assembler rejected. */ 8441 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8442 else 8443 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8444 break; 8445 } 8446 8447 if (STACK_TOP_P (operands[0])) 8448 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8449 else 8450 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8451 break; 8452 8453 case MINUS: 8454 case DIV: 8455 if (GET_CODE (operands[1]) == MEM) 8456 { 8457 p = "r%z1\t%1"; 8458 break; 8459 } 8460 8461 if (GET_CODE (operands[2]) == MEM) 8462 { 8463 p = "%z2\t%2"; 8464 break; 8465 } 8466 8467 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8468 { 8469#if SYSV386_COMPAT 8470 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8471 derived assemblers, confusingly reverse the direction of 8472 the operation for fsub{r} and fdiv{r} when the 8473 destination register is not st(0). The Intel assembler 8474 doesn't have this brain damage. Read !SYSV386_COMPAT to 8475 figure out what the hardware really does. */ 8476 if (STACK_TOP_P (operands[0])) 8477 p = "{p\t%0, %2|rp\t%2, %0}"; 8478 else 8479 p = "{rp\t%2, %0|p\t%0, %2}"; 8480#else 8481 if (STACK_TOP_P (operands[0])) 8482 /* As above for fmul/fadd, we can't store to st(0). */ 8483 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8484 else 8485 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8486#endif 8487 break; 8488 } 8489 8490 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8491 { 8492#if SYSV386_COMPAT 8493 if (STACK_TOP_P (operands[0])) 8494 p = "{rp\t%0, %1|p\t%1, %0}"; 8495 else 8496 p = "{p\t%1, %0|rp\t%0, %1}"; 8497#else 8498 if (STACK_TOP_P (operands[0])) 8499 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8500 else 8501 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8502#endif 8503 break; 8504 } 8505 8506 if (STACK_TOP_P (operands[0])) 8507 { 8508 if (STACK_TOP_P (operands[1])) 8509 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8510 else 8511 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8512 break; 8513 } 8514 else if (STACK_TOP_P (operands[1])) 8515 { 8516#if SYSV386_COMPAT 8517 p = "{\t%1, %0|r\t%0, %1}"; 8518#else 8519 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8520#endif 8521 } 8522 else 8523 { 8524#if SYSV386_COMPAT 8525 p = "{r\t%2, %0|\t%0, %2}"; 8526#else 8527 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8528#endif 8529 } 8530 break; 8531 8532 default: 8533 gcc_unreachable (); 8534 } 8535 8536 strcat (buf, p); 8537 return buf; 8538} 8539 8540/* Return needed mode for entity in optimize_mode_switching pass. */ 8541 8542int 8543ix86_mode_needed (int entity, rtx insn) 8544{ 8545 enum attr_i387_cw mode; 8546 8547 /* The mode UNINITIALIZED is used to store control word after a 8548 function call or ASM pattern. The mode ANY specify that function 8549 has no requirements on the control word and make no changes in the 8550 bits we are interested in. */ 8551 8552 if (CALL_P (insn) 8553 || (NONJUMP_INSN_P (insn) 8554 && (asm_noperands (PATTERN (insn)) >= 0 8555 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 8556 return I387_CW_UNINITIALIZED; 8557 8558 if (recog_memoized (insn) < 0) 8559 return I387_CW_ANY; 8560 8561 mode = get_attr_i387_cw (insn); 8562 8563 switch (entity) 8564 { 8565 case I387_TRUNC: 8566 if (mode == I387_CW_TRUNC) 8567 return mode; 8568 break; 8569 8570 case I387_FLOOR: 8571 if (mode == I387_CW_FLOOR) 8572 return mode; 8573 break; 8574 8575 case I387_CEIL: 8576 if (mode == I387_CW_CEIL) 8577 return mode; 8578 break; 8579 8580 case I387_MASK_PM: 8581 if (mode == I387_CW_MASK_PM) 8582 return mode; 8583 break; 8584 8585 default: 8586 gcc_unreachable (); 8587 } 8588 8589 return I387_CW_ANY; 8590} 8591 8592/* Output code to initialize control word copies used by trunc?f?i and 8593 rounding patterns. CURRENT_MODE is set to current control word, 8594 while NEW_MODE is set to new control word. */ 8595 8596void 8597emit_i387_cw_initialization (int mode) 8598{ 8599 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 8600 rtx new_mode; 8601 8602 int slot; 8603 8604 rtx reg = gen_reg_rtx (HImode); 8605 8606 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 8607 emit_move_insn (reg, stored_mode); 8608 8609 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) 8610 { 8611 switch (mode) 8612 { 8613 case I387_CW_TRUNC: 8614 /* round toward zero (truncate) */ 8615 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 8616 slot = SLOT_CW_TRUNC; 8617 break; 8618 8619 case I387_CW_FLOOR: 8620 /* round down toward -oo */ 8621 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8622 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 8623 slot = SLOT_CW_FLOOR; 8624 break; 8625 8626 case I387_CW_CEIL: 8627 /* round up toward +oo */ 8628 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8629 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 8630 slot = SLOT_CW_CEIL; 8631 break; 8632 8633 case I387_CW_MASK_PM: 8634 /* mask precision exception for nearbyint() */ 8635 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8636 slot = SLOT_CW_MASK_PM; 8637 break; 8638 8639 default: 8640 gcc_unreachable (); 8641 } 8642 } 8643 else 8644 { 8645 switch (mode) 8646 { 8647 case I387_CW_TRUNC: 8648 /* round toward zero (truncate) */ 8649 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8650 slot = SLOT_CW_TRUNC; 8651 break; 8652 8653 case I387_CW_FLOOR: 8654 /* round down toward -oo */ 8655 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 8656 slot = SLOT_CW_FLOOR; 8657 break; 8658 8659 case I387_CW_CEIL: 8660 /* round up toward +oo */ 8661 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 8662 slot = SLOT_CW_CEIL; 8663 break; 8664 8665 case I387_CW_MASK_PM: 8666 /* mask precision exception for nearbyint() */ 8667 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8668 slot = SLOT_CW_MASK_PM; 8669 break; 8670 8671 default: 8672 gcc_unreachable (); 8673 } 8674 } 8675 8676 gcc_assert (slot < MAX_386_STACK_LOCALS); 8677 8678 new_mode = assign_386_stack_local (HImode, slot); 8679 emit_move_insn (new_mode, reg); 8680} 8681 8682/* Output code for INSN to convert a float to a signed int. OPERANDS 8683 are the insn operands. The output may be [HSD]Imode and the input 8684 operand may be [SDX]Fmode. */ 8685 8686const char * 8687output_fix_trunc (rtx insn, rtx *operands, int fisttp) 8688{ 8689 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8690 int dimode_p = GET_MODE (operands[0]) == DImode; 8691 int round_mode = get_attr_i387_cw (insn); 8692 8693 /* Jump through a hoop or two for DImode, since the hardware has no 8694 non-popping instruction. We used to do this a different way, but 8695 that was somewhat fragile and broke with post-reload splitters. */ 8696 if ((dimode_p || fisttp) && !stack_top_dies) 8697 output_asm_insn ("fld\t%y1", operands); 8698 8699 gcc_assert (STACK_TOP_P (operands[1])); 8700 gcc_assert (GET_CODE (operands[0]) == MEM); 8701 8702 if (fisttp) 8703 output_asm_insn ("fisttp%z0\t%0", operands); 8704 else 8705 { 8706 if (round_mode != I387_CW_ANY) 8707 output_asm_insn ("fldcw\t%3", operands); 8708 if (stack_top_dies || dimode_p) 8709 output_asm_insn ("fistp%z0\t%0", operands); 8710 else 8711 output_asm_insn ("fist%z0\t%0", operands); 8712 if (round_mode != I387_CW_ANY) 8713 output_asm_insn ("fldcw\t%2", operands); 8714 } 8715 8716 return ""; 8717} 8718 8719/* Output code for x87 ffreep insn. The OPNO argument, which may only 8720 have the values zero or one, indicates the ffreep insn's operand 8721 from the OPERANDS array. */ 8722 8723static const char * 8724output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 8725{ 8726 if (TARGET_USE_FFREEP) 8727#if HAVE_AS_IX86_FFREEP 8728 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 8729#else 8730 switch (REGNO (operands[opno])) 8731 { 8732 case FIRST_STACK_REG + 0: return ".word\t0xc0df"; 8733 case FIRST_STACK_REG + 1: return ".word\t0xc1df"; 8734 case FIRST_STACK_REG + 2: return ".word\t0xc2df"; 8735 case FIRST_STACK_REG + 3: return ".word\t0xc3df"; 8736 case FIRST_STACK_REG + 4: return ".word\t0xc4df"; 8737 case FIRST_STACK_REG + 5: return ".word\t0xc5df"; 8738 case FIRST_STACK_REG + 6: return ".word\t0xc6df"; 8739 case FIRST_STACK_REG + 7: return ".word\t0xc7df"; 8740 } 8741#endif 8742 8743 return opno ? "fstp\t%y1" : "fstp\t%y0"; 8744} 8745 8746 8747/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 8748 should be used. UNORDERED_P is true when fucom should be used. */ 8749 8750const char * 8751output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 8752{ 8753 int stack_top_dies; 8754 rtx cmp_op0, cmp_op1; 8755 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 8756 8757 if (eflags_p) 8758 { 8759 cmp_op0 = operands[0]; 8760 cmp_op1 = operands[1]; 8761 } 8762 else 8763 { 8764 cmp_op0 = operands[1]; 8765 cmp_op1 = operands[2]; 8766 } 8767 8768 if (is_sse) 8769 { 8770 if (GET_MODE (operands[0]) == SFmode) 8771 if (unordered_p) 8772 return "ucomiss\t{%1, %0|%0, %1}"; 8773 else 8774 return "comiss\t{%1, %0|%0, %1}"; 8775 else 8776 if (unordered_p) 8777 return "ucomisd\t{%1, %0|%0, %1}"; 8778 else 8779 return "comisd\t{%1, %0|%0, %1}"; 8780 } 8781 8782 gcc_assert (STACK_TOP_P (cmp_op0)); 8783 8784 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8785 8786 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 8787 { 8788 if (stack_top_dies) 8789 { 8790 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 8791 return output_387_ffreep (operands, 1); 8792 } 8793 else 8794 return "ftst\n\tfnstsw\t%0"; 8795 } 8796 8797 if (STACK_REG_P (cmp_op1) 8798 && stack_top_dies 8799 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 8800 && REGNO (cmp_op1) != FIRST_STACK_REG) 8801 { 8802 /* If both the top of the 387 stack dies, and the other operand 8803 is also a stack register that dies, then this must be a 8804 `fcompp' float compare */ 8805 8806 if (eflags_p) 8807 { 8808 /* There is no double popping fcomi variant. Fortunately, 8809 eflags is immune from the fstp's cc clobbering. */ 8810 if (unordered_p) 8811 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 8812 else 8813 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 8814 return output_387_ffreep (operands, 0); 8815 } 8816 else 8817 { 8818 if (unordered_p) 8819 return "fucompp\n\tfnstsw\t%0"; 8820 else 8821 return "fcompp\n\tfnstsw\t%0"; 8822 } 8823 } 8824 else 8825 { 8826 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 8827 8828 static const char * const alt[16] = 8829 { 8830 "fcom%z2\t%y2\n\tfnstsw\t%0", 8831 "fcomp%z2\t%y2\n\tfnstsw\t%0", 8832 "fucom%z2\t%y2\n\tfnstsw\t%0", 8833 "fucomp%z2\t%y2\n\tfnstsw\t%0", 8834 8835 "ficom%z2\t%y2\n\tfnstsw\t%0", 8836 "ficomp%z2\t%y2\n\tfnstsw\t%0", 8837 NULL, 8838 NULL, 8839 8840 "fcomi\t{%y1, %0|%0, %y1}", 8841 "fcomip\t{%y1, %0|%0, %y1}", 8842 "fucomi\t{%y1, %0|%0, %y1}", 8843 "fucomip\t{%y1, %0|%0, %y1}", 8844 8845 NULL, 8846 NULL, 8847 NULL, 8848 NULL 8849 }; 8850 8851 int mask; 8852 const char *ret; 8853 8854 mask = eflags_p << 3; 8855 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 8856 mask |= unordered_p << 1; 8857 mask |= stack_top_dies; 8858 8859 gcc_assert (mask < 16); 8860 ret = alt[mask]; 8861 gcc_assert (ret); 8862 8863 return ret; 8864 } 8865} 8866 8867void 8868ix86_output_addr_vec_elt (FILE *file, int value) 8869{ 8870 const char *directive = ASM_LONG; 8871 8872#ifdef ASM_QUAD 8873 if (TARGET_64BIT) 8874 directive = ASM_QUAD; 8875#else 8876 gcc_assert (!TARGET_64BIT); 8877#endif 8878 8879 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 8880} 8881 8882void 8883ix86_output_addr_diff_elt (FILE *file, int value, int rel) 8884{ 8885 if (TARGET_64BIT) 8886 fprintf (file, "%s%s%d-%s%d\n", 8887 ASM_LONG, LPREFIX, value, LPREFIX, rel); 8888 else if (HAVE_AS_GOTOFF_IN_DATA) 8889 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 8890#if TARGET_MACHO 8891 else if (TARGET_MACHO) 8892 { 8893 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 8894 machopic_output_function_base_name (file); 8895 fprintf(file, "\n"); 8896 } 8897#endif 8898 else 8899 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 8900 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 8901} 8902 8903/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 8904 for the target. */ 8905 8906void 8907ix86_expand_clear (rtx dest) 8908{ 8909 rtx tmp; 8910 8911 /* We play register width games, which are only valid after reload. */ 8912 gcc_assert (reload_completed); 8913 8914 /* Avoid HImode and its attendant prefix byte. */ 8915 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 8916 dest = gen_rtx_REG (SImode, REGNO (dest)); 8917 8918 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 8919 8920 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 8921 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 8922 { 8923 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 8924 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8925 } 8926 8927 emit_insn (tmp); 8928} 8929 8930/* X is an unchanging MEM. If it is a constant pool reference, return 8931 the constant pool rtx, else NULL. */ 8932 8933rtx 8934maybe_get_pool_constant (rtx x) 8935{ 8936 x = ix86_delegitimize_address (XEXP (x, 0)); 8937 8938 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 8939 return get_pool_constant (x); 8940 8941 return NULL_RTX; 8942} 8943 8944void 8945ix86_expand_move (enum machine_mode mode, rtx operands[]) 8946{ 8947 int strict = (reload_in_progress || reload_completed); 8948 rtx op0, op1; 8949 enum tls_model model; 8950 8951 op0 = operands[0]; 8952 op1 = operands[1]; 8953 8954 if (GET_CODE (op1) == SYMBOL_REF) 8955 { 8956 model = SYMBOL_REF_TLS_MODEL (op1); 8957 if (model) 8958 { 8959 op1 = legitimize_tls_address (op1, model, true); 8960 op1 = force_operand (op1, op0); 8961 if (op1 == op0) 8962 return; 8963 } 8964 } 8965 else if (GET_CODE (op1) == CONST 8966 && GET_CODE (XEXP (op1, 0)) == PLUS 8967 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 8968 { 8969 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); 8970 if (model) 8971 { 8972 rtx addend = XEXP (XEXP (op1, 0), 1); 8973 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); 8974 op1 = force_operand (op1, NULL); 8975 op1 = expand_simple_binop (Pmode, PLUS, op1, addend, 8976 op0, 1, OPTAB_DIRECT); 8977 if (op1 == op0) 8978 return; 8979 } 8980 } 8981 8982 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 8983 { 8984 if (TARGET_MACHO && !TARGET_64BIT) 8985 { 8986#if TARGET_MACHO 8987 if (MACHOPIC_PURE) 8988 { 8989 rtx temp = ((reload_in_progress 8990 || ((op0 && GET_CODE (op0) == REG) 8991 && mode == Pmode)) 8992 ? op0 : gen_reg_rtx (Pmode)); 8993 op1 = machopic_indirect_data_reference (op1, temp); 8994 op1 = machopic_legitimize_pic_address (op1, mode, 8995 temp == op1 ? 0 : temp); 8996 } 8997 else if (MACHOPIC_INDIRECT) 8998 op1 = machopic_indirect_data_reference (op1, 0); 8999 if (op0 == op1) 9000 return; 9001#endif 9002 } 9003 else 9004 { 9005 if (GET_CODE (op0) == MEM) 9006 op1 = force_reg (Pmode, op1); 9007 else 9008 op1 = legitimize_address (op1, op1, Pmode); 9009 } 9010 } 9011 else 9012 { 9013 if (GET_CODE (op0) == MEM 9014 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 9015 || !push_operand (op0, mode)) 9016 && GET_CODE (op1) == MEM) 9017 op1 = force_reg (mode, op1); 9018 9019 if (push_operand (op0, mode) 9020 && ! general_no_elim_operand (op1, mode)) 9021 op1 = copy_to_mode_reg (mode, op1); 9022 9023 /* Force large constants in 64bit compilation into register 9024 to get them CSEed. */ 9025 if (TARGET_64BIT && mode == DImode 9026 && immediate_operand (op1, mode) 9027 && !x86_64_zext_immediate_operand (op1, VOIDmode) 9028 && !register_operand (op0, mode) 9029 && optimize && !reload_completed && !reload_in_progress) 9030 op1 = copy_to_mode_reg (mode, op1); 9031 9032 if (FLOAT_MODE_P (mode)) 9033 { 9034 /* If we are loading a floating point constant to a register, 9035 force the value to memory now, since we'll get better code 9036 out the back end. */ 9037 9038 if (strict) 9039 ; 9040 else if (GET_CODE (op1) == CONST_DOUBLE) 9041 { 9042 op1 = validize_mem (force_const_mem (mode, op1)); 9043 if (!register_operand (op0, mode)) 9044 { 9045 rtx temp = gen_reg_rtx (mode); 9046 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 9047 emit_move_insn (op0, temp); 9048 return; 9049 } 9050 } 9051 } 9052 } 9053 9054 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9055} 9056 9057void 9058ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 9059{ 9060 rtx op0 = operands[0], op1 = operands[1]; 9061 9062 /* Force constants other than zero into memory. We do not know how 9063 the instructions used to build constants modify the upper 64 bits 9064 of the register, once we have that information we may be able 9065 to handle some of them more efficiently. */ 9066 if ((reload_in_progress | reload_completed) == 0 9067 && register_operand (op0, mode) 9068 && CONSTANT_P (op1) 9069 && standard_sse_constant_p (op1) <= 0) 9070 op1 = validize_mem (force_const_mem (mode, op1)); 9071 9072 /* Make operand1 a register if it isn't already. */ 9073 if (!no_new_pseudos 9074 && !register_operand (op0, mode) 9075 && !register_operand (op1, mode)) 9076 { 9077 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 9078 return; 9079 } 9080 9081 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9082} 9083 9084/* Implement the movmisalign patterns for SSE. Non-SSE modes go 9085 straight to ix86_expand_vector_move. */ 9086 9087void 9088ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 9089{ 9090 rtx op0, op1, m; 9091 9092 op0 = operands[0]; 9093 op1 = operands[1]; 9094 9095 if (MEM_P (op1)) 9096 { 9097 /* If we're optimizing for size, movups is the smallest. */ 9098 if (optimize_size) 9099 { 9100 op0 = gen_lowpart (V4SFmode, op0); 9101 op1 = gen_lowpart (V4SFmode, op1); 9102 emit_insn (gen_sse_movups (op0, op1)); 9103 return; 9104 } 9105 9106 /* ??? If we have typed data, then it would appear that using 9107 movdqu is the only way to get unaligned data loaded with 9108 integer type. */ 9109 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9110 { 9111 op0 = gen_lowpart (V16QImode, op0); 9112 op1 = gen_lowpart (V16QImode, op1); 9113 emit_insn (gen_sse2_movdqu (op0, op1)); 9114 return; 9115 } 9116 9117 if (TARGET_SSE2 && mode == V2DFmode) 9118 { 9119 rtx zero; 9120 9121 /* When SSE registers are split into halves, we can avoid 9122 writing to the top half twice. */ 9123 if (TARGET_SSE_SPLIT_REGS) 9124 { 9125 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9126 zero = op0; 9127 } 9128 else 9129 { 9130 /* ??? Not sure about the best option for the Intel chips. 9131 The following would seem to satisfy; the register is 9132 entirely cleared, breaking the dependency chain. We 9133 then store to the upper half, with a dependency depth 9134 of one. A rumor has it that Intel recommends two movsd 9135 followed by an unpacklpd, but this is unconfirmed. And 9136 given that the dependency depth of the unpacklpd would 9137 still be one, I'm not sure why this would be better. */ 9138 zero = CONST0_RTX (V2DFmode); 9139 } 9140 9141 m = adjust_address (op1, DFmode, 0); 9142 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 9143 m = adjust_address (op1, DFmode, 8); 9144 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 9145 } 9146 else 9147 { 9148 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 9149 emit_move_insn (op0, CONST0_RTX (mode)); 9150 else 9151 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9152 9153 if (mode != V4SFmode) 9154 op0 = gen_lowpart (V4SFmode, op0); 9155 m = adjust_address (op1, V2SFmode, 0); 9156 emit_insn (gen_sse_loadlps (op0, op0, m)); 9157 m = adjust_address (op1, V2SFmode, 8); 9158 emit_insn (gen_sse_loadhps (op0, op0, m)); 9159 } 9160 } 9161 else if (MEM_P (op0)) 9162 { 9163 /* If we're optimizing for size, movups is the smallest. */ 9164 if (optimize_size) 9165 { 9166 op0 = gen_lowpart (V4SFmode, op0); 9167 op1 = gen_lowpart (V4SFmode, op1); 9168 emit_insn (gen_sse_movups (op0, op1)); 9169 return; 9170 } 9171 9172 /* ??? Similar to above, only less clear because of quote 9173 typeless stores unquote. */ 9174 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 9175 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9176 { 9177 op0 = gen_lowpart (V16QImode, op0); 9178 op1 = gen_lowpart (V16QImode, op1); 9179 emit_insn (gen_sse2_movdqu (op0, op1)); 9180 return; 9181 } 9182 9183 if (TARGET_SSE2 && mode == V2DFmode) 9184 { 9185 m = adjust_address (op0, DFmode, 0); 9186 emit_insn (gen_sse2_storelpd (m, op1)); 9187 m = adjust_address (op0, DFmode, 8); 9188 emit_insn (gen_sse2_storehpd (m, op1)); 9189 } 9190 else 9191 { 9192 if (mode != V4SFmode) 9193 op1 = gen_lowpart (V4SFmode, op1); 9194 m = adjust_address (op0, V2SFmode, 0); 9195 emit_insn (gen_sse_storelps (m, op1)); 9196 m = adjust_address (op0, V2SFmode, 8); 9197 emit_insn (gen_sse_storehps (m, op1)); 9198 } 9199 } 9200 else 9201 gcc_unreachable (); 9202} 9203 9204/* Expand a push in MODE. This is some mode for which we do not support 9205 proper push instructions, at least from the registers that we expect 9206 the value to live in. */ 9207 9208void 9209ix86_expand_push (enum machine_mode mode, rtx x) 9210{ 9211 rtx tmp; 9212 9213 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 9214 GEN_INT (-GET_MODE_SIZE (mode)), 9215 stack_pointer_rtx, 1, OPTAB_DIRECT); 9216 if (tmp != stack_pointer_rtx) 9217 emit_move_insn (stack_pointer_rtx, tmp); 9218 9219 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 9220 emit_move_insn (tmp, x); 9221} 9222 9223/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 9224 destination to use for the operation. If different from the true 9225 destination in operands[0], a copy operation will be required. */ 9226 9227rtx 9228ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 9229 rtx operands[]) 9230{ 9231 int matching_memory; 9232 rtx src1, src2, dst; 9233 9234 dst = operands[0]; 9235 src1 = operands[1]; 9236 src2 = operands[2]; 9237 9238 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 9239 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9240 && (rtx_equal_p (dst, src2) 9241 || immediate_operand (src1, mode))) 9242 { 9243 rtx temp = src1; 9244 src1 = src2; 9245 src2 = temp; 9246 } 9247 9248 /* If the destination is memory, and we do not have matching source 9249 operands, do things in registers. */ 9250 matching_memory = 0; 9251 if (GET_CODE (dst) == MEM) 9252 { 9253 if (rtx_equal_p (dst, src1)) 9254 matching_memory = 1; 9255 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9256 && rtx_equal_p (dst, src2)) 9257 matching_memory = 2; 9258 else 9259 dst = gen_reg_rtx (mode); 9260 } 9261 9262 /* Both source operands cannot be in memory. */ 9263 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 9264 { 9265 if (matching_memory != 2) 9266 src2 = force_reg (mode, src2); 9267 else 9268 src1 = force_reg (mode, src1); 9269 } 9270 9271 /* If the operation is not commutable, source 1 cannot be a constant 9272 or non-matching memory. */ 9273 if ((CONSTANT_P (src1) 9274 || (!matching_memory && GET_CODE (src1) == MEM)) 9275 && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9276 src1 = force_reg (mode, src1); 9277 9278 src1 = operands[1] = src1; 9279 src2 = operands[2] = src2; 9280 return dst; 9281} 9282 9283/* Similarly, but assume that the destination has already been 9284 set up properly. */ 9285 9286void 9287ix86_fixup_binary_operands_no_copy (enum rtx_code code, 9288 enum machine_mode mode, rtx operands[]) 9289{ 9290 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 9291 gcc_assert (dst == operands[0]); 9292} 9293 9294/* Attempt to expand a binary operator. Make the expansion closer to the 9295 actual machine, then just general_operand, which will allow 3 separate 9296 memory references (one output, two input) in a single insn. */ 9297 9298void 9299ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 9300 rtx operands[]) 9301{ 9302 rtx src1, src2, dst, op, clob; 9303 9304 dst = ix86_fixup_binary_operands (code, mode, operands); 9305 src1 = operands[1]; 9306 src2 = operands[2]; 9307 9308 /* Emit the instruction. */ 9309 9310 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 9311 if (reload_in_progress) 9312 { 9313 /* Reload doesn't know about the flags register, and doesn't know that 9314 it doesn't want to clobber it. We can only do this with PLUS. */ 9315 gcc_assert (code == PLUS); 9316 emit_insn (op); 9317 } 9318 else 9319 { 9320 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9321 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9322 } 9323 9324 /* Fix up the destination if needed. */ 9325 if (dst != operands[0]) 9326 emit_move_insn (operands[0], dst); 9327} 9328 9329/* Return TRUE or FALSE depending on whether the binary operator meets the 9330 appropriate constraints. */ 9331 9332int 9333ix86_binary_operator_ok (enum rtx_code code, 9334 enum machine_mode mode ATTRIBUTE_UNUSED, 9335 rtx operands[3]) 9336{ 9337 /* Both source operands cannot be in memory. */ 9338 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 9339 return 0; 9340 /* If the operation is not commutable, source 1 cannot be a constant. */ 9341 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9342 return 0; 9343 /* If the destination is memory, we must have a matching source operand. */ 9344 if (GET_CODE (operands[0]) == MEM 9345 && ! (rtx_equal_p (operands[0], operands[1]) 9346 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9347 && rtx_equal_p (operands[0], operands[2])))) 9348 return 0; 9349 /* If the operation is not commutable and the source 1 is memory, we must 9350 have a matching destination. */ 9351 if (GET_CODE (operands[1]) == MEM 9352 && GET_RTX_CLASS (code) != RTX_COMM_ARITH 9353 && ! rtx_equal_p (operands[0], operands[1])) 9354 return 0; 9355 return 1; 9356} 9357 9358/* Attempt to expand a unary operator. Make the expansion closer to the 9359 actual machine, then just general_operand, which will allow 2 separate 9360 memory references (one output, one input) in a single insn. */ 9361 9362void 9363ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 9364 rtx operands[]) 9365{ 9366 int matching_memory; 9367 rtx src, dst, op, clob; 9368 9369 dst = operands[0]; 9370 src = operands[1]; 9371 9372 /* If the destination is memory, and we do not have matching source 9373 operands, do things in registers. */ 9374 matching_memory = 0; 9375 if (MEM_P (dst)) 9376 { 9377 if (rtx_equal_p (dst, src)) 9378 matching_memory = 1; 9379 else 9380 dst = gen_reg_rtx (mode); 9381 } 9382 9383 /* When source operand is memory, destination must match. */ 9384 if (MEM_P (src) && !matching_memory) 9385 src = force_reg (mode, src); 9386 9387 /* Emit the instruction. */ 9388 9389 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 9390 if (reload_in_progress || code == NOT) 9391 { 9392 /* Reload doesn't know about the flags register, and doesn't know that 9393 it doesn't want to clobber it. */ 9394 gcc_assert (code == NOT); 9395 emit_insn (op); 9396 } 9397 else 9398 { 9399 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9400 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9401 } 9402 9403 /* Fix up the destination if needed. */ 9404 if (dst != operands[0]) 9405 emit_move_insn (operands[0], dst); 9406} 9407 9408/* Return TRUE or FALSE depending on whether the unary operator meets the 9409 appropriate constraints. */ 9410 9411int 9412ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 9413 enum machine_mode mode ATTRIBUTE_UNUSED, 9414 rtx operands[2] ATTRIBUTE_UNUSED) 9415{ 9416 /* If one of operands is memory, source and destination must match. */ 9417 if ((GET_CODE (operands[0]) == MEM 9418 || GET_CODE (operands[1]) == MEM) 9419 && ! rtx_equal_p (operands[0], operands[1])) 9420 return FALSE; 9421 return TRUE; 9422} 9423 9424/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. 9425 Create a mask for the sign bit in MODE for an SSE register. If VECT is 9426 true, then replicate the mask for all elements of the vector register. 9427 If INVERT is true, then create a mask excluding the sign bit. */ 9428 9429rtx 9430ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 9431{ 9432 enum machine_mode vec_mode; 9433 HOST_WIDE_INT hi, lo; 9434 int shift = 63; 9435 rtvec v; 9436 rtx mask; 9437 9438 /* Find the sign bit, sign extended to 2*HWI. */ 9439 if (mode == SFmode) 9440 lo = 0x80000000, hi = lo < 0; 9441 else if (HOST_BITS_PER_WIDE_INT >= 64) 9442 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 9443 else 9444 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 9445 9446 if (invert) 9447 lo = ~lo, hi = ~hi; 9448 9449 /* Force this value into the low part of a fp vector constant. */ 9450 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); 9451 mask = gen_lowpart (mode, mask); 9452 9453 if (mode == SFmode) 9454 { 9455 if (vect) 9456 v = gen_rtvec (4, mask, mask, mask, mask); 9457 else 9458 v = gen_rtvec (4, mask, CONST0_RTX (SFmode), 9459 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9460 vec_mode = V4SFmode; 9461 } 9462 else 9463 { 9464 if (vect) 9465 v = gen_rtvec (2, mask, mask); 9466 else 9467 v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); 9468 vec_mode = V2DFmode; 9469 } 9470 9471 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); 9472} 9473 9474/* Generate code for floating point ABS or NEG. */ 9475 9476void 9477ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 9478 rtx operands[]) 9479{ 9480 rtx mask, set, use, clob, dst, src; 9481 bool matching_memory; 9482 bool use_sse = false; 9483 bool vector_mode = VECTOR_MODE_P (mode); 9484 enum machine_mode elt_mode = mode; 9485 9486 if (vector_mode) 9487 { 9488 elt_mode = GET_MODE_INNER (mode); 9489 use_sse = true; 9490 } 9491 else if (TARGET_SSE_MATH) 9492 use_sse = SSE_FLOAT_MODE_P (mode); 9493 9494 /* NEG and ABS performed with SSE use bitwise mask operations. 9495 Create the appropriate mask now. */ 9496 if (use_sse) 9497 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 9498 else 9499 mask = NULL_RTX; 9500 9501 dst = operands[0]; 9502 src = operands[1]; 9503 9504 /* If the destination is memory, and we don't have matching source 9505 operands or we're using the x87, do things in registers. */ 9506 matching_memory = false; 9507 if (MEM_P (dst)) 9508 { 9509 if (use_sse && rtx_equal_p (dst, src)) 9510 matching_memory = true; 9511 else 9512 dst = gen_reg_rtx (mode); 9513 } 9514 if (MEM_P (src) && !matching_memory) 9515 src = force_reg (mode, src); 9516 9517 if (vector_mode) 9518 { 9519 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 9520 set = gen_rtx_SET (VOIDmode, dst, set); 9521 emit_insn (set); 9522 } 9523 else 9524 { 9525 set = gen_rtx_fmt_e (code, mode, src); 9526 set = gen_rtx_SET (VOIDmode, dst, set); 9527 if (mask) 9528 { 9529 use = gen_rtx_USE (VOIDmode, mask); 9530 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9531 emit_insn (gen_rtx_PARALLEL (VOIDmode, 9532 gen_rtvec (3, set, use, clob))); 9533 } 9534 else 9535 emit_insn (set); 9536 } 9537 9538 if (dst != operands[0]) 9539 emit_move_insn (operands[0], dst); 9540} 9541 9542/* Expand a copysign operation. Special case operand 0 being a constant. */ 9543 9544void 9545ix86_expand_copysign (rtx operands[]) 9546{ 9547 enum machine_mode mode, vmode; 9548 rtx dest, op0, op1, mask, nmask; 9549 9550 dest = operands[0]; 9551 op0 = operands[1]; 9552 op1 = operands[2]; 9553 9554 mode = GET_MODE (dest); 9555 vmode = mode == SFmode ? V4SFmode : V2DFmode; 9556 9557 if (GET_CODE (op0) == CONST_DOUBLE) 9558 { 9559 rtvec v; 9560 9561 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 9562 op0 = simplify_unary_operation (ABS, mode, op0, mode); 9563 9564 if (op0 == CONST0_RTX (mode)) 9565 op0 = CONST0_RTX (vmode); 9566 else 9567 { 9568 if (mode == SFmode) 9569 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 9570 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9571 else 9572 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 9573 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 9574 } 9575 9576 mask = ix86_build_signbit_mask (mode, 0, 0); 9577 9578 if (mode == SFmode) 9579 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); 9580 else 9581 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); 9582 } 9583 else 9584 { 9585 nmask = ix86_build_signbit_mask (mode, 0, 1); 9586 mask = ix86_build_signbit_mask (mode, 0, 0); 9587 9588 if (mode == SFmode) 9589 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); 9590 else 9591 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); 9592 } 9593} 9594 9595/* Deconstruct a copysign operation into bit masks. Operand 0 is known to 9596 be a constant, and so has already been expanded into a vector constant. */ 9597 9598void 9599ix86_split_copysign_const (rtx operands[]) 9600{ 9601 enum machine_mode mode, vmode; 9602 rtx dest, op0, op1, mask, x; 9603 9604 dest = operands[0]; 9605 op0 = operands[1]; 9606 op1 = operands[2]; 9607 mask = operands[3]; 9608 9609 mode = GET_MODE (dest); 9610 vmode = GET_MODE (mask); 9611 9612 dest = simplify_gen_subreg (vmode, dest, mode, 0); 9613 x = gen_rtx_AND (vmode, dest, mask); 9614 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9615 9616 if (op0 != CONST0_RTX (vmode)) 9617 { 9618 x = gen_rtx_IOR (vmode, dest, op0); 9619 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9620 } 9621} 9622 9623/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 9624 so we have to do two masks. */ 9625 9626void 9627ix86_split_copysign_var (rtx operands[]) 9628{ 9629 enum machine_mode mode, vmode; 9630 rtx dest, scratch, op0, op1, mask, nmask, x; 9631 9632 dest = operands[0]; 9633 scratch = operands[1]; 9634 op0 = operands[2]; 9635 op1 = operands[3]; 9636 nmask = operands[4]; 9637 mask = operands[5]; 9638 9639 mode = GET_MODE (dest); 9640 vmode = GET_MODE (mask); 9641 9642 if (rtx_equal_p (op0, op1)) 9643 { 9644 /* Shouldn't happen often (it's useless, obviously), but when it does 9645 we'd generate incorrect code if we continue below. */ 9646 emit_move_insn (dest, op0); 9647 return; 9648 } 9649 9650 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 9651 { 9652 gcc_assert (REGNO (op1) == REGNO (scratch)); 9653 9654 x = gen_rtx_AND (vmode, scratch, mask); 9655 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9656 9657 dest = mask; 9658 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9659 x = gen_rtx_NOT (vmode, dest); 9660 x = gen_rtx_AND (vmode, x, op0); 9661 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9662 } 9663 else 9664 { 9665 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 9666 { 9667 x = gen_rtx_AND (vmode, scratch, mask); 9668 } 9669 else /* alternative 2,4 */ 9670 { 9671 gcc_assert (REGNO (mask) == REGNO (scratch)); 9672 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 9673 x = gen_rtx_AND (vmode, scratch, op1); 9674 } 9675 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9676 9677 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 9678 { 9679 dest = simplify_gen_subreg (vmode, op0, mode, 0); 9680 x = gen_rtx_AND (vmode, dest, nmask); 9681 } 9682 else /* alternative 3,4 */ 9683 { 9684 gcc_assert (REGNO (nmask) == REGNO (dest)); 9685 dest = nmask; 9686 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9687 x = gen_rtx_AND (vmode, dest, op0); 9688 } 9689 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9690 } 9691 9692 x = gen_rtx_IOR (vmode, dest, scratch); 9693 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9694} 9695 9696/* Return TRUE or FALSE depending on whether the first SET in INSN 9697 has source and destination with matching CC modes, and that the 9698 CC mode is at least as constrained as REQ_MODE. */ 9699 9700int 9701ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 9702{ 9703 rtx set; 9704 enum machine_mode set_mode; 9705 9706 set = PATTERN (insn); 9707 if (GET_CODE (set) == PARALLEL) 9708 set = XVECEXP (set, 0, 0); 9709 gcc_assert (GET_CODE (set) == SET); 9710 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 9711 9712 set_mode = GET_MODE (SET_DEST (set)); 9713 switch (set_mode) 9714 { 9715 case CCNOmode: 9716 if (req_mode != CCNOmode 9717 && (req_mode != CCmode 9718 || XEXP (SET_SRC (set), 1) != const0_rtx)) 9719 return 0; 9720 break; 9721 case CCmode: 9722 if (req_mode == CCGCmode) 9723 return 0; 9724 /* FALLTHRU */ 9725 case CCGCmode: 9726 if (req_mode == CCGOCmode || req_mode == CCNOmode) 9727 return 0; 9728 /* FALLTHRU */ 9729 case CCGOCmode: 9730 if (req_mode == CCZmode) 9731 return 0; 9732 /* FALLTHRU */ 9733 case CCZmode: 9734 break; 9735 9736 default: 9737 gcc_unreachable (); 9738 } 9739 9740 return (GET_MODE (SET_SRC (set)) == set_mode); 9741} 9742 9743/* Generate insn patterns to do an integer compare of OPERANDS. */ 9744 9745static rtx 9746ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 9747{ 9748 enum machine_mode cmpmode; 9749 rtx tmp, flags; 9750 9751 cmpmode = SELECT_CC_MODE (code, op0, op1); 9752 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 9753 9754 /* This is very simple, but making the interface the same as in the 9755 FP case makes the rest of the code easier. */ 9756 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 9757 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 9758 9759 /* Return the test that should be put into the flags user, i.e. 9760 the bcc, scc, or cmov instruction. */ 9761 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 9762} 9763 9764/* Figure out whether to use ordered or unordered fp comparisons. 9765 Return the appropriate mode to use. */ 9766 9767enum machine_mode 9768ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 9769{ 9770 /* ??? In order to make all comparisons reversible, we do all comparisons 9771 non-trapping when compiling for IEEE. Once gcc is able to distinguish 9772 all forms trapping and nontrapping comparisons, we can make inequality 9773 comparisons trapping again, since it results in better code when using 9774 FCOM based compares. */ 9775 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 9776} 9777 9778enum machine_mode 9779ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 9780{ 9781 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 9782 return ix86_fp_compare_mode (code); 9783 switch (code) 9784 { 9785 /* Only zero flag is needed. */ 9786 case EQ: /* ZF=0 */ 9787 case NE: /* ZF!=0 */ 9788 return CCZmode; 9789 /* Codes needing carry flag. */ 9790 case GEU: /* CF=0 */ 9791 case GTU: /* CF=0 & ZF=0 */ 9792 case LTU: /* CF=1 */ 9793 case LEU: /* CF=1 | ZF=1 */ 9794 return CCmode; 9795 /* Codes possibly doable only with sign flag when 9796 comparing against zero. */ 9797 case GE: /* SF=OF or SF=0 */ 9798 case LT: /* SF<>OF or SF=1 */ 9799 if (op1 == const0_rtx) 9800 return CCGOCmode; 9801 else 9802 /* For other cases Carry flag is not required. */ 9803 return CCGCmode; 9804 /* Codes doable only with sign flag when comparing 9805 against zero, but we miss jump instruction for it 9806 so we need to use relational tests against overflow 9807 that thus needs to be zero. */ 9808 case GT: /* ZF=0 & SF=OF */ 9809 case LE: /* ZF=1 | SF<>OF */ 9810 if (op1 == const0_rtx) 9811 return CCNOmode; 9812 else 9813 return CCGCmode; 9814 /* strcmp pattern do (use flags) and combine may ask us for proper 9815 mode. */ 9816 case USE: 9817 return CCmode; 9818 default: 9819 gcc_unreachable (); 9820 } 9821} 9822 9823/* Return the fixed registers used for condition codes. */ 9824 9825static bool 9826ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 9827{ 9828 *p1 = FLAGS_REG; 9829 *p2 = FPSR_REG; 9830 return true; 9831} 9832 9833/* If two condition code modes are compatible, return a condition code 9834 mode which is compatible with both. Otherwise, return 9835 VOIDmode. */ 9836 9837static enum machine_mode 9838ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 9839{ 9840 if (m1 == m2) 9841 return m1; 9842 9843 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 9844 return VOIDmode; 9845 9846 if ((m1 == CCGCmode && m2 == CCGOCmode) 9847 || (m1 == CCGOCmode && m2 == CCGCmode)) 9848 return CCGCmode; 9849 9850 switch (m1) 9851 { 9852 default: 9853 gcc_unreachable (); 9854 9855 case CCmode: 9856 case CCGCmode: 9857 case CCGOCmode: 9858 case CCNOmode: 9859 case CCZmode: 9860 switch (m2) 9861 { 9862 default: 9863 return VOIDmode; 9864 9865 case CCmode: 9866 case CCGCmode: 9867 case CCGOCmode: 9868 case CCNOmode: 9869 case CCZmode: 9870 return CCmode; 9871 } 9872 9873 case CCFPmode: 9874 case CCFPUmode: 9875 /* These are only compatible with themselves, which we already 9876 checked above. */ 9877 return VOIDmode; 9878 } 9879} 9880 9881/* Return true if we should use an FCOMI instruction for this fp comparison. */ 9882 9883int 9884ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 9885{ 9886 enum rtx_code swapped_code = swap_condition (code); 9887 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 9888 || (ix86_fp_comparison_cost (swapped_code) 9889 == ix86_fp_comparison_fcomi_cost (swapped_code))); 9890} 9891 9892/* Swap, force into registers, or otherwise massage the two operands 9893 to a fp comparison. The operands are updated in place; the new 9894 comparison code is returned. */ 9895 9896static enum rtx_code 9897ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 9898{ 9899 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 9900 rtx op0 = *pop0, op1 = *pop1; 9901 enum machine_mode op_mode = GET_MODE (op0); 9902 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 9903 9904 /* All of the unordered compare instructions only work on registers. 9905 The same is true of the fcomi compare instructions. The XFmode 9906 compare instructions require registers except when comparing 9907 against zero or when converting operand 1 from fixed point to 9908 floating point. */ 9909 9910 if (!is_sse 9911 && (fpcmp_mode == CCFPUmode 9912 || (op_mode == XFmode 9913 && ! (standard_80387_constant_p (op0) == 1 9914 || standard_80387_constant_p (op1) == 1) 9915 && GET_CODE (op1) != FLOAT) 9916 || ix86_use_fcomi_compare (code))) 9917 { 9918 op0 = force_reg (op_mode, op0); 9919 op1 = force_reg (op_mode, op1); 9920 } 9921 else 9922 { 9923 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 9924 things around if they appear profitable, otherwise force op0 9925 into a register. */ 9926 9927 if (standard_80387_constant_p (op0) == 0 9928 || (GET_CODE (op0) == MEM 9929 && ! (standard_80387_constant_p (op1) == 0 9930 || GET_CODE (op1) == MEM))) 9931 { 9932 rtx tmp; 9933 tmp = op0, op0 = op1, op1 = tmp; 9934 code = swap_condition (code); 9935 } 9936 9937 if (GET_CODE (op0) != REG) 9938 op0 = force_reg (op_mode, op0); 9939 9940 if (CONSTANT_P (op1)) 9941 { 9942 int tmp = standard_80387_constant_p (op1); 9943 if (tmp == 0) 9944 op1 = validize_mem (force_const_mem (op_mode, op1)); 9945 else if (tmp == 1) 9946 { 9947 if (TARGET_CMOVE) 9948 op1 = force_reg (op_mode, op1); 9949 } 9950 else 9951 op1 = force_reg (op_mode, op1); 9952 } 9953 } 9954 9955 /* Try to rearrange the comparison to make it cheaper. */ 9956 if (ix86_fp_comparison_cost (code) 9957 > ix86_fp_comparison_cost (swap_condition (code)) 9958 && (GET_CODE (op1) == REG || !no_new_pseudos)) 9959 { 9960 rtx tmp; 9961 tmp = op0, op0 = op1, op1 = tmp; 9962 code = swap_condition (code); 9963 if (GET_CODE (op0) != REG) 9964 op0 = force_reg (op_mode, op0); 9965 } 9966 9967 *pop0 = op0; 9968 *pop1 = op1; 9969 return code; 9970} 9971 9972/* Convert comparison codes we use to represent FP comparison to integer 9973 code that will result in proper branch. Return UNKNOWN if no such code 9974 is available. */ 9975 9976enum rtx_code 9977ix86_fp_compare_code_to_integer (enum rtx_code code) 9978{ 9979 switch (code) 9980 { 9981 case GT: 9982 return GTU; 9983 case GE: 9984 return GEU; 9985 case ORDERED: 9986 case UNORDERED: 9987 return code; 9988 break; 9989 case UNEQ: 9990 return EQ; 9991 break; 9992 case UNLT: 9993 return LTU; 9994 break; 9995 case UNLE: 9996 return LEU; 9997 break; 9998 case LTGT: 9999 return NE; 10000 break; 10001 default: 10002 return UNKNOWN; 10003 } 10004} 10005 10006/* Split comparison code CODE into comparisons we can do using branch 10007 instructions. BYPASS_CODE is comparison code for branch that will 10008 branch around FIRST_CODE and SECOND_CODE. If some of branches 10009 is not required, set value to UNKNOWN. 10010 We never require more than two branches. */ 10011 10012void 10013ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 10014 enum rtx_code *first_code, 10015 enum rtx_code *second_code) 10016{ 10017 *first_code = code; 10018 *bypass_code = UNKNOWN; 10019 *second_code = UNKNOWN; 10020 10021 /* The fcomi comparison sets flags as follows: 10022 10023 cmp ZF PF CF 10024 > 0 0 0 10025 < 0 0 1 10026 = 1 0 0 10027 un 1 1 1 */ 10028 10029 switch (code) 10030 { 10031 case GT: /* GTU - CF=0 & ZF=0 */ 10032 case GE: /* GEU - CF=0 */ 10033 case ORDERED: /* PF=0 */ 10034 case UNORDERED: /* PF=1 */ 10035 case UNEQ: /* EQ - ZF=1 */ 10036 case UNLT: /* LTU - CF=1 */ 10037 case UNLE: /* LEU - CF=1 | ZF=1 */ 10038 case LTGT: /* EQ - ZF=0 */ 10039 break; 10040 case LT: /* LTU - CF=1 - fails on unordered */ 10041 *first_code = UNLT; 10042 *bypass_code = UNORDERED; 10043 break; 10044 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 10045 *first_code = UNLE; 10046 *bypass_code = UNORDERED; 10047 break; 10048 case EQ: /* EQ - ZF=1 - fails on unordered */ 10049 *first_code = UNEQ; 10050 *bypass_code = UNORDERED; 10051 break; 10052 case NE: /* NE - ZF=0 - fails on unordered */ 10053 *first_code = LTGT; 10054 *second_code = UNORDERED; 10055 break; 10056 case UNGE: /* GEU - CF=0 - fails on unordered */ 10057 *first_code = GE; 10058 *second_code = UNORDERED; 10059 break; 10060 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 10061 *first_code = GT; 10062 *second_code = UNORDERED; 10063 break; 10064 default: 10065 gcc_unreachable (); 10066 } 10067 if (!TARGET_IEEE_FP) 10068 { 10069 *second_code = UNKNOWN; 10070 *bypass_code = UNKNOWN; 10071 } 10072} 10073 10074/* Return cost of comparison done fcom + arithmetics operations on AX. 10075 All following functions do use number of instructions as a cost metrics. 10076 In future this should be tweaked to compute bytes for optimize_size and 10077 take into account performance of various instructions on various CPUs. */ 10078static int 10079ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 10080{ 10081 if (!TARGET_IEEE_FP) 10082 return 4; 10083 /* The cost of code output by ix86_expand_fp_compare. */ 10084 switch (code) 10085 { 10086 case UNLE: 10087 case UNLT: 10088 case LTGT: 10089 case GT: 10090 case GE: 10091 case UNORDERED: 10092 case ORDERED: 10093 case UNEQ: 10094 return 4; 10095 break; 10096 case LT: 10097 case NE: 10098 case EQ: 10099 case UNGE: 10100 return 5; 10101 break; 10102 case LE: 10103 case UNGT: 10104 return 6; 10105 break; 10106 default: 10107 gcc_unreachable (); 10108 } 10109} 10110 10111/* Return cost of comparison done using fcomi operation. 10112 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10113static int 10114ix86_fp_comparison_fcomi_cost (enum rtx_code code) 10115{ 10116 enum rtx_code bypass_code, first_code, second_code; 10117 /* Return arbitrarily high cost when instruction is not supported - this 10118 prevents gcc from using it. */ 10119 if (!TARGET_CMOVE) 10120 return 1024; 10121 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10122 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 10123} 10124 10125/* Return cost of comparison done using sahf operation. 10126 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10127static int 10128ix86_fp_comparison_sahf_cost (enum rtx_code code) 10129{ 10130 enum rtx_code bypass_code, first_code, second_code; 10131 /* Return arbitrarily high cost when instruction is not preferred - this 10132 avoids gcc from using it. */ 10133 if (!TARGET_USE_SAHF && !optimize_size) 10134 return 1024; 10135 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10136 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 10137} 10138 10139/* Compute cost of the comparison done using any method. 10140 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10141static int 10142ix86_fp_comparison_cost (enum rtx_code code) 10143{ 10144 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 10145 int min; 10146 10147 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 10148 sahf_cost = ix86_fp_comparison_sahf_cost (code); 10149 10150 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 10151 if (min > sahf_cost) 10152 min = sahf_cost; 10153 if (min > fcomi_cost) 10154 min = fcomi_cost; 10155 return min; 10156} 10157 10158/* Generate insn patterns to do a floating point compare of OPERANDS. */ 10159 10160static rtx 10161ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 10162 rtx *second_test, rtx *bypass_test) 10163{ 10164 enum machine_mode fpcmp_mode, intcmp_mode; 10165 rtx tmp, tmp2; 10166 int cost = ix86_fp_comparison_cost (code); 10167 enum rtx_code bypass_code, first_code, second_code; 10168 10169 fpcmp_mode = ix86_fp_compare_mode (code); 10170 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 10171 10172 if (second_test) 10173 *second_test = NULL_RTX; 10174 if (bypass_test) 10175 *bypass_test = NULL_RTX; 10176 10177 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10178 10179 /* Do fcomi/sahf based test when profitable. */ 10180 if ((bypass_code == UNKNOWN || bypass_test) 10181 && (second_code == UNKNOWN || second_test) 10182 && ix86_fp_comparison_arithmetics_cost (code) > cost) 10183 { 10184 if (TARGET_CMOVE) 10185 { 10186 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10187 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 10188 tmp); 10189 emit_insn (tmp); 10190 } 10191 else 10192 { 10193 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10194 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10195 if (!scratch) 10196 scratch = gen_reg_rtx (HImode); 10197 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10198 emit_insn (gen_x86_sahf_1 (scratch)); 10199 } 10200 10201 /* The FP codes work out to act like unsigned. */ 10202 intcmp_mode = fpcmp_mode; 10203 code = first_code; 10204 if (bypass_code != UNKNOWN) 10205 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 10206 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10207 const0_rtx); 10208 if (second_code != UNKNOWN) 10209 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 10210 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10211 const0_rtx); 10212 } 10213 else 10214 { 10215 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 10216 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10217 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10218 if (!scratch) 10219 scratch = gen_reg_rtx (HImode); 10220 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10221 10222 /* In the unordered case, we have to check C2 for NaN's, which 10223 doesn't happen to work out to anything nice combination-wise. 10224 So do some bit twiddling on the value we've got in AH to come 10225 up with an appropriate set of condition codes. */ 10226 10227 intcmp_mode = CCNOmode; 10228 switch (code) 10229 { 10230 case GT: 10231 case UNGT: 10232 if (code == GT || !TARGET_IEEE_FP) 10233 { 10234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10235 code = EQ; 10236 } 10237 else 10238 { 10239 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10240 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10241 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 10242 intcmp_mode = CCmode; 10243 code = GEU; 10244 } 10245 break; 10246 case LT: 10247 case UNLT: 10248 if (code == LT && TARGET_IEEE_FP) 10249 { 10250 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10251 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 10252 intcmp_mode = CCmode; 10253 code = EQ; 10254 } 10255 else 10256 { 10257 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 10258 code = NE; 10259 } 10260 break; 10261 case GE: 10262 case UNGE: 10263 if (code == GE || !TARGET_IEEE_FP) 10264 { 10265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 10266 code = EQ; 10267 } 10268 else 10269 { 10270 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10271 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10272 GEN_INT (0x01))); 10273 code = NE; 10274 } 10275 break; 10276 case LE: 10277 case UNLE: 10278 if (code == LE && TARGET_IEEE_FP) 10279 { 10280 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10281 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10282 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10283 intcmp_mode = CCmode; 10284 code = LTU; 10285 } 10286 else 10287 { 10288 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10289 code = NE; 10290 } 10291 break; 10292 case EQ: 10293 case UNEQ: 10294 if (code == EQ && TARGET_IEEE_FP) 10295 { 10296 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10297 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10298 intcmp_mode = CCmode; 10299 code = EQ; 10300 } 10301 else 10302 { 10303 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10304 code = NE; 10305 break; 10306 } 10307 break; 10308 case NE: 10309 case LTGT: 10310 if (code == NE && TARGET_IEEE_FP) 10311 { 10312 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10313 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10314 GEN_INT (0x40))); 10315 code = NE; 10316 } 10317 else 10318 { 10319 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10320 code = EQ; 10321 } 10322 break; 10323 10324 case UNORDERED: 10325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10326 code = NE; 10327 break; 10328 case ORDERED: 10329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10330 code = EQ; 10331 break; 10332 10333 default: 10334 gcc_unreachable (); 10335 } 10336 } 10337 10338 /* Return the test that should be put into the flags user, i.e. 10339 the bcc, scc, or cmov instruction. */ 10340 return gen_rtx_fmt_ee (code, VOIDmode, 10341 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10342 const0_rtx); 10343} 10344 10345rtx 10346ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 10347{ 10348 rtx op0, op1, ret; 10349 op0 = ix86_compare_op0; 10350 op1 = ix86_compare_op1; 10351 10352 if (second_test) 10353 *second_test = NULL_RTX; 10354 if (bypass_test) 10355 *bypass_test = NULL_RTX; 10356 10357 if (ix86_compare_emitted) 10358 { 10359 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 10360 ix86_compare_emitted = NULL_RTX; 10361 } 10362 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10363 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10364 second_test, bypass_test); 10365 else 10366 ret = ix86_expand_int_compare (code, op0, op1); 10367 10368 return ret; 10369} 10370 10371/* Return true if the CODE will result in nontrivial jump sequence. */ 10372bool 10373ix86_fp_jump_nontrivial_p (enum rtx_code code) 10374{ 10375 enum rtx_code bypass_code, first_code, second_code; 10376 if (!TARGET_CMOVE) 10377 return true; 10378 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10379 return bypass_code != UNKNOWN || second_code != UNKNOWN; 10380} 10381 10382void 10383ix86_expand_branch (enum rtx_code code, rtx label) 10384{ 10385 rtx tmp; 10386 10387 /* If we have emitted a compare insn, go straight to simple. 10388 ix86_expand_compare won't emit anything if ix86_compare_emitted 10389 is non NULL. */ 10390 if (ix86_compare_emitted) 10391 goto simple; 10392 10393 switch (GET_MODE (ix86_compare_op0)) 10394 { 10395 case QImode: 10396 case HImode: 10397 case SImode: 10398 simple: 10399 tmp = ix86_expand_compare (code, NULL, NULL); 10400 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10401 gen_rtx_LABEL_REF (VOIDmode, label), 10402 pc_rtx); 10403 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 10404 return; 10405 10406 case SFmode: 10407 case DFmode: 10408 case XFmode: 10409 { 10410 rtvec vec; 10411 int use_fcomi; 10412 enum rtx_code bypass_code, first_code, second_code; 10413 10414 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 10415 &ix86_compare_op1); 10416 10417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10418 10419 /* Check whether we will use the natural sequence with one jump. If 10420 so, we can expand jump early. Otherwise delay expansion by 10421 creating compound insn to not confuse optimizers. */ 10422 if (bypass_code == UNKNOWN && second_code == UNKNOWN 10423 && TARGET_CMOVE) 10424 { 10425 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 10426 gen_rtx_LABEL_REF (VOIDmode, label), 10427 pc_rtx, NULL_RTX, NULL_RTX); 10428 } 10429 else 10430 { 10431 tmp = gen_rtx_fmt_ee (code, VOIDmode, 10432 ix86_compare_op0, ix86_compare_op1); 10433 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10434 gen_rtx_LABEL_REF (VOIDmode, label), 10435 pc_rtx); 10436 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 10437 10438 use_fcomi = ix86_use_fcomi_compare (code); 10439 vec = rtvec_alloc (3 + !use_fcomi); 10440 RTVEC_ELT (vec, 0) = tmp; 10441 RTVEC_ELT (vec, 1) 10442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 10443 RTVEC_ELT (vec, 2) 10444 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 10445 if (! use_fcomi) 10446 RTVEC_ELT (vec, 3) 10447 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 10448 10449 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 10450 } 10451 return; 10452 } 10453 10454 case DImode: 10455 if (TARGET_64BIT) 10456 goto simple; 10457 case TImode: 10458 /* Expand DImode branch into multiple compare+branch. */ 10459 { 10460 rtx lo[2], hi[2], label2; 10461 enum rtx_code code1, code2, code3; 10462 enum machine_mode submode; 10463 10464 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 10465 { 10466 tmp = ix86_compare_op0; 10467 ix86_compare_op0 = ix86_compare_op1; 10468 ix86_compare_op1 = tmp; 10469 code = swap_condition (code); 10470 } 10471 if (GET_MODE (ix86_compare_op0) == DImode) 10472 { 10473 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 10474 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 10475 submode = SImode; 10476 } 10477 else 10478 { 10479 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 10480 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 10481 submode = DImode; 10482 } 10483 10484 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 10485 avoid two branches. This costs one extra insn, so disable when 10486 optimizing for size. */ 10487 10488 if ((code == EQ || code == NE) 10489 && (!optimize_size 10490 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 10491 { 10492 rtx xor0, xor1; 10493 10494 xor1 = hi[0]; 10495 if (hi[1] != const0_rtx) 10496 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 10497 NULL_RTX, 0, OPTAB_WIDEN); 10498 10499 xor0 = lo[0]; 10500 if (lo[1] != const0_rtx) 10501 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 10502 NULL_RTX, 0, OPTAB_WIDEN); 10503 10504 tmp = expand_binop (submode, ior_optab, xor1, xor0, 10505 NULL_RTX, 0, OPTAB_WIDEN); 10506 10507 ix86_compare_op0 = tmp; 10508 ix86_compare_op1 = const0_rtx; 10509 ix86_expand_branch (code, label); 10510 return; 10511 } 10512 10513 /* Otherwise, if we are doing less-than or greater-or-equal-than, 10514 op1 is a constant and the low word is zero, then we can just 10515 examine the high word. */ 10516 10517 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 10518 switch (code) 10519 { 10520 case LT: case LTU: case GE: case GEU: 10521 ix86_compare_op0 = hi[0]; 10522 ix86_compare_op1 = hi[1]; 10523 ix86_expand_branch (code, label); 10524 return; 10525 default: 10526 break; 10527 } 10528 10529 /* Otherwise, we need two or three jumps. */ 10530 10531 label2 = gen_label_rtx (); 10532 10533 code1 = code; 10534 code2 = swap_condition (code); 10535 code3 = unsigned_condition (code); 10536 10537 switch (code) 10538 { 10539 case LT: case GT: case LTU: case GTU: 10540 break; 10541 10542 case LE: code1 = LT; code2 = GT; break; 10543 case GE: code1 = GT; code2 = LT; break; 10544 case LEU: code1 = LTU; code2 = GTU; break; 10545 case GEU: code1 = GTU; code2 = LTU; break; 10546 10547 case EQ: code1 = UNKNOWN; code2 = NE; break; 10548 case NE: code2 = UNKNOWN; break; 10549 10550 default: 10551 gcc_unreachable (); 10552 } 10553 10554 /* 10555 * a < b => 10556 * if (hi(a) < hi(b)) goto true; 10557 * if (hi(a) > hi(b)) goto false; 10558 * if (lo(a) < lo(b)) goto true; 10559 * false: 10560 */ 10561 10562 ix86_compare_op0 = hi[0]; 10563 ix86_compare_op1 = hi[1]; 10564 10565 if (code1 != UNKNOWN) 10566 ix86_expand_branch (code1, label); 10567 if (code2 != UNKNOWN) 10568 ix86_expand_branch (code2, label2); 10569 10570 ix86_compare_op0 = lo[0]; 10571 ix86_compare_op1 = lo[1]; 10572 ix86_expand_branch (code3, label); 10573 10574 if (code2 != UNKNOWN) 10575 emit_label (label2); 10576 return; 10577 } 10578 10579 default: 10580 gcc_unreachable (); 10581 } 10582} 10583 10584/* Split branch based on floating point condition. */ 10585void 10586ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 10587 rtx target1, rtx target2, rtx tmp, rtx pushed) 10588{ 10589 rtx second, bypass; 10590 rtx label = NULL_RTX; 10591 rtx condition; 10592 int bypass_probability = -1, second_probability = -1, probability = -1; 10593 rtx i; 10594 10595 if (target2 != pc_rtx) 10596 { 10597 rtx tmp = target2; 10598 code = reverse_condition_maybe_unordered (code); 10599 target2 = target1; 10600 target1 = tmp; 10601 } 10602 10603 condition = ix86_expand_fp_compare (code, op1, op2, 10604 tmp, &second, &bypass); 10605 10606 /* Remove pushed operand from stack. */ 10607 if (pushed) 10608 ix86_free_from_memory (GET_MODE (pushed)); 10609 10610 if (split_branch_probability >= 0) 10611 { 10612 /* Distribute the probabilities across the jumps. 10613 Assume the BYPASS and SECOND to be always test 10614 for UNORDERED. */ 10615 probability = split_branch_probability; 10616 10617 /* Value of 1 is low enough to make no need for probability 10618 to be updated. Later we may run some experiments and see 10619 if unordered values are more frequent in practice. */ 10620 if (bypass) 10621 bypass_probability = 1; 10622 if (second) 10623 second_probability = 1; 10624 } 10625 if (bypass != NULL_RTX) 10626 { 10627 label = gen_label_rtx (); 10628 i = emit_jump_insn (gen_rtx_SET 10629 (VOIDmode, pc_rtx, 10630 gen_rtx_IF_THEN_ELSE (VOIDmode, 10631 bypass, 10632 gen_rtx_LABEL_REF (VOIDmode, 10633 label), 10634 pc_rtx))); 10635 if (bypass_probability >= 0) 10636 REG_NOTES (i) 10637 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10638 GEN_INT (bypass_probability), 10639 REG_NOTES (i)); 10640 } 10641 i = emit_jump_insn (gen_rtx_SET 10642 (VOIDmode, pc_rtx, 10643 gen_rtx_IF_THEN_ELSE (VOIDmode, 10644 condition, target1, target2))); 10645 if (probability >= 0) 10646 REG_NOTES (i) 10647 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10648 GEN_INT (probability), 10649 REG_NOTES (i)); 10650 if (second != NULL_RTX) 10651 { 10652 i = emit_jump_insn (gen_rtx_SET 10653 (VOIDmode, pc_rtx, 10654 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 10655 target2))); 10656 if (second_probability >= 0) 10657 REG_NOTES (i) 10658 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10659 GEN_INT (second_probability), 10660 REG_NOTES (i)); 10661 } 10662 if (label != NULL_RTX) 10663 emit_label (label); 10664} 10665 10666int 10667ix86_expand_setcc (enum rtx_code code, rtx dest) 10668{ 10669 rtx ret, tmp, tmpreg, equiv; 10670 rtx second_test, bypass_test; 10671 10672 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 10673 return 0; /* FAIL */ 10674 10675 gcc_assert (GET_MODE (dest) == QImode); 10676 10677 ret = ix86_expand_compare (code, &second_test, &bypass_test); 10678 PUT_MODE (ret, QImode); 10679 10680 tmp = dest; 10681 tmpreg = dest; 10682 10683 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 10684 if (bypass_test || second_test) 10685 { 10686 rtx test = second_test; 10687 int bypass = 0; 10688 rtx tmp2 = gen_reg_rtx (QImode); 10689 if (bypass_test) 10690 { 10691 gcc_assert (!second_test); 10692 test = bypass_test; 10693 bypass = 1; 10694 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 10695 } 10696 PUT_MODE (test, QImode); 10697 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 10698 10699 if (bypass) 10700 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 10701 else 10702 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 10703 } 10704 10705 /* Attach a REG_EQUAL note describing the comparison result. */ 10706 if (ix86_compare_op0 && ix86_compare_op1) 10707 { 10708 equiv = simplify_gen_relational (code, QImode, 10709 GET_MODE (ix86_compare_op0), 10710 ix86_compare_op0, ix86_compare_op1); 10711 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 10712 } 10713 10714 return 1; /* DONE */ 10715} 10716 10717/* Expand comparison setting or clearing carry flag. Return true when 10718 successful and set pop for the operation. */ 10719static bool 10720ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 10721{ 10722 enum machine_mode mode = 10723 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 10724 10725 /* Do not handle DImode compares that go through special path. Also we can't 10726 deal with FP compares yet. This is possible to add. */ 10727 if (mode == (TARGET_64BIT ? TImode : DImode)) 10728 return false; 10729 if (FLOAT_MODE_P (mode)) 10730 { 10731 rtx second_test = NULL, bypass_test = NULL; 10732 rtx compare_op, compare_seq; 10733 10734 /* Shortcut: following common codes never translate into carry flag compares. */ 10735 if (code == EQ || code == NE || code == UNEQ || code == LTGT 10736 || code == ORDERED || code == UNORDERED) 10737 return false; 10738 10739 /* These comparisons require zero flag; swap operands so they won't. */ 10740 if ((code == GT || code == UNLE || code == LE || code == UNGT) 10741 && !TARGET_IEEE_FP) 10742 { 10743 rtx tmp = op0; 10744 op0 = op1; 10745 op1 = tmp; 10746 code = swap_condition (code); 10747 } 10748 10749 /* Try to expand the comparison and verify that we end up with carry flag 10750 based comparison. This is fails to be true only when we decide to expand 10751 comparison using arithmetic that is not too common scenario. */ 10752 start_sequence (); 10753 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10754 &second_test, &bypass_test); 10755 compare_seq = get_insns (); 10756 end_sequence (); 10757 10758 if (second_test || bypass_test) 10759 return false; 10760 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10761 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10762 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 10763 else 10764 code = GET_CODE (compare_op); 10765 if (code != LTU && code != GEU) 10766 return false; 10767 emit_insn (compare_seq); 10768 *pop = compare_op; 10769 return true; 10770 } 10771 if (!INTEGRAL_MODE_P (mode)) 10772 return false; 10773 switch (code) 10774 { 10775 case LTU: 10776 case GEU: 10777 break; 10778 10779 /* Convert a==0 into (unsigned)a<1. */ 10780 case EQ: 10781 case NE: 10782 if (op1 != const0_rtx) 10783 return false; 10784 op1 = const1_rtx; 10785 code = (code == EQ ? LTU : GEU); 10786 break; 10787 10788 /* Convert a>b into b<a or a>=b-1. */ 10789 case GTU: 10790 case LEU: 10791 if (GET_CODE (op1) == CONST_INT) 10792 { 10793 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 10794 /* Bail out on overflow. We still can swap operands but that 10795 would force loading of the constant into register. */ 10796 if (op1 == const0_rtx 10797 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 10798 return false; 10799 code = (code == GTU ? GEU : LTU); 10800 } 10801 else 10802 { 10803 rtx tmp = op1; 10804 op1 = op0; 10805 op0 = tmp; 10806 code = (code == GTU ? LTU : GEU); 10807 } 10808 break; 10809 10810 /* Convert a>=0 into (unsigned)a<0x80000000. */ 10811 case LT: 10812 case GE: 10813 if (mode == DImode || op1 != const0_rtx) 10814 return false; 10815 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10816 code = (code == LT ? GEU : LTU); 10817 break; 10818 case LE: 10819 case GT: 10820 if (mode == DImode || op1 != constm1_rtx) 10821 return false; 10822 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10823 code = (code == LE ? GEU : LTU); 10824 break; 10825 10826 default: 10827 return false; 10828 } 10829 /* Swapping operands may cause constant to appear as first operand. */ 10830 if (!nonimmediate_operand (op0, VOIDmode)) 10831 { 10832 if (no_new_pseudos) 10833 return false; 10834 op0 = force_reg (mode, op0); 10835 } 10836 ix86_compare_op0 = op0; 10837 ix86_compare_op1 = op1; 10838 *pop = ix86_expand_compare (code, NULL, NULL); 10839 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 10840 return true; 10841} 10842 10843int 10844ix86_expand_int_movcc (rtx operands[]) 10845{ 10846 enum rtx_code code = GET_CODE (operands[1]), compare_code; 10847 rtx compare_seq, compare_op; 10848 rtx second_test, bypass_test; 10849 enum machine_mode mode = GET_MODE (operands[0]); 10850 bool sign_bit_compare_p = false;; 10851 10852 start_sequence (); 10853 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10854 compare_seq = get_insns (); 10855 end_sequence (); 10856 10857 compare_code = GET_CODE (compare_op); 10858 10859 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 10860 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 10861 sign_bit_compare_p = true; 10862 10863 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 10864 HImode insns, we'd be swallowed in word prefix ops. */ 10865 10866 if ((mode != HImode || TARGET_FAST_PREFIX) 10867 && (mode != (TARGET_64BIT ? TImode : DImode)) 10868 && GET_CODE (operands[2]) == CONST_INT 10869 && GET_CODE (operands[3]) == CONST_INT) 10870 { 10871 rtx out = operands[0]; 10872 HOST_WIDE_INT ct = INTVAL (operands[2]); 10873 HOST_WIDE_INT cf = INTVAL (operands[3]); 10874 HOST_WIDE_INT diff; 10875 10876 diff = ct - cf; 10877 /* Sign bit compares are better done using shifts than we do by using 10878 sbb. */ 10879 if (sign_bit_compare_p 10880 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 10881 ix86_compare_op1, &compare_op)) 10882 { 10883 /* Detect overlap between destination and compare sources. */ 10884 rtx tmp = out; 10885 10886 if (!sign_bit_compare_p) 10887 { 10888 bool fpcmp = false; 10889 10890 compare_code = GET_CODE (compare_op); 10891 10892 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10893 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10894 { 10895 fpcmp = true; 10896 compare_code = ix86_fp_compare_code_to_integer (compare_code); 10897 } 10898 10899 /* To simplify rest of code, restrict to the GEU case. */ 10900 if (compare_code == LTU) 10901 { 10902 HOST_WIDE_INT tmp = ct; 10903 ct = cf; 10904 cf = tmp; 10905 compare_code = reverse_condition (compare_code); 10906 code = reverse_condition (code); 10907 } 10908 else 10909 { 10910 if (fpcmp) 10911 PUT_CODE (compare_op, 10912 reverse_condition_maybe_unordered 10913 (GET_CODE (compare_op))); 10914 else 10915 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 10916 } 10917 diff = ct - cf; 10918 10919 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 10920 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 10921 tmp = gen_reg_rtx (mode); 10922 10923 if (mode == DImode) 10924 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 10925 else 10926 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 10927 } 10928 else 10929 { 10930 if (code == GT || code == GE) 10931 code = reverse_condition (code); 10932 else 10933 { 10934 HOST_WIDE_INT tmp = ct; 10935 ct = cf; 10936 cf = tmp; 10937 diff = ct - cf; 10938 } 10939 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 10940 ix86_compare_op1, VOIDmode, 0, -1); 10941 } 10942 10943 if (diff == 1) 10944 { 10945 /* 10946 * cmpl op0,op1 10947 * sbbl dest,dest 10948 * [addl dest, ct] 10949 * 10950 * Size 5 - 8. 10951 */ 10952 if (ct) 10953 tmp = expand_simple_binop (mode, PLUS, 10954 tmp, GEN_INT (ct), 10955 copy_rtx (tmp), 1, OPTAB_DIRECT); 10956 } 10957 else if (cf == -1) 10958 { 10959 /* 10960 * cmpl op0,op1 10961 * sbbl dest,dest 10962 * orl $ct, dest 10963 * 10964 * Size 8. 10965 */ 10966 tmp = expand_simple_binop (mode, IOR, 10967 tmp, GEN_INT (ct), 10968 copy_rtx (tmp), 1, OPTAB_DIRECT); 10969 } 10970 else if (diff == -1 && ct) 10971 { 10972 /* 10973 * cmpl op0,op1 10974 * sbbl dest,dest 10975 * notl dest 10976 * [addl dest, cf] 10977 * 10978 * Size 8 - 11. 10979 */ 10980 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 10981 if (cf) 10982 tmp = expand_simple_binop (mode, PLUS, 10983 copy_rtx (tmp), GEN_INT (cf), 10984 copy_rtx (tmp), 1, OPTAB_DIRECT); 10985 } 10986 else 10987 { 10988 /* 10989 * cmpl op0,op1 10990 * sbbl dest,dest 10991 * [notl dest] 10992 * andl cf - ct, dest 10993 * [addl dest, ct] 10994 * 10995 * Size 8 - 11. 10996 */ 10997 10998 if (cf == 0) 10999 { 11000 cf = ct; 11001 ct = 0; 11002 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11003 } 11004 11005 tmp = expand_simple_binop (mode, AND, 11006 copy_rtx (tmp), 11007 gen_int_mode (cf - ct, mode), 11008 copy_rtx (tmp), 1, OPTAB_DIRECT); 11009 if (ct) 11010 tmp = expand_simple_binop (mode, PLUS, 11011 copy_rtx (tmp), GEN_INT (ct), 11012 copy_rtx (tmp), 1, OPTAB_DIRECT); 11013 } 11014 11015 if (!rtx_equal_p (tmp, out)) 11016 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 11017 11018 return 1; /* DONE */ 11019 } 11020 11021 if (diff < 0) 11022 { 11023 HOST_WIDE_INT tmp; 11024 tmp = ct, ct = cf, cf = tmp; 11025 diff = -diff; 11026 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11027 { 11028 /* We may be reversing unordered compare to normal compare, that 11029 is not valid in general (we may convert non-trapping condition 11030 to trapping one), however on i386 we currently emit all 11031 comparisons unordered. */ 11032 compare_code = reverse_condition_maybe_unordered (compare_code); 11033 code = reverse_condition_maybe_unordered (code); 11034 } 11035 else 11036 { 11037 compare_code = reverse_condition (compare_code); 11038 code = reverse_condition (code); 11039 } 11040 } 11041 11042 compare_code = UNKNOWN; 11043 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 11044 && GET_CODE (ix86_compare_op1) == CONST_INT) 11045 { 11046 if (ix86_compare_op1 == const0_rtx 11047 && (code == LT || code == GE)) 11048 compare_code = code; 11049 else if (ix86_compare_op1 == constm1_rtx) 11050 { 11051 if (code == LE) 11052 compare_code = LT; 11053 else if (code == GT) 11054 compare_code = GE; 11055 } 11056 } 11057 11058 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 11059 if (compare_code != UNKNOWN 11060 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 11061 && (cf == -1 || ct == -1)) 11062 { 11063 /* If lea code below could be used, only optimize 11064 if it results in a 2 insn sequence. */ 11065 11066 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 11067 || diff == 3 || diff == 5 || diff == 9) 11068 || (compare_code == LT && ct == -1) 11069 || (compare_code == GE && cf == -1)) 11070 { 11071 /* 11072 * notl op1 (if necessary) 11073 * sarl $31, op1 11074 * orl cf, op1 11075 */ 11076 if (ct != -1) 11077 { 11078 cf = ct; 11079 ct = -1; 11080 code = reverse_condition (code); 11081 } 11082 11083 out = emit_store_flag (out, code, ix86_compare_op0, 11084 ix86_compare_op1, VOIDmode, 0, -1); 11085 11086 out = expand_simple_binop (mode, IOR, 11087 out, GEN_INT (cf), 11088 out, 1, OPTAB_DIRECT); 11089 if (out != operands[0]) 11090 emit_move_insn (operands[0], out); 11091 11092 return 1; /* DONE */ 11093 } 11094 } 11095 11096 11097 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 11098 || diff == 3 || diff == 5 || diff == 9) 11099 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 11100 && (mode != DImode 11101 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 11102 { 11103 /* 11104 * xorl dest,dest 11105 * cmpl op1,op2 11106 * setcc dest 11107 * lea cf(dest*(ct-cf)),dest 11108 * 11109 * Size 14. 11110 * 11111 * This also catches the degenerate setcc-only case. 11112 */ 11113 11114 rtx tmp; 11115 int nops; 11116 11117 out = emit_store_flag (out, code, ix86_compare_op0, 11118 ix86_compare_op1, VOIDmode, 0, 1); 11119 11120 nops = 0; 11121 /* On x86_64 the lea instruction operates on Pmode, so we need 11122 to get arithmetics done in proper mode to match. */ 11123 if (diff == 1) 11124 tmp = copy_rtx (out); 11125 else 11126 { 11127 rtx out1; 11128 out1 = copy_rtx (out); 11129 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 11130 nops++; 11131 if (diff & 1) 11132 { 11133 tmp = gen_rtx_PLUS (mode, tmp, out1); 11134 nops++; 11135 } 11136 } 11137 if (cf != 0) 11138 { 11139 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 11140 nops++; 11141 } 11142 if (!rtx_equal_p (tmp, out)) 11143 { 11144 if (nops == 1) 11145 out = force_operand (tmp, copy_rtx (out)); 11146 else 11147 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 11148 } 11149 if (!rtx_equal_p (out, operands[0])) 11150 emit_move_insn (operands[0], copy_rtx (out)); 11151 11152 return 1; /* DONE */ 11153 } 11154 11155 /* 11156 * General case: Jumpful: 11157 * xorl dest,dest cmpl op1, op2 11158 * cmpl op1, op2 movl ct, dest 11159 * setcc dest jcc 1f 11160 * decl dest movl cf, dest 11161 * andl (cf-ct),dest 1: 11162 * addl ct,dest 11163 * 11164 * Size 20. Size 14. 11165 * 11166 * This is reasonably steep, but branch mispredict costs are 11167 * high on modern cpus, so consider failing only if optimizing 11168 * for space. 11169 */ 11170 11171 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11172 && BRANCH_COST >= 2) 11173 { 11174 if (cf == 0) 11175 { 11176 cf = ct; 11177 ct = 0; 11178 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11179 /* We may be reversing unordered compare to normal compare, 11180 that is not valid in general (we may convert non-trapping 11181 condition to trapping one), however on i386 we currently 11182 emit all comparisons unordered. */ 11183 code = reverse_condition_maybe_unordered (code); 11184 else 11185 { 11186 code = reverse_condition (code); 11187 if (compare_code != UNKNOWN) 11188 compare_code = reverse_condition (compare_code); 11189 } 11190 } 11191 11192 if (compare_code != UNKNOWN) 11193 { 11194 /* notl op1 (if needed) 11195 sarl $31, op1 11196 andl (cf-ct), op1 11197 addl ct, op1 11198 11199 For x < 0 (resp. x <= -1) there will be no notl, 11200 so if possible swap the constants to get rid of the 11201 complement. 11202 True/false will be -1/0 while code below (store flag 11203 followed by decrement) is 0/-1, so the constants need 11204 to be exchanged once more. */ 11205 11206 if (compare_code == GE || !cf) 11207 { 11208 code = reverse_condition (code); 11209 compare_code = LT; 11210 } 11211 else 11212 { 11213 HOST_WIDE_INT tmp = cf; 11214 cf = ct; 11215 ct = tmp; 11216 } 11217 11218 out = emit_store_flag (out, code, ix86_compare_op0, 11219 ix86_compare_op1, VOIDmode, 0, -1); 11220 } 11221 else 11222 { 11223 out = emit_store_flag (out, code, ix86_compare_op0, 11224 ix86_compare_op1, VOIDmode, 0, 1); 11225 11226 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 11227 copy_rtx (out), 1, OPTAB_DIRECT); 11228 } 11229 11230 out = expand_simple_binop (mode, AND, copy_rtx (out), 11231 gen_int_mode (cf - ct, mode), 11232 copy_rtx (out), 1, OPTAB_DIRECT); 11233 if (ct) 11234 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 11235 copy_rtx (out), 1, OPTAB_DIRECT); 11236 if (!rtx_equal_p (out, operands[0])) 11237 emit_move_insn (operands[0], copy_rtx (out)); 11238 11239 return 1; /* DONE */ 11240 } 11241 } 11242 11243 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11244 { 11245 /* Try a few things more with specific constants and a variable. */ 11246 11247 optab op; 11248 rtx var, orig_out, out, tmp; 11249 11250 if (BRANCH_COST <= 2) 11251 return 0; /* FAIL */ 11252 11253 /* If one of the two operands is an interesting constant, load a 11254 constant with the above and mask it in with a logical operation. */ 11255 11256 if (GET_CODE (operands[2]) == CONST_INT) 11257 { 11258 var = operands[3]; 11259 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 11260 operands[3] = constm1_rtx, op = and_optab; 11261 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 11262 operands[3] = const0_rtx, op = ior_optab; 11263 else 11264 return 0; /* FAIL */ 11265 } 11266 else if (GET_CODE (operands[3]) == CONST_INT) 11267 { 11268 var = operands[2]; 11269 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 11270 operands[2] = constm1_rtx, op = and_optab; 11271 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 11272 operands[2] = const0_rtx, op = ior_optab; 11273 else 11274 return 0; /* FAIL */ 11275 } 11276 else 11277 return 0; /* FAIL */ 11278 11279 orig_out = operands[0]; 11280 tmp = gen_reg_rtx (mode); 11281 operands[0] = tmp; 11282 11283 /* Recurse to get the constant loaded. */ 11284 if (ix86_expand_int_movcc (operands) == 0) 11285 return 0; /* FAIL */ 11286 11287 /* Mask in the interesting variable. */ 11288 out = expand_binop (mode, op, var, tmp, orig_out, 0, 11289 OPTAB_WIDEN); 11290 if (!rtx_equal_p (out, orig_out)) 11291 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 11292 11293 return 1; /* DONE */ 11294 } 11295 11296 /* 11297 * For comparison with above, 11298 * 11299 * movl cf,dest 11300 * movl ct,tmp 11301 * cmpl op1,op2 11302 * cmovcc tmp,dest 11303 * 11304 * Size 15. 11305 */ 11306 11307 if (! nonimmediate_operand (operands[2], mode)) 11308 operands[2] = force_reg (mode, operands[2]); 11309 if (! nonimmediate_operand (operands[3], mode)) 11310 operands[3] = force_reg (mode, operands[3]); 11311 11312 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11313 { 11314 rtx tmp = gen_reg_rtx (mode); 11315 emit_move_insn (tmp, operands[3]); 11316 operands[3] = tmp; 11317 } 11318 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11319 { 11320 rtx tmp = gen_reg_rtx (mode); 11321 emit_move_insn (tmp, operands[2]); 11322 operands[2] = tmp; 11323 } 11324 11325 if (! register_operand (operands[2], VOIDmode) 11326 && (mode == QImode 11327 || ! register_operand (operands[3], VOIDmode))) 11328 operands[2] = force_reg (mode, operands[2]); 11329 11330 if (mode == QImode 11331 && ! register_operand (operands[3], VOIDmode)) 11332 operands[3] = force_reg (mode, operands[3]); 11333 11334 emit_insn (compare_seq); 11335 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11336 gen_rtx_IF_THEN_ELSE (mode, 11337 compare_op, operands[2], 11338 operands[3]))); 11339 if (bypass_test) 11340 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11341 gen_rtx_IF_THEN_ELSE (mode, 11342 bypass_test, 11343 copy_rtx (operands[3]), 11344 copy_rtx (operands[0])))); 11345 if (second_test) 11346 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11347 gen_rtx_IF_THEN_ELSE (mode, 11348 second_test, 11349 copy_rtx (operands[2]), 11350 copy_rtx (operands[0])))); 11351 11352 return 1; /* DONE */ 11353} 11354 11355/* Swap, force into registers, or otherwise massage the two operands 11356 to an sse comparison with a mask result. Thus we differ a bit from 11357 ix86_prepare_fp_compare_args which expects to produce a flags result. 11358 11359 The DEST operand exists to help determine whether to commute commutative 11360 operators. The POP0/POP1 operands are updated in place. The new 11361 comparison code is returned, or UNKNOWN if not implementable. */ 11362 11363static enum rtx_code 11364ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 11365 rtx *pop0, rtx *pop1) 11366{ 11367 rtx tmp; 11368 11369 switch (code) 11370 { 11371 case LTGT: 11372 case UNEQ: 11373 /* We have no LTGT as an operator. We could implement it with 11374 NE & ORDERED, but this requires an extra temporary. It's 11375 not clear that it's worth it. */ 11376 return UNKNOWN; 11377 11378 case LT: 11379 case LE: 11380 case UNGT: 11381 case UNGE: 11382 /* These are supported directly. */ 11383 break; 11384 11385 case EQ: 11386 case NE: 11387 case UNORDERED: 11388 case ORDERED: 11389 /* For commutative operators, try to canonicalize the destination 11390 operand to be first in the comparison - this helps reload to 11391 avoid extra moves. */ 11392 if (!dest || !rtx_equal_p (dest, *pop1)) 11393 break; 11394 /* FALLTHRU */ 11395 11396 case GE: 11397 case GT: 11398 case UNLE: 11399 case UNLT: 11400 /* These are not supported directly. Swap the comparison operands 11401 to transform into something that is supported. */ 11402 tmp = *pop0; 11403 *pop0 = *pop1; 11404 *pop1 = tmp; 11405 code = swap_condition (code); 11406 break; 11407 11408 default: 11409 gcc_unreachable (); 11410 } 11411 11412 return code; 11413} 11414 11415/* Detect conditional moves that exactly match min/max operational 11416 semantics. Note that this is IEEE safe, as long as we don't 11417 interchange the operands. 11418 11419 Returns FALSE if this conditional move doesn't match a MIN/MAX, 11420 and TRUE if the operation is successful and instructions are emitted. */ 11421 11422static bool 11423ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 11424 rtx cmp_op1, rtx if_true, rtx if_false) 11425{ 11426 enum machine_mode mode; 11427 bool is_min; 11428 rtx tmp; 11429 11430 if (code == LT) 11431 ; 11432 else if (code == UNGE) 11433 { 11434 tmp = if_true; 11435 if_true = if_false; 11436 if_false = tmp; 11437 } 11438 else 11439 return false; 11440 11441 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 11442 is_min = true; 11443 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 11444 is_min = false; 11445 else 11446 return false; 11447 11448 mode = GET_MODE (dest); 11449 11450 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 11451 but MODE may be a vector mode and thus not appropriate. */ 11452 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 11453 { 11454 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 11455 rtvec v; 11456 11457 if_true = force_reg (mode, if_true); 11458 v = gen_rtvec (2, if_true, if_false); 11459 tmp = gen_rtx_UNSPEC (mode, v, u); 11460 } 11461 else 11462 { 11463 code = is_min ? SMIN : SMAX; 11464 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 11465 } 11466 11467 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 11468 return true; 11469} 11470 11471/* Expand an sse vector comparison. Return the register with the result. */ 11472 11473static rtx 11474ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 11475 rtx op_true, rtx op_false) 11476{ 11477 enum machine_mode mode = GET_MODE (dest); 11478 rtx x; 11479 11480 cmp_op0 = force_reg (mode, cmp_op0); 11481 if (!nonimmediate_operand (cmp_op1, mode)) 11482 cmp_op1 = force_reg (mode, cmp_op1); 11483 11484 if (optimize 11485 || reg_overlap_mentioned_p (dest, op_true) 11486 || reg_overlap_mentioned_p (dest, op_false)) 11487 dest = gen_reg_rtx (mode); 11488 11489 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 11490 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11491 11492 return dest; 11493} 11494 11495/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 11496 operations. This is used for both scalar and vector conditional moves. */ 11497 11498static void 11499ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 11500{ 11501 enum machine_mode mode = GET_MODE (dest); 11502 rtx t2, t3, x; 11503 11504 if (op_false == CONST0_RTX (mode)) 11505 { 11506 op_true = force_reg (mode, op_true); 11507 x = gen_rtx_AND (mode, cmp, op_true); 11508 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11509 } 11510 else if (op_true == CONST0_RTX (mode)) 11511 { 11512 op_false = force_reg (mode, op_false); 11513 x = gen_rtx_NOT (mode, cmp); 11514 x = gen_rtx_AND (mode, x, op_false); 11515 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11516 } 11517 else 11518 { 11519 op_true = force_reg (mode, op_true); 11520 op_false = force_reg (mode, op_false); 11521 11522 t2 = gen_reg_rtx (mode); 11523 if (optimize) 11524 t3 = gen_reg_rtx (mode); 11525 else 11526 t3 = dest; 11527 11528 x = gen_rtx_AND (mode, op_true, cmp); 11529 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 11530 11531 x = gen_rtx_NOT (mode, cmp); 11532 x = gen_rtx_AND (mode, x, op_false); 11533 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 11534 11535 x = gen_rtx_IOR (mode, t3, t2); 11536 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11537 } 11538} 11539 11540/* Expand a floating-point conditional move. Return true if successful. */ 11541 11542int 11543ix86_expand_fp_movcc (rtx operands[]) 11544{ 11545 enum machine_mode mode = GET_MODE (operands[0]); 11546 enum rtx_code code = GET_CODE (operands[1]); 11547 rtx tmp, compare_op, second_test, bypass_test; 11548 11549 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 11550 { 11551 enum machine_mode cmode; 11552 11553 /* Since we've no cmove for sse registers, don't force bad register 11554 allocation just to gain access to it. Deny movcc when the 11555 comparison mode doesn't match the move mode. */ 11556 cmode = GET_MODE (ix86_compare_op0); 11557 if (cmode == VOIDmode) 11558 cmode = GET_MODE (ix86_compare_op1); 11559 if (cmode != mode) 11560 return 0; 11561 11562 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11563 &ix86_compare_op0, 11564 &ix86_compare_op1); 11565 if (code == UNKNOWN) 11566 return 0; 11567 11568 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 11569 ix86_compare_op1, operands[2], 11570 operands[3])) 11571 return 1; 11572 11573 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 11574 ix86_compare_op1, operands[2], operands[3]); 11575 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 11576 return 1; 11577 } 11578 11579 /* The floating point conditional move instructions don't directly 11580 support conditions resulting from a signed integer comparison. */ 11581 11582 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11583 11584 /* The floating point conditional move instructions don't directly 11585 support signed integer comparisons. */ 11586 11587 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 11588 { 11589 gcc_assert (!second_test && !bypass_test); 11590 tmp = gen_reg_rtx (QImode); 11591 ix86_expand_setcc (code, tmp); 11592 code = NE; 11593 ix86_compare_op0 = tmp; 11594 ix86_compare_op1 = const0_rtx; 11595 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11596 } 11597 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11598 { 11599 tmp = gen_reg_rtx (mode); 11600 emit_move_insn (tmp, operands[3]); 11601 operands[3] = tmp; 11602 } 11603 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11604 { 11605 tmp = gen_reg_rtx (mode); 11606 emit_move_insn (tmp, operands[2]); 11607 operands[2] = tmp; 11608 } 11609 11610 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11611 gen_rtx_IF_THEN_ELSE (mode, compare_op, 11612 operands[2], operands[3]))); 11613 if (bypass_test) 11614 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11615 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 11616 operands[3], operands[0]))); 11617 if (second_test) 11618 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11619 gen_rtx_IF_THEN_ELSE (mode, second_test, 11620 operands[2], operands[0]))); 11621 11622 return 1; 11623} 11624 11625/* Expand a floating-point vector conditional move; a vcond operation 11626 rather than a movcc operation. */ 11627 11628bool 11629ix86_expand_fp_vcond (rtx operands[]) 11630{ 11631 enum rtx_code code = GET_CODE (operands[3]); 11632 rtx cmp; 11633 11634 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11635 &operands[4], &operands[5]); 11636 if (code == UNKNOWN) 11637 return false; 11638 11639 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 11640 operands[5], operands[1], operands[2])) 11641 return true; 11642 11643 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 11644 operands[1], operands[2]); 11645 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 11646 return true; 11647} 11648 11649/* Expand a signed integral vector conditional move. */ 11650 11651bool 11652ix86_expand_int_vcond (rtx operands[]) 11653{ 11654 enum machine_mode mode = GET_MODE (operands[0]); 11655 enum rtx_code code = GET_CODE (operands[3]); 11656 bool negate = false; 11657 rtx x, cop0, cop1; 11658 11659 cop0 = operands[4]; 11660 cop1 = operands[5]; 11661 11662 /* Canonicalize the comparison to EQ, GT, GTU. */ 11663 switch (code) 11664 { 11665 case EQ: 11666 case GT: 11667 case GTU: 11668 break; 11669 11670 case NE: 11671 case LE: 11672 case LEU: 11673 code = reverse_condition (code); 11674 negate = true; 11675 break; 11676 11677 case GE: 11678 case GEU: 11679 code = reverse_condition (code); 11680 negate = true; 11681 /* FALLTHRU */ 11682 11683 case LT: 11684 case LTU: 11685 code = swap_condition (code); 11686 x = cop0, cop0 = cop1, cop1 = x; 11687 break; 11688 11689 default: 11690 gcc_unreachable (); 11691 } 11692 11693 /* Unsigned parallel compare is not supported by the hardware. Play some 11694 tricks to turn this into a signed comparison against 0. */ 11695 if (code == GTU) 11696 { 11697 cop0 = force_reg (mode, cop0); 11698 11699 switch (mode) 11700 { 11701 case V4SImode: 11702 { 11703 rtx t1, t2, mask; 11704 11705 /* Perform a parallel modulo subtraction. */ 11706 t1 = gen_reg_rtx (mode); 11707 emit_insn (gen_subv4si3 (t1, cop0, cop1)); 11708 11709 /* Extract the original sign bit of op0. */ 11710 mask = GEN_INT (-0x80000000); 11711 mask = gen_rtx_CONST_VECTOR (mode, 11712 gen_rtvec (4, mask, mask, mask, mask)); 11713 mask = force_reg (mode, mask); 11714 t2 = gen_reg_rtx (mode); 11715 emit_insn (gen_andv4si3 (t2, cop0, mask)); 11716 11717 /* XOR it back into the result of the subtraction. This results 11718 in the sign bit set iff we saw unsigned underflow. */ 11719 x = gen_reg_rtx (mode); 11720 emit_insn (gen_xorv4si3 (x, t1, t2)); 11721 11722 code = GT; 11723 } 11724 break; 11725 11726 case V16QImode: 11727 case V8HImode: 11728 /* Perform a parallel unsigned saturating subtraction. */ 11729 x = gen_reg_rtx (mode); 11730 emit_insn (gen_rtx_SET (VOIDmode, x, 11731 gen_rtx_US_MINUS (mode, cop0, cop1))); 11732 11733 code = EQ; 11734 negate = !negate; 11735 break; 11736 11737 default: 11738 gcc_unreachable (); 11739 } 11740 11741 cop0 = x; 11742 cop1 = CONST0_RTX (mode); 11743 } 11744 11745 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 11746 operands[1+negate], operands[2-negate]); 11747 11748 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 11749 operands[2-negate]); 11750 return true; 11751} 11752 11753/* Expand conditional increment or decrement using adb/sbb instructions. 11754 The default case using setcc followed by the conditional move can be 11755 done by generic code. */ 11756int 11757ix86_expand_int_addcc (rtx operands[]) 11758{ 11759 enum rtx_code code = GET_CODE (operands[1]); 11760 rtx compare_op; 11761 rtx val = const0_rtx; 11762 bool fpcmp = false; 11763 enum machine_mode mode = GET_MODE (operands[0]); 11764 11765 if (operands[3] != const1_rtx 11766 && operands[3] != constm1_rtx) 11767 return 0; 11768 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11769 ix86_compare_op1, &compare_op)) 11770 return 0; 11771 code = GET_CODE (compare_op); 11772 11773 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11774 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11775 { 11776 fpcmp = true; 11777 code = ix86_fp_compare_code_to_integer (code); 11778 } 11779 11780 if (code != LTU) 11781 { 11782 val = constm1_rtx; 11783 if (fpcmp) 11784 PUT_CODE (compare_op, 11785 reverse_condition_maybe_unordered 11786 (GET_CODE (compare_op))); 11787 else 11788 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11789 } 11790 PUT_MODE (compare_op, mode); 11791 11792 /* Construct either adc or sbb insn. */ 11793 if ((code == LTU) == (operands[3] == constm1_rtx)) 11794 { 11795 switch (GET_MODE (operands[0])) 11796 { 11797 case QImode: 11798 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 11799 break; 11800 case HImode: 11801 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 11802 break; 11803 case SImode: 11804 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 11805 break; 11806 case DImode: 11807 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11808 break; 11809 default: 11810 gcc_unreachable (); 11811 } 11812 } 11813 else 11814 { 11815 switch (GET_MODE (operands[0])) 11816 { 11817 case QImode: 11818 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 11819 break; 11820 case HImode: 11821 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 11822 break; 11823 case SImode: 11824 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 11825 break; 11826 case DImode: 11827 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11828 break; 11829 default: 11830 gcc_unreachable (); 11831 } 11832 } 11833 return 1; /* DONE */ 11834} 11835 11836 11837/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 11838 works for floating pointer parameters and nonoffsetable memories. 11839 For pushes, it returns just stack offsets; the values will be saved 11840 in the right order. Maximally three parts are generated. */ 11841 11842static int 11843ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 11844{ 11845 int size; 11846 11847 if (!TARGET_64BIT) 11848 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 11849 else 11850 size = (GET_MODE_SIZE (mode) + 4) / 8; 11851 11852 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); 11853 gcc_assert (size >= 2 && size <= 3); 11854 11855 /* Optimize constant pool reference to immediates. This is used by fp 11856 moves, that force all constants to memory to allow combining. */ 11857 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) 11858 { 11859 rtx tmp = maybe_get_pool_constant (operand); 11860 if (tmp) 11861 operand = tmp; 11862 } 11863 11864 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 11865 { 11866 /* The only non-offsetable memories we handle are pushes. */ 11867 int ok = push_operand (operand, VOIDmode); 11868 11869 gcc_assert (ok); 11870 11871 operand = copy_rtx (operand); 11872 PUT_MODE (operand, Pmode); 11873 parts[0] = parts[1] = parts[2] = operand; 11874 return size; 11875 } 11876 11877 if (GET_CODE (operand) == CONST_VECTOR) 11878 { 11879 enum machine_mode imode = int_mode_for_mode (mode); 11880 /* Caution: if we looked through a constant pool memory above, 11881 the operand may actually have a different mode now. That's 11882 ok, since we want to pun this all the way back to an integer. */ 11883 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 11884 gcc_assert (operand != NULL); 11885 mode = imode; 11886 } 11887 11888 if (!TARGET_64BIT) 11889 { 11890 if (mode == DImode) 11891 split_di (&operand, 1, &parts[0], &parts[1]); 11892 else 11893 { 11894 if (REG_P (operand)) 11895 { 11896 gcc_assert (reload_completed); 11897 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 11898 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 11899 if (size == 3) 11900 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 11901 } 11902 else if (offsettable_memref_p (operand)) 11903 { 11904 operand = adjust_address (operand, SImode, 0); 11905 parts[0] = operand; 11906 parts[1] = adjust_address (operand, SImode, 4); 11907 if (size == 3) 11908 parts[2] = adjust_address (operand, SImode, 8); 11909 } 11910 else if (GET_CODE (operand) == CONST_DOUBLE) 11911 { 11912 REAL_VALUE_TYPE r; 11913 long l[4]; 11914 11915 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 11916 switch (mode) 11917 { 11918 case XFmode: 11919 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 11920 parts[2] = gen_int_mode (l[2], SImode); 11921 break; 11922 case DFmode: 11923 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 11924 break; 11925 default: 11926 gcc_unreachable (); 11927 } 11928 parts[1] = gen_int_mode (l[1], SImode); 11929 parts[0] = gen_int_mode (l[0], SImode); 11930 } 11931 else 11932 gcc_unreachable (); 11933 } 11934 } 11935 else 11936 { 11937 if (mode == TImode) 11938 split_ti (&operand, 1, &parts[0], &parts[1]); 11939 if (mode == XFmode || mode == TFmode) 11940 { 11941 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 11942 if (REG_P (operand)) 11943 { 11944 gcc_assert (reload_completed); 11945 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 11946 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 11947 } 11948 else if (offsettable_memref_p (operand)) 11949 { 11950 operand = adjust_address (operand, DImode, 0); 11951 parts[0] = operand; 11952 parts[1] = adjust_address (operand, upper_mode, 8); 11953 } 11954 else if (GET_CODE (operand) == CONST_DOUBLE) 11955 { 11956 REAL_VALUE_TYPE r; 11957 long l[4]; 11958 11959 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 11960 real_to_target (l, &r, mode); 11961 11962 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 11963 if (HOST_BITS_PER_WIDE_INT >= 64) 11964 parts[0] 11965 = gen_int_mode 11966 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 11967 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 11968 DImode); 11969 else 11970 parts[0] = immed_double_const (l[0], l[1], DImode); 11971 11972 if (upper_mode == SImode) 11973 parts[1] = gen_int_mode (l[2], SImode); 11974 else if (HOST_BITS_PER_WIDE_INT >= 64) 11975 parts[1] 11976 = gen_int_mode 11977 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 11978 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 11979 DImode); 11980 else 11981 parts[1] = immed_double_const (l[2], l[3], DImode); 11982 } 11983 else 11984 gcc_unreachable (); 11985 } 11986 } 11987 11988 return size; 11989} 11990 11991/* Emit insns to perform a move or push of DI, DF, and XF values. 11992 Return false when normal moves are needed; true when all required 11993 insns have been emitted. Operands 2-4 contain the input values 11994 int the correct order; operands 5-7 contain the output values. */ 11995 11996void 11997ix86_split_long_move (rtx operands[]) 11998{ 11999 rtx part[2][3]; 12000 int nparts; 12001 int push = 0; 12002 int collisions = 0; 12003 enum machine_mode mode = GET_MODE (operands[0]); 12004 12005 /* The DFmode expanders may ask us to move double. 12006 For 64bit target this is single move. By hiding the fact 12007 here we simplify i386.md splitters. */ 12008 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 12009 { 12010 /* Optimize constant pool reference to immediates. This is used by 12011 fp moves, that force all constants to memory to allow combining. */ 12012 12013 if (GET_CODE (operands[1]) == MEM 12014 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 12015 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 12016 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 12017 if (push_operand (operands[0], VOIDmode)) 12018 { 12019 operands[0] = copy_rtx (operands[0]); 12020 PUT_MODE (operands[0], Pmode); 12021 } 12022 else 12023 operands[0] = gen_lowpart (DImode, operands[0]); 12024 operands[1] = gen_lowpart (DImode, operands[1]); 12025 emit_move_insn (operands[0], operands[1]); 12026 return; 12027 } 12028 12029 /* The only non-offsettable memory we handle is push. */ 12030 if (push_operand (operands[0], VOIDmode)) 12031 push = 1; 12032 else 12033 gcc_assert (GET_CODE (operands[0]) != MEM 12034 || offsettable_memref_p (operands[0])); 12035 12036 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 12037 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 12038 12039 /* When emitting push, take care for source operands on the stack. */ 12040 if (push && GET_CODE (operands[1]) == MEM 12041 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 12042 { 12043 if (nparts == 3) 12044 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 12045 XEXP (part[1][2], 0)); 12046 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 12047 XEXP (part[1][1], 0)); 12048 } 12049 12050 /* We need to do copy in the right order in case an address register 12051 of the source overlaps the destination. */ 12052 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 12053 { 12054 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 12055 collisions++; 12056 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12057 collisions++; 12058 if (nparts == 3 12059 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 12060 collisions++; 12061 12062 /* Collision in the middle part can be handled by reordering. */ 12063 if (collisions == 1 && nparts == 3 12064 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12065 { 12066 rtx tmp; 12067 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 12068 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 12069 } 12070 12071 /* If there are more collisions, we can't handle it by reordering. 12072 Do an lea to the last part and use only one colliding move. */ 12073 else if (collisions > 1) 12074 { 12075 rtx base; 12076 12077 collisions = 1; 12078 12079 base = part[0][nparts - 1]; 12080 12081 /* Handle the case when the last part isn't valid for lea. 12082 Happens in 64-bit mode storing the 12-byte XFmode. */ 12083 if (GET_MODE (base) != Pmode) 12084 base = gen_rtx_REG (Pmode, REGNO (base)); 12085 12086 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 12087 part[1][0] = replace_equiv_address (part[1][0], base); 12088 part[1][1] = replace_equiv_address (part[1][1], 12089 plus_constant (base, UNITS_PER_WORD)); 12090 if (nparts == 3) 12091 part[1][2] = replace_equiv_address (part[1][2], 12092 plus_constant (base, 8)); 12093 } 12094 } 12095 12096 if (push) 12097 { 12098 if (!TARGET_64BIT) 12099 { 12100 if (nparts == 3) 12101 { 12102 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 12103 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 12104 emit_move_insn (part[0][2], part[1][2]); 12105 } 12106 } 12107 else 12108 { 12109 /* In 64bit mode we don't have 32bit push available. In case this is 12110 register, it is OK - we will just use larger counterpart. We also 12111 retype memory - these comes from attempt to avoid REX prefix on 12112 moving of second half of TFmode value. */ 12113 if (GET_MODE (part[1][1]) == SImode) 12114 { 12115 switch (GET_CODE (part[1][1])) 12116 { 12117 case MEM: 12118 part[1][1] = adjust_address (part[1][1], DImode, 0); 12119 break; 12120 12121 case REG: 12122 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 12123 break; 12124 12125 default: 12126 gcc_unreachable (); 12127 } 12128 12129 if (GET_MODE (part[1][0]) == SImode) 12130 part[1][0] = part[1][1]; 12131 } 12132 } 12133 emit_move_insn (part[0][1], part[1][1]); 12134 emit_move_insn (part[0][0], part[1][0]); 12135 return; 12136 } 12137 12138 /* Choose correct order to not overwrite the source before it is copied. */ 12139 if ((REG_P (part[0][0]) 12140 && REG_P (part[1][1]) 12141 && (REGNO (part[0][0]) == REGNO (part[1][1]) 12142 || (nparts == 3 12143 && REGNO (part[0][0]) == REGNO (part[1][2])))) 12144 || (collisions > 0 12145 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 12146 { 12147 if (nparts == 3) 12148 { 12149 operands[2] = part[0][2]; 12150 operands[3] = part[0][1]; 12151 operands[4] = part[0][0]; 12152 operands[5] = part[1][2]; 12153 operands[6] = part[1][1]; 12154 operands[7] = part[1][0]; 12155 } 12156 else 12157 { 12158 operands[2] = part[0][1]; 12159 operands[3] = part[0][0]; 12160 operands[5] = part[1][1]; 12161 operands[6] = part[1][0]; 12162 } 12163 } 12164 else 12165 { 12166 if (nparts == 3) 12167 { 12168 operands[2] = part[0][0]; 12169 operands[3] = part[0][1]; 12170 operands[4] = part[0][2]; 12171 operands[5] = part[1][0]; 12172 operands[6] = part[1][1]; 12173 operands[7] = part[1][2]; 12174 } 12175 else 12176 { 12177 operands[2] = part[0][0]; 12178 operands[3] = part[0][1]; 12179 operands[5] = part[1][0]; 12180 operands[6] = part[1][1]; 12181 } 12182 } 12183 12184 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 12185 if (optimize_size) 12186 { 12187 if (GET_CODE (operands[5]) == CONST_INT 12188 && operands[5] != const0_rtx 12189 && REG_P (operands[2])) 12190 { 12191 if (GET_CODE (operands[6]) == CONST_INT 12192 && INTVAL (operands[6]) == INTVAL (operands[5])) 12193 operands[6] = operands[2]; 12194 12195 if (nparts == 3 12196 && GET_CODE (operands[7]) == CONST_INT 12197 && INTVAL (operands[7]) == INTVAL (operands[5])) 12198 operands[7] = operands[2]; 12199 } 12200 12201 if (nparts == 3 12202 && GET_CODE (operands[6]) == CONST_INT 12203 && operands[6] != const0_rtx 12204 && REG_P (operands[3]) 12205 && GET_CODE (operands[7]) == CONST_INT 12206 && INTVAL (operands[7]) == INTVAL (operands[6])) 12207 operands[7] = operands[3]; 12208 } 12209 12210 emit_move_insn (operands[2], operands[5]); 12211 emit_move_insn (operands[3], operands[6]); 12212 if (nparts == 3) 12213 emit_move_insn (operands[4], operands[7]); 12214 12215 return; 12216} 12217 12218/* Helper function of ix86_split_ashl used to generate an SImode/DImode 12219 left shift by a constant, either using a single shift or 12220 a sequence of add instructions. */ 12221 12222static void 12223ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 12224{ 12225 if (count == 1) 12226 { 12227 emit_insn ((mode == DImode 12228 ? gen_addsi3 12229 : gen_adddi3) (operand, operand, operand)); 12230 } 12231 else if (!optimize_size 12232 && count * ix86_cost->add <= ix86_cost->shift_const) 12233 { 12234 int i; 12235 for (i=0; i<count; i++) 12236 { 12237 emit_insn ((mode == DImode 12238 ? gen_addsi3 12239 : gen_adddi3) (operand, operand, operand)); 12240 } 12241 } 12242 else 12243 emit_insn ((mode == DImode 12244 ? gen_ashlsi3 12245 : gen_ashldi3) (operand, operand, GEN_INT (count))); 12246} 12247 12248void 12249ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 12250{ 12251 rtx low[2], high[2]; 12252 int count; 12253 const int single_width = mode == DImode ? 32 : 64; 12254 12255 if (GET_CODE (operands[2]) == CONST_INT) 12256 { 12257 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12258 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12259 12260 if (count >= single_width) 12261 { 12262 emit_move_insn (high[0], low[1]); 12263 emit_move_insn (low[0], const0_rtx); 12264 12265 if (count > single_width) 12266 ix86_expand_ashl_const (high[0], count - single_width, mode); 12267 } 12268 else 12269 { 12270 if (!rtx_equal_p (operands[0], operands[1])) 12271 emit_move_insn (operands[0], operands[1]); 12272 emit_insn ((mode == DImode 12273 ? gen_x86_shld_1 12274 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 12275 ix86_expand_ashl_const (low[0], count, mode); 12276 } 12277 return; 12278 } 12279 12280 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12281 12282 if (operands[1] == const1_rtx) 12283 { 12284 /* Assuming we've chosen a QImode capable registers, then 1 << N 12285 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 12286 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 12287 { 12288 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 12289 12290 ix86_expand_clear (low[0]); 12291 ix86_expand_clear (high[0]); 12292 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 12293 12294 d = gen_lowpart (QImode, low[0]); 12295 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12296 s = gen_rtx_EQ (QImode, flags, const0_rtx); 12297 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12298 12299 d = gen_lowpart (QImode, high[0]); 12300 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12301 s = gen_rtx_NE (QImode, flags, const0_rtx); 12302 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12303 } 12304 12305 /* Otherwise, we can get the same results by manually performing 12306 a bit extract operation on bit 5/6, and then performing the two 12307 shifts. The two methods of getting 0/1 into low/high are exactly 12308 the same size. Avoiding the shift in the bit extract case helps 12309 pentium4 a bit; no one else seems to care much either way. */ 12310 else 12311 { 12312 rtx x; 12313 12314 if (TARGET_PARTIAL_REG_STALL && !optimize_size) 12315 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 12316 else 12317 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 12318 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 12319 12320 emit_insn ((mode == DImode 12321 ? gen_lshrsi3 12322 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 12323 emit_insn ((mode == DImode 12324 ? gen_andsi3 12325 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 12326 emit_move_insn (low[0], high[0]); 12327 emit_insn ((mode == DImode 12328 ? gen_xorsi3 12329 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 12330 } 12331 12332 emit_insn ((mode == DImode 12333 ? gen_ashlsi3 12334 : gen_ashldi3) (low[0], low[0], operands[2])); 12335 emit_insn ((mode == DImode 12336 ? gen_ashlsi3 12337 : gen_ashldi3) (high[0], high[0], operands[2])); 12338 return; 12339 } 12340 12341 if (operands[1] == constm1_rtx) 12342 { 12343 /* For -1 << N, we can avoid the shld instruction, because we 12344 know that we're shifting 0...31/63 ones into a -1. */ 12345 emit_move_insn (low[0], constm1_rtx); 12346 if (optimize_size) 12347 emit_move_insn (high[0], low[0]); 12348 else 12349 emit_move_insn (high[0], constm1_rtx); 12350 } 12351 else 12352 { 12353 if (!rtx_equal_p (operands[0], operands[1])) 12354 emit_move_insn (operands[0], operands[1]); 12355 12356 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12357 emit_insn ((mode == DImode 12358 ? gen_x86_shld_1 12359 : gen_x86_64_shld) (high[0], low[0], operands[2])); 12360 } 12361 12362 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 12363 12364 if (TARGET_CMOVE && scratch) 12365 { 12366 ix86_expand_clear (scratch); 12367 emit_insn ((mode == DImode 12368 ? gen_x86_shift_adj_1 12369 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); 12370 } 12371 else 12372 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 12373} 12374 12375void 12376ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 12377{ 12378 rtx low[2], high[2]; 12379 int count; 12380 const int single_width = mode == DImode ? 32 : 64; 12381 12382 if (GET_CODE (operands[2]) == CONST_INT) 12383 { 12384 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12385 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12386 12387 if (count == single_width * 2 - 1) 12388 { 12389 emit_move_insn (high[0], high[1]); 12390 emit_insn ((mode == DImode 12391 ? gen_ashrsi3 12392 : gen_ashrdi3) (high[0], high[0], 12393 GEN_INT (single_width - 1))); 12394 emit_move_insn (low[0], high[0]); 12395 12396 } 12397 else if (count >= single_width) 12398 { 12399 emit_move_insn (low[0], high[1]); 12400 emit_move_insn (high[0], low[0]); 12401 emit_insn ((mode == DImode 12402 ? gen_ashrsi3 12403 : gen_ashrdi3) (high[0], high[0], 12404 GEN_INT (single_width - 1))); 12405 if (count > single_width) 12406 emit_insn ((mode == DImode 12407 ? gen_ashrsi3 12408 : gen_ashrdi3) (low[0], low[0], 12409 GEN_INT (count - single_width))); 12410 } 12411 else 12412 { 12413 if (!rtx_equal_p (operands[0], operands[1])) 12414 emit_move_insn (operands[0], operands[1]); 12415 emit_insn ((mode == DImode 12416 ? gen_x86_shrd_1 12417 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12418 emit_insn ((mode == DImode 12419 ? gen_ashrsi3 12420 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 12421 } 12422 } 12423 else 12424 { 12425 if (!rtx_equal_p (operands[0], operands[1])) 12426 emit_move_insn (operands[0], operands[1]); 12427 12428 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12429 12430 emit_insn ((mode == DImode 12431 ? gen_x86_shrd_1 12432 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12433 emit_insn ((mode == DImode 12434 ? gen_ashrsi3 12435 : gen_ashrdi3) (high[0], high[0], operands[2])); 12436 12437 if (TARGET_CMOVE && scratch) 12438 { 12439 emit_move_insn (scratch, high[0]); 12440 emit_insn ((mode == DImode 12441 ? gen_ashrsi3 12442 : gen_ashrdi3) (scratch, scratch, 12443 GEN_INT (single_width - 1))); 12444 emit_insn ((mode == DImode 12445 ? gen_x86_shift_adj_1 12446 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12447 scratch)); 12448 } 12449 else 12450 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 12451 } 12452} 12453 12454void 12455ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 12456{ 12457 rtx low[2], high[2]; 12458 int count; 12459 const int single_width = mode == DImode ? 32 : 64; 12460 12461 if (GET_CODE (operands[2]) == CONST_INT) 12462 { 12463 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12464 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12465 12466 if (count >= single_width) 12467 { 12468 emit_move_insn (low[0], high[1]); 12469 ix86_expand_clear (high[0]); 12470 12471 if (count > single_width) 12472 emit_insn ((mode == DImode 12473 ? gen_lshrsi3 12474 : gen_lshrdi3) (low[0], low[0], 12475 GEN_INT (count - single_width))); 12476 } 12477 else 12478 { 12479 if (!rtx_equal_p (operands[0], operands[1])) 12480 emit_move_insn (operands[0], operands[1]); 12481 emit_insn ((mode == DImode 12482 ? gen_x86_shrd_1 12483 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12484 emit_insn ((mode == DImode 12485 ? gen_lshrsi3 12486 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 12487 } 12488 } 12489 else 12490 { 12491 if (!rtx_equal_p (operands[0], operands[1])) 12492 emit_move_insn (operands[0], operands[1]); 12493 12494 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12495 12496 emit_insn ((mode == DImode 12497 ? gen_x86_shrd_1 12498 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12499 emit_insn ((mode == DImode 12500 ? gen_lshrsi3 12501 : gen_lshrdi3) (high[0], high[0], operands[2])); 12502 12503 /* Heh. By reversing the arguments, we can reuse this pattern. */ 12504 if (TARGET_CMOVE && scratch) 12505 { 12506 ix86_expand_clear (scratch); 12507 emit_insn ((mode == DImode 12508 ? gen_x86_shift_adj_1 12509 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12510 scratch)); 12511 } 12512 else 12513 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 12514 } 12515} 12516 12517/* Helper function for the string operations below. Dest VARIABLE whether 12518 it is aligned to VALUE bytes. If true, jump to the label. */ 12519static rtx 12520ix86_expand_aligntest (rtx variable, int value) 12521{ 12522 rtx label = gen_label_rtx (); 12523 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 12524 if (GET_MODE (variable) == DImode) 12525 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 12526 else 12527 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 12528 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 12529 1, label); 12530 return label; 12531} 12532 12533/* Adjust COUNTER by the VALUE. */ 12534static void 12535ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 12536{ 12537 if (GET_MODE (countreg) == DImode) 12538 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 12539 else 12540 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 12541} 12542 12543/* Zero extend possibly SImode EXP to Pmode register. */ 12544rtx 12545ix86_zero_extend_to_Pmode (rtx exp) 12546{ 12547 rtx r; 12548 if (GET_MODE (exp) == VOIDmode) 12549 return force_reg (Pmode, exp); 12550 if (GET_MODE (exp) == Pmode) 12551 return copy_to_mode_reg (Pmode, exp); 12552 r = gen_reg_rtx (Pmode); 12553 emit_insn (gen_zero_extendsidi2 (r, exp)); 12554 return r; 12555} 12556 12557/* Expand string move (memcpy) operation. Use i386 string operations when 12558 profitable. expand_clrmem contains similar code. */ 12559int 12560ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) 12561{ 12562 rtx srcreg, destreg, countreg, srcexp, destexp; 12563 enum machine_mode counter_mode; 12564 HOST_WIDE_INT align = 0; 12565 unsigned HOST_WIDE_INT count = 0; 12566 12567 if (GET_CODE (align_exp) == CONST_INT) 12568 align = INTVAL (align_exp); 12569 12570 /* Can't use any of this if the user has appropriated esi or edi. */ 12571 if (global_regs[4] || global_regs[5]) 12572 return 0; 12573 12574 /* This simple hack avoids all inlining code and simplifies code below. */ 12575 if (!TARGET_ALIGN_STRINGOPS) 12576 align = 64; 12577 12578 if (GET_CODE (count_exp) == CONST_INT) 12579 { 12580 count = INTVAL (count_exp); 12581 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12582 return 0; 12583 } 12584 12585 /* Figure out proper mode for counter. For 32bits it is always SImode, 12586 for 64bits use SImode when possible, otherwise DImode. 12587 Set count to number of bytes copied when known at compile time. */ 12588 if (!TARGET_64BIT 12589 || GET_MODE (count_exp) == SImode 12590 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12591 counter_mode = SImode; 12592 else 12593 counter_mode = DImode; 12594 12595 gcc_assert (counter_mode == SImode || counter_mode == DImode); 12596 12597 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12598 if (destreg != XEXP (dst, 0)) 12599 dst = replace_equiv_address_nv (dst, destreg); 12600 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 12601 if (srcreg != XEXP (src, 0)) 12602 src = replace_equiv_address_nv (src, srcreg); 12603 12604 /* When optimizing for size emit simple rep ; movsb instruction for 12605 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? 12606 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. 12607 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is 12608 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, 12609 but we don't know whether upper 24 (resp. 56) bits of %ecx will be 12610 known to be zero or not. The rep; movsb sequence causes higher 12611 register pressure though, so take that into account. */ 12612 12613 if ((!optimize || optimize_size) 12614 && (count == 0 12615 || ((count & 0x03) 12616 && (!optimize_size 12617 || count > 5 * 4 12618 || (count & 3) + count / 4 > 6)))) 12619 { 12620 emit_insn (gen_cld ()); 12621 countreg = ix86_zero_extend_to_Pmode (count_exp); 12622 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12623 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 12624 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 12625 destexp, srcexp)); 12626 } 12627 12628 /* For constant aligned (or small unaligned) copies use rep movsl 12629 followed by code copying the rest. For PentiumPro ensure 8 byte 12630 alignment to allow rep movsl acceleration. */ 12631 12632 else if (count != 0 12633 && (align >= 8 12634 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12635 || optimize_size || count < (unsigned int) 64)) 12636 { 12637 unsigned HOST_WIDE_INT offset = 0; 12638 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12639 rtx srcmem, dstmem; 12640 12641 emit_insn (gen_cld ()); 12642 if (count & ~(size - 1)) 12643 { 12644 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) 12645 { 12646 enum machine_mode movs_mode = size == 4 ? SImode : DImode; 12647 12648 while (offset < (count & ~(size - 1))) 12649 { 12650 srcmem = adjust_automodify_address_nv (src, movs_mode, 12651 srcreg, offset); 12652 dstmem = adjust_automodify_address_nv (dst, movs_mode, 12653 destreg, offset); 12654 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12655 offset += size; 12656 } 12657 } 12658 else 12659 { 12660 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) 12661 & (TARGET_64BIT ? -1 : 0x3fffffff)); 12662 countreg = copy_to_mode_reg (counter_mode, countreg); 12663 countreg = ix86_zero_extend_to_Pmode (countreg); 12664 12665 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12666 GEN_INT (size == 4 ? 2 : 3)); 12667 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12668 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12669 12670 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12671 countreg, destexp, srcexp)); 12672 offset = count & ~(size - 1); 12673 } 12674 } 12675 if (size == 8 && (count & 0x04)) 12676 { 12677 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 12678 offset); 12679 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 12680 offset); 12681 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12682 offset += 4; 12683 } 12684 if (count & 0x02) 12685 { 12686 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 12687 offset); 12688 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 12689 offset); 12690 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12691 offset += 2; 12692 } 12693 if (count & 0x01) 12694 { 12695 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 12696 offset); 12697 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 12698 offset); 12699 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12700 } 12701 } 12702 /* The generic code based on the glibc implementation: 12703 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 12704 allowing accelerated copying there) 12705 - copy the data using rep movsl 12706 - copy the rest. */ 12707 else 12708 { 12709 rtx countreg2; 12710 rtx label = NULL; 12711 rtx srcmem, dstmem; 12712 int desired_alignment = (TARGET_PENTIUMPRO 12713 && (count == 0 || count >= (unsigned int) 260) 12714 ? 8 : UNITS_PER_WORD); 12715 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 12716 dst = change_address (dst, BLKmode, destreg); 12717 src = change_address (src, BLKmode, srcreg); 12718 12719 /* In case we don't know anything about the alignment, default to 12720 library version, since it is usually equally fast and result in 12721 shorter code. 12722 12723 Also emit call when we know that the count is large and call overhead 12724 will not be important. */ 12725 if (!TARGET_INLINE_ALL_STRINGOPS 12726 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 12727 return 0; 12728 12729 if (TARGET_SINGLE_STRINGOP) 12730 emit_insn (gen_cld ()); 12731 12732 countreg2 = gen_reg_rtx (Pmode); 12733 countreg = copy_to_mode_reg (counter_mode, count_exp); 12734 12735 /* We don't use loops to align destination and to copy parts smaller 12736 than 4 bytes, because gcc is able to optimize such code better (in 12737 the case the destination or the count really is aligned, gcc is often 12738 able to predict the branches) and also it is friendlier to the 12739 hardware branch prediction. 12740 12741 Using loops is beneficial for generic case, because we can 12742 handle small counts using the loops. Many CPUs (such as Athlon) 12743 have large REP prefix setup costs. 12744 12745 This is quite costly. Maybe we can revisit this decision later or 12746 add some customizability to this code. */ 12747 12748 if (count == 0 && align < desired_alignment) 12749 { 12750 label = gen_label_rtx (); 12751 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 12752 LEU, 0, counter_mode, 1, label); 12753 } 12754 if (align <= 1) 12755 { 12756 rtx label = ix86_expand_aligntest (destreg, 1); 12757 srcmem = change_address (src, QImode, srcreg); 12758 dstmem = change_address (dst, QImode, destreg); 12759 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12760 ix86_adjust_counter (countreg, 1); 12761 emit_label (label); 12762 LABEL_NUSES (label) = 1; 12763 } 12764 if (align <= 2) 12765 { 12766 rtx label = ix86_expand_aligntest (destreg, 2); 12767 srcmem = change_address (src, HImode, srcreg); 12768 dstmem = change_address (dst, HImode, destreg); 12769 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12770 ix86_adjust_counter (countreg, 2); 12771 emit_label (label); 12772 LABEL_NUSES (label) = 1; 12773 } 12774 if (align <= 4 && desired_alignment > 4) 12775 { 12776 rtx label = ix86_expand_aligntest (destreg, 4); 12777 srcmem = change_address (src, SImode, srcreg); 12778 dstmem = change_address (dst, SImode, destreg); 12779 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12780 ix86_adjust_counter (countreg, 4); 12781 emit_label (label); 12782 LABEL_NUSES (label) = 1; 12783 } 12784 12785 if (label && desired_alignment > 4 && !TARGET_64BIT) 12786 { 12787 emit_label (label); 12788 LABEL_NUSES (label) = 1; 12789 label = NULL_RTX; 12790 } 12791 if (!TARGET_SINGLE_STRINGOP) 12792 emit_insn (gen_cld ()); 12793 if (TARGET_64BIT) 12794 { 12795 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 12796 GEN_INT (3))); 12797 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 12798 } 12799 else 12800 { 12801 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 12802 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 12803 } 12804 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12805 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12806 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12807 countreg2, destexp, srcexp)); 12808 12809 if (label) 12810 { 12811 emit_label (label); 12812 LABEL_NUSES (label) = 1; 12813 } 12814 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 12815 { 12816 srcmem = change_address (src, SImode, srcreg); 12817 dstmem = change_address (dst, SImode, destreg); 12818 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12819 } 12820 if ((align <= 4 || count == 0) && TARGET_64BIT) 12821 { 12822 rtx label = ix86_expand_aligntest (countreg, 4); 12823 srcmem = change_address (src, SImode, srcreg); 12824 dstmem = change_address (dst, SImode, destreg); 12825 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12826 emit_label (label); 12827 LABEL_NUSES (label) = 1; 12828 } 12829 if (align > 2 && count != 0 && (count & 2)) 12830 { 12831 srcmem = change_address (src, HImode, srcreg); 12832 dstmem = change_address (dst, HImode, destreg); 12833 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12834 } 12835 if (align <= 2 || count == 0) 12836 { 12837 rtx label = ix86_expand_aligntest (countreg, 2); 12838 srcmem = change_address (src, HImode, srcreg); 12839 dstmem = change_address (dst, HImode, destreg); 12840 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12841 emit_label (label); 12842 LABEL_NUSES (label) = 1; 12843 } 12844 if (align > 1 && count != 0 && (count & 1)) 12845 { 12846 srcmem = change_address (src, QImode, srcreg); 12847 dstmem = change_address (dst, QImode, destreg); 12848 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12849 } 12850 if (align <= 1 || count == 0) 12851 { 12852 rtx label = ix86_expand_aligntest (countreg, 1); 12853 srcmem = change_address (src, QImode, srcreg); 12854 dstmem = change_address (dst, QImode, destreg); 12855 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12856 emit_label (label); 12857 LABEL_NUSES (label) = 1; 12858 } 12859 } 12860 12861 return 1; 12862} 12863 12864/* Expand string clear operation (bzero). Use i386 string operations when 12865 profitable. expand_movmem contains similar code. */ 12866int 12867ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) 12868{ 12869 rtx destreg, zeroreg, countreg, destexp; 12870 enum machine_mode counter_mode; 12871 HOST_WIDE_INT align = 0; 12872 unsigned HOST_WIDE_INT count = 0; 12873 12874 if (GET_CODE (align_exp) == CONST_INT) 12875 align = INTVAL (align_exp); 12876 12877 /* Can't use any of this if the user has appropriated esi. */ 12878 if (global_regs[4]) 12879 return 0; 12880 12881 /* This simple hack avoids all inlining code and simplifies code below. */ 12882 if (!TARGET_ALIGN_STRINGOPS) 12883 align = 32; 12884 12885 if (GET_CODE (count_exp) == CONST_INT) 12886 { 12887 count = INTVAL (count_exp); 12888 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12889 return 0; 12890 } 12891 /* Figure out proper mode for counter. For 32bits it is always SImode, 12892 for 64bits use SImode when possible, otherwise DImode. 12893 Set count to number of bytes copied when known at compile time. */ 12894 if (!TARGET_64BIT 12895 || GET_MODE (count_exp) == SImode 12896 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12897 counter_mode = SImode; 12898 else 12899 counter_mode = DImode; 12900 12901 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12902 if (destreg != XEXP (dst, 0)) 12903 dst = replace_equiv_address_nv (dst, destreg); 12904 12905 12906 /* When optimizing for size emit simple rep ; movsb instruction for 12907 counts not divisible by 4. The movl $N, %ecx; rep; stosb 12908 sequence is 7 bytes long, so if optimizing for size and count is 12909 small enough that some stosl, stosw and stosb instructions without 12910 rep are shorter, fall back into the next if. */ 12911 12912 if ((!optimize || optimize_size) 12913 && (count == 0 12914 || ((count & 0x03) 12915 && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) 12916 { 12917 emit_insn (gen_cld ()); 12918 12919 countreg = ix86_zero_extend_to_Pmode (count_exp); 12920 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 12921 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12922 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 12923 } 12924 else if (count != 0 12925 && (align >= 8 12926 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12927 || optimize_size || count < (unsigned int) 64)) 12928 { 12929 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12930 unsigned HOST_WIDE_INT offset = 0; 12931 12932 emit_insn (gen_cld ()); 12933 12934 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 12935 if (count & ~(size - 1)) 12936 { 12937 unsigned HOST_WIDE_INT repcount; 12938 unsigned int max_nonrep; 12939 12940 repcount = count >> (size == 4 ? 2 : 3); 12941 if (!TARGET_64BIT) 12942 repcount &= 0x3fffffff; 12943 12944 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. 12945 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN 12946 bytes. In both cases the latter seems to be faster for small 12947 values of N. */ 12948 max_nonrep = size == 4 ? 7 : 4; 12949 if (!optimize_size) 12950 switch (ix86_tune) 12951 { 12952 case PROCESSOR_PENTIUM4: 12953 case PROCESSOR_NOCONA: 12954 max_nonrep = 3; 12955 break; 12956 default: 12957 break; 12958 } 12959 12960 if (repcount <= max_nonrep) 12961 while (repcount-- > 0) 12962 { 12963 rtx mem = adjust_automodify_address_nv (dst, 12964 GET_MODE (zeroreg), 12965 destreg, offset); 12966 emit_insn (gen_strset (destreg, mem, zeroreg)); 12967 offset += size; 12968 } 12969 else 12970 { 12971 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); 12972 countreg = ix86_zero_extend_to_Pmode (countreg); 12973 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12974 GEN_INT (size == 4 ? 2 : 3)); 12975 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12976 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, 12977 destexp)); 12978 offset = count & ~(size - 1); 12979 } 12980 } 12981 if (size == 8 && (count & 0x04)) 12982 { 12983 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 12984 offset); 12985 emit_insn (gen_strset (destreg, mem, 12986 gen_rtx_SUBREG (SImode, zeroreg, 0))); 12987 offset += 4; 12988 } 12989 if (count & 0x02) 12990 { 12991 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 12992 offset); 12993 emit_insn (gen_strset (destreg, mem, 12994 gen_rtx_SUBREG (HImode, zeroreg, 0))); 12995 offset += 2; 12996 } 12997 if (count & 0x01) 12998 { 12999 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 13000 offset); 13001 emit_insn (gen_strset (destreg, mem, 13002 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13003 } 13004 } 13005 else 13006 { 13007 rtx countreg2; 13008 rtx label = NULL; 13009 /* Compute desired alignment of the string operation. */ 13010 int desired_alignment = (TARGET_PENTIUMPRO 13011 && (count == 0 || count >= (unsigned int) 260) 13012 ? 8 : UNITS_PER_WORD); 13013 13014 /* In case we don't know anything about the alignment, default to 13015 library version, since it is usually equally fast and result in 13016 shorter code. 13017 13018 Also emit call when we know that the count is large and call overhead 13019 will not be important. */ 13020 if (!TARGET_INLINE_ALL_STRINGOPS 13021 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13022 return 0; 13023 13024 if (TARGET_SINGLE_STRINGOP) 13025 emit_insn (gen_cld ()); 13026 13027 countreg2 = gen_reg_rtx (Pmode); 13028 countreg = copy_to_mode_reg (counter_mode, count_exp); 13029 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 13030 /* Get rid of MEM_OFFSET, it won't be accurate. */ 13031 dst = change_address (dst, BLKmode, destreg); 13032 13033 if (count == 0 && align < desired_alignment) 13034 { 13035 label = gen_label_rtx (); 13036 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13037 LEU, 0, counter_mode, 1, label); 13038 } 13039 if (align <= 1) 13040 { 13041 rtx label = ix86_expand_aligntest (destreg, 1); 13042 emit_insn (gen_strset (destreg, dst, 13043 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13044 ix86_adjust_counter (countreg, 1); 13045 emit_label (label); 13046 LABEL_NUSES (label) = 1; 13047 } 13048 if (align <= 2) 13049 { 13050 rtx label = ix86_expand_aligntest (destreg, 2); 13051 emit_insn (gen_strset (destreg, dst, 13052 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13053 ix86_adjust_counter (countreg, 2); 13054 emit_label (label); 13055 LABEL_NUSES (label) = 1; 13056 } 13057 if (align <= 4 && desired_alignment > 4) 13058 { 13059 rtx label = ix86_expand_aligntest (destreg, 4); 13060 emit_insn (gen_strset (destreg, dst, 13061 (TARGET_64BIT 13062 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 13063 : zeroreg))); 13064 ix86_adjust_counter (countreg, 4); 13065 emit_label (label); 13066 LABEL_NUSES (label) = 1; 13067 } 13068 13069 if (label && desired_alignment > 4 && !TARGET_64BIT) 13070 { 13071 emit_label (label); 13072 LABEL_NUSES (label) = 1; 13073 label = NULL_RTX; 13074 } 13075 13076 if (!TARGET_SINGLE_STRINGOP) 13077 emit_insn (gen_cld ()); 13078 if (TARGET_64BIT) 13079 { 13080 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13081 GEN_INT (3))); 13082 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13083 } 13084 else 13085 { 13086 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13087 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13088 } 13089 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13090 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 13091 13092 if (label) 13093 { 13094 emit_label (label); 13095 LABEL_NUSES (label) = 1; 13096 } 13097 13098 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13099 emit_insn (gen_strset (destreg, dst, 13100 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13101 if (TARGET_64BIT && (align <= 4 || count == 0)) 13102 { 13103 rtx label = ix86_expand_aligntest (countreg, 4); 13104 emit_insn (gen_strset (destreg, dst, 13105 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13106 emit_label (label); 13107 LABEL_NUSES (label) = 1; 13108 } 13109 if (align > 2 && count != 0 && (count & 2)) 13110 emit_insn (gen_strset (destreg, dst, 13111 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13112 if (align <= 2 || count == 0) 13113 { 13114 rtx label = ix86_expand_aligntest (countreg, 2); 13115 emit_insn (gen_strset (destreg, dst, 13116 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13117 emit_label (label); 13118 LABEL_NUSES (label) = 1; 13119 } 13120 if (align > 1 && count != 0 && (count & 1)) 13121 emit_insn (gen_strset (destreg, dst, 13122 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13123 if (align <= 1 || count == 0) 13124 { 13125 rtx label = ix86_expand_aligntest (countreg, 1); 13126 emit_insn (gen_strset (destreg, dst, 13127 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13128 emit_label (label); 13129 LABEL_NUSES (label) = 1; 13130 } 13131 } 13132 return 1; 13133} 13134 13135/* Expand strlen. */ 13136int 13137ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 13138{ 13139 rtx addr, scratch1, scratch2, scratch3, scratch4; 13140 13141 /* The generic case of strlen expander is long. Avoid it's 13142 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 13143 13144 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13145 && !TARGET_INLINE_ALL_STRINGOPS 13146 && !optimize_size 13147 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 13148 return 0; 13149 13150 addr = force_reg (Pmode, XEXP (src, 0)); 13151 scratch1 = gen_reg_rtx (Pmode); 13152 13153 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13154 && !optimize_size) 13155 { 13156 /* Well it seems that some optimizer does not combine a call like 13157 foo(strlen(bar), strlen(bar)); 13158 when the move and the subtraction is done here. It does calculate 13159 the length just once when these instructions are done inside of 13160 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 13161 often used and I use one fewer register for the lifetime of 13162 output_strlen_unroll() this is better. */ 13163 13164 emit_move_insn (out, addr); 13165 13166 ix86_expand_strlensi_unroll_1 (out, src, align); 13167 13168 /* strlensi_unroll_1 returns the address of the zero at the end of 13169 the string, like memchr(), so compute the length by subtracting 13170 the start address. */ 13171 if (TARGET_64BIT) 13172 emit_insn (gen_subdi3 (out, out, addr)); 13173 else 13174 emit_insn (gen_subsi3 (out, out, addr)); 13175 } 13176 else 13177 { 13178 rtx unspec; 13179 scratch2 = gen_reg_rtx (Pmode); 13180 scratch3 = gen_reg_rtx (Pmode); 13181 scratch4 = force_reg (Pmode, constm1_rtx); 13182 13183 emit_move_insn (scratch3, addr); 13184 eoschar = force_reg (QImode, eoschar); 13185 13186 emit_insn (gen_cld ()); 13187 src = replace_equiv_address_nv (src, scratch3); 13188 13189 /* If .md starts supporting :P, this can be done in .md. */ 13190 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 13191 scratch4), UNSPEC_SCAS); 13192 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 13193 if (TARGET_64BIT) 13194 { 13195 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 13196 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 13197 } 13198 else 13199 { 13200 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 13201 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 13202 } 13203 } 13204 return 1; 13205} 13206 13207/* Expand the appropriate insns for doing strlen if not just doing 13208 repnz; scasb 13209 13210 out = result, initialized with the start address 13211 align_rtx = alignment of the address. 13212 scratch = scratch register, initialized with the startaddress when 13213 not aligned, otherwise undefined 13214 13215 This is just the body. It needs the initializations mentioned above and 13216 some address computing at the end. These things are done in i386.md. */ 13217 13218static void 13219ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 13220{ 13221 int align; 13222 rtx tmp; 13223 rtx align_2_label = NULL_RTX; 13224 rtx align_3_label = NULL_RTX; 13225 rtx align_4_label = gen_label_rtx (); 13226 rtx end_0_label = gen_label_rtx (); 13227 rtx mem; 13228 rtx tmpreg = gen_reg_rtx (SImode); 13229 rtx scratch = gen_reg_rtx (SImode); 13230 rtx cmp; 13231 13232 align = 0; 13233 if (GET_CODE (align_rtx) == CONST_INT) 13234 align = INTVAL (align_rtx); 13235 13236 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 13237 13238 /* Is there a known alignment and is it less than 4? */ 13239 if (align < 4) 13240 { 13241 rtx scratch1 = gen_reg_rtx (Pmode); 13242 emit_move_insn (scratch1, out); 13243 /* Is there a known alignment and is it not 2? */ 13244 if (align != 2) 13245 { 13246 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 13247 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 13248 13249 /* Leave just the 3 lower bits. */ 13250 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 13251 NULL_RTX, 0, OPTAB_WIDEN); 13252 13253 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13254 Pmode, 1, align_4_label); 13255 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 13256 Pmode, 1, align_2_label); 13257 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 13258 Pmode, 1, align_3_label); 13259 } 13260 else 13261 { 13262 /* Since the alignment is 2, we have to check 2 or 0 bytes; 13263 check if is aligned to 4 - byte. */ 13264 13265 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 13266 NULL_RTX, 0, OPTAB_WIDEN); 13267 13268 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13269 Pmode, 1, align_4_label); 13270 } 13271 13272 mem = change_address (src, QImode, out); 13273 13274 /* Now compare the bytes. */ 13275 13276 /* Compare the first n unaligned byte on a byte per byte basis. */ 13277 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 13278 QImode, 1, end_0_label); 13279 13280 /* Increment the address. */ 13281 if (TARGET_64BIT) 13282 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13283 else 13284 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13285 13286 /* Not needed with an alignment of 2 */ 13287 if (align != 2) 13288 { 13289 emit_label (align_2_label); 13290 13291 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13292 end_0_label); 13293 13294 if (TARGET_64BIT) 13295 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13296 else 13297 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13298 13299 emit_label (align_3_label); 13300 } 13301 13302 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13303 end_0_label); 13304 13305 if (TARGET_64BIT) 13306 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13307 else 13308 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13309 } 13310 13311 /* Generate loop to check 4 bytes at a time. It is not a good idea to 13312 align this loop. It gives only huge programs, but does not help to 13313 speed up. */ 13314 emit_label (align_4_label); 13315 13316 mem = change_address (src, SImode, out); 13317 emit_move_insn (scratch, mem); 13318 if (TARGET_64BIT) 13319 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 13320 else 13321 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 13322 13323 /* This formula yields a nonzero result iff one of the bytes is zero. 13324 This saves three branches inside loop and many cycles. */ 13325 13326 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 13327 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 13328 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 13329 emit_insn (gen_andsi3 (tmpreg, tmpreg, 13330 gen_int_mode (0x80808080, SImode))); 13331 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 13332 align_4_label); 13333 13334 if (TARGET_CMOVE) 13335 { 13336 rtx reg = gen_reg_rtx (SImode); 13337 rtx reg2 = gen_reg_rtx (Pmode); 13338 emit_move_insn (reg, tmpreg); 13339 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 13340 13341 /* If zero is not in the first two bytes, move two bytes forward. */ 13342 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13343 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13344 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13345 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 13346 gen_rtx_IF_THEN_ELSE (SImode, tmp, 13347 reg, 13348 tmpreg))); 13349 /* Emit lea manually to avoid clobbering of flags. */ 13350 emit_insn (gen_rtx_SET (SImode, reg2, 13351 gen_rtx_PLUS (Pmode, out, const2_rtx))); 13352 13353 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13354 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13355 emit_insn (gen_rtx_SET (VOIDmode, out, 13356 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 13357 reg2, 13358 out))); 13359 13360 } 13361 else 13362 { 13363 rtx end_2_label = gen_label_rtx (); 13364 /* Is zero in the first two bytes? */ 13365 13366 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13367 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13368 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 13369 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 13370 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 13371 pc_rtx); 13372 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 13373 JUMP_LABEL (tmp) = end_2_label; 13374 13375 /* Not in the first two. Move two bytes forward. */ 13376 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 13377 if (TARGET_64BIT) 13378 emit_insn (gen_adddi3 (out, out, const2_rtx)); 13379 else 13380 emit_insn (gen_addsi3 (out, out, const2_rtx)); 13381 13382 emit_label (end_2_label); 13383 13384 } 13385 13386 /* Avoid branch in fixing the byte. */ 13387 tmpreg = gen_lowpart (QImode, tmpreg); 13388 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 13389 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 13390 if (TARGET_64BIT) 13391 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 13392 else 13393 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 13394 13395 emit_label (end_0_label); 13396} 13397 13398void 13399ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 13400 rtx callarg2 ATTRIBUTE_UNUSED, 13401 rtx pop, int sibcall) 13402{ 13403 rtx use = NULL, call; 13404 13405 if (pop == const0_rtx) 13406 pop = NULL; 13407 gcc_assert (!TARGET_64BIT || !pop); 13408 13409 if (TARGET_MACHO && !TARGET_64BIT) 13410 { 13411#if TARGET_MACHO 13412 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 13413 fnaddr = machopic_indirect_call_target (fnaddr); 13414#endif 13415 } 13416 else 13417 { 13418 /* Static functions and indirect calls don't need the pic register. */ 13419 if (! TARGET_64BIT && flag_pic 13420 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 13421 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 13422 use_reg (&use, pic_offset_table_rtx); 13423 } 13424 13425 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 13426 { 13427 rtx al = gen_rtx_REG (QImode, 0); 13428 emit_move_insn (al, callarg2); 13429 use_reg (&use, al); 13430 } 13431 13432 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 13433 { 13434 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13435 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13436 } 13437 if (sibcall && TARGET_64BIT 13438 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 13439 { 13440 rtx addr; 13441 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13442 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 13443 emit_move_insn (fnaddr, addr); 13444 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13445 } 13446 13447 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 13448 if (retval) 13449 call = gen_rtx_SET (VOIDmode, retval, call); 13450 if (pop) 13451 { 13452 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 13453 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 13454 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 13455 } 13456 13457 call = emit_call_insn (call); 13458 if (use) 13459 CALL_INSN_FUNCTION_USAGE (call) = use; 13460} 13461 13462 13463/* Clear stack slot assignments remembered from previous functions. 13464 This is called from INIT_EXPANDERS once before RTL is emitted for each 13465 function. */ 13466 13467static struct machine_function * 13468ix86_init_machine_status (void) 13469{ 13470 struct machine_function *f; 13471 13472 f = ggc_alloc_cleared (sizeof (struct machine_function)); 13473 f->use_fast_prologue_epilogue_nregs = -1; 13474 f->tls_descriptor_call_expanded_p = 0; 13475 13476 return f; 13477} 13478 13479/* Return a MEM corresponding to a stack slot with mode MODE. 13480 Allocate a new slot if necessary. 13481 13482 The RTL for a function can have several slots available: N is 13483 which slot to use. */ 13484 13485rtx 13486assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 13487{ 13488 struct stack_local_entry *s; 13489 13490 gcc_assert (n < MAX_386_STACK_LOCALS); 13491 13492 /* Virtual slot is valid only before vregs are instantiated. */ 13493 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 13494 13495 for (s = ix86_stack_locals; s; s = s->next) 13496 if (s->mode == mode && s->n == n) 13497 return s->rtl; 13498 13499 s = (struct stack_local_entry *) 13500 ggc_alloc (sizeof (struct stack_local_entry)); 13501 s->n = n; 13502 s->mode = mode; 13503 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 13504 13505 s->next = ix86_stack_locals; 13506 ix86_stack_locals = s; 13507 return s->rtl; 13508} 13509 13510/* Construct the SYMBOL_REF for the tls_get_addr function. */ 13511 13512static GTY(()) rtx ix86_tls_symbol; 13513rtx 13514ix86_tls_get_addr (void) 13515{ 13516 13517 if (!ix86_tls_symbol) 13518 { 13519 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 13520 (TARGET_ANY_GNU_TLS 13521 && !TARGET_64BIT) 13522 ? "___tls_get_addr" 13523 : "__tls_get_addr"); 13524 } 13525 13526 return ix86_tls_symbol; 13527} 13528 13529/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 13530 13531static GTY(()) rtx ix86_tls_module_base_symbol; 13532rtx 13533ix86_tls_module_base (void) 13534{ 13535 13536 if (!ix86_tls_module_base_symbol) 13537 { 13538 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 13539 "_TLS_MODULE_BASE_"); 13540 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 13541 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 13542 } 13543 13544 return ix86_tls_module_base_symbol; 13545} 13546 13547/* Calculate the length of the memory address in the instruction 13548 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 13549 13550int 13551memory_address_length (rtx addr) 13552{ 13553 struct ix86_address parts; 13554 rtx base, index, disp; 13555 int len; 13556 int ok; 13557 13558 if (GET_CODE (addr) == PRE_DEC 13559 || GET_CODE (addr) == POST_INC 13560 || GET_CODE (addr) == PRE_MODIFY 13561 || GET_CODE (addr) == POST_MODIFY) 13562 return 0; 13563 13564 ok = ix86_decompose_address (addr, &parts); 13565 gcc_assert (ok); 13566 13567 if (parts.base && GET_CODE (parts.base) == SUBREG) 13568 parts.base = SUBREG_REG (parts.base); 13569 if (parts.index && GET_CODE (parts.index) == SUBREG) 13570 parts.index = SUBREG_REG (parts.index); 13571 13572 base = parts.base; 13573 index = parts.index; 13574 disp = parts.disp; 13575 len = 0; 13576 13577 /* Rule of thumb: 13578 - esp as the base always wants an index, 13579 - ebp as the base always wants a displacement. */ 13580 13581 /* Register Indirect. */ 13582 if (base && !index && !disp) 13583 { 13584 /* esp (for its index) and ebp (for its displacement) need 13585 the two-byte modrm form. */ 13586 if (addr == stack_pointer_rtx 13587 || addr == arg_pointer_rtx 13588 || addr == frame_pointer_rtx 13589 || addr == hard_frame_pointer_rtx) 13590 len = 1; 13591 } 13592 13593 /* Direct Addressing. */ 13594 else if (disp && !base && !index) 13595 len = 4; 13596 13597 else 13598 { 13599 /* Find the length of the displacement constant. */ 13600 if (disp) 13601 { 13602 if (base && satisfies_constraint_K (disp)) 13603 len = 1; 13604 else 13605 len = 4; 13606 } 13607 /* ebp always wants a displacement. */ 13608 else if (base == hard_frame_pointer_rtx) 13609 len = 1; 13610 13611 /* An index requires the two-byte modrm form.... */ 13612 if (index 13613 /* ...like esp, which always wants an index. */ 13614 || base == stack_pointer_rtx 13615 || base == arg_pointer_rtx 13616 || base == frame_pointer_rtx) 13617 len += 1; 13618 } 13619 13620 return len; 13621} 13622 13623/* Compute default value for "length_immediate" attribute. When SHORTFORM 13624 is set, expect that insn have 8bit immediate alternative. */ 13625int 13626ix86_attr_length_immediate_default (rtx insn, int shortform) 13627{ 13628 int len = 0; 13629 int i; 13630 extract_insn_cached (insn); 13631 for (i = recog_data.n_operands - 1; i >= 0; --i) 13632 if (CONSTANT_P (recog_data.operand[i])) 13633 { 13634 gcc_assert (!len); 13635 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 13636 len = 1; 13637 else 13638 { 13639 switch (get_attr_mode (insn)) 13640 { 13641 case MODE_QI: 13642 len+=1; 13643 break; 13644 case MODE_HI: 13645 len+=2; 13646 break; 13647 case MODE_SI: 13648 len+=4; 13649 break; 13650 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 13651 case MODE_DI: 13652 len+=4; 13653 break; 13654 default: 13655 fatal_insn ("unknown insn mode", insn); 13656 } 13657 } 13658 } 13659 return len; 13660} 13661/* Compute default value for "length_address" attribute. */ 13662int 13663ix86_attr_length_address_default (rtx insn) 13664{ 13665 int i; 13666 13667 if (get_attr_type (insn) == TYPE_LEA) 13668 { 13669 rtx set = PATTERN (insn); 13670 13671 if (GET_CODE (set) == PARALLEL) 13672 set = XVECEXP (set, 0, 0); 13673 13674 gcc_assert (GET_CODE (set) == SET); 13675 13676 return memory_address_length (SET_SRC (set)); 13677 } 13678 13679 extract_insn_cached (insn); 13680 for (i = recog_data.n_operands - 1; i >= 0; --i) 13681 if (GET_CODE (recog_data.operand[i]) == MEM) 13682 { 13683 return memory_address_length (XEXP (recog_data.operand[i], 0)); 13684 break; 13685 } 13686 return 0; 13687} 13688 13689/* Return the maximum number of instructions a cpu can issue. */ 13690 13691static int 13692ix86_issue_rate (void) 13693{ 13694 switch (ix86_tune) 13695 { 13696 case PROCESSOR_PENTIUM: 13697 case PROCESSOR_K6: 13698 return 2; 13699 13700 case PROCESSOR_PENTIUMPRO: 13701 case PROCESSOR_PENTIUM4: 13702 case PROCESSOR_ATHLON: 13703 case PROCESSOR_K8: 13704 case PROCESSOR_NOCONA: 13705 case PROCESSOR_GENERIC32: 13706 case PROCESSOR_GENERIC64: 13707 return 3; 13708 13709 default: 13710 return 1; 13711 } 13712} 13713 13714/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 13715 by DEP_INSN and nothing set by DEP_INSN. */ 13716 13717static int 13718ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13719{ 13720 rtx set, set2; 13721 13722 /* Simplify the test for uninteresting insns. */ 13723 if (insn_type != TYPE_SETCC 13724 && insn_type != TYPE_ICMOV 13725 && insn_type != TYPE_FCMOV 13726 && insn_type != TYPE_IBR) 13727 return 0; 13728 13729 if ((set = single_set (dep_insn)) != 0) 13730 { 13731 set = SET_DEST (set); 13732 set2 = NULL_RTX; 13733 } 13734 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 13735 && XVECLEN (PATTERN (dep_insn), 0) == 2 13736 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 13737 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 13738 { 13739 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13740 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13741 } 13742 else 13743 return 0; 13744 13745 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 13746 return 0; 13747 13748 /* This test is true if the dependent insn reads the flags but 13749 not any other potentially set register. */ 13750 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 13751 return 0; 13752 13753 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 13754 return 0; 13755 13756 return 1; 13757} 13758 13759/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 13760 address with operands set by DEP_INSN. */ 13761 13762static int 13763ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13764{ 13765 rtx addr; 13766 13767 if (insn_type == TYPE_LEA 13768 && TARGET_PENTIUM) 13769 { 13770 addr = PATTERN (insn); 13771 13772 if (GET_CODE (addr) == PARALLEL) 13773 addr = XVECEXP (addr, 0, 0); 13774 13775 gcc_assert (GET_CODE (addr) == SET); 13776 13777 addr = SET_SRC (addr); 13778 } 13779 else 13780 { 13781 int i; 13782 extract_insn_cached (insn); 13783 for (i = recog_data.n_operands - 1; i >= 0; --i) 13784 if (GET_CODE (recog_data.operand[i]) == MEM) 13785 { 13786 addr = XEXP (recog_data.operand[i], 0); 13787 goto found; 13788 } 13789 return 0; 13790 found:; 13791 } 13792 13793 return modified_in_p (addr, dep_insn); 13794} 13795 13796static int 13797ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 13798{ 13799 enum attr_type insn_type, dep_insn_type; 13800 enum attr_memory memory; 13801 rtx set, set2; 13802 int dep_insn_code_number; 13803 13804 /* Anti and output dependencies have zero cost on all CPUs. */ 13805 if (REG_NOTE_KIND (link) != 0) 13806 return 0; 13807 13808 dep_insn_code_number = recog_memoized (dep_insn); 13809 13810 /* If we can't recognize the insns, we can't really do anything. */ 13811 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 13812 return cost; 13813 13814 insn_type = get_attr_type (insn); 13815 dep_insn_type = get_attr_type (dep_insn); 13816 13817 switch (ix86_tune) 13818 { 13819 case PROCESSOR_PENTIUM: 13820 /* Address Generation Interlock adds a cycle of latency. */ 13821 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 13822 cost += 1; 13823 13824 /* ??? Compares pair with jump/setcc. */ 13825 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 13826 cost = 0; 13827 13828 /* Floating point stores require value to be ready one cycle earlier. */ 13829 if (insn_type == TYPE_FMOV 13830 && get_attr_memory (insn) == MEMORY_STORE 13831 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13832 cost += 1; 13833 break; 13834 13835 case PROCESSOR_PENTIUMPRO: 13836 memory = get_attr_memory (insn); 13837 13838 /* INT->FP conversion is expensive. */ 13839 if (get_attr_fp_int_src (dep_insn)) 13840 cost += 5; 13841 13842 /* There is one cycle extra latency between an FP op and a store. */ 13843 if (insn_type == TYPE_FMOV 13844 && (set = single_set (dep_insn)) != NULL_RTX 13845 && (set2 = single_set (insn)) != NULL_RTX 13846 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 13847 && GET_CODE (SET_DEST (set2)) == MEM) 13848 cost += 1; 13849 13850 /* Show ability of reorder buffer to hide latency of load by executing 13851 in parallel with previous instruction in case 13852 previous instruction is not needed to compute the address. */ 13853 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13854 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13855 { 13856 /* Claim moves to take one cycle, as core can issue one load 13857 at time and the next load can start cycle later. */ 13858 if (dep_insn_type == TYPE_IMOV 13859 || dep_insn_type == TYPE_FMOV) 13860 cost = 1; 13861 else if (cost > 1) 13862 cost--; 13863 } 13864 break; 13865 13866 case PROCESSOR_K6: 13867 memory = get_attr_memory (insn); 13868 13869 /* The esp dependency is resolved before the instruction is really 13870 finished. */ 13871 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 13872 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 13873 return 1; 13874 13875 /* INT->FP conversion is expensive. */ 13876 if (get_attr_fp_int_src (dep_insn)) 13877 cost += 5; 13878 13879 /* Show ability of reorder buffer to hide latency of load by executing 13880 in parallel with previous instruction in case 13881 previous instruction is not needed to compute the address. */ 13882 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13883 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13884 { 13885 /* Claim moves to take one cycle, as core can issue one load 13886 at time and the next load can start cycle later. */ 13887 if (dep_insn_type == TYPE_IMOV 13888 || dep_insn_type == TYPE_FMOV) 13889 cost = 1; 13890 else if (cost > 2) 13891 cost -= 2; 13892 else 13893 cost = 1; 13894 } 13895 break; 13896 13897 case PROCESSOR_ATHLON: 13898 case PROCESSOR_K8: 13899 case PROCESSOR_GENERIC32: 13900 case PROCESSOR_GENERIC64: 13901 memory = get_attr_memory (insn); 13902 13903 /* Show ability of reorder buffer to hide latency of load by executing 13904 in parallel with previous instruction in case 13905 previous instruction is not needed to compute the address. */ 13906 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13907 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13908 { 13909 enum attr_unit unit = get_attr_unit (insn); 13910 int loadcost = 3; 13911 13912 /* Because of the difference between the length of integer and 13913 floating unit pipeline preparation stages, the memory operands 13914 for floating point are cheaper. 13915 13916 ??? For Athlon it the difference is most probably 2. */ 13917 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 13918 loadcost = 3; 13919 else 13920 loadcost = TARGET_ATHLON ? 2 : 0; 13921 13922 if (cost >= loadcost) 13923 cost -= loadcost; 13924 else 13925 cost = 0; 13926 } 13927 13928 default: 13929 break; 13930 } 13931 13932 return cost; 13933} 13934 13935/* How many alternative schedules to try. This should be as wide as the 13936 scheduling freedom in the DFA, but no wider. Making this value too 13937 large results extra work for the scheduler. */ 13938 13939static int 13940ia32_multipass_dfa_lookahead (void) 13941{ 13942 if (ix86_tune == PROCESSOR_PENTIUM) 13943 return 2; 13944 13945 if (ix86_tune == PROCESSOR_PENTIUMPRO 13946 || ix86_tune == PROCESSOR_K6) 13947 return 1; 13948 13949 else 13950 return 0; 13951} 13952 13953 13954/* Compute the alignment given to a constant that is being placed in memory. 13955 EXP is the constant and ALIGN is the alignment that the object would 13956 ordinarily have. 13957 The value of this function is used instead of that alignment to align 13958 the object. */ 13959 13960int 13961ix86_constant_alignment (tree exp, int align) 13962{ 13963 if (TREE_CODE (exp) == REAL_CST) 13964 { 13965 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 13966 return 64; 13967 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 13968 return 128; 13969 } 13970 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 13971 && !TARGET_NO_ALIGN_LONG_STRINGS 13972 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 13973 return BITS_PER_WORD; 13974 13975 return align; 13976} 13977 13978/* Compute the alignment for a static variable. 13979 TYPE is the data type, and ALIGN is the alignment that 13980 the object would ordinarily have. The value of this function is used 13981 instead of that alignment to align the object. */ 13982 13983int 13984ix86_data_alignment (tree type, int align) 13985{ 13986 int max_align = optimize_size ? BITS_PER_WORD : 256; 13987 13988 if (AGGREGATE_TYPE_P (type) 13989 && TYPE_SIZE (type) 13990 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 13991 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 13992 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 13993 && align < max_align) 13994 align = max_align; 13995 13996 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 13997 to 16byte boundary. */ 13998 if (TARGET_64BIT) 13999 { 14000 if (AGGREGATE_TYPE_P (type) 14001 && TYPE_SIZE (type) 14002 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14003 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 14004 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14005 return 128; 14006 } 14007 14008 if (TREE_CODE (type) == ARRAY_TYPE) 14009 { 14010 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14011 return 64; 14012 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14013 return 128; 14014 } 14015 else if (TREE_CODE (type) == COMPLEX_TYPE) 14016 { 14017 14018 if (TYPE_MODE (type) == DCmode && align < 64) 14019 return 64; 14020 if (TYPE_MODE (type) == XCmode && align < 128) 14021 return 128; 14022 } 14023 else if ((TREE_CODE (type) == RECORD_TYPE 14024 || TREE_CODE (type) == UNION_TYPE 14025 || TREE_CODE (type) == QUAL_UNION_TYPE) 14026 && TYPE_FIELDS (type)) 14027 { 14028 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14029 return 64; 14030 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14031 return 128; 14032 } 14033 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14034 || TREE_CODE (type) == INTEGER_TYPE) 14035 { 14036 if (TYPE_MODE (type) == DFmode && align < 64) 14037 return 64; 14038 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14039 return 128; 14040 } 14041 14042 return align; 14043} 14044 14045/* Compute the alignment for a local variable. 14046 TYPE is the data type, and ALIGN is the alignment that 14047 the object would ordinarily have. The value of this macro is used 14048 instead of that alignment to align the object. */ 14049 14050int 14051ix86_local_alignment (tree type, int align) 14052{ 14053 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14054 to 16byte boundary. */ 14055 if (TARGET_64BIT) 14056 { 14057 if (AGGREGATE_TYPE_P (type) 14058 && TYPE_SIZE (type) 14059 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14060 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 14061 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14062 return 128; 14063 } 14064 if (TREE_CODE (type) == ARRAY_TYPE) 14065 { 14066 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14067 return 64; 14068 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14069 return 128; 14070 } 14071 else if (TREE_CODE (type) == COMPLEX_TYPE) 14072 { 14073 if (TYPE_MODE (type) == DCmode && align < 64) 14074 return 64; 14075 if (TYPE_MODE (type) == XCmode && align < 128) 14076 return 128; 14077 } 14078 else if ((TREE_CODE (type) == RECORD_TYPE 14079 || TREE_CODE (type) == UNION_TYPE 14080 || TREE_CODE (type) == QUAL_UNION_TYPE) 14081 && TYPE_FIELDS (type)) 14082 { 14083 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14084 return 64; 14085 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14086 return 128; 14087 } 14088 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14089 || TREE_CODE (type) == INTEGER_TYPE) 14090 { 14091 14092 if (TYPE_MODE (type) == DFmode && align < 64) 14093 return 64; 14094 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14095 return 128; 14096 } 14097 return align; 14098} 14099 14100/* Emit RTL insns to initialize the variable parts of a trampoline. 14101 FNADDR is an RTX for the address of the function's pure code. 14102 CXT is an RTX for the static chain value for the function. */ 14103void 14104x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 14105{ 14106 if (!TARGET_64BIT) 14107 { 14108 /* Compute offset from the end of the jmp to the target function. */ 14109 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 14110 plus_constant (tramp, 10), 14111 NULL_RTX, 1, OPTAB_DIRECT); 14112 emit_move_insn (gen_rtx_MEM (QImode, tramp), 14113 gen_int_mode (0xb9, QImode)); 14114 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 14115 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 14116 gen_int_mode (0xe9, QImode)); 14117 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 14118 } 14119 else 14120 { 14121 int offset = 0; 14122 /* Try to load address using shorter movl instead of movabs. 14123 We may want to support movq for kernel mode, but kernel does not use 14124 trampolines at the moment. */ 14125 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 14126 { 14127 fnaddr = copy_to_mode_reg (DImode, fnaddr); 14128 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14129 gen_int_mode (0xbb41, HImode)); 14130 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 14131 gen_lowpart (SImode, fnaddr)); 14132 offset += 6; 14133 } 14134 else 14135 { 14136 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14137 gen_int_mode (0xbb49, HImode)); 14138 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14139 fnaddr); 14140 offset += 10; 14141 } 14142 /* Load static chain using movabs to r10. */ 14143 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14144 gen_int_mode (0xba49, HImode)); 14145 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14146 cxt); 14147 offset += 10; 14148 /* Jump to the r11 */ 14149 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14150 gen_int_mode (0xff49, HImode)); 14151 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 14152 gen_int_mode (0xe3, QImode)); 14153 offset += 3; 14154 gcc_assert (offset <= TRAMPOLINE_SIZE); 14155 } 14156 14157#ifdef ENABLE_EXECUTE_STACK 14158 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 14159 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 14160#endif 14161} 14162 14163/* Codes for all the SSE/MMX builtins. */ 14164enum ix86_builtins 14165{ 14166 IX86_BUILTIN_ADDPS, 14167 IX86_BUILTIN_ADDSS, 14168 IX86_BUILTIN_DIVPS, 14169 IX86_BUILTIN_DIVSS, 14170 IX86_BUILTIN_MULPS, 14171 IX86_BUILTIN_MULSS, 14172 IX86_BUILTIN_SUBPS, 14173 IX86_BUILTIN_SUBSS, 14174 14175 IX86_BUILTIN_CMPEQPS, 14176 IX86_BUILTIN_CMPLTPS, 14177 IX86_BUILTIN_CMPLEPS, 14178 IX86_BUILTIN_CMPGTPS, 14179 IX86_BUILTIN_CMPGEPS, 14180 IX86_BUILTIN_CMPNEQPS, 14181 IX86_BUILTIN_CMPNLTPS, 14182 IX86_BUILTIN_CMPNLEPS, 14183 IX86_BUILTIN_CMPNGTPS, 14184 IX86_BUILTIN_CMPNGEPS, 14185 IX86_BUILTIN_CMPORDPS, 14186 IX86_BUILTIN_CMPUNORDPS, 14187 IX86_BUILTIN_CMPEQSS, 14188 IX86_BUILTIN_CMPLTSS, 14189 IX86_BUILTIN_CMPLESS, 14190 IX86_BUILTIN_CMPNEQSS, 14191 IX86_BUILTIN_CMPNLTSS, 14192 IX86_BUILTIN_CMPNLESS, 14193 IX86_BUILTIN_CMPNGTSS, 14194 IX86_BUILTIN_CMPNGESS, 14195 IX86_BUILTIN_CMPORDSS, 14196 IX86_BUILTIN_CMPUNORDSS, 14197 14198 IX86_BUILTIN_COMIEQSS, 14199 IX86_BUILTIN_COMILTSS, 14200 IX86_BUILTIN_COMILESS, 14201 IX86_BUILTIN_COMIGTSS, 14202 IX86_BUILTIN_COMIGESS, 14203 IX86_BUILTIN_COMINEQSS, 14204 IX86_BUILTIN_UCOMIEQSS, 14205 IX86_BUILTIN_UCOMILTSS, 14206 IX86_BUILTIN_UCOMILESS, 14207 IX86_BUILTIN_UCOMIGTSS, 14208 IX86_BUILTIN_UCOMIGESS, 14209 IX86_BUILTIN_UCOMINEQSS, 14210 14211 IX86_BUILTIN_CVTPI2PS, 14212 IX86_BUILTIN_CVTPS2PI, 14213 IX86_BUILTIN_CVTSI2SS, 14214 IX86_BUILTIN_CVTSI642SS, 14215 IX86_BUILTIN_CVTSS2SI, 14216 IX86_BUILTIN_CVTSS2SI64, 14217 IX86_BUILTIN_CVTTPS2PI, 14218 IX86_BUILTIN_CVTTSS2SI, 14219 IX86_BUILTIN_CVTTSS2SI64, 14220 14221 IX86_BUILTIN_MAXPS, 14222 IX86_BUILTIN_MAXSS, 14223 IX86_BUILTIN_MINPS, 14224 IX86_BUILTIN_MINSS, 14225 14226 IX86_BUILTIN_LOADUPS, 14227 IX86_BUILTIN_STOREUPS, 14228 IX86_BUILTIN_MOVSS, 14229 14230 IX86_BUILTIN_MOVHLPS, 14231 IX86_BUILTIN_MOVLHPS, 14232 IX86_BUILTIN_LOADHPS, 14233 IX86_BUILTIN_LOADLPS, 14234 IX86_BUILTIN_STOREHPS, 14235 IX86_BUILTIN_STORELPS, 14236 14237 IX86_BUILTIN_MASKMOVQ, 14238 IX86_BUILTIN_MOVMSKPS, 14239 IX86_BUILTIN_PMOVMSKB, 14240 14241 IX86_BUILTIN_MOVNTPS, 14242 IX86_BUILTIN_MOVNTQ, 14243 14244 IX86_BUILTIN_LOADDQU, 14245 IX86_BUILTIN_STOREDQU, 14246 14247 IX86_BUILTIN_PACKSSWB, 14248 IX86_BUILTIN_PACKSSDW, 14249 IX86_BUILTIN_PACKUSWB, 14250 14251 IX86_BUILTIN_PADDB, 14252 IX86_BUILTIN_PADDW, 14253 IX86_BUILTIN_PADDD, 14254 IX86_BUILTIN_PADDQ, 14255 IX86_BUILTIN_PADDSB, 14256 IX86_BUILTIN_PADDSW, 14257 IX86_BUILTIN_PADDUSB, 14258 IX86_BUILTIN_PADDUSW, 14259 IX86_BUILTIN_PSUBB, 14260 IX86_BUILTIN_PSUBW, 14261 IX86_BUILTIN_PSUBD, 14262 IX86_BUILTIN_PSUBQ, 14263 IX86_BUILTIN_PSUBSB, 14264 IX86_BUILTIN_PSUBSW, 14265 IX86_BUILTIN_PSUBUSB, 14266 IX86_BUILTIN_PSUBUSW, 14267 14268 IX86_BUILTIN_PAND, 14269 IX86_BUILTIN_PANDN, 14270 IX86_BUILTIN_POR, 14271 IX86_BUILTIN_PXOR, 14272 14273 IX86_BUILTIN_PAVGB, 14274 IX86_BUILTIN_PAVGW, 14275 14276 IX86_BUILTIN_PCMPEQB, 14277 IX86_BUILTIN_PCMPEQW, 14278 IX86_BUILTIN_PCMPEQD, 14279 IX86_BUILTIN_PCMPGTB, 14280 IX86_BUILTIN_PCMPGTW, 14281 IX86_BUILTIN_PCMPGTD, 14282 14283 IX86_BUILTIN_PMADDWD, 14284 14285 IX86_BUILTIN_PMAXSW, 14286 IX86_BUILTIN_PMAXUB, 14287 IX86_BUILTIN_PMINSW, 14288 IX86_BUILTIN_PMINUB, 14289 14290 IX86_BUILTIN_PMULHUW, 14291 IX86_BUILTIN_PMULHW, 14292 IX86_BUILTIN_PMULLW, 14293 14294 IX86_BUILTIN_PSADBW, 14295 IX86_BUILTIN_PSHUFW, 14296 14297 IX86_BUILTIN_PSLLW, 14298 IX86_BUILTIN_PSLLD, 14299 IX86_BUILTIN_PSLLQ, 14300 IX86_BUILTIN_PSRAW, 14301 IX86_BUILTIN_PSRAD, 14302 IX86_BUILTIN_PSRLW, 14303 IX86_BUILTIN_PSRLD, 14304 IX86_BUILTIN_PSRLQ, 14305 IX86_BUILTIN_PSLLWI, 14306 IX86_BUILTIN_PSLLDI, 14307 IX86_BUILTIN_PSLLQI, 14308 IX86_BUILTIN_PSRAWI, 14309 IX86_BUILTIN_PSRADI, 14310 IX86_BUILTIN_PSRLWI, 14311 IX86_BUILTIN_PSRLDI, 14312 IX86_BUILTIN_PSRLQI, 14313 14314 IX86_BUILTIN_PUNPCKHBW, 14315 IX86_BUILTIN_PUNPCKHWD, 14316 IX86_BUILTIN_PUNPCKHDQ, 14317 IX86_BUILTIN_PUNPCKLBW, 14318 IX86_BUILTIN_PUNPCKLWD, 14319 IX86_BUILTIN_PUNPCKLDQ, 14320 14321 IX86_BUILTIN_SHUFPS, 14322 14323 IX86_BUILTIN_RCPPS, 14324 IX86_BUILTIN_RCPSS, 14325 IX86_BUILTIN_RSQRTPS, 14326 IX86_BUILTIN_RSQRTSS, 14327 IX86_BUILTIN_SQRTPS, 14328 IX86_BUILTIN_SQRTSS, 14329 14330 IX86_BUILTIN_UNPCKHPS, 14331 IX86_BUILTIN_UNPCKLPS, 14332 14333 IX86_BUILTIN_ANDPS, 14334 IX86_BUILTIN_ANDNPS, 14335 IX86_BUILTIN_ORPS, 14336 IX86_BUILTIN_XORPS, 14337 14338 IX86_BUILTIN_EMMS, 14339 IX86_BUILTIN_LDMXCSR, 14340 IX86_BUILTIN_STMXCSR, 14341 IX86_BUILTIN_SFENCE, 14342 14343 /* 3DNow! Original */ 14344 IX86_BUILTIN_FEMMS, 14345 IX86_BUILTIN_PAVGUSB, 14346 IX86_BUILTIN_PF2ID, 14347 IX86_BUILTIN_PFACC, 14348 IX86_BUILTIN_PFADD, 14349 IX86_BUILTIN_PFCMPEQ, 14350 IX86_BUILTIN_PFCMPGE, 14351 IX86_BUILTIN_PFCMPGT, 14352 IX86_BUILTIN_PFMAX, 14353 IX86_BUILTIN_PFMIN, 14354 IX86_BUILTIN_PFMUL, 14355 IX86_BUILTIN_PFRCP, 14356 IX86_BUILTIN_PFRCPIT1, 14357 IX86_BUILTIN_PFRCPIT2, 14358 IX86_BUILTIN_PFRSQIT1, 14359 IX86_BUILTIN_PFRSQRT, 14360 IX86_BUILTIN_PFSUB, 14361 IX86_BUILTIN_PFSUBR, 14362 IX86_BUILTIN_PI2FD, 14363 IX86_BUILTIN_PMULHRW, 14364 14365 /* 3DNow! Athlon Extensions */ 14366 IX86_BUILTIN_PF2IW, 14367 IX86_BUILTIN_PFNACC, 14368 IX86_BUILTIN_PFPNACC, 14369 IX86_BUILTIN_PI2FW, 14370 IX86_BUILTIN_PSWAPDSI, 14371 IX86_BUILTIN_PSWAPDSF, 14372 14373 /* SSE2 */ 14374 IX86_BUILTIN_ADDPD, 14375 IX86_BUILTIN_ADDSD, 14376 IX86_BUILTIN_DIVPD, 14377 IX86_BUILTIN_DIVSD, 14378 IX86_BUILTIN_MULPD, 14379 IX86_BUILTIN_MULSD, 14380 IX86_BUILTIN_SUBPD, 14381 IX86_BUILTIN_SUBSD, 14382 14383 IX86_BUILTIN_CMPEQPD, 14384 IX86_BUILTIN_CMPLTPD, 14385 IX86_BUILTIN_CMPLEPD, 14386 IX86_BUILTIN_CMPGTPD, 14387 IX86_BUILTIN_CMPGEPD, 14388 IX86_BUILTIN_CMPNEQPD, 14389 IX86_BUILTIN_CMPNLTPD, 14390 IX86_BUILTIN_CMPNLEPD, 14391 IX86_BUILTIN_CMPNGTPD, 14392 IX86_BUILTIN_CMPNGEPD, 14393 IX86_BUILTIN_CMPORDPD, 14394 IX86_BUILTIN_CMPUNORDPD, 14395 IX86_BUILTIN_CMPNEPD, 14396 IX86_BUILTIN_CMPEQSD, 14397 IX86_BUILTIN_CMPLTSD, 14398 IX86_BUILTIN_CMPLESD, 14399 IX86_BUILTIN_CMPNEQSD, 14400 IX86_BUILTIN_CMPNLTSD, 14401 IX86_BUILTIN_CMPNLESD, 14402 IX86_BUILTIN_CMPORDSD, 14403 IX86_BUILTIN_CMPUNORDSD, 14404 IX86_BUILTIN_CMPNESD, 14405 14406 IX86_BUILTIN_COMIEQSD, 14407 IX86_BUILTIN_COMILTSD, 14408 IX86_BUILTIN_COMILESD, 14409 IX86_BUILTIN_COMIGTSD, 14410 IX86_BUILTIN_COMIGESD, 14411 IX86_BUILTIN_COMINEQSD, 14412 IX86_BUILTIN_UCOMIEQSD, 14413 IX86_BUILTIN_UCOMILTSD, 14414 IX86_BUILTIN_UCOMILESD, 14415 IX86_BUILTIN_UCOMIGTSD, 14416 IX86_BUILTIN_UCOMIGESD, 14417 IX86_BUILTIN_UCOMINEQSD, 14418 14419 IX86_BUILTIN_MAXPD, 14420 IX86_BUILTIN_MAXSD, 14421 IX86_BUILTIN_MINPD, 14422 IX86_BUILTIN_MINSD, 14423 14424 IX86_BUILTIN_ANDPD, 14425 IX86_BUILTIN_ANDNPD, 14426 IX86_BUILTIN_ORPD, 14427 IX86_BUILTIN_XORPD, 14428 14429 IX86_BUILTIN_SQRTPD, 14430 IX86_BUILTIN_SQRTSD, 14431 14432 IX86_BUILTIN_UNPCKHPD, 14433 IX86_BUILTIN_UNPCKLPD, 14434 14435 IX86_BUILTIN_SHUFPD, 14436 14437 IX86_BUILTIN_LOADUPD, 14438 IX86_BUILTIN_STOREUPD, 14439 IX86_BUILTIN_MOVSD, 14440 14441 IX86_BUILTIN_LOADHPD, 14442 IX86_BUILTIN_LOADLPD, 14443 14444 IX86_BUILTIN_CVTDQ2PD, 14445 IX86_BUILTIN_CVTDQ2PS, 14446 14447 IX86_BUILTIN_CVTPD2DQ, 14448 IX86_BUILTIN_CVTPD2PI, 14449 IX86_BUILTIN_CVTPD2PS, 14450 IX86_BUILTIN_CVTTPD2DQ, 14451 IX86_BUILTIN_CVTTPD2PI, 14452 14453 IX86_BUILTIN_CVTPI2PD, 14454 IX86_BUILTIN_CVTSI2SD, 14455 IX86_BUILTIN_CVTSI642SD, 14456 14457 IX86_BUILTIN_CVTSD2SI, 14458 IX86_BUILTIN_CVTSD2SI64, 14459 IX86_BUILTIN_CVTSD2SS, 14460 IX86_BUILTIN_CVTSS2SD, 14461 IX86_BUILTIN_CVTTSD2SI, 14462 IX86_BUILTIN_CVTTSD2SI64, 14463 14464 IX86_BUILTIN_CVTPS2DQ, 14465 IX86_BUILTIN_CVTPS2PD, 14466 IX86_BUILTIN_CVTTPS2DQ, 14467 14468 IX86_BUILTIN_MOVNTI, 14469 IX86_BUILTIN_MOVNTPD, 14470 IX86_BUILTIN_MOVNTDQ, 14471 14472 /* SSE2 MMX */ 14473 IX86_BUILTIN_MASKMOVDQU, 14474 IX86_BUILTIN_MOVMSKPD, 14475 IX86_BUILTIN_PMOVMSKB128, 14476 14477 IX86_BUILTIN_PACKSSWB128, 14478 IX86_BUILTIN_PACKSSDW128, 14479 IX86_BUILTIN_PACKUSWB128, 14480 14481 IX86_BUILTIN_PADDB128, 14482 IX86_BUILTIN_PADDW128, 14483 IX86_BUILTIN_PADDD128, 14484 IX86_BUILTIN_PADDQ128, 14485 IX86_BUILTIN_PADDSB128, 14486 IX86_BUILTIN_PADDSW128, 14487 IX86_BUILTIN_PADDUSB128, 14488 IX86_BUILTIN_PADDUSW128, 14489 IX86_BUILTIN_PSUBB128, 14490 IX86_BUILTIN_PSUBW128, 14491 IX86_BUILTIN_PSUBD128, 14492 IX86_BUILTIN_PSUBQ128, 14493 IX86_BUILTIN_PSUBSB128, 14494 IX86_BUILTIN_PSUBSW128, 14495 IX86_BUILTIN_PSUBUSB128, 14496 IX86_BUILTIN_PSUBUSW128, 14497 14498 IX86_BUILTIN_PAND128, 14499 IX86_BUILTIN_PANDN128, 14500 IX86_BUILTIN_POR128, 14501 IX86_BUILTIN_PXOR128, 14502 14503 IX86_BUILTIN_PAVGB128, 14504 IX86_BUILTIN_PAVGW128, 14505 14506 IX86_BUILTIN_PCMPEQB128, 14507 IX86_BUILTIN_PCMPEQW128, 14508 IX86_BUILTIN_PCMPEQD128, 14509 IX86_BUILTIN_PCMPGTB128, 14510 IX86_BUILTIN_PCMPGTW128, 14511 IX86_BUILTIN_PCMPGTD128, 14512 14513 IX86_BUILTIN_PMADDWD128, 14514 14515 IX86_BUILTIN_PMAXSW128, 14516 IX86_BUILTIN_PMAXUB128, 14517 IX86_BUILTIN_PMINSW128, 14518 IX86_BUILTIN_PMINUB128, 14519 14520 IX86_BUILTIN_PMULUDQ, 14521 IX86_BUILTIN_PMULUDQ128, 14522 IX86_BUILTIN_PMULHUW128, 14523 IX86_BUILTIN_PMULHW128, 14524 IX86_BUILTIN_PMULLW128, 14525 14526 IX86_BUILTIN_PSADBW128, 14527 IX86_BUILTIN_PSHUFHW, 14528 IX86_BUILTIN_PSHUFLW, 14529 IX86_BUILTIN_PSHUFD, 14530 14531 IX86_BUILTIN_PSLLW128, 14532 IX86_BUILTIN_PSLLD128, 14533 IX86_BUILTIN_PSLLQ128, 14534 IX86_BUILTIN_PSRAW128, 14535 IX86_BUILTIN_PSRAD128, 14536 IX86_BUILTIN_PSRLW128, 14537 IX86_BUILTIN_PSRLD128, 14538 IX86_BUILTIN_PSRLQ128, 14539 IX86_BUILTIN_PSLLDQI128, 14540 IX86_BUILTIN_PSLLWI128, 14541 IX86_BUILTIN_PSLLDI128, 14542 IX86_BUILTIN_PSLLQI128, 14543 IX86_BUILTIN_PSRAWI128, 14544 IX86_BUILTIN_PSRADI128, 14545 IX86_BUILTIN_PSRLDQI128, 14546 IX86_BUILTIN_PSRLWI128, 14547 IX86_BUILTIN_PSRLDI128, 14548 IX86_BUILTIN_PSRLQI128, 14549 14550 IX86_BUILTIN_PUNPCKHBW128, 14551 IX86_BUILTIN_PUNPCKHWD128, 14552 IX86_BUILTIN_PUNPCKHDQ128, 14553 IX86_BUILTIN_PUNPCKHQDQ128, 14554 IX86_BUILTIN_PUNPCKLBW128, 14555 IX86_BUILTIN_PUNPCKLWD128, 14556 IX86_BUILTIN_PUNPCKLDQ128, 14557 IX86_BUILTIN_PUNPCKLQDQ128, 14558 14559 IX86_BUILTIN_CLFLUSH, 14560 IX86_BUILTIN_MFENCE, 14561 IX86_BUILTIN_LFENCE, 14562 14563 /* Prescott New Instructions. */ 14564 IX86_BUILTIN_ADDSUBPS, 14565 IX86_BUILTIN_HADDPS, 14566 IX86_BUILTIN_HSUBPS, 14567 IX86_BUILTIN_MOVSHDUP, 14568 IX86_BUILTIN_MOVSLDUP, 14569 IX86_BUILTIN_ADDSUBPD, 14570 IX86_BUILTIN_HADDPD, 14571 IX86_BUILTIN_HSUBPD, 14572 IX86_BUILTIN_LDDQU, 14573 14574 IX86_BUILTIN_MONITOR, 14575 IX86_BUILTIN_MWAIT, 14576 14577 IX86_BUILTIN_VEC_INIT_V2SI, 14578 IX86_BUILTIN_VEC_INIT_V4HI, 14579 IX86_BUILTIN_VEC_INIT_V8QI, 14580 IX86_BUILTIN_VEC_EXT_V2DF, 14581 IX86_BUILTIN_VEC_EXT_V2DI, 14582 IX86_BUILTIN_VEC_EXT_V4SF, 14583 IX86_BUILTIN_VEC_EXT_V4SI, 14584 IX86_BUILTIN_VEC_EXT_V8HI, 14585 IX86_BUILTIN_VEC_EXT_V16QI, 14586 IX86_BUILTIN_VEC_EXT_V2SI, 14587 IX86_BUILTIN_VEC_EXT_V4HI, 14588 IX86_BUILTIN_VEC_SET_V8HI, 14589 IX86_BUILTIN_VEC_SET_V4HI, 14590 14591 IX86_BUILTIN_MAX 14592}; 14593 14594#define def_builtin(MASK, NAME, TYPE, CODE) \ 14595do { \ 14596 if ((MASK) & target_flags \ 14597 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 14598 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 14599 NULL, NULL_TREE); \ 14600} while (0) 14601 14602/* Bits for builtin_description.flag. */ 14603 14604/* Set when we don't support the comparison natively, and should 14605 swap_comparison in order to support it. */ 14606#define BUILTIN_DESC_SWAP_OPERANDS 1 14607 14608struct builtin_description 14609{ 14610 const unsigned int mask; 14611 const enum insn_code icode; 14612 const char *const name; 14613 const enum ix86_builtins code; 14614 const enum rtx_code comparison; 14615 const unsigned int flag; 14616}; 14617 14618static const struct builtin_description bdesc_comi[] = 14619{ 14620 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 14621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 14622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 14623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 14624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 14625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 14626 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 14627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 14628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 14629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 14630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 14631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 14632 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 14633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 14634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 14635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 14636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 14637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 14638 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 14639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 14640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 14641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 14642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 14643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 14644}; 14645 14646static const struct builtin_description bdesc_2arg[] = 14647{ 14648 /* SSE */ 14649 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 14650 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 14651 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 14652 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 14653 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 14654 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 14655 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 14656 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 14657 14658 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 14659 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 14660 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 14661 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 14662 BUILTIN_DESC_SWAP_OPERANDS }, 14663 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 14664 BUILTIN_DESC_SWAP_OPERANDS }, 14665 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 14666 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, 14667 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, 14668 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, 14669 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, 14670 BUILTIN_DESC_SWAP_OPERANDS }, 14671 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, 14672 BUILTIN_DESC_SWAP_OPERANDS }, 14673 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, 14674 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 14675 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 14676 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 14677 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 14678 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, 14679 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, 14680 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, 14681 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, 14682 BUILTIN_DESC_SWAP_OPERANDS }, 14683 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, 14684 BUILTIN_DESC_SWAP_OPERANDS }, 14685 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, 14686 14687 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 14688 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 14689 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 14690 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 14691 14692 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 14693 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 14694 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 14695 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 14696 14697 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 14698 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 14699 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 14700 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 14701 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 14702 14703 /* MMX */ 14704 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 14705 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 14706 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 14707 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 14708 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 14709 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 14710 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 14711 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 14712 14713 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 14714 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 14715 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 14716 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 14717 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 14718 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 14719 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 14720 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 14721 14722 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 14723 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 14724 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 14725 14726 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 14727 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 14728 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 14729 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 14730 14731 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 14732 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 14733 14734 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 14735 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 14736 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 14737 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 14738 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 14739 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 14740 14741 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 14742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 14743 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 14744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 14745 14746 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 14747 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 14748 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 14749 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 14750 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 14751 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 14752 14753 /* Special. */ 14754 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 14755 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 14756 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 14757 14758 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 14759 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 14760 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 14761 14762 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 14763 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 14764 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 14765 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 14766 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 14767 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 14768 14769 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 14770 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 14771 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 14772 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 14773 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 14774 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 14775 14776 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 14777 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 14778 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 14779 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 14780 14781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 14782 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 14783 14784 /* SSE2 */ 14785 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 14786 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 14787 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 14788 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 14789 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 14790 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 14791 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 14792 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 14793 14794 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 14795 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 14796 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 14797 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 14798 BUILTIN_DESC_SWAP_OPERANDS }, 14799 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 14800 BUILTIN_DESC_SWAP_OPERANDS }, 14801 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 14802 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, 14803 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, 14804 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, 14805 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, 14806 BUILTIN_DESC_SWAP_OPERANDS }, 14807 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, 14808 BUILTIN_DESC_SWAP_OPERANDS }, 14809 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, 14810 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 14811 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 14812 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 14813 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 14814 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, 14815 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, 14816 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, 14817 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, 14818 14819 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 14820 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 14821 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 14822 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 14823 14824 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 14825 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 14826 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 14827 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 14828 14829 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 14830 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 14831 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 14832 14833 /* SSE2 MMX */ 14834 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 14835 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 14836 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 14837 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 14838 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 14839 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 14840 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 14841 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 14842 14843 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 14844 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 14845 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 14846 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 14847 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 14848 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 14849 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 14850 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 14851 14852 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 14853 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 14854 14855 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 14856 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 14857 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 14858 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 14859 14860 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 14861 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 14862 14863 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 14864 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 14865 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 14866 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 14867 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 14868 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 14869 14870 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 14871 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 14872 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 14873 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 14874 14875 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 14876 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 14877 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 14878 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 14879 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 14880 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 14881 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 14882 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 14883 14884 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 14885 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 14886 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 14887 14888 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 14889 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 14890 14891 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, 14892 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, 14893 14894 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 14895 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 14896 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 14897 14898 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 14899 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 14900 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 14901 14902 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 14903 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 14904 14905 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 14906 14907 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 14908 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 14909 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 14910 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 14911 14912 /* SSE3 MMX */ 14913 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 14914 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 14915 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 14916 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 14917 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 14918 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } 14919}; 14920 14921static const struct builtin_description bdesc_1arg[] = 14922{ 14923 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 14924 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 14925 14926 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 14927 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 14928 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 14929 14930 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 14931 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 14932 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 14933 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 14934 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 14935 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 14936 14937 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 14938 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 14939 14940 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 14941 14942 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 14943 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 14944 14945 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 14946 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 14947 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 14948 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 14949 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 14950 14951 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 14952 14953 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 14954 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 14955 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 14956 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 14957 14958 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 14959 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 14960 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 14961 14962 /* SSE3 */ 14963 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 14964 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 14965}; 14966 14967static void 14968ix86_init_builtins (void) 14969{ 14970 if (TARGET_MMX) 14971 ix86_init_mmx_sse_builtins (); 14972} 14973 14974/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 14975 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 14976 builtins. */ 14977static void 14978ix86_init_mmx_sse_builtins (void) 14979{ 14980 const struct builtin_description * d; 14981 size_t i; 14982 14983 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode); 14984 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 14985 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 14986 tree V2DI_type_node 14987 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 14988 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 14989 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 14990 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 14991 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 14992 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); 14993 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 14994 14995 tree pchar_type_node = build_pointer_type (char_type_node); 14996 tree pcchar_type_node = build_pointer_type ( 14997 build_type_variant (char_type_node, 1, 0)); 14998 tree pfloat_type_node = build_pointer_type (float_type_node); 14999 tree pcfloat_type_node = build_pointer_type ( 15000 build_type_variant (float_type_node, 1, 0)); 15001 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 15002 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 15003 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 15004 15005 /* Comparisons. */ 15006 tree int_ftype_v4sf_v4sf 15007 = build_function_type_list (integer_type_node, 15008 V4SF_type_node, V4SF_type_node, NULL_TREE); 15009 tree v4si_ftype_v4sf_v4sf 15010 = build_function_type_list (V4SI_type_node, 15011 V4SF_type_node, V4SF_type_node, NULL_TREE); 15012 /* MMX/SSE/integer conversions. */ 15013 tree int_ftype_v4sf 15014 = build_function_type_list (integer_type_node, 15015 V4SF_type_node, NULL_TREE); 15016 tree int64_ftype_v4sf 15017 = build_function_type_list (long_long_integer_type_node, 15018 V4SF_type_node, NULL_TREE); 15019 tree int_ftype_v8qi 15020 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 15021 tree v4sf_ftype_v4sf_int 15022 = build_function_type_list (V4SF_type_node, 15023 V4SF_type_node, integer_type_node, NULL_TREE); 15024 tree v4sf_ftype_v4sf_int64 15025 = build_function_type_list (V4SF_type_node, 15026 V4SF_type_node, long_long_integer_type_node, 15027 NULL_TREE); 15028 tree v4sf_ftype_v4sf_v2si 15029 = build_function_type_list (V4SF_type_node, 15030 V4SF_type_node, V2SI_type_node, NULL_TREE); 15031 15032 /* Miscellaneous. */ 15033 tree v8qi_ftype_v4hi_v4hi 15034 = build_function_type_list (V8QI_type_node, 15035 V4HI_type_node, V4HI_type_node, NULL_TREE); 15036 tree v4hi_ftype_v2si_v2si 15037 = build_function_type_list (V4HI_type_node, 15038 V2SI_type_node, V2SI_type_node, NULL_TREE); 15039 tree v4sf_ftype_v4sf_v4sf_int 15040 = build_function_type_list (V4SF_type_node, 15041 V4SF_type_node, V4SF_type_node, 15042 integer_type_node, NULL_TREE); 15043 tree v2si_ftype_v4hi_v4hi 15044 = build_function_type_list (V2SI_type_node, 15045 V4HI_type_node, V4HI_type_node, NULL_TREE); 15046 tree v4hi_ftype_v4hi_int 15047 = build_function_type_list (V4HI_type_node, 15048 V4HI_type_node, integer_type_node, NULL_TREE); 15049 tree v4hi_ftype_v4hi_di 15050 = build_function_type_list (V4HI_type_node, 15051 V4HI_type_node, long_long_unsigned_type_node, 15052 NULL_TREE); 15053 tree v2si_ftype_v2si_di 15054 = build_function_type_list (V2SI_type_node, 15055 V2SI_type_node, long_long_unsigned_type_node, 15056 NULL_TREE); 15057 tree void_ftype_void 15058 = build_function_type (void_type_node, void_list_node); 15059 tree void_ftype_unsigned 15060 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 15061 tree void_ftype_unsigned_unsigned 15062 = build_function_type_list (void_type_node, unsigned_type_node, 15063 unsigned_type_node, NULL_TREE); 15064 tree void_ftype_pcvoid_unsigned_unsigned 15065 = build_function_type_list (void_type_node, const_ptr_type_node, 15066 unsigned_type_node, unsigned_type_node, 15067 NULL_TREE); 15068 tree unsigned_ftype_void 15069 = build_function_type (unsigned_type_node, void_list_node); 15070 tree v2si_ftype_v4sf 15071 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 15072 /* Loads/stores. */ 15073 tree void_ftype_v8qi_v8qi_pchar 15074 = build_function_type_list (void_type_node, 15075 V8QI_type_node, V8QI_type_node, 15076 pchar_type_node, NULL_TREE); 15077 tree v4sf_ftype_pcfloat 15078 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 15079 /* @@@ the type is bogus */ 15080 tree v4sf_ftype_v4sf_pv2si 15081 = build_function_type_list (V4SF_type_node, 15082 V4SF_type_node, pv2si_type_node, NULL_TREE); 15083 tree void_ftype_pv2si_v4sf 15084 = build_function_type_list (void_type_node, 15085 pv2si_type_node, V4SF_type_node, NULL_TREE); 15086 tree void_ftype_pfloat_v4sf 15087 = build_function_type_list (void_type_node, 15088 pfloat_type_node, V4SF_type_node, NULL_TREE); 15089 tree void_ftype_pdi_di 15090 = build_function_type_list (void_type_node, 15091 pdi_type_node, long_long_unsigned_type_node, 15092 NULL_TREE); 15093 tree void_ftype_pv2di_v2di 15094 = build_function_type_list (void_type_node, 15095 pv2di_type_node, V2DI_type_node, NULL_TREE); 15096 /* Normal vector unops. */ 15097 tree v4sf_ftype_v4sf 15098 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 15099 15100 /* Normal vector binops. */ 15101 tree v4sf_ftype_v4sf_v4sf 15102 = build_function_type_list (V4SF_type_node, 15103 V4SF_type_node, V4SF_type_node, NULL_TREE); 15104 tree v8qi_ftype_v8qi_v8qi 15105 = build_function_type_list (V8QI_type_node, 15106 V8QI_type_node, V8QI_type_node, NULL_TREE); 15107 tree v4hi_ftype_v4hi_v4hi 15108 = build_function_type_list (V4HI_type_node, 15109 V4HI_type_node, V4HI_type_node, NULL_TREE); 15110 tree v2si_ftype_v2si_v2si 15111 = build_function_type_list (V2SI_type_node, 15112 V2SI_type_node, V2SI_type_node, NULL_TREE); 15113 tree di_ftype_di_di 15114 = build_function_type_list (long_long_unsigned_type_node, 15115 long_long_unsigned_type_node, 15116 long_long_unsigned_type_node, NULL_TREE); 15117 15118 tree v2si_ftype_v2sf 15119 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 15120 tree v2sf_ftype_v2si 15121 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 15122 tree v2si_ftype_v2si 15123 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 15124 tree v2sf_ftype_v2sf 15125 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 15126 tree v2sf_ftype_v2sf_v2sf 15127 = build_function_type_list (V2SF_type_node, 15128 V2SF_type_node, V2SF_type_node, NULL_TREE); 15129 tree v2si_ftype_v2sf_v2sf 15130 = build_function_type_list (V2SI_type_node, 15131 V2SF_type_node, V2SF_type_node, NULL_TREE); 15132 tree pint_type_node = build_pointer_type (integer_type_node); 15133 tree pdouble_type_node = build_pointer_type (double_type_node); 15134 tree pcdouble_type_node = build_pointer_type ( 15135 build_type_variant (double_type_node, 1, 0)); 15136 tree int_ftype_v2df_v2df 15137 = build_function_type_list (integer_type_node, 15138 V2DF_type_node, V2DF_type_node, NULL_TREE); 15139 15140 tree void_ftype_pcvoid 15141 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 15142 tree v4sf_ftype_v4si 15143 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 15144 tree v4si_ftype_v4sf 15145 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 15146 tree v2df_ftype_v4si 15147 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 15148 tree v4si_ftype_v2df 15149 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 15150 tree v2si_ftype_v2df 15151 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 15152 tree v4sf_ftype_v2df 15153 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 15154 tree v2df_ftype_v2si 15155 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 15156 tree v2df_ftype_v4sf 15157 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 15158 tree int_ftype_v2df 15159 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 15160 tree int64_ftype_v2df 15161 = build_function_type_list (long_long_integer_type_node, 15162 V2DF_type_node, NULL_TREE); 15163 tree v2df_ftype_v2df_int 15164 = build_function_type_list (V2DF_type_node, 15165 V2DF_type_node, integer_type_node, NULL_TREE); 15166 tree v2df_ftype_v2df_int64 15167 = build_function_type_list (V2DF_type_node, 15168 V2DF_type_node, long_long_integer_type_node, 15169 NULL_TREE); 15170 tree v4sf_ftype_v4sf_v2df 15171 = build_function_type_list (V4SF_type_node, 15172 V4SF_type_node, V2DF_type_node, NULL_TREE); 15173 tree v2df_ftype_v2df_v4sf 15174 = build_function_type_list (V2DF_type_node, 15175 V2DF_type_node, V4SF_type_node, NULL_TREE); 15176 tree v2df_ftype_v2df_v2df_int 15177 = build_function_type_list (V2DF_type_node, 15178 V2DF_type_node, V2DF_type_node, 15179 integer_type_node, 15180 NULL_TREE); 15181 tree v2df_ftype_v2df_pcdouble 15182 = build_function_type_list (V2DF_type_node, 15183 V2DF_type_node, pcdouble_type_node, NULL_TREE); 15184 tree void_ftype_pdouble_v2df 15185 = build_function_type_list (void_type_node, 15186 pdouble_type_node, V2DF_type_node, NULL_TREE); 15187 tree void_ftype_pint_int 15188 = build_function_type_list (void_type_node, 15189 pint_type_node, integer_type_node, NULL_TREE); 15190 tree void_ftype_v16qi_v16qi_pchar 15191 = build_function_type_list (void_type_node, 15192 V16QI_type_node, V16QI_type_node, 15193 pchar_type_node, NULL_TREE); 15194 tree v2df_ftype_pcdouble 15195 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 15196 tree v2df_ftype_v2df_v2df 15197 = build_function_type_list (V2DF_type_node, 15198 V2DF_type_node, V2DF_type_node, NULL_TREE); 15199 tree v16qi_ftype_v16qi_v16qi 15200 = build_function_type_list (V16QI_type_node, 15201 V16QI_type_node, V16QI_type_node, NULL_TREE); 15202 tree v8hi_ftype_v8hi_v8hi 15203 = build_function_type_list (V8HI_type_node, 15204 V8HI_type_node, V8HI_type_node, NULL_TREE); 15205 tree v4si_ftype_v4si_v4si 15206 = build_function_type_list (V4SI_type_node, 15207 V4SI_type_node, V4SI_type_node, NULL_TREE); 15208 tree v2di_ftype_v2di_v2di 15209 = build_function_type_list (V2DI_type_node, 15210 V2DI_type_node, V2DI_type_node, NULL_TREE); 15211 tree v2di_ftype_v2df_v2df 15212 = build_function_type_list (V2DI_type_node, 15213 V2DF_type_node, V2DF_type_node, NULL_TREE); 15214 tree v2df_ftype_v2df 15215 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 15216 tree v2di_ftype_v2di_int 15217 = build_function_type_list (V2DI_type_node, 15218 V2DI_type_node, integer_type_node, NULL_TREE); 15219 tree v4si_ftype_v4si_int 15220 = build_function_type_list (V4SI_type_node, 15221 V4SI_type_node, integer_type_node, NULL_TREE); 15222 tree v8hi_ftype_v8hi_int 15223 = build_function_type_list (V8HI_type_node, 15224 V8HI_type_node, integer_type_node, NULL_TREE); 15225 tree v4si_ftype_v8hi_v8hi 15226 = build_function_type_list (V4SI_type_node, 15227 V8HI_type_node, V8HI_type_node, NULL_TREE); 15228 tree di_ftype_v8qi_v8qi 15229 = build_function_type_list (long_long_unsigned_type_node, 15230 V8QI_type_node, V8QI_type_node, NULL_TREE); 15231 tree di_ftype_v2si_v2si 15232 = build_function_type_list (long_long_unsigned_type_node, 15233 V2SI_type_node, V2SI_type_node, NULL_TREE); 15234 tree v2di_ftype_v16qi_v16qi 15235 = build_function_type_list (V2DI_type_node, 15236 V16QI_type_node, V16QI_type_node, NULL_TREE); 15237 tree v2di_ftype_v4si_v4si 15238 = build_function_type_list (V2DI_type_node, 15239 V4SI_type_node, V4SI_type_node, NULL_TREE); 15240 tree int_ftype_v16qi 15241 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 15242 tree v16qi_ftype_pcchar 15243 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 15244 tree void_ftype_pchar_v16qi 15245 = build_function_type_list (void_type_node, 15246 pchar_type_node, V16QI_type_node, NULL_TREE); 15247 15248 tree float80_type; 15249 tree float128_type; 15250 tree ftype; 15251 15252 /* The __float80 type. */ 15253 if (TYPE_MODE (long_double_type_node) == XFmode) 15254 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 15255 "__float80"); 15256 else 15257 { 15258 /* The __float80 type. */ 15259 float80_type = make_node (REAL_TYPE); 15260 TYPE_PRECISION (float80_type) = 80; 15261 layout_type (float80_type); 15262 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 15263 } 15264 15265 if (TARGET_64BIT) 15266 { 15267 float128_type = make_node (REAL_TYPE); 15268 TYPE_PRECISION (float128_type) = 128; 15269 layout_type (float128_type); 15270 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 15271 } 15272 15273 /* Add all builtins that are more or less simple operations on two 15274 operands. */ 15275 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 15276 { 15277 /* Use one of the operands; the target can have a different mode for 15278 mask-generating compares. */ 15279 enum machine_mode mode; 15280 tree type; 15281 15282 if (d->name == 0) 15283 continue; 15284 mode = insn_data[d->icode].operand[1].mode; 15285 15286 switch (mode) 15287 { 15288 case V16QImode: 15289 type = v16qi_ftype_v16qi_v16qi; 15290 break; 15291 case V8HImode: 15292 type = v8hi_ftype_v8hi_v8hi; 15293 break; 15294 case V4SImode: 15295 type = v4si_ftype_v4si_v4si; 15296 break; 15297 case V2DImode: 15298 type = v2di_ftype_v2di_v2di; 15299 break; 15300 case V2DFmode: 15301 type = v2df_ftype_v2df_v2df; 15302 break; 15303 case V4SFmode: 15304 type = v4sf_ftype_v4sf_v4sf; 15305 break; 15306 case V8QImode: 15307 type = v8qi_ftype_v8qi_v8qi; 15308 break; 15309 case V4HImode: 15310 type = v4hi_ftype_v4hi_v4hi; 15311 break; 15312 case V2SImode: 15313 type = v2si_ftype_v2si_v2si; 15314 break; 15315 case DImode: 15316 type = di_ftype_di_di; 15317 break; 15318 15319 default: 15320 gcc_unreachable (); 15321 } 15322 15323 /* Override for comparisons. */ 15324 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 15325 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) 15326 type = v4si_ftype_v4sf_v4sf; 15327 15328 if (d->icode == CODE_FOR_sse2_maskcmpv2df3 15329 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 15330 type = v2di_ftype_v2df_v2df; 15331 15332 def_builtin (d->mask, d->name, type, d->code); 15333 } 15334 15335 /* Add the remaining MMX insns with somewhat more complicated types. */ 15336 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 15337 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 15338 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 15339 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 15340 15341 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 15342 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 15343 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 15344 15345 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 15346 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 15347 15348 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 15349 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 15350 15351 /* comi/ucomi insns. */ 15352 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 15353 if (d->mask == MASK_SSE2) 15354 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 15355 else 15356 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 15357 15358 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 15359 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 15360 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 15361 15362 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 15363 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 15364 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 15365 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 15366 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 15367 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 15368 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 15369 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 15370 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 15371 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 15372 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 15373 15374 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 15375 15376 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 15377 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 15378 15379 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 15380 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 15381 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 15382 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 15383 15384 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 15385 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 15386 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 15387 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 15388 15389 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 15390 15391 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 15392 15393 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 15394 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 15395 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 15396 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 15397 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 15398 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 15399 15400 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 15401 15402 /* Original 3DNow! */ 15403 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 15404 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 15405 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 15406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 15407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 15408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 15409 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 15410 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 15411 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 15412 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 15413 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 15414 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 15415 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 15416 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 15417 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 15418 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 15419 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 15420 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 15421 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 15422 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 15423 15424 /* 3DNow! extension as used in the Athlon CPU. */ 15425 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 15426 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 15427 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 15428 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 15429 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 15430 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 15431 15432 /* SSE2 */ 15433 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 15434 15435 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 15436 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 15437 15438 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); 15439 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); 15440 15441 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 15442 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 15443 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 15444 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 15445 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 15446 15447 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 15448 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 15449 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 15450 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 15451 15452 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 15453 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 15454 15455 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 15456 15457 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 15458 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 15459 15460 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 15461 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 15462 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 15463 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 15464 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 15465 15466 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 15467 15468 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 15469 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 15470 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 15471 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 15472 15473 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 15474 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 15475 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 15476 15477 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 15478 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 15479 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 15480 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 15481 15482 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 15483 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 15484 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 15485 15486 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 15487 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 15488 15489 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); 15490 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); 15491 15492 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); 15493 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); 15494 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 15495 15496 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); 15497 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); 15498 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 15499 15500 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); 15501 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); 15502 15503 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 15504 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 15505 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 15506 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 15507 15508 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 15509 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 15510 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 15511 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 15512 15513 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 15514 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 15515 15516 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 15517 15518 /* Prescott New Instructions. */ 15519 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 15520 void_ftype_pcvoid_unsigned_unsigned, 15521 IX86_BUILTIN_MONITOR); 15522 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 15523 void_ftype_unsigned_unsigned, 15524 IX86_BUILTIN_MWAIT); 15525 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 15526 v4sf_ftype_v4sf, 15527 IX86_BUILTIN_MOVSHDUP); 15528 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 15529 v4sf_ftype_v4sf, 15530 IX86_BUILTIN_MOVSLDUP); 15531 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 15532 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 15533 15534 /* Access to the vec_init patterns. */ 15535 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 15536 integer_type_node, NULL_TREE); 15537 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", 15538 ftype, IX86_BUILTIN_VEC_INIT_V2SI); 15539 15540 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 15541 short_integer_type_node, 15542 short_integer_type_node, 15543 short_integer_type_node, NULL_TREE); 15544 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", 15545 ftype, IX86_BUILTIN_VEC_INIT_V4HI); 15546 15547 ftype = build_function_type_list (V8QI_type_node, char_type_node, 15548 char_type_node, char_type_node, 15549 char_type_node, char_type_node, 15550 char_type_node, char_type_node, 15551 char_type_node, NULL_TREE); 15552 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", 15553 ftype, IX86_BUILTIN_VEC_INIT_V8QI); 15554 15555 /* Access to the vec_extract patterns. */ 15556 ftype = build_function_type_list (double_type_node, V2DF_type_node, 15557 integer_type_node, NULL_TREE); 15558 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", 15559 ftype, IX86_BUILTIN_VEC_EXT_V2DF); 15560 15561 ftype = build_function_type_list (long_long_integer_type_node, 15562 V2DI_type_node, integer_type_node, 15563 NULL_TREE); 15564 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", 15565 ftype, IX86_BUILTIN_VEC_EXT_V2DI); 15566 15567 ftype = build_function_type_list (float_type_node, V4SF_type_node, 15568 integer_type_node, NULL_TREE); 15569 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", 15570 ftype, IX86_BUILTIN_VEC_EXT_V4SF); 15571 15572 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 15573 integer_type_node, NULL_TREE); 15574 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", 15575 ftype, IX86_BUILTIN_VEC_EXT_V4SI); 15576 15577 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 15578 integer_type_node, NULL_TREE); 15579 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", 15580 ftype, IX86_BUILTIN_VEC_EXT_V8HI); 15581 15582 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 15583 integer_type_node, NULL_TREE); 15584 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", 15585 ftype, IX86_BUILTIN_VEC_EXT_V4HI); 15586 15587 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 15588 integer_type_node, NULL_TREE); 15589 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", 15590 ftype, IX86_BUILTIN_VEC_EXT_V2SI); 15591 15592 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 15593 integer_type_node, NULL_TREE); 15594 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 15595 15596 /* Access to the vec_set patterns. */ 15597 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 15598 intHI_type_node, 15599 integer_type_node, NULL_TREE); 15600 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", 15601 ftype, IX86_BUILTIN_VEC_SET_V8HI); 15602 15603 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 15604 intHI_type_node, 15605 integer_type_node, NULL_TREE); 15606 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", 15607 ftype, IX86_BUILTIN_VEC_SET_V4HI); 15608} 15609 15610/* Errors in the source file can cause expand_expr to return const0_rtx 15611 where we expect a vector. To avoid crashing, use one of the vector 15612 clear instructions. */ 15613static rtx 15614safe_vector_operand (rtx x, enum machine_mode mode) 15615{ 15616 if (x == const0_rtx) 15617 x = CONST0_RTX (mode); 15618 return x; 15619} 15620 15621/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 15622 15623static rtx 15624ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 15625{ 15626 rtx pat, xops[3]; 15627 tree arg0 = TREE_VALUE (arglist); 15628 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15629 rtx op0 = expand_normal (arg0); 15630 rtx op1 = expand_normal (arg1); 15631 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15632 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15633 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 15634 15635 if (VECTOR_MODE_P (mode0)) 15636 op0 = safe_vector_operand (op0, mode0); 15637 if (VECTOR_MODE_P (mode1)) 15638 op1 = safe_vector_operand (op1, mode1); 15639 15640 if (optimize || !target 15641 || GET_MODE (target) != tmode 15642 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15643 target = gen_reg_rtx (tmode); 15644 15645 if (GET_MODE (op1) == SImode && mode1 == TImode) 15646 { 15647 rtx x = gen_reg_rtx (V4SImode); 15648 emit_insn (gen_sse2_loadd (x, op1)); 15649 op1 = gen_lowpart (TImode, x); 15650 } 15651 15652 /* The insn must want input operands in the same modes as the 15653 result. */ 15654 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 15655 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 15656 15657 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 15658 op0 = copy_to_mode_reg (mode0, op0); 15659 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 15660 op1 = copy_to_mode_reg (mode1, op1); 15661 15662 /* ??? Using ix86_fixup_binary_operands is problematic when 15663 we've got mismatched modes. Fake it. */ 15664 15665 xops[0] = target; 15666 xops[1] = op0; 15667 xops[2] = op1; 15668 15669 if (tmode == mode0 && tmode == mode1) 15670 { 15671 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); 15672 op0 = xops[1]; 15673 op1 = xops[2]; 15674 } 15675 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) 15676 { 15677 op0 = force_reg (mode0, op0); 15678 op1 = force_reg (mode1, op1); 15679 target = gen_reg_rtx (tmode); 15680 } 15681 15682 pat = GEN_FCN (icode) (target, op0, op1); 15683 if (! pat) 15684 return 0; 15685 emit_insn (pat); 15686 return target; 15687} 15688 15689/* Subroutine of ix86_expand_builtin to take care of stores. */ 15690 15691static rtx 15692ix86_expand_store_builtin (enum insn_code icode, tree arglist) 15693{ 15694 rtx pat; 15695 tree arg0 = TREE_VALUE (arglist); 15696 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15697 rtx op0 = expand_normal (arg0); 15698 rtx op1 = expand_normal (arg1); 15699 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 15700 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 15701 15702 if (VECTOR_MODE_P (mode1)) 15703 op1 = safe_vector_operand (op1, mode1); 15704 15705 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 15706 op1 = copy_to_mode_reg (mode1, op1); 15707 15708 pat = GEN_FCN (icode) (op0, op1); 15709 if (pat) 15710 emit_insn (pat); 15711 return 0; 15712} 15713 15714/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 15715 15716static rtx 15717ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 15718 rtx target, int do_load) 15719{ 15720 rtx pat; 15721 tree arg0 = TREE_VALUE (arglist); 15722 rtx op0 = expand_normal (arg0); 15723 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15724 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15725 15726 if (optimize || !target 15727 || GET_MODE (target) != tmode 15728 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15729 target = gen_reg_rtx (tmode); 15730 if (do_load) 15731 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 15732 else 15733 { 15734 if (VECTOR_MODE_P (mode0)) 15735 op0 = safe_vector_operand (op0, mode0); 15736 15737 if ((optimize && !register_operand (op0, mode0)) 15738 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 15739 op0 = copy_to_mode_reg (mode0, op0); 15740 } 15741 15742 pat = GEN_FCN (icode) (target, op0); 15743 if (! pat) 15744 return 0; 15745 emit_insn (pat); 15746 return target; 15747} 15748 15749/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 15750 sqrtss, rsqrtss, rcpss. */ 15751 15752static rtx 15753ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 15754{ 15755 rtx pat; 15756 tree arg0 = TREE_VALUE (arglist); 15757 rtx op1, op0 = expand_normal (arg0); 15758 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15759 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15760 15761 if (optimize || !target 15762 || GET_MODE (target) != tmode 15763 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15764 target = gen_reg_rtx (tmode); 15765 15766 if (VECTOR_MODE_P (mode0)) 15767 op0 = safe_vector_operand (op0, mode0); 15768 15769 if ((optimize && !register_operand (op0, mode0)) 15770 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 15771 op0 = copy_to_mode_reg (mode0, op0); 15772 15773 op1 = op0; 15774 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 15775 op1 = copy_to_mode_reg (mode0, op1); 15776 15777 pat = GEN_FCN (icode) (target, op0, op1); 15778 if (! pat) 15779 return 0; 15780 emit_insn (pat); 15781 return target; 15782} 15783 15784/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 15785 15786static rtx 15787ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 15788 rtx target) 15789{ 15790 rtx pat; 15791 tree arg0 = TREE_VALUE (arglist); 15792 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15793 rtx op0 = expand_normal (arg0); 15794 rtx op1 = expand_normal (arg1); 15795 rtx op2; 15796 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 15797 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 15798 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 15799 enum rtx_code comparison = d->comparison; 15800 15801 if (VECTOR_MODE_P (mode0)) 15802 op0 = safe_vector_operand (op0, mode0); 15803 if (VECTOR_MODE_P (mode1)) 15804 op1 = safe_vector_operand (op1, mode1); 15805 15806 /* Swap operands if we have a comparison that isn't available in 15807 hardware. */ 15808 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 15809 { 15810 rtx tmp = gen_reg_rtx (mode1); 15811 emit_move_insn (tmp, op1); 15812 op1 = op0; 15813 op0 = tmp; 15814 } 15815 15816 if (optimize || !target 15817 || GET_MODE (target) != tmode 15818 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 15819 target = gen_reg_rtx (tmode); 15820 15821 if ((optimize && !register_operand (op0, mode0)) 15822 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 15823 op0 = copy_to_mode_reg (mode0, op0); 15824 if ((optimize && !register_operand (op1, mode1)) 15825 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 15826 op1 = copy_to_mode_reg (mode1, op1); 15827 15828 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 15829 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 15830 if (! pat) 15831 return 0; 15832 emit_insn (pat); 15833 return target; 15834} 15835 15836/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 15837 15838static rtx 15839ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 15840 rtx target) 15841{ 15842 rtx pat; 15843 tree arg0 = TREE_VALUE (arglist); 15844 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15845 rtx op0 = expand_normal (arg0); 15846 rtx op1 = expand_normal (arg1); 15847 rtx op2; 15848 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 15849 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 15850 enum rtx_code comparison = d->comparison; 15851 15852 if (VECTOR_MODE_P (mode0)) 15853 op0 = safe_vector_operand (op0, mode0); 15854 if (VECTOR_MODE_P (mode1)) 15855 op1 = safe_vector_operand (op1, mode1); 15856 15857 /* Swap operands if we have a comparison that isn't available in 15858 hardware. */ 15859 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 15860 { 15861 rtx tmp = op1; 15862 op1 = op0; 15863 op0 = tmp; 15864 } 15865 15866 target = gen_reg_rtx (SImode); 15867 emit_move_insn (target, const0_rtx); 15868 target = gen_rtx_SUBREG (QImode, target, 0); 15869 15870 if ((optimize && !register_operand (op0, mode0)) 15871 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 15872 op0 = copy_to_mode_reg (mode0, op0); 15873 if ((optimize && !register_operand (op1, mode1)) 15874 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 15875 op1 = copy_to_mode_reg (mode1, op1); 15876 15877 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 15878 pat = GEN_FCN (d->icode) (op0, op1); 15879 if (! pat) 15880 return 0; 15881 emit_insn (pat); 15882 emit_insn (gen_rtx_SET (VOIDmode, 15883 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 15884 gen_rtx_fmt_ee (comparison, QImode, 15885 SET_DEST (pat), 15886 const0_rtx))); 15887 15888 return SUBREG_REG (target); 15889} 15890 15891/* Return the integer constant in ARG. Constrain it to be in the range 15892 of the subparts of VEC_TYPE; issue an error if not. */ 15893 15894static int 15895get_element_number (tree vec_type, tree arg) 15896{ 15897 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 15898 15899 if (!host_integerp (arg, 1) 15900 || (elt = tree_low_cst (arg, 1), elt > max)) 15901 { 15902 error ("selector must be an integer constant in the range 0..%wi", max); 15903 return 0; 15904 } 15905 15906 return elt; 15907} 15908 15909/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15910 ix86_expand_vector_init. We DO have language-level syntax for this, in 15911 the form of (type){ init-list }. Except that since we can't place emms 15912 instructions from inside the compiler, we can't allow the use of MMX 15913 registers unless the user explicitly asks for it. So we do *not* define 15914 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 15915 we have builtins invoked by mmintrin.h that gives us license to emit 15916 these sorts of instructions. */ 15917 15918static rtx 15919ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) 15920{ 15921 enum machine_mode tmode = TYPE_MODE (type); 15922 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 15923 int i, n_elt = GET_MODE_NUNITS (tmode); 15924 rtvec v = rtvec_alloc (n_elt); 15925 15926 gcc_assert (VECTOR_MODE_P (tmode)); 15927 15928 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) 15929 { 15930 rtx x = expand_normal (TREE_VALUE (arglist)); 15931 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 15932 } 15933 15934 gcc_assert (arglist == NULL); 15935 15936 if (!target || !register_operand (target, tmode)) 15937 target = gen_reg_rtx (tmode); 15938 15939 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 15940 return target; 15941} 15942 15943/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15944 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 15945 had a language-level syntax for referencing vector elements. */ 15946 15947static rtx 15948ix86_expand_vec_ext_builtin (tree arglist, rtx target) 15949{ 15950 enum machine_mode tmode, mode0; 15951 tree arg0, arg1; 15952 int elt; 15953 rtx op0; 15954 15955 arg0 = TREE_VALUE (arglist); 15956 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15957 15958 op0 = expand_normal (arg0); 15959 elt = get_element_number (TREE_TYPE (arg0), arg1); 15960 15961 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 15962 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 15963 gcc_assert (VECTOR_MODE_P (mode0)); 15964 15965 op0 = force_reg (mode0, op0); 15966 15967 if (optimize || !target || !register_operand (target, tmode)) 15968 target = gen_reg_rtx (tmode); 15969 15970 ix86_expand_vector_extract (true, target, op0, elt); 15971 15972 return target; 15973} 15974 15975/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15976 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 15977 a language-level syntax for referencing vector elements. */ 15978 15979static rtx 15980ix86_expand_vec_set_builtin (tree arglist) 15981{ 15982 enum machine_mode tmode, mode1; 15983 tree arg0, arg1, arg2; 15984 int elt; 15985 rtx op0, op1, target; 15986 15987 arg0 = TREE_VALUE (arglist); 15988 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15989 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 15990 15991 tmode = TYPE_MODE (TREE_TYPE (arg0)); 15992 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 15993 gcc_assert (VECTOR_MODE_P (tmode)); 15994 15995 op0 = expand_expr (arg0, NULL_RTX, tmode, 0); 15996 op1 = expand_expr (arg1, NULL_RTX, mode1, 0); 15997 elt = get_element_number (TREE_TYPE (arg0), arg2); 15998 15999 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 16000 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 16001 16002 op0 = force_reg (tmode, op0); 16003 op1 = force_reg (mode1, op1); 16004 16005 /* OP0 is the source of these builtin functions and shouldn't be 16006 modified. Create a copy, use it and return it as target. */ 16007 target = gen_reg_rtx (tmode); 16008 emit_move_insn (target, op0); 16009 ix86_expand_vector_set (true, target, op1, elt); 16010 16011 return target; 16012} 16013 16014/* Expand an expression EXP that calls a built-in function, 16015 with result going to TARGET if that's convenient 16016 (and in mode MODE if that's convenient). 16017 SUBTARGET may be used as the target for computing one of EXP's operands. 16018 IGNORE is nonzero if the value is to be ignored. */ 16019 16020static rtx 16021ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 16022 enum machine_mode mode ATTRIBUTE_UNUSED, 16023 int ignore ATTRIBUTE_UNUSED) 16024{ 16025 const struct builtin_description *d; 16026 size_t i; 16027 enum insn_code icode; 16028 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 16029 tree arglist = TREE_OPERAND (exp, 1); 16030 tree arg0, arg1, arg2; 16031 rtx op0, op1, op2, pat; 16032 enum machine_mode tmode, mode0, mode1, mode2; 16033 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 16034 16035 switch (fcode) 16036 { 16037 case IX86_BUILTIN_EMMS: 16038 emit_insn (gen_mmx_emms ()); 16039 return 0; 16040 16041 case IX86_BUILTIN_SFENCE: 16042 emit_insn (gen_sse_sfence ()); 16043 return 0; 16044 16045 case IX86_BUILTIN_MASKMOVQ: 16046 case IX86_BUILTIN_MASKMOVDQU: 16047 icode = (fcode == IX86_BUILTIN_MASKMOVQ 16048 ? CODE_FOR_mmx_maskmovq 16049 : CODE_FOR_sse2_maskmovdqu); 16050 /* Note the arg order is different from the operand order. */ 16051 arg1 = TREE_VALUE (arglist); 16052 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 16053 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16054 op0 = expand_normal (arg0); 16055 op1 = expand_normal (arg1); 16056 op2 = expand_normal (arg2); 16057 mode0 = insn_data[icode].operand[0].mode; 16058 mode1 = insn_data[icode].operand[1].mode; 16059 mode2 = insn_data[icode].operand[2].mode; 16060 16061 op0 = force_reg (Pmode, op0); 16062 op0 = gen_rtx_MEM (mode1, op0); 16063 16064 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 16065 op0 = copy_to_mode_reg (mode0, op0); 16066 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 16067 op1 = copy_to_mode_reg (mode1, op1); 16068 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 16069 op2 = copy_to_mode_reg (mode2, op2); 16070 pat = GEN_FCN (icode) (op0, op1, op2); 16071 if (! pat) 16072 return 0; 16073 emit_insn (pat); 16074 return 0; 16075 16076 case IX86_BUILTIN_SQRTSS: 16077 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); 16078 case IX86_BUILTIN_RSQRTSS: 16079 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); 16080 case IX86_BUILTIN_RCPSS: 16081 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); 16082 16083 case IX86_BUILTIN_LOADUPS: 16084 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 16085 16086 case IX86_BUILTIN_STOREUPS: 16087 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 16088 16089 case IX86_BUILTIN_LOADHPS: 16090 case IX86_BUILTIN_LOADLPS: 16091 case IX86_BUILTIN_LOADHPD: 16092 case IX86_BUILTIN_LOADLPD: 16093 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps 16094 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps 16095 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd 16096 : CODE_FOR_sse2_loadlpd); 16097 arg0 = TREE_VALUE (arglist); 16098 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16099 op0 = expand_normal (arg0); 16100 op1 = expand_normal (arg1); 16101 tmode = insn_data[icode].operand[0].mode; 16102 mode0 = insn_data[icode].operand[1].mode; 16103 mode1 = insn_data[icode].operand[2].mode; 16104 16105 op0 = force_reg (mode0, op0); 16106 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 16107 if (optimize || target == 0 16108 || GET_MODE (target) != tmode 16109 || !register_operand (target, tmode)) 16110 target = gen_reg_rtx (tmode); 16111 pat = GEN_FCN (icode) (target, op0, op1); 16112 if (! pat) 16113 return 0; 16114 emit_insn (pat); 16115 return target; 16116 16117 case IX86_BUILTIN_STOREHPS: 16118 case IX86_BUILTIN_STORELPS: 16119 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps 16120 : CODE_FOR_sse_storelps); 16121 arg0 = TREE_VALUE (arglist); 16122 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16123 op0 = expand_normal (arg0); 16124 op1 = expand_normal (arg1); 16125 mode0 = insn_data[icode].operand[0].mode; 16126 mode1 = insn_data[icode].operand[1].mode; 16127 16128 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16129 op1 = force_reg (mode1, op1); 16130 16131 pat = GEN_FCN (icode) (op0, op1); 16132 if (! pat) 16133 return 0; 16134 emit_insn (pat); 16135 return const0_rtx; 16136 16137 case IX86_BUILTIN_MOVNTPS: 16138 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 16139 case IX86_BUILTIN_MOVNTQ: 16140 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 16141 16142 case IX86_BUILTIN_LDMXCSR: 16143 op0 = expand_normal (TREE_VALUE (arglist)); 16144 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16145 emit_move_insn (target, op0); 16146 emit_insn (gen_sse_ldmxcsr (target)); 16147 return 0; 16148 16149 case IX86_BUILTIN_STMXCSR: 16150 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16151 emit_insn (gen_sse_stmxcsr (target)); 16152 return copy_to_mode_reg (SImode, target); 16153 16154 case IX86_BUILTIN_SHUFPS: 16155 case IX86_BUILTIN_SHUFPD: 16156 icode = (fcode == IX86_BUILTIN_SHUFPS 16157 ? CODE_FOR_sse_shufps 16158 : CODE_FOR_sse2_shufpd); 16159 arg0 = TREE_VALUE (arglist); 16160 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16161 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16162 op0 = expand_normal (arg0); 16163 op1 = expand_normal (arg1); 16164 op2 = expand_normal (arg2); 16165 tmode = insn_data[icode].operand[0].mode; 16166 mode0 = insn_data[icode].operand[1].mode; 16167 mode1 = insn_data[icode].operand[2].mode; 16168 mode2 = insn_data[icode].operand[3].mode; 16169 16170 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16171 op0 = copy_to_mode_reg (mode0, op0); 16172 if ((optimize && !register_operand (op1, mode1)) 16173 || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16174 op1 = copy_to_mode_reg (mode1, op1); 16175 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 16176 { 16177 /* @@@ better error message */ 16178 error ("mask must be an immediate"); 16179 return gen_reg_rtx (tmode); 16180 } 16181 if (optimize || target == 0 16182 || GET_MODE (target) != tmode 16183 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16184 target = gen_reg_rtx (tmode); 16185 pat = GEN_FCN (icode) (target, op0, op1, op2); 16186 if (! pat) 16187 return 0; 16188 emit_insn (pat); 16189 return target; 16190 16191 case IX86_BUILTIN_PSHUFW: 16192 case IX86_BUILTIN_PSHUFD: 16193 case IX86_BUILTIN_PSHUFHW: 16194 case IX86_BUILTIN_PSHUFLW: 16195 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 16196 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 16197 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 16198 : CODE_FOR_mmx_pshufw); 16199 arg0 = TREE_VALUE (arglist); 16200 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16201 op0 = expand_normal (arg0); 16202 op1 = expand_normal (arg1); 16203 tmode = insn_data[icode].operand[0].mode; 16204 mode1 = insn_data[icode].operand[1].mode; 16205 mode2 = insn_data[icode].operand[2].mode; 16206 16207 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16208 op0 = copy_to_mode_reg (mode1, op0); 16209 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16210 { 16211 /* @@@ better error message */ 16212 error ("mask must be an immediate"); 16213 return const0_rtx; 16214 } 16215 if (target == 0 16216 || GET_MODE (target) != tmode 16217 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16218 target = gen_reg_rtx (tmode); 16219 pat = GEN_FCN (icode) (target, op0, op1); 16220 if (! pat) 16221 return 0; 16222 emit_insn (pat); 16223 return target; 16224 16225 case IX86_BUILTIN_PSLLWI128: 16226 icode = CODE_FOR_ashlv8hi3; 16227 goto do_pshifti; 16228 case IX86_BUILTIN_PSLLDI128: 16229 icode = CODE_FOR_ashlv4si3; 16230 goto do_pshifti; 16231 case IX86_BUILTIN_PSLLQI128: 16232 icode = CODE_FOR_ashlv2di3; 16233 goto do_pshifti; 16234 case IX86_BUILTIN_PSRAWI128: 16235 icode = CODE_FOR_ashrv8hi3; 16236 goto do_pshifti; 16237 case IX86_BUILTIN_PSRADI128: 16238 icode = CODE_FOR_ashrv4si3; 16239 goto do_pshifti; 16240 case IX86_BUILTIN_PSRLWI128: 16241 icode = CODE_FOR_lshrv8hi3; 16242 goto do_pshifti; 16243 case IX86_BUILTIN_PSRLDI128: 16244 icode = CODE_FOR_lshrv4si3; 16245 goto do_pshifti; 16246 case IX86_BUILTIN_PSRLQI128: 16247 icode = CODE_FOR_lshrv2di3; 16248 goto do_pshifti; 16249 do_pshifti: 16250 arg0 = TREE_VALUE (arglist); 16251 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16252 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16253 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16254 16255 if (GET_CODE (op1) != CONST_INT) 16256 { 16257 error ("shift must be an immediate"); 16258 return const0_rtx; 16259 } 16260 if (INTVAL (op1) < 0 || INTVAL (op1) > 255) 16261 op1 = GEN_INT (255); 16262 16263 tmode = insn_data[icode].operand[0].mode; 16264 mode1 = insn_data[icode].operand[1].mode; 16265 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16266 op0 = copy_to_reg (op0); 16267 16268 target = gen_reg_rtx (tmode); 16269 pat = GEN_FCN (icode) (target, op0, op1); 16270 if (!pat) 16271 return 0; 16272 emit_insn (pat); 16273 return target; 16274 16275 case IX86_BUILTIN_PSLLW128: 16276 icode = CODE_FOR_ashlv8hi3; 16277 goto do_pshift; 16278 case IX86_BUILTIN_PSLLD128: 16279 icode = CODE_FOR_ashlv4si3; 16280 goto do_pshift; 16281 case IX86_BUILTIN_PSLLQ128: 16282 icode = CODE_FOR_ashlv2di3; 16283 goto do_pshift; 16284 case IX86_BUILTIN_PSRAW128: 16285 icode = CODE_FOR_ashrv8hi3; 16286 goto do_pshift; 16287 case IX86_BUILTIN_PSRAD128: 16288 icode = CODE_FOR_ashrv4si3; 16289 goto do_pshift; 16290 case IX86_BUILTIN_PSRLW128: 16291 icode = CODE_FOR_lshrv8hi3; 16292 goto do_pshift; 16293 case IX86_BUILTIN_PSRLD128: 16294 icode = CODE_FOR_lshrv4si3; 16295 goto do_pshift; 16296 case IX86_BUILTIN_PSRLQ128: 16297 icode = CODE_FOR_lshrv2di3; 16298 goto do_pshift; 16299 do_pshift: 16300 arg0 = TREE_VALUE (arglist); 16301 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16302 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16303 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16304 16305 tmode = insn_data[icode].operand[0].mode; 16306 mode1 = insn_data[icode].operand[1].mode; 16307 16308 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16309 op0 = copy_to_reg (op0); 16310 16311 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); 16312 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 16313 op1 = copy_to_reg (op1); 16314 16315 target = gen_reg_rtx (tmode); 16316 pat = GEN_FCN (icode) (target, op0, op1); 16317 if (!pat) 16318 return 0; 16319 emit_insn (pat); 16320 return target; 16321 16322 case IX86_BUILTIN_PSLLDQI128: 16323 case IX86_BUILTIN_PSRLDQI128: 16324 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 16325 : CODE_FOR_sse2_lshrti3); 16326 arg0 = TREE_VALUE (arglist); 16327 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16328 op0 = expand_normal (arg0); 16329 op1 = expand_normal (arg1); 16330 tmode = insn_data[icode].operand[0].mode; 16331 mode1 = insn_data[icode].operand[1].mode; 16332 mode2 = insn_data[icode].operand[2].mode; 16333 16334 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16335 { 16336 op0 = copy_to_reg (op0); 16337 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16338 } 16339 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16340 { 16341 error ("shift must be an immediate"); 16342 return const0_rtx; 16343 } 16344 target = gen_reg_rtx (V2DImode); 16345 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), 16346 op0, op1); 16347 if (! pat) 16348 return 0; 16349 emit_insn (pat); 16350 return target; 16351 16352 case IX86_BUILTIN_FEMMS: 16353 emit_insn (gen_mmx_femms ()); 16354 return NULL_RTX; 16355 16356 case IX86_BUILTIN_PAVGUSB: 16357 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); 16358 16359 case IX86_BUILTIN_PF2ID: 16360 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); 16361 16362 case IX86_BUILTIN_PFACC: 16363 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); 16364 16365 case IX86_BUILTIN_PFADD: 16366 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); 16367 16368 case IX86_BUILTIN_PFCMPEQ: 16369 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); 16370 16371 case IX86_BUILTIN_PFCMPGE: 16372 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); 16373 16374 case IX86_BUILTIN_PFCMPGT: 16375 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); 16376 16377 case IX86_BUILTIN_PFMAX: 16378 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); 16379 16380 case IX86_BUILTIN_PFMIN: 16381 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); 16382 16383 case IX86_BUILTIN_PFMUL: 16384 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); 16385 16386 case IX86_BUILTIN_PFRCP: 16387 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); 16388 16389 case IX86_BUILTIN_PFRCPIT1: 16390 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); 16391 16392 case IX86_BUILTIN_PFRCPIT2: 16393 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); 16394 16395 case IX86_BUILTIN_PFRSQIT1: 16396 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); 16397 16398 case IX86_BUILTIN_PFRSQRT: 16399 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); 16400 16401 case IX86_BUILTIN_PFSUB: 16402 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); 16403 16404 case IX86_BUILTIN_PFSUBR: 16405 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); 16406 16407 case IX86_BUILTIN_PI2FD: 16408 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); 16409 16410 case IX86_BUILTIN_PMULHRW: 16411 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); 16412 16413 case IX86_BUILTIN_PF2IW: 16414 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); 16415 16416 case IX86_BUILTIN_PFNACC: 16417 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); 16418 16419 case IX86_BUILTIN_PFPNACC: 16420 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); 16421 16422 case IX86_BUILTIN_PI2FW: 16423 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); 16424 16425 case IX86_BUILTIN_PSWAPDSI: 16426 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); 16427 16428 case IX86_BUILTIN_PSWAPDSF: 16429 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); 16430 16431 case IX86_BUILTIN_SQRTSD: 16432 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); 16433 case IX86_BUILTIN_LOADUPD: 16434 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 16435 case IX86_BUILTIN_STOREUPD: 16436 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 16437 16438 case IX86_BUILTIN_MFENCE: 16439 emit_insn (gen_sse2_mfence ()); 16440 return 0; 16441 case IX86_BUILTIN_LFENCE: 16442 emit_insn (gen_sse2_lfence ()); 16443 return 0; 16444 16445 case IX86_BUILTIN_CLFLUSH: 16446 arg0 = TREE_VALUE (arglist); 16447 op0 = expand_normal (arg0); 16448 icode = CODE_FOR_sse2_clflush; 16449 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 16450 op0 = copy_to_mode_reg (Pmode, op0); 16451 16452 emit_insn (gen_sse2_clflush (op0)); 16453 return 0; 16454 16455 case IX86_BUILTIN_MOVNTPD: 16456 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 16457 case IX86_BUILTIN_MOVNTDQ: 16458 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 16459 case IX86_BUILTIN_MOVNTI: 16460 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 16461 16462 case IX86_BUILTIN_LOADDQU: 16463 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 16464 case IX86_BUILTIN_STOREDQU: 16465 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 16466 16467 case IX86_BUILTIN_MONITOR: 16468 arg0 = TREE_VALUE (arglist); 16469 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16470 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16471 op0 = expand_normal (arg0); 16472 op1 = expand_normal (arg1); 16473 op2 = expand_normal (arg2); 16474 if (!REG_P (op0)) 16475 op0 = copy_to_mode_reg (Pmode, op0); 16476 if (!REG_P (op1)) 16477 op1 = copy_to_mode_reg (SImode, op1); 16478 if (!REG_P (op2)) 16479 op2 = copy_to_mode_reg (SImode, op2); 16480 if (!TARGET_64BIT) 16481 emit_insn (gen_sse3_monitor (op0, op1, op2)); 16482 else 16483 emit_insn (gen_sse3_monitor64 (op0, op1, op2)); 16484 return 0; 16485 16486 case IX86_BUILTIN_MWAIT: 16487 arg0 = TREE_VALUE (arglist); 16488 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16489 op0 = expand_normal (arg0); 16490 op1 = expand_normal (arg1); 16491 if (!REG_P (op0)) 16492 op0 = copy_to_mode_reg (SImode, op0); 16493 if (!REG_P (op1)) 16494 op1 = copy_to_mode_reg (SImode, op1); 16495 emit_insn (gen_sse3_mwait (op0, op1)); 16496 return 0; 16497 16498 case IX86_BUILTIN_LDDQU: 16499 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, 16500 target, 1); 16501 16502 case IX86_BUILTIN_VEC_INIT_V2SI: 16503 case IX86_BUILTIN_VEC_INIT_V4HI: 16504 case IX86_BUILTIN_VEC_INIT_V8QI: 16505 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); 16506 16507 case IX86_BUILTIN_VEC_EXT_V2DF: 16508 case IX86_BUILTIN_VEC_EXT_V2DI: 16509 case IX86_BUILTIN_VEC_EXT_V4SF: 16510 case IX86_BUILTIN_VEC_EXT_V4SI: 16511 case IX86_BUILTIN_VEC_EXT_V8HI: 16512 case IX86_BUILTIN_VEC_EXT_V16QI: 16513 case IX86_BUILTIN_VEC_EXT_V2SI: 16514 case IX86_BUILTIN_VEC_EXT_V4HI: 16515 return ix86_expand_vec_ext_builtin (arglist, target); 16516 16517 case IX86_BUILTIN_VEC_SET_V8HI: 16518 case IX86_BUILTIN_VEC_SET_V4HI: 16519 return ix86_expand_vec_set_builtin (arglist); 16520 16521 default: 16522 break; 16523 } 16524 16525 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 16526 if (d->code == fcode) 16527 { 16528 /* Compares are treated specially. */ 16529 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 16530 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 16531 || d->icode == CODE_FOR_sse2_maskcmpv2df3 16532 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 16533 return ix86_expand_sse_compare (d, arglist, target); 16534 16535 return ix86_expand_binop_builtin (d->icode, arglist, target); 16536 } 16537 16538 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 16539 if (d->code == fcode) 16540 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 16541 16542 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 16543 if (d->code == fcode) 16544 return ix86_expand_sse_comi (d, arglist, target); 16545 16546 gcc_unreachable (); 16547} 16548 16549/* Store OPERAND to the memory after reload is completed. This means 16550 that we can't easily use assign_stack_local. */ 16551rtx 16552ix86_force_to_memory (enum machine_mode mode, rtx operand) 16553{ 16554 rtx result; 16555 16556 gcc_assert (reload_completed); 16557 if (TARGET_RED_ZONE) 16558 { 16559 result = gen_rtx_MEM (mode, 16560 gen_rtx_PLUS (Pmode, 16561 stack_pointer_rtx, 16562 GEN_INT (-RED_ZONE_SIZE))); 16563 emit_move_insn (result, operand); 16564 } 16565 else if (!TARGET_RED_ZONE && TARGET_64BIT) 16566 { 16567 switch (mode) 16568 { 16569 case HImode: 16570 case SImode: 16571 operand = gen_lowpart (DImode, operand); 16572 /* FALLTHRU */ 16573 case DImode: 16574 emit_insn ( 16575 gen_rtx_SET (VOIDmode, 16576 gen_rtx_MEM (DImode, 16577 gen_rtx_PRE_DEC (DImode, 16578 stack_pointer_rtx)), 16579 operand)); 16580 break; 16581 default: 16582 gcc_unreachable (); 16583 } 16584 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16585 } 16586 else 16587 { 16588 switch (mode) 16589 { 16590 case DImode: 16591 { 16592 rtx operands[2]; 16593 split_di (&operand, 1, operands, operands + 1); 16594 emit_insn ( 16595 gen_rtx_SET (VOIDmode, 16596 gen_rtx_MEM (SImode, 16597 gen_rtx_PRE_DEC (Pmode, 16598 stack_pointer_rtx)), 16599 operands[1])); 16600 emit_insn ( 16601 gen_rtx_SET (VOIDmode, 16602 gen_rtx_MEM (SImode, 16603 gen_rtx_PRE_DEC (Pmode, 16604 stack_pointer_rtx)), 16605 operands[0])); 16606 } 16607 break; 16608 case HImode: 16609 /* Store HImodes as SImodes. */ 16610 operand = gen_lowpart (SImode, operand); 16611 /* FALLTHRU */ 16612 case SImode: 16613 emit_insn ( 16614 gen_rtx_SET (VOIDmode, 16615 gen_rtx_MEM (GET_MODE (operand), 16616 gen_rtx_PRE_DEC (SImode, 16617 stack_pointer_rtx)), 16618 operand)); 16619 break; 16620 default: 16621 gcc_unreachable (); 16622 } 16623 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16624 } 16625 return result; 16626} 16627 16628/* Free operand from the memory. */ 16629void 16630ix86_free_from_memory (enum machine_mode mode) 16631{ 16632 if (!TARGET_RED_ZONE) 16633 { 16634 int size; 16635 16636 if (mode == DImode || TARGET_64BIT) 16637 size = 8; 16638 else 16639 size = 4; 16640 /* Use LEA to deallocate stack space. In peephole2 it will be converted 16641 to pop or add instruction if registers are available. */ 16642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 16643 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 16644 GEN_INT (size)))); 16645 } 16646} 16647 16648/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 16649 QImode must go into class Q_REGS. 16650 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 16651 movdf to do mem-to-mem moves through integer regs. */ 16652enum reg_class 16653ix86_preferred_reload_class (rtx x, enum reg_class class) 16654{ 16655 enum machine_mode mode = GET_MODE (x); 16656 16657 /* We're only allowed to return a subclass of CLASS. Many of the 16658 following checks fail for NO_REGS, so eliminate that early. */ 16659 if (class == NO_REGS) 16660 return NO_REGS; 16661 16662 /* All classes can load zeros. */ 16663 if (x == CONST0_RTX (mode)) 16664 return class; 16665 16666 /* Force constants into memory if we are loading a (nonzero) constant into 16667 an MMX or SSE register. This is because there are no MMX/SSE instructions 16668 to load from a constant. */ 16669 if (CONSTANT_P (x) 16670 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) 16671 return NO_REGS; 16672 16673 /* Prefer SSE regs only, if we can use them for math. */ 16674 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 16675 return SSE_CLASS_P (class) ? class : NO_REGS; 16676 16677 /* Floating-point constants need more complex checks. */ 16678 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 16679 { 16680 /* General regs can load everything. */ 16681 if (reg_class_subset_p (class, GENERAL_REGS)) 16682 return class; 16683 16684 /* Floats can load 0 and 1 plus some others. Note that we eliminated 16685 zero above. We only want to wind up preferring 80387 registers if 16686 we plan on doing computation with them. */ 16687 if (TARGET_80387 16688 && standard_80387_constant_p (x)) 16689 { 16690 /* Limit class to non-sse. */ 16691 if (class == FLOAT_SSE_REGS) 16692 return FLOAT_REGS; 16693 if (class == FP_TOP_SSE_REGS) 16694 return FP_TOP_REG; 16695 if (class == FP_SECOND_SSE_REGS) 16696 return FP_SECOND_REG; 16697 if (class == FLOAT_INT_REGS || class == FLOAT_REGS) 16698 return class; 16699 } 16700 16701 return NO_REGS; 16702 } 16703 16704 /* Generally when we see PLUS here, it's the function invariant 16705 (plus soft-fp const_int). Which can only be computed into general 16706 regs. */ 16707 if (GET_CODE (x) == PLUS) 16708 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; 16709 16710 /* QImode constants are easy to load, but non-constant QImode data 16711 must go into Q_REGS. */ 16712 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 16713 { 16714 if (reg_class_subset_p (class, Q_REGS)) 16715 return class; 16716 if (reg_class_subset_p (Q_REGS, class)) 16717 return Q_REGS; 16718 return NO_REGS; 16719 } 16720 16721 return class; 16722} 16723 16724/* Discourage putting floating-point values in SSE registers unless 16725 SSE math is being used, and likewise for the 387 registers. */ 16726enum reg_class 16727ix86_preferred_output_reload_class (rtx x, enum reg_class class) 16728{ 16729 enum machine_mode mode = GET_MODE (x); 16730 16731 /* Restrict the output reload class to the register bank that we are doing 16732 math on. If we would like not to return a subset of CLASS, reject this 16733 alternative: if reload cannot do this, it will still use its choice. */ 16734 mode = GET_MODE (x); 16735 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 16736 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; 16737 16738 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) 16739 { 16740 if (class == FP_TOP_SSE_REGS) 16741 return FP_TOP_REG; 16742 else if (class == FP_SECOND_SSE_REGS) 16743 return FP_SECOND_REG; 16744 else 16745 return FLOAT_CLASS_P (class) ? class : NO_REGS; 16746 } 16747 16748 return class; 16749} 16750 16751/* If we are copying between general and FP registers, we need a memory 16752 location. The same is true for SSE and MMX registers. 16753 16754 The macro can't work reliably when one of the CLASSES is class containing 16755 registers from multiple units (SSE, MMX, integer). We avoid this by never 16756 combining those units in single alternative in the machine description. 16757 Ensure that this constraint holds to avoid unexpected surprises. 16758 16759 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 16760 enforce these sanity checks. */ 16761 16762int 16763ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 16764 enum machine_mode mode, int strict) 16765{ 16766 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 16767 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 16768 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 16769 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 16770 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 16771 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 16772 { 16773 gcc_assert (!strict); 16774 return true; 16775 } 16776 16777 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 16778 return true; 16779 16780 /* ??? This is a lie. We do have moves between mmx/general, and for 16781 mmx/sse2. But by saying we need secondary memory we discourage the 16782 register allocator from using the mmx registers unless needed. */ 16783 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 16784 return true; 16785 16786 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 16787 { 16788 /* SSE1 doesn't have any direct moves from other classes. */ 16789 if (!TARGET_SSE2) 16790 return true; 16791 16792 /* If the target says that inter-unit moves are more expensive 16793 than moving through memory, then don't generate them. */ 16794 if (!TARGET_INTER_UNIT_MOVES && !optimize_size) 16795 return true; 16796 16797 /* Between SSE and general, we have moves no larger than word size. */ 16798 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 16799 return true; 16800 16801 /* ??? For the cost of one register reformat penalty, we could use 16802 the same instructions to move SFmode and DFmode data, but the 16803 relevant move patterns don't support those alternatives. */ 16804 if (mode == SFmode || mode == DFmode) 16805 return true; 16806 } 16807 16808 return false; 16809} 16810 16811/* Return true if the registers in CLASS cannot represent the change from 16812 modes FROM to TO. */ 16813 16814bool 16815ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 16816 enum reg_class class) 16817{ 16818 if (from == to) 16819 return false; 16820 16821 /* x87 registers can't do subreg at all, as all values are reformatted 16822 to extended precision. */ 16823 if (MAYBE_FLOAT_CLASS_P (class)) 16824 return true; 16825 16826 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) 16827 { 16828 /* Vector registers do not support QI or HImode loads. If we don't 16829 disallow a change to these modes, reload will assume it's ok to 16830 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 16831 the vec_dupv4hi pattern. */ 16832 if (GET_MODE_SIZE (from) < 4) 16833 return true; 16834 16835 /* Vector registers do not support subreg with nonzero offsets, which 16836 are otherwise valid for integer registers. Since we can't see 16837 whether we have a nonzero offset from here, prohibit all 16838 nonparadoxical subregs changing size. */ 16839 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 16840 return true; 16841 } 16842 16843 return false; 16844} 16845 16846/* Return the cost of moving data from a register in class CLASS1 to 16847 one in class CLASS2. 16848 16849 It is not required that the cost always equal 2 when FROM is the same as TO; 16850 on some machines it is expensive to move between registers if they are not 16851 general registers. */ 16852 16853int 16854ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 16855 enum reg_class class2) 16856{ 16857 /* In case we require secondary memory, compute cost of the store followed 16858 by load. In order to avoid bad register allocation choices, we need 16859 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 16860 16861 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 16862 { 16863 int cost = 1; 16864 16865 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 16866 MEMORY_MOVE_COST (mode, class1, 1)); 16867 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 16868 MEMORY_MOVE_COST (mode, class2, 1)); 16869 16870 /* In case of copying from general_purpose_register we may emit multiple 16871 stores followed by single load causing memory size mismatch stall. 16872 Count this as arbitrarily high cost of 20. */ 16873 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 16874 cost += 20; 16875 16876 /* In the case of FP/MMX moves, the registers actually overlap, and we 16877 have to switch modes in order to treat them differently. */ 16878 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 16879 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 16880 cost += 20; 16881 16882 return cost; 16883 } 16884 16885 /* Moves between SSE/MMX and integer unit are expensive. */ 16886 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 16887 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 16888 return ix86_cost->mmxsse_to_integer; 16889 if (MAYBE_FLOAT_CLASS_P (class1)) 16890 return ix86_cost->fp_move; 16891 if (MAYBE_SSE_CLASS_P (class1)) 16892 return ix86_cost->sse_move; 16893 if (MAYBE_MMX_CLASS_P (class1)) 16894 return ix86_cost->mmx_move; 16895 return 2; 16896} 16897 16898/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 16899 16900bool 16901ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 16902{ 16903 /* Flags and only flags can only hold CCmode values. */ 16904 if (CC_REGNO_P (regno)) 16905 return GET_MODE_CLASS (mode) == MODE_CC; 16906 if (GET_MODE_CLASS (mode) == MODE_CC 16907 || GET_MODE_CLASS (mode) == MODE_RANDOM 16908 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 16909 return 0; 16910 if (FP_REGNO_P (regno)) 16911 return VALID_FP_MODE_P (mode); 16912 if (SSE_REGNO_P (regno)) 16913 { 16914 /* We implement the move patterns for all vector modes into and 16915 out of SSE registers, even when no operation instructions 16916 are available. */ 16917 return (VALID_SSE_REG_MODE (mode) 16918 || VALID_SSE2_REG_MODE (mode) 16919 || VALID_MMX_REG_MODE (mode) 16920 || VALID_MMX_REG_MODE_3DNOW (mode)); 16921 } 16922 if (MMX_REGNO_P (regno)) 16923 { 16924 /* We implement the move patterns for 3DNOW modes even in MMX mode, 16925 so if the register is available at all, then we can move data of 16926 the given mode into or out of it. */ 16927 return (VALID_MMX_REG_MODE (mode) 16928 || VALID_MMX_REG_MODE_3DNOW (mode)); 16929 } 16930 16931 if (mode == QImode) 16932 { 16933 /* Take care for QImode values - they can be in non-QI regs, 16934 but then they do cause partial register stalls. */ 16935 if (regno < 4 || TARGET_64BIT) 16936 return 1; 16937 if (!TARGET_PARTIAL_REG_STALL) 16938 return 1; 16939 return reload_in_progress || reload_completed; 16940 } 16941 /* We handle both integer and floats in the general purpose registers. */ 16942 else if (VALID_INT_MODE_P (mode)) 16943 return 1; 16944 else if (VALID_FP_MODE_P (mode)) 16945 return 1; 16946 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 16947 on to use that value in smaller contexts, this can easily force a 16948 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 16949 supporting DImode, allow it. */ 16950 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 16951 return 1; 16952 16953 return 0; 16954} 16955 16956/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 16957 tieable integer mode. */ 16958 16959static bool 16960ix86_tieable_integer_mode_p (enum machine_mode mode) 16961{ 16962 switch (mode) 16963 { 16964 case HImode: 16965 case SImode: 16966 return true; 16967 16968 case QImode: 16969 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 16970 16971 case DImode: 16972 return TARGET_64BIT; 16973 16974 default: 16975 return false; 16976 } 16977} 16978 16979/* Return true if MODE1 is accessible in a register that can hold MODE2 16980 without copying. That is, all register classes that can hold MODE2 16981 can also hold MODE1. */ 16982 16983bool 16984ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 16985{ 16986 if (mode1 == mode2) 16987 return true; 16988 16989 if (ix86_tieable_integer_mode_p (mode1) 16990 && ix86_tieable_integer_mode_p (mode2)) 16991 return true; 16992 16993 /* MODE2 being XFmode implies fp stack or general regs, which means we 16994 can tie any smaller floating point modes to it. Note that we do not 16995 tie this with TFmode. */ 16996 if (mode2 == XFmode) 16997 return mode1 == SFmode || mode1 == DFmode; 16998 16999 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 17000 that we can tie it with SFmode. */ 17001 if (mode2 == DFmode) 17002 return mode1 == SFmode; 17003 17004 /* If MODE2 is only appropriate for an SSE register, then tie with 17005 any other mode acceptable to SSE registers. */ 17006 if (GET_MODE_SIZE (mode2) >= 8 17007 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 17008 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); 17009 17010 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie 17011 with any other mode acceptable to MMX registers. */ 17012 if (GET_MODE_SIZE (mode2) == 8 17013 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 17014 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); 17015 17016 return false; 17017} 17018 17019/* Return the cost of moving data of mode M between a 17020 register and memory. A value of 2 is the default; this cost is 17021 relative to those in `REGISTER_MOVE_COST'. 17022 17023 If moving between registers and memory is more expensive than 17024 between two registers, you should define this macro to express the 17025 relative cost. 17026 17027 Model also increased moving costs of QImode registers in non 17028 Q_REGS classes. 17029 */ 17030int 17031ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 17032{ 17033 if (FLOAT_CLASS_P (class)) 17034 { 17035 int index; 17036 switch (mode) 17037 { 17038 case SFmode: 17039 index = 0; 17040 break; 17041 case DFmode: 17042 index = 1; 17043 break; 17044 case XFmode: 17045 index = 2; 17046 break; 17047 default: 17048 return 100; 17049 } 17050 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 17051 } 17052 if (SSE_CLASS_P (class)) 17053 { 17054 int index; 17055 switch (GET_MODE_SIZE (mode)) 17056 { 17057 case 4: 17058 index = 0; 17059 break; 17060 case 8: 17061 index = 1; 17062 break; 17063 case 16: 17064 index = 2; 17065 break; 17066 default: 17067 return 100; 17068 } 17069 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 17070 } 17071 if (MMX_CLASS_P (class)) 17072 { 17073 int index; 17074 switch (GET_MODE_SIZE (mode)) 17075 { 17076 case 4: 17077 index = 0; 17078 break; 17079 case 8: 17080 index = 1; 17081 break; 17082 default: 17083 return 100; 17084 } 17085 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 17086 } 17087 switch (GET_MODE_SIZE (mode)) 17088 { 17089 case 1: 17090 if (in) 17091 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 17092 : ix86_cost->movzbl_load); 17093 else 17094 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 17095 : ix86_cost->int_store[0] + 4); 17096 break; 17097 case 2: 17098 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 17099 default: 17100 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 17101 if (mode == TFmode) 17102 mode = XFmode; 17103 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 17104 * (((int) GET_MODE_SIZE (mode) 17105 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 17106 } 17107} 17108 17109/* Compute a (partial) cost for rtx X. Return true if the complete 17110 cost has been computed, and false if subexpressions should be 17111 scanned. In either case, *TOTAL contains the cost result. */ 17112 17113static bool 17114ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 17115{ 17116 enum machine_mode mode = GET_MODE (x); 17117 17118 switch (code) 17119 { 17120 case CONST_INT: 17121 case CONST: 17122 case LABEL_REF: 17123 case SYMBOL_REF: 17124 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 17125 *total = 3; 17126 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 17127 *total = 2; 17128 else if (flag_pic && SYMBOLIC_CONST (x) 17129 && (!TARGET_64BIT 17130 || (!GET_CODE (x) != LABEL_REF 17131 && (GET_CODE (x) != SYMBOL_REF 17132 || !SYMBOL_REF_LOCAL_P (x))))) 17133 *total = 1; 17134 else 17135 *total = 0; 17136 return true; 17137 17138 case CONST_DOUBLE: 17139 if (mode == VOIDmode) 17140 *total = 0; 17141 else 17142 switch (standard_80387_constant_p (x)) 17143 { 17144 case 1: /* 0.0 */ 17145 *total = 1; 17146 break; 17147 default: /* Other constants */ 17148 *total = 2; 17149 break; 17150 case 0: 17151 case -1: 17152 /* Start with (MEM (SYMBOL_REF)), since that's where 17153 it'll probably end up. Add a penalty for size. */ 17154 *total = (COSTS_N_INSNS (1) 17155 + (flag_pic != 0 && !TARGET_64BIT) 17156 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 17157 break; 17158 } 17159 return true; 17160 17161 case ZERO_EXTEND: 17162 /* The zero extensions is often completely free on x86_64, so make 17163 it as cheap as possible. */ 17164 if (TARGET_64BIT && mode == DImode 17165 && GET_MODE (XEXP (x, 0)) == SImode) 17166 *total = 1; 17167 else if (TARGET_ZERO_EXTEND_WITH_AND) 17168 *total = ix86_cost->add; 17169 else 17170 *total = ix86_cost->movzx; 17171 return false; 17172 17173 case SIGN_EXTEND: 17174 *total = ix86_cost->movsx; 17175 return false; 17176 17177 case ASHIFT: 17178 if (GET_CODE (XEXP (x, 1)) == CONST_INT 17179 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 17180 { 17181 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17182 if (value == 1) 17183 { 17184 *total = ix86_cost->add; 17185 return false; 17186 } 17187 if ((value == 2 || value == 3) 17188 && ix86_cost->lea <= ix86_cost->shift_const) 17189 { 17190 *total = ix86_cost->lea; 17191 return false; 17192 } 17193 } 17194 /* FALLTHRU */ 17195 17196 case ROTATE: 17197 case ASHIFTRT: 17198 case LSHIFTRT: 17199 case ROTATERT: 17200 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 17201 { 17202 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17203 { 17204 if (INTVAL (XEXP (x, 1)) > 32) 17205 *total = ix86_cost->shift_const + COSTS_N_INSNS (2); 17206 else 17207 *total = ix86_cost->shift_const * 2; 17208 } 17209 else 17210 { 17211 if (GET_CODE (XEXP (x, 1)) == AND) 17212 *total = ix86_cost->shift_var * 2; 17213 else 17214 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); 17215 } 17216 } 17217 else 17218 { 17219 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17220 *total = ix86_cost->shift_const; 17221 else 17222 *total = ix86_cost->shift_var; 17223 } 17224 return false; 17225 17226 case MULT: 17227 if (FLOAT_MODE_P (mode)) 17228 { 17229 *total = ix86_cost->fmul; 17230 return false; 17231 } 17232 else 17233 { 17234 rtx op0 = XEXP (x, 0); 17235 rtx op1 = XEXP (x, 1); 17236 int nbits; 17237 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17238 { 17239 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17240 for (nbits = 0; value != 0; value &= value - 1) 17241 nbits++; 17242 } 17243 else 17244 /* This is arbitrary. */ 17245 nbits = 7; 17246 17247 /* Compute costs correctly for widening multiplication. */ 17248 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) 17249 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 17250 == GET_MODE_SIZE (mode)) 17251 { 17252 int is_mulwiden = 0; 17253 enum machine_mode inner_mode = GET_MODE (op0); 17254 17255 if (GET_CODE (op0) == GET_CODE (op1)) 17256 is_mulwiden = 1, op1 = XEXP (op1, 0); 17257 else if (GET_CODE (op1) == CONST_INT) 17258 { 17259 if (GET_CODE (op0) == SIGN_EXTEND) 17260 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 17261 == INTVAL (op1); 17262 else 17263 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 17264 } 17265 17266 if (is_mulwiden) 17267 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 17268 } 17269 17270 *total = (ix86_cost->mult_init[MODE_INDEX (mode)] 17271 + nbits * ix86_cost->mult_bit 17272 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); 17273 17274 return true; 17275 } 17276 17277 case DIV: 17278 case UDIV: 17279 case MOD: 17280 case UMOD: 17281 if (FLOAT_MODE_P (mode)) 17282 *total = ix86_cost->fdiv; 17283 else 17284 *total = ix86_cost->divide[MODE_INDEX (mode)]; 17285 return false; 17286 17287 case PLUS: 17288 if (FLOAT_MODE_P (mode)) 17289 *total = ix86_cost->fadd; 17290 else if (GET_MODE_CLASS (mode) == MODE_INT 17291 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 17292 { 17293 if (GET_CODE (XEXP (x, 0)) == PLUS 17294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 17295 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 17296 && CONSTANT_P (XEXP (x, 1))) 17297 { 17298 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 17299 if (val == 2 || val == 4 || val == 8) 17300 { 17301 *total = ix86_cost->lea; 17302 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17303 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 17304 outer_code); 17305 *total += rtx_cost (XEXP (x, 1), outer_code); 17306 return true; 17307 } 17308 } 17309 else if (GET_CODE (XEXP (x, 0)) == MULT 17310 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 17311 { 17312 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 17313 if (val == 2 || val == 4 || val == 8) 17314 { 17315 *total = ix86_cost->lea; 17316 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17317 *total += rtx_cost (XEXP (x, 1), outer_code); 17318 return true; 17319 } 17320 } 17321 else if (GET_CODE (XEXP (x, 0)) == PLUS) 17322 { 17323 *total = ix86_cost->lea; 17324 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17325 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17326 *total += rtx_cost (XEXP (x, 1), outer_code); 17327 return true; 17328 } 17329 } 17330 /* FALLTHRU */ 17331 17332 case MINUS: 17333 if (FLOAT_MODE_P (mode)) 17334 { 17335 *total = ix86_cost->fadd; 17336 return false; 17337 } 17338 /* FALLTHRU */ 17339 17340 case AND: 17341 case IOR: 17342 case XOR: 17343 if (!TARGET_64BIT && mode == DImode) 17344 { 17345 *total = (ix86_cost->add * 2 17346 + (rtx_cost (XEXP (x, 0), outer_code) 17347 << (GET_MODE (XEXP (x, 0)) != DImode)) 17348 + (rtx_cost (XEXP (x, 1), outer_code) 17349 << (GET_MODE (XEXP (x, 1)) != DImode))); 17350 return true; 17351 } 17352 /* FALLTHRU */ 17353 17354 case NEG: 17355 if (FLOAT_MODE_P (mode)) 17356 { 17357 *total = ix86_cost->fchs; 17358 return false; 17359 } 17360 /* FALLTHRU */ 17361 17362 case NOT: 17363 if (!TARGET_64BIT && mode == DImode) 17364 *total = ix86_cost->add * 2; 17365 else 17366 *total = ix86_cost->add; 17367 return false; 17368 17369 case COMPARE: 17370 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 17371 && XEXP (XEXP (x, 0), 1) == const1_rtx 17372 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT 17373 && XEXP (x, 1) == const0_rtx) 17374 { 17375 /* This kind of construct is implemented using test[bwl]. 17376 Treat it as if we had an AND. */ 17377 *total = (ix86_cost->add 17378 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) 17379 + rtx_cost (const1_rtx, outer_code)); 17380 return true; 17381 } 17382 return false; 17383 17384 case FLOAT_EXTEND: 17385 if (!TARGET_SSE_MATH 17386 || mode == XFmode 17387 || (mode == DFmode && !TARGET_SSE2)) 17388 /* For standard 80387 constants, raise the cost to prevent 17389 compress_float_constant() to generate load from memory. */ 17390 switch (standard_80387_constant_p (XEXP (x, 0))) 17391 { 17392 case -1: 17393 case 0: 17394 *total = 0; 17395 break; 17396 case 1: /* 0.0 */ 17397 *total = 1; 17398 break; 17399 default: 17400 *total = (x86_ext_80387_constants & TUNEMASK 17401 || optimize_size 17402 ? 1 : 0); 17403 } 17404 return false; 17405 17406 case ABS: 17407 if (FLOAT_MODE_P (mode)) 17408 *total = ix86_cost->fabs; 17409 return false; 17410 17411 case SQRT: 17412 if (FLOAT_MODE_P (mode)) 17413 *total = ix86_cost->fsqrt; 17414 return false; 17415 17416 case UNSPEC: 17417 if (XINT (x, 1) == UNSPEC_TP) 17418 *total = 0; 17419 return false; 17420 17421 default: 17422 return false; 17423 } 17424} 17425 17426#if TARGET_MACHO 17427 17428static int current_machopic_label_num; 17429 17430/* Given a symbol name and its associated stub, write out the 17431 definition of the stub. */ 17432 17433void 17434machopic_output_stub (FILE *file, const char *symb, const char *stub) 17435{ 17436 unsigned int length; 17437 char *binder_name, *symbol_name, lazy_ptr_name[32]; 17438 int label = ++current_machopic_label_num; 17439 17440 /* For 64-bit we shouldn't get here. */ 17441 gcc_assert (!TARGET_64BIT); 17442 17443 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 17444 symb = (*targetm.strip_name_encoding) (symb); 17445 17446 length = strlen (stub); 17447 binder_name = alloca (length + 32); 17448 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 17449 17450 length = strlen (symb); 17451 symbol_name = alloca (length + 32); 17452 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 17453 17454 sprintf (lazy_ptr_name, "L%d$lz", label); 17455 17456 if (MACHOPIC_PURE) 17457 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 17458 else 17459 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 17460 17461 fprintf (file, "%s:\n", stub); 17462 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17463 17464 if (MACHOPIC_PURE) 17465 { 17466 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 17467 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 17468 fprintf (file, "\tjmp\t*%%edx\n"); 17469 } 17470 else 17471 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 17472 17473 fprintf (file, "%s:\n", binder_name); 17474 17475 if (MACHOPIC_PURE) 17476 { 17477 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 17478 fprintf (file, "\tpushl\t%%eax\n"); 17479 } 17480 else 17481 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 17482 17483 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 17484 17485 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 17486 fprintf (file, "%s:\n", lazy_ptr_name); 17487 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17488 fprintf (file, "\t.long %s\n", binder_name); 17489} 17490 17491void 17492darwin_x86_file_end (void) 17493{ 17494 darwin_file_end (); 17495 ix86_file_end (); 17496} 17497#endif /* TARGET_MACHO */ 17498 17499/* Order the registers for register allocator. */ 17500 17501void 17502x86_order_regs_for_local_alloc (void) 17503{ 17504 int pos = 0; 17505 int i; 17506 17507 /* First allocate the local general purpose registers. */ 17508 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17509 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 17510 reg_alloc_order [pos++] = i; 17511 17512 /* Global general purpose registers. */ 17513 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17514 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 17515 reg_alloc_order [pos++] = i; 17516 17517 /* x87 registers come first in case we are doing FP math 17518 using them. */ 17519 if (!TARGET_SSE_MATH) 17520 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17521 reg_alloc_order [pos++] = i; 17522 17523 /* SSE registers. */ 17524 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 17525 reg_alloc_order [pos++] = i; 17526 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 17527 reg_alloc_order [pos++] = i; 17528 17529 /* x87 registers. */ 17530 if (TARGET_SSE_MATH) 17531 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17532 reg_alloc_order [pos++] = i; 17533 17534 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 17535 reg_alloc_order [pos++] = i; 17536 17537 /* Initialize the rest of array as we do not allocate some registers 17538 at all. */ 17539 while (pos < FIRST_PSEUDO_REGISTER) 17540 reg_alloc_order [pos++] = 0; 17541} 17542 17543/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 17544 struct attribute_spec.handler. */ 17545static tree 17546ix86_handle_struct_attribute (tree *node, tree name, 17547 tree args ATTRIBUTE_UNUSED, 17548 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 17549{ 17550 tree *type = NULL; 17551 if (DECL_P (*node)) 17552 { 17553 if (TREE_CODE (*node) == TYPE_DECL) 17554 type = &TREE_TYPE (*node); 17555 } 17556 else 17557 type = node; 17558 17559 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 17560 || TREE_CODE (*type) == UNION_TYPE))) 17561 { 17562 warning (OPT_Wattributes, "%qs attribute ignored", 17563 IDENTIFIER_POINTER (name)); 17564 *no_add_attrs = true; 17565 } 17566 17567 else if ((is_attribute_p ("ms_struct", name) 17568 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 17569 || ((is_attribute_p ("gcc_struct", name) 17570 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 17571 { 17572 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 17573 IDENTIFIER_POINTER (name)); 17574 *no_add_attrs = true; 17575 } 17576 17577 return NULL_TREE; 17578} 17579 17580static bool 17581ix86_ms_bitfield_layout_p (tree record_type) 17582{ 17583 return (TARGET_MS_BITFIELD_LAYOUT && 17584 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 17585 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 17586} 17587 17588/* Returns an expression indicating where the this parameter is 17589 located on entry to the FUNCTION. */ 17590 17591static rtx 17592x86_this_parameter (tree function) 17593{ 17594 tree type = TREE_TYPE (function); 17595 17596 if (TARGET_64BIT) 17597 { 17598 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 17599 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 17600 } 17601 17602 if (ix86_function_regparm (type, function) > 0) 17603 { 17604 tree parm; 17605 17606 parm = TYPE_ARG_TYPES (type); 17607 /* Figure out whether or not the function has a variable number of 17608 arguments. */ 17609 for (; parm; parm = TREE_CHAIN (parm)) 17610 if (TREE_VALUE (parm) == void_type_node) 17611 break; 17612 /* If not, the this parameter is in the first argument. */ 17613 if (parm) 17614 { 17615 int regno = 0; 17616 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 17617 regno = 2; 17618 return gen_rtx_REG (SImode, regno); 17619 } 17620 } 17621 17622 if (aggregate_value_p (TREE_TYPE (type), type)) 17623 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 17624 else 17625 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 17626} 17627 17628/* Determine whether x86_output_mi_thunk can succeed. */ 17629 17630static bool 17631x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 17632 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 17633 HOST_WIDE_INT vcall_offset, tree function) 17634{ 17635 /* 64-bit can handle anything. */ 17636 if (TARGET_64BIT) 17637 return true; 17638 17639 /* For 32-bit, everything's fine if we have one free register. */ 17640 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 17641 return true; 17642 17643 /* Need a free register for vcall_offset. */ 17644 if (vcall_offset) 17645 return false; 17646 17647 /* Need a free register for GOT references. */ 17648 if (flag_pic && !(*targetm.binds_local_p) (function)) 17649 return false; 17650 17651 /* Otherwise ok. */ 17652 return true; 17653} 17654 17655/* Output the assembler code for a thunk function. THUNK_DECL is the 17656 declaration for the thunk function itself, FUNCTION is the decl for 17657 the target function. DELTA is an immediate constant offset to be 17658 added to THIS. If VCALL_OFFSET is nonzero, the word at 17659 *(*this + vcall_offset) should be added to THIS. */ 17660 17661static void 17662x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 17663 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 17664 HOST_WIDE_INT vcall_offset, tree function) 17665{ 17666 rtx xops[3]; 17667 rtx this = x86_this_parameter (function); 17668 rtx this_reg, tmp; 17669 17670 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 17671 pull it in now and let DELTA benefit. */ 17672 if (REG_P (this)) 17673 this_reg = this; 17674 else if (vcall_offset) 17675 { 17676 /* Put the this parameter into %eax. */ 17677 xops[0] = this; 17678 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 17679 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17680 } 17681 else 17682 this_reg = NULL_RTX; 17683 17684 /* Adjust the this parameter by a fixed constant. */ 17685 if (delta) 17686 { 17687 xops[0] = GEN_INT (delta); 17688 xops[1] = this_reg ? this_reg : this; 17689 if (TARGET_64BIT) 17690 { 17691 if (!x86_64_general_operand (xops[0], DImode)) 17692 { 17693 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 17694 xops[1] = tmp; 17695 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 17696 xops[0] = tmp; 17697 xops[1] = this; 17698 } 17699 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 17700 } 17701 else 17702 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 17703 } 17704 17705 /* Adjust the this parameter by a value stored in the vtable. */ 17706 if (vcall_offset) 17707 { 17708 if (TARGET_64BIT) 17709 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 17710 else 17711 { 17712 int tmp_regno = 2 /* ECX */; 17713 if (lookup_attribute ("fastcall", 17714 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 17715 tmp_regno = 0 /* EAX */; 17716 tmp = gen_rtx_REG (SImode, tmp_regno); 17717 } 17718 17719 xops[0] = gen_rtx_MEM (Pmode, this_reg); 17720 xops[1] = tmp; 17721 if (TARGET_64BIT) 17722 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 17723 else 17724 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17725 17726 /* Adjust the this parameter. */ 17727 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 17728 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 17729 { 17730 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 17731 xops[0] = GEN_INT (vcall_offset); 17732 xops[1] = tmp2; 17733 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 17734 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 17735 } 17736 xops[1] = this_reg; 17737 if (TARGET_64BIT) 17738 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 17739 else 17740 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 17741 } 17742 17743 /* If necessary, drop THIS back to its stack slot. */ 17744 if (this_reg && this_reg != this) 17745 { 17746 xops[0] = this_reg; 17747 xops[1] = this; 17748 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17749 } 17750 17751 xops[0] = XEXP (DECL_RTL (function), 0); 17752 if (TARGET_64BIT) 17753 { 17754 if (!flag_pic || (*targetm.binds_local_p) (function)) 17755 output_asm_insn ("jmp\t%P0", xops); 17756 else 17757 { 17758 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 17759 tmp = gen_rtx_CONST (Pmode, tmp); 17760 tmp = gen_rtx_MEM (QImode, tmp); 17761 xops[0] = tmp; 17762 output_asm_insn ("jmp\t%A0", xops); 17763 } 17764 } 17765 else 17766 { 17767 if (!flag_pic || (*targetm.binds_local_p) (function)) 17768 output_asm_insn ("jmp\t%P0", xops); 17769 else 17770#if TARGET_MACHO 17771 if (TARGET_MACHO) 17772 { 17773 rtx sym_ref = XEXP (DECL_RTL (function), 0); 17774 tmp = (gen_rtx_SYMBOL_REF 17775 (Pmode, 17776 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 17777 tmp = gen_rtx_MEM (QImode, tmp); 17778 xops[0] = tmp; 17779 output_asm_insn ("jmp\t%0", xops); 17780 } 17781 else 17782#endif /* TARGET_MACHO */ 17783 { 17784 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 17785 output_set_got (tmp, NULL_RTX); 17786 17787 xops[1] = tmp; 17788 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 17789 output_asm_insn ("jmp\t{*}%1", xops); 17790 } 17791 } 17792} 17793 17794static void 17795x86_file_start (void) 17796{ 17797 default_file_start (); 17798#if TARGET_MACHO 17799 darwin_file_start (); 17800#endif 17801 if (X86_FILE_START_VERSION_DIRECTIVE) 17802 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 17803 if (X86_FILE_START_FLTUSED) 17804 fputs ("\t.global\t__fltused\n", asm_out_file); 17805 if (ix86_asm_dialect == ASM_INTEL) 17806 fputs ("\t.intel_syntax\n", asm_out_file); 17807} 17808 17809int 17810x86_field_alignment (tree field, int computed) 17811{ 17812 enum machine_mode mode; 17813 tree type = TREE_TYPE (field); 17814 17815 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 17816 return computed; 17817 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 17818 ? get_inner_array_type (type) : type); 17819 if (mode == DFmode || mode == DCmode 17820 || GET_MODE_CLASS (mode) == MODE_INT 17821 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 17822 return MIN (32, computed); 17823 return computed; 17824} 17825 17826/* Output assembler code to FILE to increment profiler label # LABELNO 17827 for profiling a function entry. */ 17828void 17829x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 17830{ 17831 if (TARGET_64BIT) 17832 if (flag_pic) 17833 { 17834#ifndef NO_PROFILE_COUNTERS 17835 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 17836#endif 17837 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 17838 } 17839 else 17840 { 17841#ifndef NO_PROFILE_COUNTERS 17842 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 17843#endif 17844 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 17845 } 17846 else if (flag_pic) 17847 { 17848#ifndef NO_PROFILE_COUNTERS 17849 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 17850 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 17851#endif 17852 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 17853 } 17854 else 17855 { 17856#ifndef NO_PROFILE_COUNTERS 17857 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 17858 PROFILE_COUNT_REGISTER); 17859#endif 17860 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 17861 } 17862} 17863 17864/* We don't have exact information about the insn sizes, but we may assume 17865 quite safely that we are informed about all 1 byte insns and memory 17866 address sizes. This is enough to eliminate unnecessary padding in 17867 99% of cases. */ 17868 17869static int 17870min_insn_size (rtx insn) 17871{ 17872 int l = 0; 17873 17874 if (!INSN_P (insn) || !active_insn_p (insn)) 17875 return 0; 17876 17877 /* Discard alignments we've emit and jump instructions. */ 17878 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 17879 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 17880 return 0; 17881 if (GET_CODE (insn) == JUMP_INSN 17882 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 17883 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 17884 return 0; 17885 17886 /* Important case - calls are always 5 bytes. 17887 It is common to have many calls in the row. */ 17888 if (GET_CODE (insn) == CALL_INSN 17889 && symbolic_reference_mentioned_p (PATTERN (insn)) 17890 && !SIBLING_CALL_P (insn)) 17891 return 5; 17892 if (get_attr_length (insn) <= 1) 17893 return 1; 17894 17895 /* For normal instructions we may rely on the sizes of addresses 17896 and the presence of symbol to require 4 bytes of encoding. 17897 This is not the case for jumps where references are PC relative. */ 17898 if (GET_CODE (insn) != JUMP_INSN) 17899 { 17900 l = get_attr_length_address (insn); 17901 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 17902 l = 4; 17903 } 17904 if (l) 17905 return 1+l; 17906 else 17907 return 2; 17908} 17909 17910/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 17911 window. */ 17912 17913static void 17914ix86_avoid_jump_misspredicts (void) 17915{ 17916 rtx insn, start = get_insns (); 17917 int nbytes = 0, njumps = 0; 17918 int isjump = 0; 17919 17920 /* Look for all minimal intervals of instructions containing 4 jumps. 17921 The intervals are bounded by START and INSN. NBYTES is the total 17922 size of instructions in the interval including INSN and not including 17923 START. When the NBYTES is smaller than 16 bytes, it is possible 17924 that the end of START and INSN ends up in the same 16byte page. 17925 17926 The smallest offset in the page INSN can start is the case where START 17927 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 17928 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 17929 */ 17930 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 17931 { 17932 17933 nbytes += min_insn_size (insn); 17934 if (dump_file) 17935 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 17936 INSN_UID (insn), min_insn_size (insn)); 17937 if ((GET_CODE (insn) == JUMP_INSN 17938 && GET_CODE (PATTERN (insn)) != ADDR_VEC 17939 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 17940 || GET_CODE (insn) == CALL_INSN) 17941 njumps++; 17942 else 17943 continue; 17944 17945 while (njumps > 3) 17946 { 17947 start = NEXT_INSN (start); 17948 if ((GET_CODE (start) == JUMP_INSN 17949 && GET_CODE (PATTERN (start)) != ADDR_VEC 17950 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 17951 || GET_CODE (start) == CALL_INSN) 17952 njumps--, isjump = 1; 17953 else 17954 isjump = 0; 17955 nbytes -= min_insn_size (start); 17956 } 17957 gcc_assert (njumps >= 0); 17958 if (dump_file) 17959 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 17960 INSN_UID (start), INSN_UID (insn), nbytes); 17961 17962 if (njumps == 3 && isjump && nbytes < 16) 17963 { 17964 int padsize = 15 - nbytes + min_insn_size (insn); 17965 17966 if (dump_file) 17967 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 17968 INSN_UID (insn), padsize); 17969 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 17970 } 17971 } 17972} 17973 17974/* AMD Athlon works faster 17975 when RET is not destination of conditional jump or directly preceded 17976 by other jump instruction. We avoid the penalty by inserting NOP just 17977 before the RET instructions in such cases. */ 17978static void 17979ix86_pad_returns (void) 17980{ 17981 edge e; 17982 edge_iterator ei; 17983 17984 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 17985 { 17986 basic_block bb = e->src; 17987 rtx ret = BB_END (bb); 17988 rtx prev; 17989 bool replace = false; 17990 17991 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 17992 || !maybe_hot_bb_p (bb)) 17993 continue; 17994 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 17995 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 17996 break; 17997 if (prev && GET_CODE (prev) == CODE_LABEL) 17998 { 17999 edge e; 18000 edge_iterator ei; 18001 18002 FOR_EACH_EDGE (e, ei, bb->preds) 18003 if (EDGE_FREQUENCY (e) && e->src->index >= 0 18004 && !(e->flags & EDGE_FALLTHRU)) 18005 replace = true; 18006 } 18007 if (!replace) 18008 { 18009 prev = prev_active_insn (ret); 18010 if (prev 18011 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 18012 || GET_CODE (prev) == CALL_INSN)) 18013 replace = true; 18014 /* Empty functions get branch mispredict even when the jump destination 18015 is not visible to us. */ 18016 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 18017 replace = true; 18018 } 18019 if (replace) 18020 { 18021 emit_insn_before (gen_return_internal_long (), ret); 18022 delete_insn (ret); 18023 } 18024 } 18025} 18026 18027/* Implement machine specific optimizations. We implement padding of returns 18028 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 18029static void 18030ix86_reorg (void) 18031{ 18032 if (TARGET_PAD_RETURNS && optimize && !optimize_size) 18033 ix86_pad_returns (); 18034 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) 18035 ix86_avoid_jump_misspredicts (); 18036} 18037 18038/* Return nonzero when QImode register that must be represented via REX prefix 18039 is used. */ 18040bool 18041x86_extended_QIreg_mentioned_p (rtx insn) 18042{ 18043 int i; 18044 extract_insn_cached (insn); 18045 for (i = 0; i < recog_data.n_operands; i++) 18046 if (REG_P (recog_data.operand[i]) 18047 && REGNO (recog_data.operand[i]) >= 4) 18048 return true; 18049 return false; 18050} 18051 18052/* Return nonzero when P points to register encoded via REX prefix. 18053 Called via for_each_rtx. */ 18054static int 18055extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 18056{ 18057 unsigned int regno; 18058 if (!REG_P (*p)) 18059 return 0; 18060 regno = REGNO (*p); 18061 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 18062} 18063 18064/* Return true when INSN mentions register that must be encoded using REX 18065 prefix. */ 18066bool 18067x86_extended_reg_mentioned_p (rtx insn) 18068{ 18069 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 18070} 18071 18072/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 18073 optabs would emit if we didn't have TFmode patterns. */ 18074 18075void 18076x86_emit_floatuns (rtx operands[2]) 18077{ 18078 rtx neglab, donelab, i0, i1, f0, in, out; 18079 enum machine_mode mode, inmode; 18080 18081 inmode = GET_MODE (operands[1]); 18082 gcc_assert (inmode == SImode || inmode == DImode); 18083 18084 out = operands[0]; 18085 in = force_reg (inmode, operands[1]); 18086 mode = GET_MODE (out); 18087 neglab = gen_label_rtx (); 18088 donelab = gen_label_rtx (); 18089 i1 = gen_reg_rtx (Pmode); 18090 f0 = gen_reg_rtx (mode); 18091 18092 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 18093 18094 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 18095 emit_jump_insn (gen_jump (donelab)); 18096 emit_barrier (); 18097 18098 emit_label (neglab); 18099 18100 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18101 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18102 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 18103 expand_float (f0, i0, 0); 18104 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 18105 18106 emit_label (donelab); 18107} 18108 18109/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18110 with all elements equal to VAR. Return true if successful. */ 18111 18112static bool 18113ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 18114 rtx target, rtx val) 18115{ 18116 enum machine_mode smode, wsmode, wvmode; 18117 rtx x; 18118 18119 switch (mode) 18120 { 18121 case V2SImode: 18122 case V2SFmode: 18123 if (!mmx_ok) 18124 return false; 18125 /* FALLTHRU */ 18126 18127 case V2DFmode: 18128 case V2DImode: 18129 case V4SFmode: 18130 case V4SImode: 18131 val = force_reg (GET_MODE_INNER (mode), val); 18132 x = gen_rtx_VEC_DUPLICATE (mode, val); 18133 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18134 return true; 18135 18136 case V4HImode: 18137 if (!mmx_ok) 18138 return false; 18139 if (TARGET_SSE || TARGET_3DNOW_A) 18140 { 18141 val = gen_lowpart (SImode, val); 18142 x = gen_rtx_TRUNCATE (HImode, val); 18143 x = gen_rtx_VEC_DUPLICATE (mode, x); 18144 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18145 return true; 18146 } 18147 else 18148 { 18149 smode = HImode; 18150 wsmode = SImode; 18151 wvmode = V2SImode; 18152 goto widen; 18153 } 18154 18155 case V8QImode: 18156 if (!mmx_ok) 18157 return false; 18158 smode = QImode; 18159 wsmode = HImode; 18160 wvmode = V4HImode; 18161 goto widen; 18162 case V8HImode: 18163 if (TARGET_SSE2) 18164 { 18165 rtx tmp1, tmp2; 18166 /* Extend HImode to SImode using a paradoxical SUBREG. */ 18167 tmp1 = gen_reg_rtx (SImode); 18168 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18169 /* Insert the SImode value as low element of V4SImode vector. */ 18170 tmp2 = gen_reg_rtx (V4SImode); 18171 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18172 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18173 CONST0_RTX (V4SImode), 18174 const1_rtx); 18175 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18176 /* Cast the V4SImode vector back to a V8HImode vector. */ 18177 tmp1 = gen_reg_rtx (V8HImode); 18178 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 18179 /* Duplicate the low short through the whole low SImode word. */ 18180 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 18181 /* Cast the V8HImode vector back to a V4SImode vector. */ 18182 tmp2 = gen_reg_rtx (V4SImode); 18183 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18184 /* Replicate the low element of the V4SImode vector. */ 18185 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18186 /* Cast the V2SImode back to V8HImode, and store in target. */ 18187 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 18188 return true; 18189 } 18190 smode = HImode; 18191 wsmode = SImode; 18192 wvmode = V4SImode; 18193 goto widen; 18194 case V16QImode: 18195 if (TARGET_SSE2) 18196 { 18197 rtx tmp1, tmp2; 18198 /* Extend QImode to SImode using a paradoxical SUBREG. */ 18199 tmp1 = gen_reg_rtx (SImode); 18200 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18201 /* Insert the SImode value as low element of V4SImode vector. */ 18202 tmp2 = gen_reg_rtx (V4SImode); 18203 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18204 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18205 CONST0_RTX (V4SImode), 18206 const1_rtx); 18207 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18208 /* Cast the V4SImode vector back to a V16QImode vector. */ 18209 tmp1 = gen_reg_rtx (V16QImode); 18210 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 18211 /* Duplicate the low byte through the whole low SImode word. */ 18212 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18213 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18214 /* Cast the V16QImode vector back to a V4SImode vector. */ 18215 tmp2 = gen_reg_rtx (V4SImode); 18216 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18217 /* Replicate the low element of the V4SImode vector. */ 18218 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18219 /* Cast the V2SImode back to V16QImode, and store in target. */ 18220 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 18221 return true; 18222 } 18223 smode = QImode; 18224 wsmode = HImode; 18225 wvmode = V8HImode; 18226 goto widen; 18227 widen: 18228 /* Replicate the value once into the next wider mode and recurse. */ 18229 val = convert_modes (wsmode, smode, val, true); 18230 x = expand_simple_binop (wsmode, ASHIFT, val, 18231 GEN_INT (GET_MODE_BITSIZE (smode)), 18232 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18233 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 18234 18235 x = gen_reg_rtx (wvmode); 18236 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 18237 gcc_unreachable (); 18238 emit_move_insn (target, gen_lowpart (mode, x)); 18239 return true; 18240 18241 default: 18242 return false; 18243 } 18244} 18245 18246/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18247 whose ONE_VAR element is VAR, and other elements are zero. Return true 18248 if successful. */ 18249 18250static bool 18251ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 18252 rtx target, rtx var, int one_var) 18253{ 18254 enum machine_mode vsimode; 18255 rtx new_target; 18256 rtx x, tmp; 18257 18258 switch (mode) 18259 { 18260 case V2SFmode: 18261 case V2SImode: 18262 if (!mmx_ok) 18263 return false; 18264 /* FALLTHRU */ 18265 18266 case V2DFmode: 18267 case V2DImode: 18268 if (one_var != 0) 18269 return false; 18270 var = force_reg (GET_MODE_INNER (mode), var); 18271 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 18272 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18273 return true; 18274 18275 case V4SFmode: 18276 case V4SImode: 18277 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 18278 new_target = gen_reg_rtx (mode); 18279 else 18280 new_target = target; 18281 var = force_reg (GET_MODE_INNER (mode), var); 18282 x = gen_rtx_VEC_DUPLICATE (mode, var); 18283 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 18284 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 18285 if (one_var != 0) 18286 { 18287 /* We need to shuffle the value to the correct position, so 18288 create a new pseudo to store the intermediate result. */ 18289 18290 /* With SSE2, we can use the integer shuffle insns. */ 18291 if (mode != V4SFmode && TARGET_SSE2) 18292 { 18293 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 18294 GEN_INT (1), 18295 GEN_INT (one_var == 1 ? 0 : 1), 18296 GEN_INT (one_var == 2 ? 0 : 1), 18297 GEN_INT (one_var == 3 ? 0 : 1))); 18298 if (target != new_target) 18299 emit_move_insn (target, new_target); 18300 return true; 18301 } 18302 18303 /* Otherwise convert the intermediate result to V4SFmode and 18304 use the SSE1 shuffle instructions. */ 18305 if (mode != V4SFmode) 18306 { 18307 tmp = gen_reg_rtx (V4SFmode); 18308 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 18309 } 18310 else 18311 tmp = new_target; 18312 18313 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, 18314 GEN_INT (1), 18315 GEN_INT (one_var == 1 ? 0 : 1), 18316 GEN_INT (one_var == 2 ? 0+4 : 1+4), 18317 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 18318 18319 if (mode != V4SFmode) 18320 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 18321 else if (tmp != target) 18322 emit_move_insn (target, tmp); 18323 } 18324 else if (target != new_target) 18325 emit_move_insn (target, new_target); 18326 return true; 18327 18328 case V8HImode: 18329 case V16QImode: 18330 vsimode = V4SImode; 18331 goto widen; 18332 case V4HImode: 18333 case V8QImode: 18334 if (!mmx_ok) 18335 return false; 18336 vsimode = V2SImode; 18337 goto widen; 18338 widen: 18339 if (one_var != 0) 18340 return false; 18341 18342 /* Zero extend the variable element to SImode and recurse. */ 18343 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 18344 18345 x = gen_reg_rtx (vsimode); 18346 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 18347 var, one_var)) 18348 gcc_unreachable (); 18349 18350 emit_move_insn (target, gen_lowpart (mode, x)); 18351 return true; 18352 18353 default: 18354 return false; 18355 } 18356} 18357 18358/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18359 consisting of the values in VALS. It is known that all elements 18360 except ONE_VAR are constants. Return true if successful. */ 18361 18362static bool 18363ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 18364 rtx target, rtx vals, int one_var) 18365{ 18366 rtx var = XVECEXP (vals, 0, one_var); 18367 enum machine_mode wmode; 18368 rtx const_vec, x; 18369 18370 const_vec = copy_rtx (vals); 18371 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 18372 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 18373 18374 switch (mode) 18375 { 18376 case V2DFmode: 18377 case V2DImode: 18378 case V2SFmode: 18379 case V2SImode: 18380 /* For the two element vectors, it's just as easy to use 18381 the general case. */ 18382 return false; 18383 18384 case V4SFmode: 18385 case V4SImode: 18386 case V8HImode: 18387 case V4HImode: 18388 break; 18389 18390 case V16QImode: 18391 wmode = V8HImode; 18392 goto widen; 18393 case V8QImode: 18394 wmode = V4HImode; 18395 goto widen; 18396 widen: 18397 /* There's no way to set one QImode entry easily. Combine 18398 the variable value with its adjacent constant value, and 18399 promote to an HImode set. */ 18400 x = XVECEXP (vals, 0, one_var ^ 1); 18401 if (one_var & 1) 18402 { 18403 var = convert_modes (HImode, QImode, var, true); 18404 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 18405 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18406 x = GEN_INT (INTVAL (x) & 0xff); 18407 } 18408 else 18409 { 18410 var = convert_modes (HImode, QImode, var, true); 18411 x = gen_int_mode (INTVAL (x) << 8, HImode); 18412 } 18413 if (x != const0_rtx) 18414 var = expand_simple_binop (HImode, IOR, var, x, var, 18415 1, OPTAB_LIB_WIDEN); 18416 18417 x = gen_reg_rtx (wmode); 18418 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 18419 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 18420 18421 emit_move_insn (target, gen_lowpart (mode, x)); 18422 return true; 18423 18424 default: 18425 return false; 18426 } 18427 18428 emit_move_insn (target, const_vec); 18429 ix86_expand_vector_set (mmx_ok, target, var, one_var); 18430 return true; 18431} 18432 18433/* A subroutine of ix86_expand_vector_init. Handle the most general case: 18434 all values variable, and none identical. */ 18435 18436static void 18437ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 18438 rtx target, rtx vals) 18439{ 18440 enum machine_mode half_mode = GET_MODE_INNER (mode); 18441 rtx op0 = NULL, op1 = NULL; 18442 bool use_vec_concat = false; 18443 18444 switch (mode) 18445 { 18446 case V2SFmode: 18447 case V2SImode: 18448 if (!mmx_ok && !TARGET_SSE) 18449 break; 18450 /* FALLTHRU */ 18451 18452 case V2DFmode: 18453 case V2DImode: 18454 /* For the two element vectors, we always implement VEC_CONCAT. */ 18455 op0 = XVECEXP (vals, 0, 0); 18456 op1 = XVECEXP (vals, 0, 1); 18457 use_vec_concat = true; 18458 break; 18459 18460 case V4SFmode: 18461 half_mode = V2SFmode; 18462 goto half; 18463 case V4SImode: 18464 half_mode = V2SImode; 18465 goto half; 18466 half: 18467 { 18468 rtvec v; 18469 18470 /* For V4SF and V4SI, we implement a concat of two V2 vectors. 18471 Recurse to load the two halves. */ 18472 18473 op0 = gen_reg_rtx (half_mode); 18474 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); 18475 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); 18476 18477 op1 = gen_reg_rtx (half_mode); 18478 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); 18479 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); 18480 18481 use_vec_concat = true; 18482 } 18483 break; 18484 18485 case V8HImode: 18486 case V16QImode: 18487 case V4HImode: 18488 case V8QImode: 18489 break; 18490 18491 default: 18492 gcc_unreachable (); 18493 } 18494 18495 if (use_vec_concat) 18496 { 18497 if (!register_operand (op0, half_mode)) 18498 op0 = force_reg (half_mode, op0); 18499 if (!register_operand (op1, half_mode)) 18500 op1 = force_reg (half_mode, op1); 18501 18502 emit_insn (gen_rtx_SET (VOIDmode, target, 18503 gen_rtx_VEC_CONCAT (mode, op0, op1))); 18504 } 18505 else 18506 { 18507 int i, j, n_elts, n_words, n_elt_per_word; 18508 enum machine_mode inner_mode; 18509 rtx words[4], shift; 18510 18511 inner_mode = GET_MODE_INNER (mode); 18512 n_elts = GET_MODE_NUNITS (mode); 18513 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 18514 n_elt_per_word = n_elts / n_words; 18515 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 18516 18517 for (i = 0; i < n_words; ++i) 18518 { 18519 rtx word = NULL_RTX; 18520 18521 for (j = 0; j < n_elt_per_word; ++j) 18522 { 18523 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 18524 elt = convert_modes (word_mode, inner_mode, elt, true); 18525 18526 if (j == 0) 18527 word = elt; 18528 else 18529 { 18530 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 18531 word, 1, OPTAB_LIB_WIDEN); 18532 word = expand_simple_binop (word_mode, IOR, word, elt, 18533 word, 1, OPTAB_LIB_WIDEN); 18534 } 18535 } 18536 18537 words[i] = word; 18538 } 18539 18540 if (n_words == 1) 18541 emit_move_insn (target, gen_lowpart (mode, words[0])); 18542 else if (n_words == 2) 18543 { 18544 rtx tmp = gen_reg_rtx (mode); 18545 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); 18546 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 18547 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 18548 emit_move_insn (target, tmp); 18549 } 18550 else if (n_words == 4) 18551 { 18552 rtx tmp = gen_reg_rtx (V4SImode); 18553 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 18554 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 18555 emit_move_insn (target, gen_lowpart (mode, tmp)); 18556 } 18557 else 18558 gcc_unreachable (); 18559 } 18560} 18561 18562/* Initialize vector TARGET via VALS. Suppress the use of MMX 18563 instructions unless MMX_OK is true. */ 18564 18565void 18566ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 18567{ 18568 enum machine_mode mode = GET_MODE (target); 18569 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18570 int n_elts = GET_MODE_NUNITS (mode); 18571 int n_var = 0, one_var = -1; 18572 bool all_same = true, all_const_zero = true; 18573 int i; 18574 rtx x; 18575 18576 for (i = 0; i < n_elts; ++i) 18577 { 18578 x = XVECEXP (vals, 0, i); 18579 if (!CONSTANT_P (x)) 18580 n_var++, one_var = i; 18581 else if (x != CONST0_RTX (inner_mode)) 18582 all_const_zero = false; 18583 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 18584 all_same = false; 18585 } 18586 18587 /* Constants are best loaded from the constant pool. */ 18588 if (n_var == 0) 18589 { 18590 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 18591 return; 18592 } 18593 18594 /* If all values are identical, broadcast the value. */ 18595 if (all_same 18596 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 18597 XVECEXP (vals, 0, 0))) 18598 return; 18599 18600 /* Values where only one field is non-constant are best loaded from 18601 the pool and overwritten via move later. */ 18602 if (n_var == 1) 18603 { 18604 if (all_const_zero 18605 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 18606 XVECEXP (vals, 0, one_var), 18607 one_var)) 18608 return; 18609 18610 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 18611 return; 18612 } 18613 18614 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 18615} 18616 18617void 18618ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 18619{ 18620 enum machine_mode mode = GET_MODE (target); 18621 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18622 bool use_vec_merge = false; 18623 rtx tmp; 18624 18625 switch (mode) 18626 { 18627 case V2SFmode: 18628 case V2SImode: 18629 if (mmx_ok) 18630 { 18631 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 18632 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 18633 if (elt == 0) 18634 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 18635 else 18636 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 18637 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18638 return; 18639 } 18640 break; 18641 18642 case V2DFmode: 18643 case V2DImode: 18644 { 18645 rtx op0, op1; 18646 18647 /* For the two element vectors, we implement a VEC_CONCAT with 18648 the extraction of the other element. */ 18649 18650 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 18651 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 18652 18653 if (elt == 0) 18654 op0 = val, op1 = tmp; 18655 else 18656 op0 = tmp, op1 = val; 18657 18658 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 18659 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18660 } 18661 return; 18662 18663 case V4SFmode: 18664 switch (elt) 18665 { 18666 case 0: 18667 use_vec_merge = true; 18668 break; 18669 18670 case 1: 18671 /* tmp = target = A B C D */ 18672 tmp = copy_to_reg (target); 18673 /* target = A A B B */ 18674 emit_insn (gen_sse_unpcklps (target, target, target)); 18675 /* target = X A B B */ 18676 ix86_expand_vector_set (false, target, val, 0); 18677 /* target = A X C D */ 18678 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18679 GEN_INT (1), GEN_INT (0), 18680 GEN_INT (2+4), GEN_INT (3+4))); 18681 return; 18682 18683 case 2: 18684 /* tmp = target = A B C D */ 18685 tmp = copy_to_reg (target); 18686 /* tmp = X B C D */ 18687 ix86_expand_vector_set (false, tmp, val, 0); 18688 /* target = A B X D */ 18689 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18690 GEN_INT (0), GEN_INT (1), 18691 GEN_INT (0+4), GEN_INT (3+4))); 18692 return; 18693 18694 case 3: 18695 /* tmp = target = A B C D */ 18696 tmp = copy_to_reg (target); 18697 /* tmp = X B C D */ 18698 ix86_expand_vector_set (false, tmp, val, 0); 18699 /* target = A B X D */ 18700 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18701 GEN_INT (0), GEN_INT (1), 18702 GEN_INT (2+4), GEN_INT (0+4))); 18703 return; 18704 18705 default: 18706 gcc_unreachable (); 18707 } 18708 break; 18709 18710 case V4SImode: 18711 /* Element 0 handled by vec_merge below. */ 18712 if (elt == 0) 18713 { 18714 use_vec_merge = true; 18715 break; 18716 } 18717 18718 if (TARGET_SSE2) 18719 { 18720 /* With SSE2, use integer shuffles to swap element 0 and ELT, 18721 store into element 0, then shuffle them back. */ 18722 18723 rtx order[4]; 18724 18725 order[0] = GEN_INT (elt); 18726 order[1] = const1_rtx; 18727 order[2] = const2_rtx; 18728 order[3] = GEN_INT (3); 18729 order[elt] = const0_rtx; 18730 18731 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 18732 order[1], order[2], order[3])); 18733 18734 ix86_expand_vector_set (false, target, val, 0); 18735 18736 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 18737 order[1], order[2], order[3])); 18738 } 18739 else 18740 { 18741 /* For SSE1, we have to reuse the V4SF code. */ 18742 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 18743 gen_lowpart (SFmode, val), elt); 18744 } 18745 return; 18746 18747 case V8HImode: 18748 use_vec_merge = TARGET_SSE2; 18749 break; 18750 case V4HImode: 18751 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 18752 break; 18753 18754 case V16QImode: 18755 case V8QImode: 18756 default: 18757 break; 18758 } 18759 18760 if (use_vec_merge) 18761 { 18762 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 18763 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 18764 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18765 } 18766 else 18767 { 18768 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 18769 18770 emit_move_insn (mem, target); 18771 18772 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 18773 emit_move_insn (tmp, val); 18774 18775 emit_move_insn (target, mem); 18776 } 18777} 18778 18779void 18780ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 18781{ 18782 enum machine_mode mode = GET_MODE (vec); 18783 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18784 bool use_vec_extr = false; 18785 rtx tmp; 18786 18787 switch (mode) 18788 { 18789 case V2SImode: 18790 case V2SFmode: 18791 if (!mmx_ok) 18792 break; 18793 /* FALLTHRU */ 18794 18795 case V2DFmode: 18796 case V2DImode: 18797 use_vec_extr = true; 18798 break; 18799 18800 case V4SFmode: 18801 switch (elt) 18802 { 18803 case 0: 18804 tmp = vec; 18805 break; 18806 18807 case 1: 18808 case 3: 18809 tmp = gen_reg_rtx (mode); 18810 emit_insn (gen_sse_shufps_1 (tmp, vec, vec, 18811 GEN_INT (elt), GEN_INT (elt), 18812 GEN_INT (elt+4), GEN_INT (elt+4))); 18813 break; 18814 18815 case 2: 18816 tmp = gen_reg_rtx (mode); 18817 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 18818 break; 18819 18820 default: 18821 gcc_unreachable (); 18822 } 18823 vec = tmp; 18824 use_vec_extr = true; 18825 elt = 0; 18826 break; 18827 18828 case V4SImode: 18829 if (TARGET_SSE2) 18830 { 18831 switch (elt) 18832 { 18833 case 0: 18834 tmp = vec; 18835 break; 18836 18837 case 1: 18838 case 3: 18839 tmp = gen_reg_rtx (mode); 18840 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 18841 GEN_INT (elt), GEN_INT (elt), 18842 GEN_INT (elt), GEN_INT (elt))); 18843 break; 18844 18845 case 2: 18846 tmp = gen_reg_rtx (mode); 18847 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 18848 break; 18849 18850 default: 18851 gcc_unreachable (); 18852 } 18853 vec = tmp; 18854 use_vec_extr = true; 18855 elt = 0; 18856 } 18857 else 18858 { 18859 /* For SSE1, we have to reuse the V4SF code. */ 18860 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 18861 gen_lowpart (V4SFmode, vec), elt); 18862 return; 18863 } 18864 break; 18865 18866 case V8HImode: 18867 use_vec_extr = TARGET_SSE2; 18868 break; 18869 case V4HImode: 18870 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 18871 break; 18872 18873 case V16QImode: 18874 case V8QImode: 18875 /* ??? Could extract the appropriate HImode element and shift. */ 18876 default: 18877 break; 18878 } 18879 18880 if (use_vec_extr) 18881 { 18882 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 18883 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 18884 18885 /* Let the rtl optimizers know about the zero extension performed. */ 18886 if (inner_mode == HImode) 18887 { 18888 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 18889 target = gen_lowpart (SImode, target); 18890 } 18891 18892 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18893 } 18894 else 18895 { 18896 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 18897 18898 emit_move_insn (mem, vec); 18899 18900 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 18901 emit_move_insn (target, tmp); 18902 } 18903} 18904 18905/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 18906 pattern to reduce; DEST is the destination; IN is the input vector. */ 18907 18908void 18909ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 18910{ 18911 rtx tmp1, tmp2, tmp3; 18912 18913 tmp1 = gen_reg_rtx (V4SFmode); 18914 tmp2 = gen_reg_rtx (V4SFmode); 18915 tmp3 = gen_reg_rtx (V4SFmode); 18916 18917 emit_insn (gen_sse_movhlps (tmp1, in, in)); 18918 emit_insn (fn (tmp2, tmp1, in)); 18919 18920 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, 18921 GEN_INT (1), GEN_INT (1), 18922 GEN_INT (1+4), GEN_INT (1+4))); 18923 emit_insn (fn (dest, tmp2, tmp3)); 18924} 18925 18926/* Target hook for scalar_mode_supported_p. */ 18927static bool 18928ix86_scalar_mode_supported_p (enum machine_mode mode) 18929{ 18930 if (DECIMAL_FLOAT_MODE_P (mode)) 18931 return true; 18932 else 18933 return default_scalar_mode_supported_p (mode); 18934} 18935 18936/* Implements target hook vector_mode_supported_p. */ 18937static bool 18938ix86_vector_mode_supported_p (enum machine_mode mode) 18939{ 18940 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 18941 return true; 18942 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 18943 return true; 18944 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 18945 return true; 18946 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 18947 return true; 18948 return false; 18949} 18950 18951/* Worker function for TARGET_MD_ASM_CLOBBERS. 18952 18953 We do this in the new i386 backend to maintain source compatibility 18954 with the old cc0-based compiler. */ 18955 18956static tree 18957ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 18958 tree inputs ATTRIBUTE_UNUSED, 18959 tree clobbers) 18960{ 18961 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 18962 clobbers); 18963 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 18964 clobbers); 18965 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), 18966 clobbers); 18967 return clobbers; 18968} 18969 18970/* Return true if this goes in small data/bss. */ 18971 18972static bool 18973ix86_in_large_data_p (tree exp) 18974{ 18975 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 18976 return false; 18977 18978 /* Functions are never large data. */ 18979 if (TREE_CODE (exp) == FUNCTION_DECL) 18980 return false; 18981 18982 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 18983 { 18984 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 18985 if (strcmp (section, ".ldata") == 0 18986 || strcmp (section, ".lbss") == 0) 18987 return true; 18988 return false; 18989 } 18990 else 18991 { 18992 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 18993 18994 /* If this is an incomplete type with size 0, then we can't put it 18995 in data because it might be too big when completed. */ 18996 if (!size || size > ix86_section_threshold) 18997 return true; 18998 } 18999 19000 return false; 19001} 19002static void 19003ix86_encode_section_info (tree decl, rtx rtl, int first) 19004{ 19005 default_encode_section_info (decl, rtl, first); 19006 19007 if (TREE_CODE (decl) == VAR_DECL 19008 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 19009 && ix86_in_large_data_p (decl)) 19010 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 19011} 19012 19013/* Worker function for REVERSE_CONDITION. */ 19014 19015enum rtx_code 19016ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 19017{ 19018 return (mode != CCFPmode && mode != CCFPUmode 19019 ? reverse_condition (code) 19020 : reverse_condition_maybe_unordered (code)); 19021} 19022 19023/* Output code to perform an x87 FP register move, from OPERANDS[1] 19024 to OPERANDS[0]. */ 19025 19026const char * 19027output_387_reg_move (rtx insn, rtx *operands) 19028{ 19029 if (REG_P (operands[1]) 19030 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 19031 { 19032 if (REGNO (operands[0]) == FIRST_STACK_REG) 19033 return output_387_ffreep (operands, 0); 19034 return "fstp\t%y0"; 19035 } 19036 if (STACK_TOP_P (operands[0])) 19037 return "fld%z1\t%y1"; 19038 return "fst\t%y0"; 19039} 19040 19041/* Output code to perform a conditional jump to LABEL, if C2 flag in 19042 FP status register is set. */ 19043 19044void 19045ix86_emit_fp_unordered_jump (rtx label) 19046{ 19047 rtx reg = gen_reg_rtx (HImode); 19048 rtx temp; 19049 19050 emit_insn (gen_x86_fnstsw_1 (reg)); 19051 19052 if (TARGET_USE_SAHF) 19053 { 19054 emit_insn (gen_x86_sahf_1 (reg)); 19055 19056 temp = gen_rtx_REG (CCmode, FLAGS_REG); 19057 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 19058 } 19059 else 19060 { 19061 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 19062 19063 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 19064 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 19065 } 19066 19067 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 19068 gen_rtx_LABEL_REF (VOIDmode, label), 19069 pc_rtx); 19070 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 19071 emit_jump_insn (temp); 19072} 19073 19074/* Output code to perform a log1p XFmode calculation. */ 19075 19076void ix86_emit_i387_log1p (rtx op0, rtx op1) 19077{ 19078 rtx label1 = gen_label_rtx (); 19079 rtx label2 = gen_label_rtx (); 19080 19081 rtx tmp = gen_reg_rtx (XFmode); 19082 rtx tmp2 = gen_reg_rtx (XFmode); 19083 19084 emit_insn (gen_absxf2 (tmp, op1)); 19085 emit_insn (gen_cmpxf (tmp, 19086 CONST_DOUBLE_FROM_REAL_VALUE ( 19087 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 19088 XFmode))); 19089 emit_jump_insn (gen_bge (label1)); 19090 19091 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19092 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); 19093 emit_jump (label2); 19094 19095 emit_label (label1); 19096 emit_move_insn (tmp, CONST1_RTX (XFmode)); 19097 emit_insn (gen_addxf3 (tmp, op1, tmp)); 19098 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19099 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); 19100 19101 emit_label (label2); 19102} 19103 19104/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 19105 19106static void 19107i386_solaris_elf_named_section (const char *name, unsigned int flags, 19108 tree decl) 19109{ 19110 /* With Binutils 2.15, the "@unwind" marker must be specified on 19111 every occurrence of the ".eh_frame" section, not just the first 19112 one. */ 19113 if (TARGET_64BIT 19114 && strcmp (name, ".eh_frame") == 0) 19115 { 19116 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 19117 flags & SECTION_WRITE ? "aw" : "a"); 19118 return; 19119 } 19120 default_elf_asm_named_section (name, flags, decl); 19121} 19122 19123/* Return the mangling of TYPE if it is an extended fundamental type. */ 19124 19125static const char * 19126ix86_mangle_fundamental_type (tree type) 19127{ 19128 switch (TYPE_MODE (type)) 19129 { 19130 case TFmode: 19131 /* __float128 is "g". */ 19132 return "g"; 19133 case XFmode: 19134 /* "long double" or __float80 is "e". */ 19135 return "e"; 19136 default: 19137 return NULL; 19138 } 19139} 19140 19141/* For 32-bit code we can save PIC register setup by using 19142 __stack_chk_fail_local hidden function instead of calling 19143 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 19144 register, so it is better to call __stack_chk_fail directly. */ 19145 19146static tree 19147ix86_stack_protect_fail (void) 19148{ 19149 return TARGET_64BIT 19150 ? default_external_stack_protect_fail () 19151 : default_hidden_stack_protect_fail (); 19152} 19153 19154/* Select a format to encode pointers in exception handling data. CODE 19155 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 19156 true if the symbol may be affected by dynamic relocations. 19157 19158 ??? All x86 object file formats are capable of representing this. 19159 After all, the relocation needed is the same as for the call insn. 19160 Whether or not a particular assembler allows us to enter such, I 19161 guess we'll have to see. */ 19162int 19163asm_preferred_eh_data_format (int code, int global) 19164{ 19165 if (flag_pic) 19166 { 19167 int type = DW_EH_PE_sdata8; 19168 if (!TARGET_64BIT 19169 || ix86_cmodel == CM_SMALL_PIC 19170 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 19171 type = DW_EH_PE_sdata4; 19172 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 19173 } 19174 if (ix86_cmodel == CM_SMALL 19175 || (ix86_cmodel == CM_MEDIUM && code)) 19176 return DW_EH_PE_udata4; 19177 return DW_EH_PE_absptr; 19178} 19179 19180#include "gt-i386.h" 19181