i386.c revision 225736
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to 19the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20Boston, MA 02110-1301, USA. */ 21 22/* $FreeBSD: stable/9/contrib/gcc/config/i386/i386.c 219711 2011-03-17 09:44:33Z mm $ */ 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "tm.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-codes.h" 38#include "insn-attr.h" 39#include "flags.h" 40#include "except.h" 41#include "function.h" 42#include "recog.h" 43#include "expr.h" 44#include "optabs.h" 45#include "toplev.h" 46#include "basic-block.h" 47#include "ggc.h" 48#include "target.h" 49#include "target-def.h" 50#include "langhooks.h" 51#include "cgraph.h" 52#include "tree-gimple.h" 53#include "dwarf2.h" 54#include "tm-constrs.h" 55 56#ifndef CHECK_STACK_LIMIT 57#define CHECK_STACK_LIMIT (-1) 58#endif 59 60/* Return index of given mode in mult and division cost tables. */ 61#define MODE_INDEX(mode) \ 62 ((mode) == QImode ? 0 \ 63 : (mode) == HImode ? 1 \ 64 : (mode) == SImode ? 2 \ 65 : (mode) == DImode ? 3 \ 66 : 4) 67 68/* Processor costs (relative to an add) */ 69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 70#define COSTS_N_BYTES(N) ((N) * 2) 71 72static const 73struct processor_costs size_cost = { /* costs for tuning for size */ 74 COSTS_N_BYTES (2), /* cost of an add instruction */ 75 COSTS_N_BYTES (3), /* cost of a lea instruction */ 76 COSTS_N_BYTES (2), /* variable shift costs */ 77 COSTS_N_BYTES (3), /* constant shift costs */ 78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 79 COSTS_N_BYTES (3), /* HI */ 80 COSTS_N_BYTES (3), /* SI */ 81 COSTS_N_BYTES (3), /* DI */ 82 COSTS_N_BYTES (5)}, /* other */ 83 0, /* cost of multiply per each bit set */ 84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 85 COSTS_N_BYTES (3), /* HI */ 86 COSTS_N_BYTES (3), /* SI */ 87 COSTS_N_BYTES (3), /* DI */ 88 COSTS_N_BYTES (5)}, /* other */ 89 COSTS_N_BYTES (3), /* cost of movsx */ 90 COSTS_N_BYTES (3), /* cost of movzx */ 91 0, /* "large" insn */ 92 2, /* MOVE_RATIO */ 93 2, /* cost for loading QImode using movzbl */ 94 {2, 2, 2}, /* cost of loading integer registers 95 in QImode, HImode and SImode. 96 Relative to reg-reg move (2). */ 97 {2, 2, 2}, /* cost of storing integer registers */ 98 2, /* cost of reg,reg fld/fst */ 99 {2, 2, 2}, /* cost of loading fp registers 100 in SFmode, DFmode and XFmode */ 101 {2, 2, 2}, /* cost of storing fp registers 102 in SFmode, DFmode and XFmode */ 103 3, /* cost of moving MMX register */ 104 {3, 3}, /* cost of loading MMX registers 105 in SImode and DImode */ 106 {3, 3}, /* cost of storing MMX registers 107 in SImode and DImode */ 108 3, /* cost of moving SSE register */ 109 {3, 3, 3}, /* cost of loading SSE registers 110 in SImode, DImode and TImode */ 111 {3, 3, 3}, /* cost of storing SSE registers 112 in SImode, DImode and TImode */ 113 3, /* MMX or SSE register to integer */ 114 0, /* size of prefetch block */ 115 0, /* number of parallel prefetches */ 116 2, /* Branch cost */ 117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 120 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 123}; 124 125/* Processor costs (relative to an add) */ 126static const 127struct processor_costs i386_cost = { /* 386 specific costs */ 128 COSTS_N_INSNS (1), /* cost of an add instruction */ 129 COSTS_N_INSNS (1), /* cost of a lea instruction */ 130 COSTS_N_INSNS (3), /* variable shift costs */ 131 COSTS_N_INSNS (2), /* constant shift costs */ 132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 133 COSTS_N_INSNS (6), /* HI */ 134 COSTS_N_INSNS (6), /* SI */ 135 COSTS_N_INSNS (6), /* DI */ 136 COSTS_N_INSNS (6)}, /* other */ 137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 139 COSTS_N_INSNS (23), /* HI */ 140 COSTS_N_INSNS (23), /* SI */ 141 COSTS_N_INSNS (23), /* DI */ 142 COSTS_N_INSNS (23)}, /* other */ 143 COSTS_N_INSNS (3), /* cost of movsx */ 144 COSTS_N_INSNS (2), /* cost of movzx */ 145 15, /* "large" insn */ 146 3, /* MOVE_RATIO */ 147 4, /* cost for loading QImode using movzbl */ 148 {2, 4, 2}, /* cost of loading integer registers 149 in QImode, HImode and SImode. 150 Relative to reg-reg move (2). */ 151 {2, 4, 2}, /* cost of storing integer registers */ 152 2, /* cost of reg,reg fld/fst */ 153 {8, 8, 8}, /* cost of loading fp registers 154 in SFmode, DFmode and XFmode */ 155 {8, 8, 8}, /* cost of storing fp registers 156 in SFmode, DFmode and XFmode */ 157 2, /* cost of moving MMX register */ 158 {4, 8}, /* cost of loading MMX registers 159 in SImode and DImode */ 160 {4, 8}, /* cost of storing MMX registers 161 in SImode and DImode */ 162 2, /* cost of moving SSE register */ 163 {4, 8, 16}, /* cost of loading SSE registers 164 in SImode, DImode and TImode */ 165 {4, 8, 16}, /* cost of storing SSE registers 166 in SImode, DImode and TImode */ 167 3, /* MMX or SSE register to integer */ 168 0, /* size of prefetch block */ 169 0, /* number of parallel prefetches */ 170 1, /* Branch cost */ 171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 174 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 177}; 178 179static const 180struct processor_costs i486_cost = { /* 486 specific costs */ 181 COSTS_N_INSNS (1), /* cost of an add instruction */ 182 COSTS_N_INSNS (1), /* cost of a lea instruction */ 183 COSTS_N_INSNS (3), /* variable shift costs */ 184 COSTS_N_INSNS (2), /* constant shift costs */ 185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 186 COSTS_N_INSNS (12), /* HI */ 187 COSTS_N_INSNS (12), /* SI */ 188 COSTS_N_INSNS (12), /* DI */ 189 COSTS_N_INSNS (12)}, /* other */ 190 1, /* cost of multiply per each bit set */ 191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 192 COSTS_N_INSNS (40), /* HI */ 193 COSTS_N_INSNS (40), /* SI */ 194 COSTS_N_INSNS (40), /* DI */ 195 COSTS_N_INSNS (40)}, /* other */ 196 COSTS_N_INSNS (3), /* cost of movsx */ 197 COSTS_N_INSNS (2), /* cost of movzx */ 198 15, /* "large" insn */ 199 3, /* MOVE_RATIO */ 200 4, /* cost for loading QImode using movzbl */ 201 {2, 4, 2}, /* cost of loading integer registers 202 in QImode, HImode and SImode. 203 Relative to reg-reg move (2). */ 204 {2, 4, 2}, /* cost of storing integer registers */ 205 2, /* cost of reg,reg fld/fst */ 206 {8, 8, 8}, /* cost of loading fp registers 207 in SFmode, DFmode and XFmode */ 208 {8, 8, 8}, /* cost of storing fp registers 209 in SFmode, DFmode and XFmode */ 210 2, /* cost of moving MMX register */ 211 {4, 8}, /* cost of loading MMX registers 212 in SImode and DImode */ 213 {4, 8}, /* cost of storing MMX registers 214 in SImode and DImode */ 215 2, /* cost of moving SSE register */ 216 {4, 8, 16}, /* cost of loading SSE registers 217 in SImode, DImode and TImode */ 218 {4, 8, 16}, /* cost of storing SSE registers 219 in SImode, DImode and TImode */ 220 3, /* MMX or SSE register to integer */ 221 0, /* size of prefetch block */ 222 0, /* number of parallel prefetches */ 223 1, /* Branch cost */ 224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 227 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 230}; 231 232static const 233struct processor_costs pentium_cost = { 234 COSTS_N_INSNS (1), /* cost of an add instruction */ 235 COSTS_N_INSNS (1), /* cost of a lea instruction */ 236 COSTS_N_INSNS (4), /* variable shift costs */ 237 COSTS_N_INSNS (1), /* constant shift costs */ 238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 239 COSTS_N_INSNS (11), /* HI */ 240 COSTS_N_INSNS (11), /* SI */ 241 COSTS_N_INSNS (11), /* DI */ 242 COSTS_N_INSNS (11)}, /* other */ 243 0, /* cost of multiply per each bit set */ 244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 245 COSTS_N_INSNS (25), /* HI */ 246 COSTS_N_INSNS (25), /* SI */ 247 COSTS_N_INSNS (25), /* DI */ 248 COSTS_N_INSNS (25)}, /* other */ 249 COSTS_N_INSNS (3), /* cost of movsx */ 250 COSTS_N_INSNS (2), /* cost of movzx */ 251 8, /* "large" insn */ 252 6, /* MOVE_RATIO */ 253 6, /* cost for loading QImode using movzbl */ 254 {2, 4, 2}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 4, 2}, /* cost of storing integer registers */ 258 2, /* cost of reg,reg fld/fst */ 259 {2, 2, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 6}, /* cost of storing fp registers 262 in SFmode, DFmode and XFmode */ 263 8, /* cost of moving MMX register */ 264 {8, 8}, /* cost of loading MMX registers 265 in SImode and DImode */ 266 {8, 8}, /* cost of storing MMX registers 267 in SImode and DImode */ 268 2, /* cost of moving SSE register */ 269 {4, 8, 16}, /* cost of loading SSE registers 270 in SImode, DImode and TImode */ 271 {4, 8, 16}, /* cost of storing SSE registers 272 in SImode, DImode and TImode */ 273 3, /* MMX or SSE register to integer */ 274 0, /* size of prefetch block */ 275 0, /* number of parallel prefetches */ 276 2, /* Branch cost */ 277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 280 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 283}; 284 285static const 286struct processor_costs pentiumpro_cost = { 287 COSTS_N_INSNS (1), /* cost of an add instruction */ 288 COSTS_N_INSNS (1), /* cost of a lea instruction */ 289 COSTS_N_INSNS (1), /* variable shift costs */ 290 COSTS_N_INSNS (1), /* constant shift costs */ 291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 292 COSTS_N_INSNS (4), /* HI */ 293 COSTS_N_INSNS (4), /* SI */ 294 COSTS_N_INSNS (4), /* DI */ 295 COSTS_N_INSNS (4)}, /* other */ 296 0, /* cost of multiply per each bit set */ 297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 298 COSTS_N_INSNS (17), /* HI */ 299 COSTS_N_INSNS (17), /* SI */ 300 COSTS_N_INSNS (17), /* DI */ 301 COSTS_N_INSNS (17)}, /* other */ 302 COSTS_N_INSNS (1), /* cost of movsx */ 303 COSTS_N_INSNS (1), /* cost of movzx */ 304 8, /* "large" insn */ 305 6, /* MOVE_RATIO */ 306 2, /* cost for loading QImode using movzbl */ 307 {4, 4, 4}, /* cost of loading integer registers 308 in QImode, HImode and SImode. 309 Relative to reg-reg move (2). */ 310 {2, 2, 2}, /* cost of storing integer registers */ 311 2, /* cost of reg,reg fld/fst */ 312 {2, 2, 6}, /* cost of loading fp registers 313 in SFmode, DFmode and XFmode */ 314 {4, 4, 6}, /* cost of storing fp registers 315 in SFmode, DFmode and XFmode */ 316 2, /* cost of moving MMX register */ 317 {2, 2}, /* cost of loading MMX registers 318 in SImode and DImode */ 319 {2, 2}, /* cost of storing MMX registers 320 in SImode and DImode */ 321 2, /* cost of moving SSE register */ 322 {2, 2, 8}, /* cost of loading SSE registers 323 in SImode, DImode and TImode */ 324 {2, 2, 8}, /* cost of storing SSE registers 325 in SImode, DImode and TImode */ 326 3, /* MMX or SSE register to integer */ 327 32, /* size of prefetch block */ 328 6, /* number of parallel prefetches */ 329 2, /* Branch cost */ 330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 333 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 336}; 337 338static const 339struct processor_costs geode_cost = { 340 COSTS_N_INSNS (1), /* cost of an add instruction */ 341 COSTS_N_INSNS (1), /* cost of a lea instruction */ 342 COSTS_N_INSNS (2), /* variable shift costs */ 343 COSTS_N_INSNS (1), /* constant shift costs */ 344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 345 COSTS_N_INSNS (4), /* HI */ 346 COSTS_N_INSNS (7), /* SI */ 347 COSTS_N_INSNS (7), /* DI */ 348 COSTS_N_INSNS (7)}, /* other */ 349 0, /* cost of multiply per each bit set */ 350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ 351 COSTS_N_INSNS (23), /* HI */ 352 COSTS_N_INSNS (39), /* SI */ 353 COSTS_N_INSNS (39), /* DI */ 354 COSTS_N_INSNS (39)}, /* other */ 355 COSTS_N_INSNS (1), /* cost of movsx */ 356 COSTS_N_INSNS (1), /* cost of movzx */ 357 8, /* "large" insn */ 358 4, /* MOVE_RATIO */ 359 1, /* cost for loading QImode using movzbl */ 360 {1, 1, 1}, /* cost of loading integer registers 361 in QImode, HImode and SImode. 362 Relative to reg-reg move (2). */ 363 {1, 1, 1}, /* cost of storing integer registers */ 364 1, /* cost of reg,reg fld/fst */ 365 {1, 1, 1}, /* cost of loading fp registers 366 in SFmode, DFmode and XFmode */ 367 {4, 6, 6}, /* cost of storing fp registers 368 in SFmode, DFmode and XFmode */ 369 370 1, /* cost of moving MMX register */ 371 {1, 1}, /* cost of loading MMX registers 372 in SImode and DImode */ 373 {1, 1}, /* cost of storing MMX registers 374 in SImode and DImode */ 375 1, /* cost of moving SSE register */ 376 {1, 1, 1}, /* cost of loading SSE registers 377 in SImode, DImode and TImode */ 378 {1, 1, 1}, /* cost of storing SSE registers 379 in SImode, DImode and TImode */ 380 1, /* MMX or SSE register to integer */ 381 32, /* size of prefetch block */ 382 1, /* number of parallel prefetches */ 383 1, /* Branch cost */ 384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */ 386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */ 387 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ 390}; 391 392static const 393struct processor_costs k6_cost = { 394 COSTS_N_INSNS (1), /* cost of an add instruction */ 395 COSTS_N_INSNS (2), /* cost of a lea instruction */ 396 COSTS_N_INSNS (1), /* variable shift costs */ 397 COSTS_N_INSNS (1), /* constant shift costs */ 398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 399 COSTS_N_INSNS (3), /* HI */ 400 COSTS_N_INSNS (3), /* SI */ 401 COSTS_N_INSNS (3), /* DI */ 402 COSTS_N_INSNS (3)}, /* other */ 403 0, /* cost of multiply per each bit set */ 404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 405 COSTS_N_INSNS (18), /* HI */ 406 COSTS_N_INSNS (18), /* SI */ 407 COSTS_N_INSNS (18), /* DI */ 408 COSTS_N_INSNS (18)}, /* other */ 409 COSTS_N_INSNS (2), /* cost of movsx */ 410 COSTS_N_INSNS (2), /* cost of movzx */ 411 8, /* "large" insn */ 412 4, /* MOVE_RATIO */ 413 3, /* cost for loading QImode using movzbl */ 414 {4, 5, 4}, /* cost of loading integer registers 415 in QImode, HImode and SImode. 416 Relative to reg-reg move (2). */ 417 {2, 3, 2}, /* cost of storing integer registers */ 418 4, /* cost of reg,reg fld/fst */ 419 {6, 6, 6}, /* cost of loading fp registers 420 in SFmode, DFmode and XFmode */ 421 {4, 4, 4}, /* cost of storing fp registers 422 in SFmode, DFmode and XFmode */ 423 2, /* cost of moving MMX register */ 424 {2, 2}, /* cost of loading MMX registers 425 in SImode and DImode */ 426 {2, 2}, /* cost of storing MMX registers 427 in SImode and DImode */ 428 2, /* cost of moving SSE register */ 429 {2, 2, 8}, /* cost of loading SSE registers 430 in SImode, DImode and TImode */ 431 {2, 2, 8}, /* cost of storing SSE registers 432 in SImode, DImode and TImode */ 433 6, /* MMX or SSE register to integer */ 434 32, /* size of prefetch block */ 435 1, /* number of parallel prefetches */ 436 1, /* Branch cost */ 437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 440 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 443}; 444 445static const 446struct processor_costs athlon_cost = { 447 COSTS_N_INSNS (1), /* cost of an add instruction */ 448 COSTS_N_INSNS (2), /* cost of a lea instruction */ 449 COSTS_N_INSNS (1), /* variable shift costs */ 450 COSTS_N_INSNS (1), /* constant shift costs */ 451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 452 COSTS_N_INSNS (5), /* HI */ 453 COSTS_N_INSNS (5), /* SI */ 454 COSTS_N_INSNS (5), /* DI */ 455 COSTS_N_INSNS (5)}, /* other */ 456 0, /* cost of multiply per each bit set */ 457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 458 COSTS_N_INSNS (26), /* HI */ 459 COSTS_N_INSNS (42), /* SI */ 460 COSTS_N_INSNS (74), /* DI */ 461 COSTS_N_INSNS (74)}, /* other */ 462 COSTS_N_INSNS (1), /* cost of movsx */ 463 COSTS_N_INSNS (1), /* cost of movzx */ 464 8, /* "large" insn */ 465 9, /* MOVE_RATIO */ 466 4, /* cost for loading QImode using movzbl */ 467 {3, 4, 3}, /* cost of loading integer registers 468 in QImode, HImode and SImode. 469 Relative to reg-reg move (2). */ 470 {3, 4, 3}, /* cost of storing integer registers */ 471 4, /* cost of reg,reg fld/fst */ 472 {4, 4, 12}, /* cost of loading fp registers 473 in SFmode, DFmode and XFmode */ 474 {6, 6, 8}, /* cost of storing fp registers 475 in SFmode, DFmode and XFmode */ 476 2, /* cost of moving MMX register */ 477 {4, 4}, /* cost of loading MMX registers 478 in SImode and DImode */ 479 {4, 4}, /* cost of storing MMX registers 480 in SImode and DImode */ 481 2, /* cost of moving SSE register */ 482 {4, 4, 6}, /* cost of loading SSE registers 483 in SImode, DImode and TImode */ 484 {4, 4, 5}, /* cost of storing SSE registers 485 in SImode, DImode and TImode */ 486 5, /* MMX or SSE register to integer */ 487 64, /* size of prefetch block */ 488 6, /* number of parallel prefetches */ 489 5, /* Branch cost */ 490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 493 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 496}; 497 498static const 499struct processor_costs k8_cost = { 500 COSTS_N_INSNS (1), /* cost of an add instruction */ 501 COSTS_N_INSNS (2), /* cost of a lea instruction */ 502 COSTS_N_INSNS (1), /* variable shift costs */ 503 COSTS_N_INSNS (1), /* constant shift costs */ 504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 505 COSTS_N_INSNS (4), /* HI */ 506 COSTS_N_INSNS (3), /* SI */ 507 COSTS_N_INSNS (4), /* DI */ 508 COSTS_N_INSNS (5)}, /* other */ 509 0, /* cost of multiply per each bit set */ 510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 511 COSTS_N_INSNS (26), /* HI */ 512 COSTS_N_INSNS (42), /* SI */ 513 COSTS_N_INSNS (74), /* DI */ 514 COSTS_N_INSNS (74)}, /* other */ 515 COSTS_N_INSNS (1), /* cost of movsx */ 516 COSTS_N_INSNS (1), /* cost of movzx */ 517 8, /* "large" insn */ 518 9, /* MOVE_RATIO */ 519 4, /* cost for loading QImode using movzbl */ 520 {3, 4, 3}, /* cost of loading integer registers 521 in QImode, HImode and SImode. 522 Relative to reg-reg move (2). */ 523 {3, 4, 3}, /* cost of storing integer registers */ 524 4, /* cost of reg,reg fld/fst */ 525 {4, 4, 12}, /* cost of loading fp registers 526 in SFmode, DFmode and XFmode */ 527 {6, 6, 8}, /* cost of storing fp registers 528 in SFmode, DFmode and XFmode */ 529 2, /* cost of moving MMX register */ 530 {3, 3}, /* cost of loading MMX registers 531 in SImode and DImode */ 532 {4, 4}, /* cost of storing MMX registers 533 in SImode and DImode */ 534 2, /* cost of moving SSE register */ 535 {4, 3, 6}, /* cost of loading SSE registers 536 in SImode, DImode and TImode */ 537 {4, 4, 5}, /* cost of storing SSE registers 538 in SImode, DImode and TImode */ 539 5, /* MMX or SSE register to integer */ 540 64, /* size of prefetch block */ 541 6, /* number of parallel prefetches */ 542 5, /* Branch cost */ 543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 546 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 549}; 550 551static const 552struct processor_costs pentium4_cost = { 553 COSTS_N_INSNS (1), /* cost of an add instruction */ 554 COSTS_N_INSNS (3), /* cost of a lea instruction */ 555 COSTS_N_INSNS (4), /* variable shift costs */ 556 COSTS_N_INSNS (4), /* constant shift costs */ 557 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 558 COSTS_N_INSNS (15), /* HI */ 559 COSTS_N_INSNS (15), /* SI */ 560 COSTS_N_INSNS (15), /* DI */ 561 COSTS_N_INSNS (15)}, /* other */ 562 0, /* cost of multiply per each bit set */ 563 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 564 COSTS_N_INSNS (56), /* HI */ 565 COSTS_N_INSNS (56), /* SI */ 566 COSTS_N_INSNS (56), /* DI */ 567 COSTS_N_INSNS (56)}, /* other */ 568 COSTS_N_INSNS (1), /* cost of movsx */ 569 COSTS_N_INSNS (1), /* cost of movzx */ 570 16, /* "large" insn */ 571 6, /* MOVE_RATIO */ 572 2, /* cost for loading QImode using movzbl */ 573 {4, 5, 4}, /* cost of loading integer registers 574 in QImode, HImode and SImode. 575 Relative to reg-reg move (2). */ 576 {2, 3, 2}, /* cost of storing integer registers */ 577 2, /* cost of reg,reg fld/fst */ 578 {2, 2, 6}, /* cost of loading fp registers 579 in SFmode, DFmode and XFmode */ 580 {4, 4, 6}, /* cost of storing fp registers 581 in SFmode, DFmode and XFmode */ 582 2, /* cost of moving MMX register */ 583 {2, 2}, /* cost of loading MMX registers 584 in SImode and DImode */ 585 {2, 2}, /* cost of storing MMX registers 586 in SImode and DImode */ 587 12, /* cost of moving SSE register */ 588 {12, 12, 12}, /* cost of loading SSE registers 589 in SImode, DImode and TImode */ 590 {2, 2, 8}, /* cost of storing SSE registers 591 in SImode, DImode and TImode */ 592 10, /* MMX or SSE register to integer */ 593 64, /* size of prefetch block */ 594 6, /* number of parallel prefetches */ 595 2, /* Branch cost */ 596 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 597 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 598 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 599 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 600 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 601 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 602}; 603 604static const 605struct processor_costs nocona_cost = { 606 COSTS_N_INSNS (1), /* cost of an add instruction */ 607 COSTS_N_INSNS (1), /* cost of a lea instruction */ 608 COSTS_N_INSNS (1), /* variable shift costs */ 609 COSTS_N_INSNS (1), /* constant shift costs */ 610 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 611 COSTS_N_INSNS (10), /* HI */ 612 COSTS_N_INSNS (10), /* SI */ 613 COSTS_N_INSNS (10), /* DI */ 614 COSTS_N_INSNS (10)}, /* other */ 615 0, /* cost of multiply per each bit set */ 616 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 617 COSTS_N_INSNS (66), /* HI */ 618 COSTS_N_INSNS (66), /* SI */ 619 COSTS_N_INSNS (66), /* DI */ 620 COSTS_N_INSNS (66)}, /* other */ 621 COSTS_N_INSNS (1), /* cost of movsx */ 622 COSTS_N_INSNS (1), /* cost of movzx */ 623 16, /* "large" insn */ 624 17, /* MOVE_RATIO */ 625 4, /* cost for loading QImode using movzbl */ 626 {4, 4, 4}, /* cost of loading integer registers 627 in QImode, HImode and SImode. 628 Relative to reg-reg move (2). */ 629 {4, 4, 4}, /* cost of storing integer registers */ 630 3, /* cost of reg,reg fld/fst */ 631 {12, 12, 12}, /* cost of loading fp registers 632 in SFmode, DFmode and XFmode */ 633 {4, 4, 4}, /* cost of storing fp registers 634 in SFmode, DFmode and XFmode */ 635 6, /* cost of moving MMX register */ 636 {12, 12}, /* cost of loading MMX registers 637 in SImode and DImode */ 638 {12, 12}, /* cost of storing MMX registers 639 in SImode and DImode */ 640 6, /* cost of moving SSE register */ 641 {12, 12, 12}, /* cost of loading SSE registers 642 in SImode, DImode and TImode */ 643 {12, 12, 12}, /* cost of storing SSE registers 644 in SImode, DImode and TImode */ 645 8, /* MMX or SSE register to integer */ 646 128, /* size of prefetch block */ 647 8, /* number of parallel prefetches */ 648 1, /* Branch cost */ 649 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 650 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 651 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 652 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 653 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 654 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 655}; 656 657static const 658struct processor_costs core2_cost = { 659 COSTS_N_INSNS (1), /* cost of an add instruction */ 660 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 661 COSTS_N_INSNS (1), /* variable shift costs */ 662 COSTS_N_INSNS (1), /* constant shift costs */ 663 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 664 COSTS_N_INSNS (3), /* HI */ 665 COSTS_N_INSNS (3), /* SI */ 666 COSTS_N_INSNS (3), /* DI */ 667 COSTS_N_INSNS (3)}, /* other */ 668 0, /* cost of multiply per each bit set */ 669 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ 670 COSTS_N_INSNS (22), /* HI */ 671 COSTS_N_INSNS (22), /* SI */ 672 COSTS_N_INSNS (22), /* DI */ 673 COSTS_N_INSNS (22)}, /* other */ 674 COSTS_N_INSNS (1), /* cost of movsx */ 675 COSTS_N_INSNS (1), /* cost of movzx */ 676 8, /* "large" insn */ 677 16, /* MOVE_RATIO */ 678 2, /* cost for loading QImode using movzbl */ 679 {6, 6, 6}, /* cost of loading integer registers 680 in QImode, HImode and SImode. 681 Relative to reg-reg move (2). */ 682 {4, 4, 4}, /* cost of storing integer registers */ 683 2, /* cost of reg,reg fld/fst */ 684 {6, 6, 6}, /* cost of loading fp registers 685 in SFmode, DFmode and XFmode */ 686 {4, 4, 4}, /* cost of loading integer registers */ 687 2, /* cost of moving MMX register */ 688 {6, 6}, /* cost of loading MMX registers 689 in SImode and DImode */ 690 {4, 4}, /* cost of storing MMX registers 691 in SImode and DImode */ 692 2, /* cost of moving SSE register */ 693 {6, 6, 6}, /* cost of loading SSE registers 694 in SImode, DImode and TImode */ 695 {4, 4, 4}, /* cost of storing SSE registers 696 in SImode, DImode and TImode */ 697 2, /* MMX or SSE register to integer */ 698 128, /* size of prefetch block */ 699 8, /* number of parallel prefetches */ 700 3, /* Branch cost */ 701 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 702 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 703 COSTS_N_INSNS (32), /* cost of FDIV instruction. */ 704 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 705 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 706 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ 707}; 708 709/* Generic64 should produce code tuned for Nocona and K8. */ 710static const 711struct processor_costs generic64_cost = { 712 COSTS_N_INSNS (1), /* cost of an add instruction */ 713 /* On all chips taken into consideration lea is 2 cycles and more. With 714 this cost however our current implementation of synth_mult results in 715 use of unnecessary temporary registers causing regression on several 716 SPECfp benchmarks. */ 717 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 718 COSTS_N_INSNS (1), /* variable shift costs */ 719 COSTS_N_INSNS (1), /* constant shift costs */ 720 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 721 COSTS_N_INSNS (4), /* HI */ 722 COSTS_N_INSNS (3), /* SI */ 723 COSTS_N_INSNS (4), /* DI */ 724 COSTS_N_INSNS (2)}, /* other */ 725 0, /* cost of multiply per each bit set */ 726 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 727 COSTS_N_INSNS (26), /* HI */ 728 COSTS_N_INSNS (42), /* SI */ 729 COSTS_N_INSNS (74), /* DI */ 730 COSTS_N_INSNS (74)}, /* other */ 731 COSTS_N_INSNS (1), /* cost of movsx */ 732 COSTS_N_INSNS (1), /* cost of movzx */ 733 8, /* "large" insn */ 734 17, /* MOVE_RATIO */ 735 4, /* cost for loading QImode using movzbl */ 736 {4, 4, 4}, /* cost of loading integer registers 737 in QImode, HImode and SImode. 738 Relative to reg-reg move (2). */ 739 {4, 4, 4}, /* cost of storing integer registers */ 740 4, /* cost of reg,reg fld/fst */ 741 {12, 12, 12}, /* cost of loading fp registers 742 in SFmode, DFmode and XFmode */ 743 {6, 6, 8}, /* cost of storing fp registers 744 in SFmode, DFmode and XFmode */ 745 2, /* cost of moving MMX register */ 746 {8, 8}, /* cost of loading MMX registers 747 in SImode and DImode */ 748 {8, 8}, /* cost of storing MMX registers 749 in SImode and DImode */ 750 2, /* cost of moving SSE register */ 751 {8, 8, 8}, /* cost of loading SSE registers 752 in SImode, DImode and TImode */ 753 {8, 8, 8}, /* cost of storing SSE registers 754 in SImode, DImode and TImode */ 755 5, /* MMX or SSE register to integer */ 756 64, /* size of prefetch block */ 757 6, /* number of parallel prefetches */ 758 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 759 is increased to perhaps more appropriate value of 5. */ 760 3, /* Branch cost */ 761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 764 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 767}; 768 769/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 770static const 771struct processor_costs generic32_cost = { 772 COSTS_N_INSNS (1), /* cost of an add instruction */ 773 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 774 COSTS_N_INSNS (1), /* variable shift costs */ 775 COSTS_N_INSNS (1), /* constant shift costs */ 776 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 777 COSTS_N_INSNS (4), /* HI */ 778 COSTS_N_INSNS (3), /* SI */ 779 COSTS_N_INSNS (4), /* DI */ 780 COSTS_N_INSNS (2)}, /* other */ 781 0, /* cost of multiply per each bit set */ 782 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 783 COSTS_N_INSNS (26), /* HI */ 784 COSTS_N_INSNS (42), /* SI */ 785 COSTS_N_INSNS (74), /* DI */ 786 COSTS_N_INSNS (74)}, /* other */ 787 COSTS_N_INSNS (1), /* cost of movsx */ 788 COSTS_N_INSNS (1), /* cost of movzx */ 789 8, /* "large" insn */ 790 17, /* MOVE_RATIO */ 791 4, /* cost for loading QImode using movzbl */ 792 {4, 4, 4}, /* cost of loading integer registers 793 in QImode, HImode and SImode. 794 Relative to reg-reg move (2). */ 795 {4, 4, 4}, /* cost of storing integer registers */ 796 4, /* cost of reg,reg fld/fst */ 797 {12, 12, 12}, /* cost of loading fp registers 798 in SFmode, DFmode and XFmode */ 799 {6, 6, 8}, /* cost of storing fp registers 800 in SFmode, DFmode and XFmode */ 801 2, /* cost of moving MMX register */ 802 {8, 8}, /* cost of loading MMX registers 803 in SImode and DImode */ 804 {8, 8}, /* cost of storing MMX registers 805 in SImode and DImode */ 806 2, /* cost of moving SSE register */ 807 {8, 8, 8}, /* cost of loading SSE registers 808 in SImode, DImode and TImode */ 809 {8, 8, 8}, /* cost of storing SSE registers 810 in SImode, DImode and TImode */ 811 5, /* MMX or SSE register to integer */ 812 64, /* size of prefetch block */ 813 6, /* number of parallel prefetches */ 814 3, /* Branch cost */ 815 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 816 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 817 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 818 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 819 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 820 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 821}; 822 823const struct processor_costs *ix86_cost = &pentium_cost; 824 825/* Processor feature/optimization bitmasks. */ 826#define m_386 (1<<PROCESSOR_I386) 827#define m_486 (1<<PROCESSOR_I486) 828#define m_PENT (1<<PROCESSOR_PENTIUM) 829#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 830#define m_GEODE (1<<PROCESSOR_GEODE) 831#define m_K6_GEODE (m_K6 | m_GEODE) 832#define m_K6 (1<<PROCESSOR_K6) 833#define m_ATHLON (1<<PROCESSOR_ATHLON) 834#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 835#define m_K8 (1<<PROCESSOR_K8) 836#define m_ATHLON_K8 (m_K8 | m_ATHLON) 837#define m_NOCONA (1<<PROCESSOR_NOCONA) 838#define m_CORE2 (1<<PROCESSOR_CORE2) 839#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 840#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 841#define m_GENERIC (m_GENERIC32 | m_GENERIC64) 842 843/* Generic instruction choice should be common subset of supported CPUs 844 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ 845 846/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for 847 Generic64 seems like good code size tradeoff. We can't enable it for 32bit 848 generic because it is not working well with PPro base chips. */ 849const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64; 850const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 851const int x86_zero_extend_with_and = m_486 | m_PENT; 852const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */; 853const int x86_double_with_add = ~m_386; 854const int x86_use_bit_test = m_386; 855const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC; 856const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA; 857const int x86_3dnow_a = m_ATHLON_K8; 858const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 859/* Branch hints were put in P4 based on simulation result. But 860 after P4 was made, no performance benefit was observed with 861 branch hints. It also increases the code size. As the result, 862 icc never generates branch hints. */ 863const int x86_branch_hints = 0; 864const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */ 865/* We probably ought to watch for partial register stalls on Generic32 866 compilation setting as well. However in current implementation the 867 partial register stalls are not eliminated very well - they can 868 be introduced via subregs synthesized by combine and can happen 869 in caller/callee saving sequences. 870 Because this option pays back little on PPro based chips and is in conflict 871 with partial reg. dependencies used by Athlon/P4 based chips, it is better 872 to leave it off for generic32 for now. */ 873const int x86_partial_reg_stall = m_PPRO; 874const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC; 875const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE; 876const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC); 877const int x86_use_mov0 = m_K6; 878const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC); 879const int x86_read_modify_write = ~m_PENT; 880const int x86_read_modify = ~(m_PENT | m_PPRO); 881const int x86_split_long_moves = m_PPRO; 882const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */ 883const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 884const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; 885const int x86_qimode_math = ~(0); 886const int x86_promote_qi_regs = 0; 887/* On PPro this flag is meant to avoid partial register stalls. Just like 888 the x86_partial_reg_stall this option might be considered for Generic32 889 if our scheme for avoiding partial stalls was more effective. */ 890const int x86_himode_math = ~(m_PPRO); 891const int x86_promote_hi_regs = m_PPRO; 892const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 893const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 894const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 895const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 896const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE); 897const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 898const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 899const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 900const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 901const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 902const int x86_shift1 = ~m_486; 903const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 904/* In Generic model we have an conflict here in between PPro/Pentium4 based chips 905 that thread 128bit SSE registers as single units versus K8 based chips that 906 divide SSE registers to two 64bit halves. 907 x86_sse_partial_reg_dependency promote all store destinations to be 128bit 908 to allow register renaming on 128bit SSE units, but usually results in one 909 extra microop on 64bit SSE units. Experimental results shows that disabling 910 this option on P4 brings over 20% SPECfp regression, while enabling it on 911 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling 912 of moves. */ 913const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 914/* Set for machines where the type and dependencies are resolved on SSE 915 register parts instead of whole registers, so we may maintain just 916 lower part of scalar values in proper format leaving the upper part 917 undefined. */ 918const int x86_sse_split_regs = m_ATHLON_K8; 919const int x86_sse_typeless_stores = m_ATHLON_K8; 920const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; 921const int x86_use_ffreep = m_ATHLON_K8; 922const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2; 923const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC); 924 925/* ??? Allowing interunit moves makes it all too easy for the compiler to put 926 integer data in xmm registers. Which results in pretty abysmal code. */ 927const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; 928 929const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32; 930/* Some CPU cores are not able to predict more than 4 branch instructions in 931 the 16 byte window. */ 932const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 933const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC; 934const int x86_use_bt = m_ATHLON_K8; 935/* Compare and exchange was added for 80486. */ 936const int x86_cmpxchg = ~m_386; 937/* Compare and exchange 8 bytes was added for pentium. */ 938const int x86_cmpxchg8b = ~(m_386 | m_486); 939/* Compare and exchange 16 bytes was added for nocona. */ 940const int x86_cmpxchg16b = m_NOCONA | m_CORE2; 941/* Exchange and add was added for 80486. */ 942const int x86_xadd = ~m_386; 943const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC; 944 945/* In case the average insn count for single function invocation is 946 lower than this constant, emit fast (but longer) prologue and 947 epilogue code. */ 948#define FAST_PROLOGUE_INSN_COUNT 20 949 950/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 951static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 952static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 953static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 954 955/* Array of the smallest class containing reg number REGNO, indexed by 956 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 957 958enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 959{ 960 /* ax, dx, cx, bx */ 961 AREG, DREG, CREG, BREG, 962 /* si, di, bp, sp */ 963 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 964 /* FP registers */ 965 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 966 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 967 /* arg pointer */ 968 NON_Q_REGS, 969 /* flags, fpsr, dirflag, frame */ 970 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 971 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 972 SSE_REGS, SSE_REGS, 973 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 974 MMX_REGS, MMX_REGS, 975 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 976 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 977 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 978 SSE_REGS, SSE_REGS, 979}; 980 981/* The "default" register map used in 32bit mode. */ 982 983int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 984{ 985 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 986 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 987 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 988 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 989 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 990 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 991 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 992}; 993 994static int const x86_64_int_parameter_registers[6] = 995{ 996 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 997 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 998}; 999 1000static int const x86_64_int_return_registers[4] = 1001{ 1002 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 1003}; 1004 1005/* The "default" register map used in 64bit mode. */ 1006int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 1007{ 1008 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 1009 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 1010 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1011 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 1012 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 1013 8,9,10,11,12,13,14,15, /* extended integer registers */ 1014 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 1015}; 1016 1017/* Define the register numbers to be used in Dwarf debugging information. 1018 The SVR4 reference port C compiler uses the following register numbers 1019 in its Dwarf output code: 1020 0 for %eax (gcc regno = 0) 1021 1 for %ecx (gcc regno = 2) 1022 2 for %edx (gcc regno = 1) 1023 3 for %ebx (gcc regno = 3) 1024 4 for %esp (gcc regno = 7) 1025 5 for %ebp (gcc regno = 6) 1026 6 for %esi (gcc regno = 4) 1027 7 for %edi (gcc regno = 5) 1028 The following three DWARF register numbers are never generated by 1029 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 1030 believes these numbers have these meanings. 1031 8 for %eip (no gcc equivalent) 1032 9 for %eflags (gcc regno = 17) 1033 10 for %trapno (no gcc equivalent) 1034 It is not at all clear how we should number the FP stack registers 1035 for the x86 architecture. If the version of SDB on x86/svr4 were 1036 a bit less brain dead with respect to floating-point then we would 1037 have a precedent to follow with respect to DWARF register numbers 1038 for x86 FP registers, but the SDB on x86/svr4 is so completely 1039 broken with respect to FP registers that it is hardly worth thinking 1040 of it as something to strive for compatibility with. 1041 The version of x86/svr4 SDB I have at the moment does (partially) 1042 seem to believe that DWARF register number 11 is associated with 1043 the x86 register %st(0), but that's about all. Higher DWARF 1044 register numbers don't seem to be associated with anything in 1045 particular, and even for DWARF regno 11, SDB only seems to under- 1046 stand that it should say that a variable lives in %st(0) (when 1047 asked via an `=' command) if we said it was in DWARF regno 11, 1048 but SDB still prints garbage when asked for the value of the 1049 variable in question (via a `/' command). 1050 (Also note that the labels SDB prints for various FP stack regs 1051 when doing an `x' command are all wrong.) 1052 Note that these problems generally don't affect the native SVR4 1053 C compiler because it doesn't allow the use of -O with -g and 1054 because when it is *not* optimizing, it allocates a memory 1055 location for each floating-point variable, and the memory 1056 location is what gets described in the DWARF AT_location 1057 attribute for the variable in question. 1058 Regardless of the severe mental illness of the x86/svr4 SDB, we 1059 do something sensible here and we use the following DWARF 1060 register numbers. Note that these are all stack-top-relative 1061 numbers. 1062 11 for %st(0) (gcc regno = 8) 1063 12 for %st(1) (gcc regno = 9) 1064 13 for %st(2) (gcc regno = 10) 1065 14 for %st(3) (gcc regno = 11) 1066 15 for %st(4) (gcc regno = 12) 1067 16 for %st(5) (gcc regno = 13) 1068 17 for %st(6) (gcc regno = 14) 1069 18 for %st(7) (gcc regno = 15) 1070*/ 1071int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 1072{ 1073 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 1074 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 1075 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1076 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 1077 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 1078 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1079 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1080}; 1081 1082/* Test and compare insns in i386.md store the information needed to 1083 generate branch and scc insns here. */ 1084 1085rtx ix86_compare_op0 = NULL_RTX; 1086rtx ix86_compare_op1 = NULL_RTX; 1087rtx ix86_compare_emitted = NULL_RTX; 1088 1089/* Size of the register save area. */ 1090#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 1091 1092/* Define the structure for the machine field in struct function. */ 1093 1094struct stack_local_entry GTY(()) 1095{ 1096 unsigned short mode; 1097 unsigned short n; 1098 rtx rtl; 1099 struct stack_local_entry *next; 1100}; 1101 1102/* Structure describing stack frame layout. 1103 Stack grows downward: 1104 1105 [arguments] 1106 <- ARG_POINTER 1107 saved pc 1108 1109 saved frame pointer if frame_pointer_needed 1110 <- HARD_FRAME_POINTER 1111 [saved regs] 1112 1113 [padding1] \ 1114 ) 1115 [va_arg registers] ( 1116 > to_allocate <- FRAME_POINTER 1117 [frame] ( 1118 ) 1119 [padding2] / 1120 */ 1121struct ix86_frame 1122{ 1123 int nregs; 1124 int padding1; 1125 int va_arg_size; 1126 HOST_WIDE_INT frame; 1127 int padding2; 1128 int outgoing_arguments_size; 1129 int red_zone_size; 1130 1131 HOST_WIDE_INT to_allocate; 1132 /* The offsets relative to ARG_POINTER. */ 1133 HOST_WIDE_INT frame_pointer_offset; 1134 HOST_WIDE_INT hard_frame_pointer_offset; 1135 HOST_WIDE_INT stack_pointer_offset; 1136 1137 /* When save_regs_using_mov is set, emit prologue using 1138 move instead of push instructions. */ 1139 bool save_regs_using_mov; 1140}; 1141 1142/* Code model option. */ 1143enum cmodel ix86_cmodel; 1144/* Asm dialect. */ 1145enum asm_dialect ix86_asm_dialect = ASM_ATT; 1146/* TLS dialects. */ 1147enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1148 1149/* Which unit we are generating floating point math for. */ 1150enum fpmath_unit ix86_fpmath; 1151 1152/* Which cpu are we scheduling for. */ 1153enum processor_type ix86_tune; 1154/* Which instruction set architecture to use. */ 1155enum processor_type ix86_arch; 1156 1157/* true if sse prefetch instruction is not NOOP. */ 1158int x86_prefetch_sse; 1159 1160/* ix86_regparm_string as a number */ 1161static int ix86_regparm; 1162 1163/* -mstackrealign option */ 1164extern int ix86_force_align_arg_pointer; 1165static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; 1166 1167/* Preferred alignment for stack boundary in bits. */ 1168unsigned int ix86_preferred_stack_boundary; 1169 1170/* Values 1-5: see jump.c */ 1171int ix86_branch_cost; 1172 1173/* Variables which are this size or smaller are put in the data/bss 1174 or ldata/lbss sections. */ 1175 1176int ix86_section_threshold = 65536; 1177 1178/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1179char internal_label_prefix[16]; 1180int internal_label_prefix_len; 1181 1182static bool ix86_handle_option (size_t, const char *, int); 1183static void output_pic_addr_const (FILE *, rtx, int); 1184static void put_condition_code (enum rtx_code, enum machine_mode, 1185 int, int, FILE *); 1186static const char *get_some_local_dynamic_name (void); 1187static int get_some_local_dynamic_name_1 (rtx *, void *); 1188static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 1189static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 1190 rtx *); 1191static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 1192static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 1193 enum machine_mode); 1194static rtx get_thread_pointer (int); 1195static rtx legitimize_tls_address (rtx, enum tls_model, int); 1196static void get_pc_thunk_name (char [32], unsigned int); 1197static rtx gen_push (rtx); 1198static int ix86_flags_dependent (rtx, rtx, enum attr_type); 1199static int ix86_agi_dependent (rtx, rtx, enum attr_type); 1200static struct machine_function * ix86_init_machine_status (void); 1201static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 1202static int ix86_nsaved_regs (void); 1203static void ix86_emit_save_regs (void); 1204static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 1205static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 1206static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 1207static HOST_WIDE_INT ix86_GOT_alias_set (void); 1208static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 1209static rtx ix86_expand_aligntest (rtx, int); 1210static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 1211static int ix86_issue_rate (void); 1212static int ix86_adjust_cost (rtx, rtx, rtx, int); 1213static int ia32_multipass_dfa_lookahead (void); 1214static void ix86_init_mmx_sse_builtins (void); 1215static rtx x86_this_parameter (tree); 1216static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 1217 HOST_WIDE_INT, tree); 1218static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 1219static void x86_file_start (void); 1220static void ix86_reorg (void); 1221static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 1222static tree ix86_build_builtin_va_list (void); 1223static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 1224 tree, int *, int); 1225static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); 1226static bool ix86_scalar_mode_supported_p (enum machine_mode); 1227static bool ix86_vector_mode_supported_p (enum machine_mode); 1228 1229static int ix86_address_cost (rtx); 1230static bool ix86_cannot_force_const_mem (rtx); 1231static rtx ix86_delegitimize_address (rtx); 1232 1233static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 1234 1235struct builtin_description; 1236static rtx ix86_expand_sse_comi (const struct builtin_description *, 1237 tree, rtx); 1238static rtx ix86_expand_sse_compare (const struct builtin_description *, 1239 tree, rtx); 1240static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 1241static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 1242static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 1243static rtx ix86_expand_store_builtin (enum insn_code, tree); 1244static rtx safe_vector_operand (rtx, enum machine_mode); 1245static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 1246static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 1247static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 1248static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 1249static int ix86_fp_comparison_cost (enum rtx_code code); 1250static unsigned int ix86_select_alt_pic_regnum (void); 1251static int ix86_save_reg (unsigned int, int); 1252static void ix86_compute_frame_layout (struct ix86_frame *); 1253static int ix86_comp_type_attributes (tree, tree); 1254static int ix86_function_regparm (tree, tree); 1255const struct attribute_spec ix86_attribute_table[]; 1256static bool ix86_function_ok_for_sibcall (tree, tree); 1257static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); 1258static int ix86_value_regno (enum machine_mode, tree, tree); 1259static bool contains_128bit_aligned_vector_p (tree); 1260static rtx ix86_struct_value_rtx (tree, int); 1261static bool ix86_ms_bitfield_layout_p (tree); 1262static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 1263static int extended_reg_mentioned_1 (rtx *, void *); 1264static bool ix86_rtx_costs (rtx, int, int, int *); 1265static int min_insn_size (rtx); 1266static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); 1267static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); 1268static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 1269 tree, bool); 1270static void ix86_init_builtins (void); 1271static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 1272static const char *ix86_mangle_fundamental_type (tree); 1273static tree ix86_stack_protect_fail (void); 1274static rtx ix86_internal_arg_pointer (void); 1275static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); 1276 1277/* This function is only used on Solaris. */ 1278static void i386_solaris_elf_named_section (const char *, unsigned int, tree) 1279 ATTRIBUTE_UNUSED; 1280 1281/* Register class used for passing given 64bit part of the argument. 1282 These represent classes as documented by the PS ABI, with the exception 1283 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1284 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1285 1286 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1287 whenever possible (upper half does contain padding). 1288 */ 1289enum x86_64_reg_class 1290 { 1291 X86_64_NO_CLASS, 1292 X86_64_INTEGER_CLASS, 1293 X86_64_INTEGERSI_CLASS, 1294 X86_64_SSE_CLASS, 1295 X86_64_SSESF_CLASS, 1296 X86_64_SSEDF_CLASS, 1297 X86_64_SSEUP_CLASS, 1298 X86_64_X87_CLASS, 1299 X86_64_X87UP_CLASS, 1300 X86_64_COMPLEX_X87_CLASS, 1301 X86_64_MEMORY_CLASS 1302 }; 1303static const char * const x86_64_reg_class_name[] = { 1304 "no", "integer", "integerSI", "sse", "sseSF", "sseDF", 1305 "sseup", "x87", "x87up", "cplx87", "no" 1306}; 1307 1308#define MAX_CLASSES 4 1309 1310/* Table of constants used by fldpi, fldln2, etc.... */ 1311static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1312static bool ext_80387_constants_init = 0; 1313static void init_ext_80387_constants (void); 1314static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; 1315static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 1316static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; 1317static section *x86_64_elf_select_section (tree decl, int reloc, 1318 unsigned HOST_WIDE_INT align) 1319 ATTRIBUTE_UNUSED; 1320 1321/* Initialize the GCC target structure. */ 1322#undef TARGET_ATTRIBUTE_TABLE 1323#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 1324#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 1325# undef TARGET_MERGE_DECL_ATTRIBUTES 1326# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 1327#endif 1328 1329#undef TARGET_COMP_TYPE_ATTRIBUTES 1330#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 1331 1332#undef TARGET_INIT_BUILTINS 1333#define TARGET_INIT_BUILTINS ix86_init_builtins 1334#undef TARGET_EXPAND_BUILTIN 1335#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 1336 1337#undef TARGET_ASM_FUNCTION_EPILOGUE 1338#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 1339 1340#undef TARGET_ENCODE_SECTION_INFO 1341#ifndef SUBTARGET_ENCODE_SECTION_INFO 1342#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 1343#else 1344#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 1345#endif 1346 1347#undef TARGET_ASM_OPEN_PAREN 1348#define TARGET_ASM_OPEN_PAREN "" 1349#undef TARGET_ASM_CLOSE_PAREN 1350#define TARGET_ASM_CLOSE_PAREN "" 1351 1352#undef TARGET_ASM_ALIGNED_HI_OP 1353#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 1354#undef TARGET_ASM_ALIGNED_SI_OP 1355#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 1356#ifdef ASM_QUAD 1357#undef TARGET_ASM_ALIGNED_DI_OP 1358#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 1359#endif 1360 1361#undef TARGET_ASM_UNALIGNED_HI_OP 1362#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 1363#undef TARGET_ASM_UNALIGNED_SI_OP 1364#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 1365#undef TARGET_ASM_UNALIGNED_DI_OP 1366#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 1367 1368#undef TARGET_SCHED_ADJUST_COST 1369#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 1370#undef TARGET_SCHED_ISSUE_RATE 1371#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 1372#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1373#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 1374 ia32_multipass_dfa_lookahead 1375 1376#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1377#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 1378 1379#ifdef HAVE_AS_TLS 1380#undef TARGET_HAVE_TLS 1381#define TARGET_HAVE_TLS true 1382#endif 1383#undef TARGET_CANNOT_FORCE_CONST_MEM 1384#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1385#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1386#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true 1387 1388#undef TARGET_DELEGITIMIZE_ADDRESS 1389#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1390 1391#undef TARGET_MS_BITFIELD_LAYOUT_P 1392#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1393 1394#if TARGET_MACHO 1395#undef TARGET_BINDS_LOCAL_P 1396#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1397#endif 1398 1399#undef TARGET_ASM_OUTPUT_MI_THUNK 1400#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1401#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1402#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1403 1404#undef TARGET_ASM_FILE_START 1405#define TARGET_ASM_FILE_START x86_file_start 1406 1407#undef TARGET_DEFAULT_TARGET_FLAGS 1408#define TARGET_DEFAULT_TARGET_FLAGS \ 1409 (TARGET_DEFAULT \ 1410 | TARGET_64BIT_DEFAULT \ 1411 | TARGET_SUBTARGET_DEFAULT \ 1412 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 1413 1414#undef TARGET_HANDLE_OPTION 1415#define TARGET_HANDLE_OPTION ix86_handle_option 1416 1417#undef TARGET_RTX_COSTS 1418#define TARGET_RTX_COSTS ix86_rtx_costs 1419#undef TARGET_ADDRESS_COST 1420#define TARGET_ADDRESS_COST ix86_address_cost 1421 1422#undef TARGET_FIXED_CONDITION_CODE_REGS 1423#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1424#undef TARGET_CC_MODES_COMPATIBLE 1425#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1426 1427#undef TARGET_MACHINE_DEPENDENT_REORG 1428#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1429 1430#undef TARGET_BUILD_BUILTIN_VA_LIST 1431#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1432 1433#undef TARGET_MD_ASM_CLOBBERS 1434#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 1435 1436#undef TARGET_PROMOTE_PROTOTYPES 1437#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 1438#undef TARGET_STRUCT_VALUE_RTX 1439#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 1440#undef TARGET_SETUP_INCOMING_VARARGS 1441#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 1442#undef TARGET_MUST_PASS_IN_STACK 1443#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 1444#undef TARGET_PASS_BY_REFERENCE 1445#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 1446#undef TARGET_INTERNAL_ARG_POINTER 1447#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 1448#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 1449#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 1450 1451#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1452#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 1453 1454#undef TARGET_SCALAR_MODE_SUPPORTED_P 1455#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 1456 1457#undef TARGET_VECTOR_MODE_SUPPORTED_P 1458#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 1459 1460#ifdef HAVE_AS_TLS 1461#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1462#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 1463#endif 1464 1465#ifdef SUBTARGET_INSERT_ATTRIBUTES 1466#undef TARGET_INSERT_ATTRIBUTES 1467#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 1468#endif 1469 1470#undef TARGET_MANGLE_FUNDAMENTAL_TYPE 1471#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type 1472 1473#undef TARGET_STACK_PROTECT_FAIL 1474#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 1475 1476#undef TARGET_FUNCTION_VALUE 1477#define TARGET_FUNCTION_VALUE ix86_function_value 1478 1479struct gcc_target targetm = TARGET_INITIALIZER; 1480 1481 1482/* The svr4 ABI for the i386 says that records and unions are returned 1483 in memory. */ 1484#ifndef DEFAULT_PCC_STRUCT_RETURN 1485#define DEFAULT_PCC_STRUCT_RETURN 1 1486#endif 1487 1488/* Implement TARGET_HANDLE_OPTION. */ 1489 1490static bool 1491ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1492{ 1493 switch (code) 1494 { 1495 case OPT_m3dnow: 1496 if (!value) 1497 { 1498 target_flags &= ~MASK_3DNOW_A; 1499 target_flags_explicit |= MASK_3DNOW_A; 1500 } 1501 return true; 1502 1503 case OPT_mmmx: 1504 if (!value) 1505 { 1506 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); 1507 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; 1508 } 1509 return true; 1510 1511 case OPT_msse: 1512 if (!value) 1513 { 1514 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3); 1515 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3; 1516 } 1517 return true; 1518 1519 case OPT_msse2: 1520 if (!value) 1521 { 1522 target_flags &= ~(MASK_SSE3 | MASK_SSSE3); 1523 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3; 1524 } 1525 return true; 1526 1527 case OPT_msse3: 1528 if (!value) 1529 { 1530 target_flags &= ~MASK_SSSE3; 1531 target_flags_explicit |= MASK_SSSE3; 1532 } 1533 return true; 1534 1535 default: 1536 return true; 1537 } 1538} 1539 1540/* Sometimes certain combinations of command options do not make 1541 sense on a particular target machine. You can define a macro 1542 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1543 defined, is executed once just after all the command options have 1544 been parsed. 1545 1546 Don't use this macro to turn on various extra optimizations for 1547 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1548 1549void 1550override_options (void) 1551{ 1552 int i; 1553 int ix86_tune_defaulted = 0; 1554 1555 /* Comes from final.c -- no real reason to change it. */ 1556#define MAX_CODE_ALIGN 16 1557 1558 static struct ptt 1559 { 1560 const struct processor_costs *cost; /* Processor costs */ 1561 const int target_enable; /* Target flags to enable. */ 1562 const int target_disable; /* Target flags to disable. */ 1563 const int align_loop; /* Default alignments. */ 1564 const int align_loop_max_skip; 1565 const int align_jump; 1566 const int align_jump_max_skip; 1567 const int align_func; 1568 } 1569 const processor_target_table[PROCESSOR_max] = 1570 { 1571 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1572 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1573 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1574 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1575 {&geode_cost, 0, 0, 0, 0, 0, 0, 0}, 1576 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1577 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1578 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1579 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, 1580 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, 1581 {&core2_cost, 0, 0, 16, 7, 16, 7, 16}, 1582 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, 1583 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16} 1584 }; 1585 1586 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1587 static struct pta 1588 { 1589 const char *const name; /* processor name or nickname. */ 1590 const enum processor_type processor; 1591 const enum pta_flags 1592 { 1593 PTA_SSE = 1, 1594 PTA_SSE2 = 2, 1595 PTA_SSE3 = 4, 1596 PTA_MMX = 8, 1597 PTA_PREFETCH_SSE = 16, 1598 PTA_3DNOW = 32, 1599 PTA_3DNOW_A = 64, 1600 PTA_64BIT = 128, 1601 PTA_SSSE3 = 256 1602 } flags; 1603 } 1604 const processor_alias_table[] = 1605 { 1606 {"i386", PROCESSOR_I386, 0}, 1607 {"i486", PROCESSOR_I486, 0}, 1608 {"i586", PROCESSOR_PENTIUM, 0}, 1609 {"pentium", PROCESSOR_PENTIUM, 0}, 1610 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1611 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1612 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1613 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1614 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1615 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1616 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1617 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1618 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1619 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1620 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1621 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1622 | PTA_MMX | PTA_PREFETCH_SSE}, 1623 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1624 | PTA_MMX | PTA_PREFETCH_SSE}, 1625 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 1626 | PTA_MMX | PTA_PREFETCH_SSE}, 1627 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1628 | PTA_MMX | PTA_PREFETCH_SSE}, 1629 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 1630 | PTA_64BIT | PTA_MMX 1631 | PTA_PREFETCH_SSE}, 1632 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1633 | PTA_3DNOW_A}, 1634 {"k6", PROCESSOR_K6, PTA_MMX}, 1635 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1636 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1637 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1638 | PTA_3DNOW_A}, 1639 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1640 | PTA_3DNOW | PTA_3DNOW_A}, 1641 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1642 | PTA_3DNOW_A | PTA_SSE}, 1643 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1644 | PTA_3DNOW_A | PTA_SSE}, 1645 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1646 | PTA_3DNOW_A | PTA_SSE}, 1647 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1648 | PTA_SSE | PTA_SSE2 }, 1649 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1650 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1651 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1652 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1653 | PTA_SSE3 }, 1654 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1655 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1656 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1657 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1658 | PTA_SSE3 }, 1659 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1660 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1661 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1662 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1663 | PTA_SSE3 }, 1664 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1665 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1666 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, 1667 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, 1668 }; 1669 1670 int const pta_size = ARRAY_SIZE (processor_alias_table); 1671 1672#ifdef SUBTARGET_OVERRIDE_OPTIONS 1673 SUBTARGET_OVERRIDE_OPTIONS; 1674#endif 1675 1676#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 1677 SUBSUBTARGET_OVERRIDE_OPTIONS; 1678#endif 1679 1680 /* -fPIC is the default for x86_64. */ 1681 if (TARGET_MACHO && TARGET_64BIT) 1682 flag_pic = 2; 1683 1684 /* Set the default values for switches whose default depends on TARGET_64BIT 1685 in case they weren't overwritten by command line options. */ 1686 if (TARGET_64BIT) 1687 { 1688 /* Mach-O doesn't support omitting the frame pointer for now. */ 1689 if (flag_omit_frame_pointer == 2) 1690 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 1691 if (flag_asynchronous_unwind_tables == 2) 1692 flag_asynchronous_unwind_tables = 1; 1693 if (flag_pcc_struct_return == 2) 1694 flag_pcc_struct_return = 0; 1695 } 1696 else 1697 { 1698 if (flag_omit_frame_pointer == 2) 1699 flag_omit_frame_pointer = 0; 1700 if (flag_asynchronous_unwind_tables == 2) 1701 flag_asynchronous_unwind_tables = 0; 1702 if (flag_pcc_struct_return == 2) 1703 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1704 } 1705 1706 /* Need to check -mtune=generic first. */ 1707 if (ix86_tune_string) 1708 { 1709 if (!strcmp (ix86_tune_string, "generic") 1710 || !strcmp (ix86_tune_string, "i686") 1711 /* As special support for cross compilers we read -mtune=native 1712 as -mtune=generic. With native compilers we won't see the 1713 -mtune=native, as it was changed by the driver. */ 1714 || !strcmp (ix86_tune_string, "native")) 1715 { 1716 if (TARGET_64BIT) 1717 ix86_tune_string = "generic64"; 1718 else 1719 ix86_tune_string = "generic32"; 1720 } 1721 else if (!strncmp (ix86_tune_string, "generic", 7)) 1722 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1723 } 1724 else 1725 { 1726 if (ix86_arch_string) 1727 ix86_tune_string = ix86_arch_string; 1728 if (!ix86_tune_string) 1729 { 1730 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1731 ix86_tune_defaulted = 1; 1732 } 1733 1734 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 1735 need to use a sensible tune option. */ 1736 if (!strcmp (ix86_tune_string, "generic") 1737 || !strcmp (ix86_tune_string, "x86-64") 1738 || !strcmp (ix86_tune_string, "i686")) 1739 { 1740 if (TARGET_64BIT) 1741 ix86_tune_string = "generic64"; 1742 else 1743 ix86_tune_string = "generic32"; 1744 } 1745 } 1746 if (!strcmp (ix86_tune_string, "x86-64")) 1747 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " 1748 "-mtune=generic instead as appropriate."); 1749 1750 if (!ix86_arch_string) 1751 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486"; 1752 if (!strcmp (ix86_arch_string, "generic")) 1753 error ("generic CPU can be used only for -mtune= switch"); 1754 if (!strncmp (ix86_arch_string, "generic", 7)) 1755 error ("bad value (%s) for -march= switch", ix86_arch_string); 1756 1757 if (ix86_cmodel_string != 0) 1758 { 1759 if (!strcmp (ix86_cmodel_string, "small")) 1760 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1761 else if (!strcmp (ix86_cmodel_string, "medium")) 1762 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 1763 else if (flag_pic) 1764 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1765 else if (!strcmp (ix86_cmodel_string, "32")) 1766 ix86_cmodel = CM_32; 1767 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1768 ix86_cmodel = CM_KERNEL; 1769 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1770 ix86_cmodel = CM_LARGE; 1771 else 1772 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1773 } 1774 else 1775 { 1776 ix86_cmodel = CM_32; 1777 if (TARGET_64BIT) 1778 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1779 } 1780 if (ix86_asm_string != 0) 1781 { 1782 if (! TARGET_MACHO 1783 && !strcmp (ix86_asm_string, "intel")) 1784 ix86_asm_dialect = ASM_INTEL; 1785 else if (!strcmp (ix86_asm_string, "att")) 1786 ix86_asm_dialect = ASM_ATT; 1787 else 1788 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1789 } 1790 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1791 error ("code model %qs not supported in the %s bit mode", 1792 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1793 if (ix86_cmodel == CM_LARGE) 1794 sorry ("code model %<large%> not supported yet"); 1795 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1796 sorry ("%i-bit mode not compiled in", 1797 (target_flags & MASK_64BIT) ? 64 : 32); 1798 1799 for (i = 0; i < pta_size; i++) 1800 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1801 { 1802 ix86_arch = processor_alias_table[i].processor; 1803 /* Default cpu tuning to the architecture. */ 1804 ix86_tune = ix86_arch; 1805 if (processor_alias_table[i].flags & PTA_MMX 1806 && !(target_flags_explicit & MASK_MMX)) 1807 target_flags |= MASK_MMX; 1808 if (processor_alias_table[i].flags & PTA_3DNOW 1809 && !(target_flags_explicit & MASK_3DNOW)) 1810 target_flags |= MASK_3DNOW; 1811 if (processor_alias_table[i].flags & PTA_3DNOW_A 1812 && !(target_flags_explicit & MASK_3DNOW_A)) 1813 target_flags |= MASK_3DNOW_A; 1814 if (processor_alias_table[i].flags & PTA_SSE 1815 && !(target_flags_explicit & MASK_SSE)) 1816 target_flags |= MASK_SSE; 1817 if (processor_alias_table[i].flags & PTA_SSE2 1818 && !(target_flags_explicit & MASK_SSE2)) 1819 target_flags |= MASK_SSE2; 1820 if (processor_alias_table[i].flags & PTA_SSE3 1821 && !(target_flags_explicit & MASK_SSE3)) 1822 target_flags |= MASK_SSE3; 1823 if (processor_alias_table[i].flags & PTA_SSSE3 1824 && !(target_flags_explicit & MASK_SSSE3)) 1825 target_flags |= MASK_SSSE3; 1826 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1827 x86_prefetch_sse = true; 1828 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1829 error ("CPU you selected does not support x86-64 " 1830 "instruction set"); 1831 break; 1832 } 1833 1834 if (i == pta_size) 1835 error ("bad value (%s) for -march= switch", ix86_arch_string); 1836 1837 for (i = 0; i < pta_size; i++) 1838 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 1839 { 1840 ix86_tune = processor_alias_table[i].processor; 1841 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1842 { 1843 if (ix86_tune_defaulted) 1844 { 1845 ix86_tune_string = "x86-64"; 1846 for (i = 0; i < pta_size; i++) 1847 if (! strcmp (ix86_tune_string, 1848 processor_alias_table[i].name)) 1849 break; 1850 ix86_tune = processor_alias_table[i].processor; 1851 } 1852 else 1853 error ("CPU you selected does not support x86-64 " 1854 "instruction set"); 1855 } 1856 /* Intel CPUs have always interpreted SSE prefetch instructions as 1857 NOPs; so, we can enable SSE prefetch instructions even when 1858 -mtune (rather than -march) points us to a processor that has them. 1859 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 1860 higher processors. */ 1861 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 1862 x86_prefetch_sse = true; 1863 break; 1864 } 1865 if (i == pta_size) 1866 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1867 1868 if (optimize_size) 1869 ix86_cost = &size_cost; 1870 else 1871 ix86_cost = processor_target_table[ix86_tune].cost; 1872 target_flags |= processor_target_table[ix86_tune].target_enable; 1873 target_flags &= ~processor_target_table[ix86_tune].target_disable; 1874 1875 /* Arrange to set up i386_stack_locals for all functions. */ 1876 init_machine_status = ix86_init_machine_status; 1877 1878 /* Validate -mregparm= value. */ 1879 if (ix86_regparm_string) 1880 { 1881 i = atoi (ix86_regparm_string); 1882 if (i < 0 || i > REGPARM_MAX) 1883 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1884 else 1885 ix86_regparm = i; 1886 } 1887 else 1888 if (TARGET_64BIT) 1889 ix86_regparm = REGPARM_MAX; 1890 1891 /* If the user has provided any of the -malign-* options, 1892 warn and use that value only if -falign-* is not set. 1893 Remove this code in GCC 3.2 or later. */ 1894 if (ix86_align_loops_string) 1895 { 1896 warning (0, "-malign-loops is obsolete, use -falign-loops"); 1897 if (align_loops == 0) 1898 { 1899 i = atoi (ix86_align_loops_string); 1900 if (i < 0 || i > MAX_CODE_ALIGN) 1901 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1902 else 1903 align_loops = 1 << i; 1904 } 1905 } 1906 1907 if (ix86_align_jumps_string) 1908 { 1909 warning (0, "-malign-jumps is obsolete, use -falign-jumps"); 1910 if (align_jumps == 0) 1911 { 1912 i = atoi (ix86_align_jumps_string); 1913 if (i < 0 || i > MAX_CODE_ALIGN) 1914 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1915 else 1916 align_jumps = 1 << i; 1917 } 1918 } 1919 1920 if (ix86_align_funcs_string) 1921 { 1922 warning (0, "-malign-functions is obsolete, use -falign-functions"); 1923 if (align_functions == 0) 1924 { 1925 i = atoi (ix86_align_funcs_string); 1926 if (i < 0 || i > MAX_CODE_ALIGN) 1927 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1928 else 1929 align_functions = 1 << i; 1930 } 1931 } 1932 1933 /* Default align_* from the processor table. */ 1934 if (align_loops == 0) 1935 { 1936 align_loops = processor_target_table[ix86_tune].align_loop; 1937 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 1938 } 1939 if (align_jumps == 0) 1940 { 1941 align_jumps = processor_target_table[ix86_tune].align_jump; 1942 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 1943 } 1944 if (align_functions == 0) 1945 { 1946 align_functions = processor_target_table[ix86_tune].align_func; 1947 } 1948 1949 /* Validate -mbranch-cost= value, or provide default. */ 1950 ix86_branch_cost = ix86_cost->branch_cost; 1951 if (ix86_branch_cost_string) 1952 { 1953 i = atoi (ix86_branch_cost_string); 1954 if (i < 0 || i > 5) 1955 error ("-mbranch-cost=%d is not between 0 and 5", i); 1956 else 1957 ix86_branch_cost = i; 1958 } 1959 if (ix86_section_threshold_string) 1960 { 1961 i = atoi (ix86_section_threshold_string); 1962 if (i < 0) 1963 error ("-mlarge-data-threshold=%d is negative", i); 1964 else 1965 ix86_section_threshold = i; 1966 } 1967 1968 if (ix86_tls_dialect_string) 1969 { 1970 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1971 ix86_tls_dialect = TLS_DIALECT_GNU; 1972 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 1973 ix86_tls_dialect = TLS_DIALECT_GNU2; 1974 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1975 ix86_tls_dialect = TLS_DIALECT_SUN; 1976 else 1977 error ("bad value (%s) for -mtls-dialect= switch", 1978 ix86_tls_dialect_string); 1979 } 1980 1981 /* Keep nonleaf frame pointers. */ 1982 if (flag_omit_frame_pointer) 1983 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 1984 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 1985 flag_omit_frame_pointer = 1; 1986 1987 /* If we're doing fast math, we don't care about comparison order 1988 wrt NaNs. This lets us use a shorter comparison sequence. */ 1989 if (flag_finite_math_only) 1990 target_flags &= ~MASK_IEEE_FP; 1991 1992 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1993 since the insns won't need emulation. */ 1994 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1995 target_flags &= ~MASK_NO_FANCY_MATH_387; 1996 1997 /* Likewise, if the target doesn't have a 387, or we've specified 1998 software floating point, don't use 387 inline intrinsics. */ 1999 if (!TARGET_80387) 2000 target_flags |= MASK_NO_FANCY_MATH_387; 2001 2002 /* Turn on SSE3 builtins for -mssse3. */ 2003 if (TARGET_SSSE3) 2004 target_flags |= MASK_SSE3; 2005 2006 /* Turn on SSE2 builtins for -msse3. */ 2007 if (TARGET_SSE3) 2008 target_flags |= MASK_SSE2; 2009 2010 /* Turn on SSE builtins for -msse2. */ 2011 if (TARGET_SSE2) 2012 target_flags |= MASK_SSE; 2013 2014 /* Turn on MMX builtins for -msse. */ 2015 if (TARGET_SSE) 2016 { 2017 target_flags |= MASK_MMX & ~target_flags_explicit; 2018 x86_prefetch_sse = true; 2019 } 2020 2021 /* Turn on MMX builtins for 3Dnow. */ 2022 if (TARGET_3DNOW) 2023 target_flags |= MASK_MMX; 2024 2025 if (TARGET_64BIT) 2026 { 2027 if (TARGET_ALIGN_DOUBLE) 2028 error ("-malign-double makes no sense in the 64bit mode"); 2029 if (TARGET_RTD) 2030 error ("-mrtd calling convention not supported in the 64bit mode"); 2031 2032 /* Enable by default the SSE and MMX builtins. Do allow the user to 2033 explicitly disable any of these. In particular, disabling SSE and 2034 MMX for kernel code is extremely useful. */ 2035 target_flags 2036 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) 2037 & ~target_flags_explicit); 2038 } 2039 else 2040 { 2041 /* i386 ABI does not specify red zone. It still makes sense to use it 2042 when programmer takes care to stack from being destroyed. */ 2043 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 2044 target_flags |= MASK_NO_RED_ZONE; 2045 } 2046 2047 /* Validate -mpreferred-stack-boundary= value, or provide default. 2048 The default of 128 bits is for Pentium III's SSE __m128. We can't 2049 change it because of optimize_size. Otherwise, we can't mix object 2050 files compiled with -Os and -On. */ 2051 ix86_preferred_stack_boundary = 128; 2052 if (ix86_preferred_stack_boundary_string) 2053 { 2054 i = atoi (ix86_preferred_stack_boundary_string); 2055 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 2056 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 2057 TARGET_64BIT ? 4 : 2); 2058 else 2059 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 2060 } 2061 2062 /* Accept -msseregparm only if at least SSE support is enabled. */ 2063 if (TARGET_SSEREGPARM 2064 && ! TARGET_SSE) 2065 error ("-msseregparm used without SSE enabled"); 2066 2067 ix86_fpmath = TARGET_FPMATH_DEFAULT; 2068 2069 if (ix86_fpmath_string != 0) 2070 { 2071 if (! strcmp (ix86_fpmath_string, "387")) 2072 ix86_fpmath = FPMATH_387; 2073 else if (! strcmp (ix86_fpmath_string, "sse")) 2074 { 2075 if (!TARGET_SSE) 2076 { 2077 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2078 ix86_fpmath = FPMATH_387; 2079 } 2080 else 2081 ix86_fpmath = FPMATH_SSE; 2082 } 2083 else if (! strcmp (ix86_fpmath_string, "387,sse") 2084 || ! strcmp (ix86_fpmath_string, "sse,387")) 2085 { 2086 if (!TARGET_SSE) 2087 { 2088 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2089 ix86_fpmath = FPMATH_387; 2090 } 2091 else if (!TARGET_80387) 2092 { 2093 warning (0, "387 instruction set disabled, using SSE arithmetics"); 2094 ix86_fpmath = FPMATH_SSE; 2095 } 2096 else 2097 ix86_fpmath = FPMATH_SSE | FPMATH_387; 2098 } 2099 else 2100 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 2101 } 2102 2103 /* If the i387 is disabled, then do not return values in it. */ 2104 if (!TARGET_80387) 2105 target_flags &= ~MASK_FLOAT_RETURNS; 2106 2107 if ((x86_accumulate_outgoing_args & TUNEMASK) 2108 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2109 && !optimize_size) 2110 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2111 2112 /* ??? Unwind info is not correct around the CFG unless either a frame 2113 pointer is present or M_A_O_A is set. Fixing this requires rewriting 2114 unwind info generation to be aware of the CFG and propagating states 2115 around edges. */ 2116 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 2117 || flag_exceptions || flag_non_call_exceptions) 2118 && flag_omit_frame_pointer 2119 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 2120 { 2121 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2122 warning (0, "unwind tables currently require either a frame pointer " 2123 "or -maccumulate-outgoing-args for correctness"); 2124 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2125 } 2126 2127 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 2128 { 2129 char *p; 2130 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 2131 p = strchr (internal_label_prefix, 'X'); 2132 internal_label_prefix_len = p - internal_label_prefix; 2133 *p = '\0'; 2134 } 2135 2136 /* When scheduling description is not available, disable scheduler pass 2137 so it won't slow down the compilation and make x87 code slower. */ 2138 if (!TARGET_SCHEDULE) 2139 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 2140} 2141 2142/* switch to the appropriate section for output of DECL. 2143 DECL is either a `VAR_DECL' node or a constant of some sort. 2144 RELOC indicates whether forming the initial value of DECL requires 2145 link-time relocations. */ 2146 2147static section * 2148x86_64_elf_select_section (tree decl, int reloc, 2149 unsigned HOST_WIDE_INT align) 2150{ 2151 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2152 && ix86_in_large_data_p (decl)) 2153 { 2154 const char *sname = NULL; 2155 unsigned int flags = SECTION_WRITE; 2156 switch (categorize_decl_for_section (decl, reloc)) 2157 { 2158 case SECCAT_DATA: 2159 sname = ".ldata"; 2160 break; 2161 case SECCAT_DATA_REL: 2162 sname = ".ldata.rel"; 2163 break; 2164 case SECCAT_DATA_REL_LOCAL: 2165 sname = ".ldata.rel.local"; 2166 break; 2167 case SECCAT_DATA_REL_RO: 2168 sname = ".ldata.rel.ro"; 2169 break; 2170 case SECCAT_DATA_REL_RO_LOCAL: 2171 sname = ".ldata.rel.ro.local"; 2172 break; 2173 case SECCAT_BSS: 2174 sname = ".lbss"; 2175 flags |= SECTION_BSS; 2176 break; 2177 case SECCAT_RODATA: 2178 case SECCAT_RODATA_MERGE_STR: 2179 case SECCAT_RODATA_MERGE_STR_INIT: 2180 case SECCAT_RODATA_MERGE_CONST: 2181 sname = ".lrodata"; 2182 flags = 0; 2183 break; 2184 case SECCAT_SRODATA: 2185 case SECCAT_SDATA: 2186 case SECCAT_SBSS: 2187 gcc_unreachable (); 2188 case SECCAT_TEXT: 2189 case SECCAT_TDATA: 2190 case SECCAT_TBSS: 2191 /* We don't split these for medium model. Place them into 2192 default sections and hope for best. */ 2193 break; 2194 } 2195 if (sname) 2196 { 2197 /* We might get called with string constants, but get_named_section 2198 doesn't like them as they are not DECLs. Also, we need to set 2199 flags in that case. */ 2200 if (!DECL_P (decl)) 2201 return get_section (sname, flags, NULL); 2202 return get_named_section (decl, sname, reloc); 2203 } 2204 } 2205 return default_elf_select_section (decl, reloc, align); 2206} 2207 2208/* Build up a unique section name, expressed as a 2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 2210 RELOC indicates whether the initial value of EXP requires 2211 link-time relocations. */ 2212 2213static void 2214x86_64_elf_unique_section (tree decl, int reloc) 2215{ 2216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2217 && ix86_in_large_data_p (decl)) 2218 { 2219 const char *prefix = NULL; 2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 2221 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 2222 2223 switch (categorize_decl_for_section (decl, reloc)) 2224 { 2225 case SECCAT_DATA: 2226 case SECCAT_DATA_REL: 2227 case SECCAT_DATA_REL_LOCAL: 2228 case SECCAT_DATA_REL_RO: 2229 case SECCAT_DATA_REL_RO_LOCAL: 2230 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; 2231 break; 2232 case SECCAT_BSS: 2233 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; 2234 break; 2235 case SECCAT_RODATA: 2236 case SECCAT_RODATA_MERGE_STR: 2237 case SECCAT_RODATA_MERGE_STR_INIT: 2238 case SECCAT_RODATA_MERGE_CONST: 2239 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; 2240 break; 2241 case SECCAT_SRODATA: 2242 case SECCAT_SDATA: 2243 case SECCAT_SBSS: 2244 gcc_unreachable (); 2245 case SECCAT_TEXT: 2246 case SECCAT_TDATA: 2247 case SECCAT_TBSS: 2248 /* We don't split these for medium model. Place them into 2249 default sections and hope for best. */ 2250 break; 2251 } 2252 if (prefix) 2253 { 2254 const char *name; 2255 size_t nlen, plen; 2256 char *string; 2257 plen = strlen (prefix); 2258 2259 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 2260 name = targetm.strip_name_encoding (name); 2261 nlen = strlen (name); 2262 2263 string = alloca (nlen + plen + 1); 2264 memcpy (string, prefix, plen); 2265 memcpy (string + plen, name, nlen + 1); 2266 2267 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); 2268 return; 2269 } 2270 } 2271 default_unique_section (decl, reloc); 2272} 2273 2274#ifdef COMMON_ASM_OP 2275/* This says how to output assembler code to declare an 2276 uninitialized external linkage data object. 2277 2278 For medium model x86-64 we need to use .largecomm opcode for 2279 large objects. */ 2280void 2281x86_elf_aligned_common (FILE *file, 2282 const char *name, unsigned HOST_WIDE_INT size, 2283 int align) 2284{ 2285 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2286 && size > (unsigned int)ix86_section_threshold) 2287 fprintf (file, ".largecomm\t"); 2288 else 2289 fprintf (file, "%s", COMMON_ASM_OP); 2290 assemble_name (file, name); 2291 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 2292 size, align / BITS_PER_UNIT); 2293} 2294 2295/* Utility function for targets to use in implementing 2296 ASM_OUTPUT_ALIGNED_BSS. */ 2297 2298void 2299x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 2300 const char *name, unsigned HOST_WIDE_INT size, 2301 int align) 2302{ 2303 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2304 && size > (unsigned int)ix86_section_threshold) 2305 switch_to_section (get_named_section (decl, ".lbss", 0)); 2306 else 2307 switch_to_section (bss_section); 2308 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 2309#ifdef ASM_DECLARE_OBJECT_NAME 2310 last_assemble_variable_decl = decl; 2311 ASM_DECLARE_OBJECT_NAME (file, name, decl); 2312#else 2313 /* Standard thing is just output label for the object. */ 2314 ASM_OUTPUT_LABEL (file, name); 2315#endif /* ASM_DECLARE_OBJECT_NAME */ 2316 ASM_OUTPUT_SKIP (file, size ? size : 1); 2317} 2318#endif 2319 2320void 2321optimization_options (int level, int size ATTRIBUTE_UNUSED) 2322{ 2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 2324 make the problem with not enough registers even worse. */ 2325#ifdef INSN_SCHEDULING 2326 if (level > 1) 2327 flag_schedule_insns = 0; 2328#endif 2329 2330 if (TARGET_MACHO) 2331 /* The Darwin libraries never set errno, so we might as well 2332 avoid calling them when that's the only reason we would. */ 2333 flag_errno_math = 0; 2334 2335 /* The default values of these switches depend on the TARGET_64BIT 2336 that is not known at this moment. Mark these values with 2 and 2337 let user the to override these. In case there is no command line option 2338 specifying them, we will set the defaults in override_options. */ 2339 if (optimize >= 1) 2340 flag_omit_frame_pointer = 2; 2341 flag_pcc_struct_return = 2; 2342 flag_asynchronous_unwind_tables = 2; 2343#ifdef SUBTARGET_OPTIMIZATION_OPTIONS 2344 SUBTARGET_OPTIMIZATION_OPTIONS; 2345#endif 2346} 2347 2348/* Table of valid machine attributes. */ 2349const struct attribute_spec ix86_attribute_table[] = 2350{ 2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 2352 /* Stdcall attribute says callee is responsible for popping arguments 2353 if they are not variable. */ 2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2355 /* Fastcall attribute says callee is responsible for popping arguments 2356 if they are not variable. */ 2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2358 /* Cdecl attribute says the callee is a normal C declaration */ 2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2360 /* Regparm attribute specifies how many integer arguments are to be 2361 passed in registers. */ 2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 2363 /* Sseregparm attribute says we are using x86_64 calling conventions 2364 for FP arguments. */ 2365 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2366 /* force_align_arg_pointer says this function realigns the stack at entry. */ 2367 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 2368 false, true, true, ix86_handle_cconv_attribute }, 2369#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2370 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 2371 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 2372 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 2373#endif 2374 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2375 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2376#ifdef SUBTARGET_ATTRIBUTE_TABLE 2377 SUBTARGET_ATTRIBUTE_TABLE, 2378#endif 2379 { NULL, 0, 0, false, false, false, NULL } 2380}; 2381 2382/* Decide whether we can make a sibling call to a function. DECL is the 2383 declaration of the function being targeted by the call and EXP is the 2384 CALL_EXPR representing the call. */ 2385 2386static bool 2387ix86_function_ok_for_sibcall (tree decl, tree exp) 2388{ 2389 tree func; 2390 rtx a, b; 2391 2392 /* If we are generating position-independent code, we cannot sibcall 2393 optimize any indirect call, or a direct call to a global function, 2394 as the PLT requires %ebx be live. */ 2395 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 2396 return false; 2397 2398 if (decl) 2399 func = decl; 2400 else 2401 { 2402 func = TREE_TYPE (TREE_OPERAND (exp, 0)); 2403 if (POINTER_TYPE_P (func)) 2404 func = TREE_TYPE (func); 2405 } 2406 2407 /* Check that the return value locations are the same. Like 2408 if we are returning floats on the 80387 register stack, we cannot 2409 make a sibcall from a function that doesn't return a float to a 2410 function that does or, conversely, from a function that does return 2411 a float to a function that doesn't; the necessary stack adjustment 2412 would not be executed. This is also the place we notice 2413 differences in the return value ABI. Note that it is ok for one 2414 of the functions to have void return type as long as the return 2415 value of the other is passed in a register. */ 2416 a = ix86_function_value (TREE_TYPE (exp), func, false); 2417 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 2418 cfun->decl, false); 2419 if (STACK_REG_P (a) || STACK_REG_P (b)) 2420 { 2421 if (!rtx_equal_p (a, b)) 2422 return false; 2423 } 2424 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 2425 ; 2426 else if (!rtx_equal_p (a, b)) 2427 return false; 2428 2429 /* If this call is indirect, we'll need to be able to use a call-clobbered 2430 register for the address of the target function. Make sure that all 2431 such registers are not used for passing parameters. */ 2432 if (!decl && !TARGET_64BIT) 2433 { 2434 tree type; 2435 2436 /* We're looking at the CALL_EXPR, we need the type of the function. */ 2437 type = TREE_OPERAND (exp, 0); /* pointer expression */ 2438 type = TREE_TYPE (type); /* pointer type */ 2439 type = TREE_TYPE (type); /* function type */ 2440 2441 if (ix86_function_regparm (type, NULL) >= 3) 2442 { 2443 /* ??? Need to count the actual number of registers to be used, 2444 not the possible number of registers. Fix later. */ 2445 return false; 2446 } 2447 } 2448 2449#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2450 /* Dllimport'd functions are also called indirectly. */ 2451 if (decl && DECL_DLLIMPORT_P (decl) 2452 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) 2453 return false; 2454#endif 2455 2456 /* If we forced aligned the stack, then sibcalling would unalign the 2457 stack, which may break the called function. */ 2458 if (cfun->machine->force_align_arg_pointer) 2459 return false; 2460 2461 /* Otherwise okay. That also includes certain types of indirect calls. */ 2462 return true; 2463} 2464 2465/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 2466 calling convention attributes; 2467 arguments as in struct attribute_spec.handler. */ 2468 2469static tree 2470ix86_handle_cconv_attribute (tree *node, tree name, 2471 tree args, 2472 int flags ATTRIBUTE_UNUSED, 2473 bool *no_add_attrs) 2474{ 2475 if (TREE_CODE (*node) != FUNCTION_TYPE 2476 && TREE_CODE (*node) != METHOD_TYPE 2477 && TREE_CODE (*node) != FIELD_DECL 2478 && TREE_CODE (*node) != TYPE_DECL) 2479 { 2480 warning (OPT_Wattributes, "%qs attribute only applies to functions", 2481 IDENTIFIER_POINTER (name)); 2482 *no_add_attrs = true; 2483 return NULL_TREE; 2484 } 2485 2486 /* Can combine regparm with all attributes but fastcall. */ 2487 if (is_attribute_p ("regparm", name)) 2488 { 2489 tree cst; 2490 2491 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2492 { 2493 error ("fastcall and regparm attributes are not compatible"); 2494 } 2495 2496 cst = TREE_VALUE (args); 2497 if (TREE_CODE (cst) != INTEGER_CST) 2498 { 2499 warning (OPT_Wattributes, 2500 "%qs attribute requires an integer constant argument", 2501 IDENTIFIER_POINTER (name)); 2502 *no_add_attrs = true; 2503 } 2504 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 2505 { 2506 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 2507 IDENTIFIER_POINTER (name), REGPARM_MAX); 2508 *no_add_attrs = true; 2509 } 2510 2511 if (!TARGET_64BIT 2512 && lookup_attribute (ix86_force_align_arg_pointer_string, 2513 TYPE_ATTRIBUTES (*node)) 2514 && compare_tree_int (cst, REGPARM_MAX-1)) 2515 { 2516 error ("%s functions limited to %d register parameters", 2517 ix86_force_align_arg_pointer_string, REGPARM_MAX-1); 2518 } 2519 2520 return NULL_TREE; 2521 } 2522 2523 if (TARGET_64BIT) 2524 { 2525 warning (OPT_Wattributes, "%qs attribute ignored", 2526 IDENTIFIER_POINTER (name)); 2527 *no_add_attrs = true; 2528 return NULL_TREE; 2529 } 2530 2531 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 2532 if (is_attribute_p ("fastcall", name)) 2533 { 2534 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2535 { 2536 error ("fastcall and cdecl attributes are not compatible"); 2537 } 2538 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2539 { 2540 error ("fastcall and stdcall attributes are not compatible"); 2541 } 2542 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 2543 { 2544 error ("fastcall and regparm attributes are not compatible"); 2545 } 2546 } 2547 2548 /* Can combine stdcall with fastcall (redundant), regparm and 2549 sseregparm. */ 2550 else if (is_attribute_p ("stdcall", name)) 2551 { 2552 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2553 { 2554 error ("stdcall and cdecl attributes are not compatible"); 2555 } 2556 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2557 { 2558 error ("stdcall and fastcall attributes are not compatible"); 2559 } 2560 } 2561 2562 /* Can combine cdecl with regparm and sseregparm. */ 2563 else if (is_attribute_p ("cdecl", name)) 2564 { 2565 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2566 { 2567 error ("stdcall and cdecl attributes are not compatible"); 2568 } 2569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2570 { 2571 error ("fastcall and cdecl attributes are not compatible"); 2572 } 2573 } 2574 2575 /* Can combine sseregparm with all attributes. */ 2576 2577 return NULL_TREE; 2578} 2579 2580/* Return 0 if the attributes for two types are incompatible, 1 if they 2581 are compatible, and 2 if they are nearly compatible (which causes a 2582 warning to be generated). */ 2583 2584static int 2585ix86_comp_type_attributes (tree type1, tree type2) 2586{ 2587 /* Check for mismatch of non-default calling convention. */ 2588 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 2589 2590 if (TREE_CODE (type1) != FUNCTION_TYPE) 2591 return 1; 2592 2593 /* Check for mismatched fastcall/regparm types. */ 2594 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 2595 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 2596 || (ix86_function_regparm (type1, NULL) 2597 != ix86_function_regparm (type2, NULL))) 2598 return 0; 2599 2600 /* Check for mismatched sseregparm types. */ 2601 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 2602 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 2603 return 0; 2604 2605 /* Check for mismatched return types (cdecl vs stdcall). */ 2606 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 2607 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 2608 return 0; 2609 2610 return 1; 2611} 2612 2613/* Return the regparm value for a function with the indicated TYPE and DECL. 2614 DECL may be NULL when calling function indirectly 2615 or considering a libcall. */ 2616 2617static int 2618ix86_function_regparm (tree type, tree decl) 2619{ 2620 tree attr; 2621 int regparm = ix86_regparm; 2622 bool user_convention = false; 2623 2624 if (!TARGET_64BIT) 2625 { 2626 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 2627 if (attr) 2628 { 2629 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 2630 user_convention = true; 2631 } 2632 2633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 2634 { 2635 regparm = 2; 2636 user_convention = true; 2637 } 2638 2639 /* Use register calling convention for local functions when possible. */ 2640 if (!TARGET_64BIT && !user_convention && decl 2641 && flag_unit_at_a_time && !profile_flag) 2642 { 2643 struct cgraph_local_info *i = cgraph_local_info (decl); 2644 if (i && i->local) 2645 { 2646 int local_regparm, globals = 0, regno; 2647 2648 /* Make sure no regparm register is taken by a global register 2649 variable. */ 2650 for (local_regparm = 0; local_regparm < 3; local_regparm++) 2651 if (global_regs[local_regparm]) 2652 break; 2653 /* We can't use regparm(3) for nested functions as these use 2654 static chain pointer in third argument. */ 2655 if (local_regparm == 3 2656 && decl_function_context (decl) 2657 && !DECL_NO_STATIC_CHAIN (decl)) 2658 local_regparm = 2; 2659 /* If the function realigns its stackpointer, the 2660 prologue will clobber %ecx. If we've already 2661 generated code for the callee, the callee 2662 DECL_STRUCT_FUNCTION is gone, so we fall back to 2663 scanning the attributes for the self-realigning 2664 property. */ 2665 if ((DECL_STRUCT_FUNCTION (decl) 2666 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) 2667 || (!DECL_STRUCT_FUNCTION (decl) 2668 && lookup_attribute (ix86_force_align_arg_pointer_string, 2669 TYPE_ATTRIBUTES (TREE_TYPE (decl))))) 2670 local_regparm = 2; 2671 /* Each global register variable increases register preassure, 2672 so the more global reg vars there are, the smaller regparm 2673 optimization use, unless requested by the user explicitly. */ 2674 for (regno = 0; regno < 6; regno++) 2675 if (global_regs[regno]) 2676 globals++; 2677 local_regparm 2678 = globals < local_regparm ? local_regparm - globals : 0; 2679 2680 if (local_regparm > regparm) 2681 regparm = local_regparm; 2682 } 2683 } 2684 } 2685 return regparm; 2686} 2687 2688/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 2689 DFmode (2) arguments in SSE registers for a function with the 2690 indicated TYPE and DECL. DECL may be NULL when calling function 2691 indirectly or considering a libcall. Otherwise return 0. */ 2692 2693static int 2694ix86_function_sseregparm (tree type, tree decl) 2695{ 2696 /* Use SSE registers to pass SFmode and DFmode arguments if requested 2697 by the sseregparm attribute. */ 2698 if (TARGET_SSEREGPARM 2699 || (type 2700 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 2701 { 2702 if (!TARGET_SSE) 2703 { 2704 if (decl) 2705 error ("Calling %qD with attribute sseregparm without " 2706 "SSE/SSE2 enabled", decl); 2707 else 2708 error ("Calling %qT with attribute sseregparm without " 2709 "SSE/SSE2 enabled", type); 2710 return 0; 2711 } 2712 2713 return 2; 2714 } 2715 2716 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 2717 (and DFmode for SSE2) arguments in SSE registers, 2718 even for 32-bit targets. */ 2719 if (!TARGET_64BIT && decl 2720 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) 2721 { 2722 struct cgraph_local_info *i = cgraph_local_info (decl); 2723 if (i && i->local) 2724 return TARGET_SSE2 ? 2 : 1; 2725 } 2726 2727 return 0; 2728} 2729 2730/* Return true if EAX is live at the start of the function. Used by 2731 ix86_expand_prologue to determine if we need special help before 2732 calling allocate_stack_worker. */ 2733 2734static bool 2735ix86_eax_live_at_start_p (void) 2736{ 2737 /* Cheat. Don't bother working forward from ix86_function_regparm 2738 to the function type to whether an actual argument is located in 2739 eax. Instead just look at cfg info, which is still close enough 2740 to correct at this point. This gives false positives for broken 2741 functions that might use uninitialized data that happens to be 2742 allocated in eax, but who cares? */ 2743 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); 2744} 2745 2746/* Value is the number of bytes of arguments automatically 2747 popped when returning from a subroutine call. 2748 FUNDECL is the declaration node of the function (as a tree), 2749 FUNTYPE is the data type of the function (as a tree), 2750 or for a library call it is an identifier node for the subroutine name. 2751 SIZE is the number of bytes of arguments passed on the stack. 2752 2753 On the 80386, the RTD insn may be used to pop them if the number 2754 of args is fixed, but if the number is variable then the caller 2755 must pop them all. RTD can't be used for library calls now 2756 because the library is compiled with the Unix compiler. 2757 Use of RTD is a selectable option, since it is incompatible with 2758 standard Unix calling sequences. If the option is not selected, 2759 the caller must always pop the args. 2760 2761 The attribute stdcall is equivalent to RTD on a per module basis. */ 2762 2763int 2764ix86_return_pops_args (tree fundecl, tree funtype, int size) 2765{ 2766 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 2767 2768 /* Cdecl functions override -mrtd, and never pop the stack. */ 2769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 2770 2771 /* Stdcall and fastcall functions will pop the stack if not 2772 variable args. */ 2773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 2774 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 2775 rtd = 1; 2776 2777 if (rtd 2778 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 2779 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 2780 == void_type_node))) 2781 return size; 2782 } 2783 2784 /* Lose any fake structure return argument if it is passed on the stack. */ 2785 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 2786 && !TARGET_64BIT 2787 && !KEEP_AGGREGATE_RETURN_POINTER) 2788 { 2789 int nregs = ix86_function_regparm (funtype, fundecl); 2790 2791 if (!nregs) 2792 return GET_MODE_SIZE (Pmode); 2793 } 2794 2795 return 0; 2796} 2797 2798/* Argument support functions. */ 2799 2800/* Return true when register may be used to pass function parameters. */ 2801bool 2802ix86_function_arg_regno_p (int regno) 2803{ 2804 int i; 2805 if (!TARGET_64BIT) 2806 { 2807 if (TARGET_MACHO) 2808 return (regno < REGPARM_MAX 2809 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 2810 else 2811 return (regno < REGPARM_MAX 2812 || (TARGET_MMX && MMX_REGNO_P (regno) 2813 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 2814 || (TARGET_SSE && SSE_REGNO_P (regno) 2815 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 2816 } 2817 2818 if (TARGET_MACHO) 2819 { 2820 if (SSE_REGNO_P (regno) && TARGET_SSE) 2821 return true; 2822 } 2823 else 2824 { 2825 if (TARGET_SSE && SSE_REGNO_P (regno) 2826 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 2827 return true; 2828 } 2829 /* RAX is used as hidden argument to va_arg functions. */ 2830 if (!regno) 2831 return true; 2832 for (i = 0; i < REGPARM_MAX; i++) 2833 if (regno == x86_64_int_parameter_registers[i]) 2834 return true; 2835 return false; 2836} 2837 2838/* Return if we do not know how to pass TYPE solely in registers. */ 2839 2840static bool 2841ix86_must_pass_in_stack (enum machine_mode mode, tree type) 2842{ 2843 if (must_pass_in_stack_var_size_or_pad (mode, type)) 2844 return true; 2845 2846 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 2847 The layout_type routine is crafty and tries to trick us into passing 2848 currently unsupported vector types on the stack by using TImode. */ 2849 return (!TARGET_64BIT && mode == TImode 2850 && type && TREE_CODE (type) != VECTOR_TYPE); 2851} 2852 2853/* Initialize a variable CUM of type CUMULATIVE_ARGS 2854 for a call to a function whose data type is FNTYPE. 2855 For a library call, FNTYPE is 0. */ 2856 2857void 2858init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 2859 tree fntype, /* tree ptr for function decl */ 2860 rtx libname, /* SYMBOL_REF of library name or 0 */ 2861 tree fndecl) 2862{ 2863 static CUMULATIVE_ARGS zero_cum; 2864 tree param, next_param; 2865 2866 if (TARGET_DEBUG_ARG) 2867 { 2868 fprintf (stderr, "\ninit_cumulative_args ("); 2869 if (fntype) 2870 fprintf (stderr, "fntype code = %s, ret code = %s", 2871 tree_code_name[(int) TREE_CODE (fntype)], 2872 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 2873 else 2874 fprintf (stderr, "no fntype"); 2875 2876 if (libname) 2877 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 2878 } 2879 2880 *cum = zero_cum; 2881 2882 /* Set up the number of registers to use for passing arguments. */ 2883 cum->nregs = ix86_regparm; 2884 if (TARGET_SSE) 2885 cum->sse_nregs = SSE_REGPARM_MAX; 2886 if (TARGET_MMX) 2887 cum->mmx_nregs = MMX_REGPARM_MAX; 2888 cum->warn_sse = true; 2889 cum->warn_mmx = true; 2890 cum->maybe_vaarg = false; 2891 2892 /* Use ecx and edx registers if function has fastcall attribute, 2893 else look for regparm information. */ 2894 if (fntype && !TARGET_64BIT) 2895 { 2896 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 2897 { 2898 cum->nregs = 2; 2899 cum->fastcall = 1; 2900 } 2901 else 2902 cum->nregs = ix86_function_regparm (fntype, fndecl); 2903 } 2904 2905 /* Set up the number of SSE registers used for passing SFmode 2906 and DFmode arguments. Warn for mismatching ABI. */ 2907 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); 2908 2909 /* Determine if this function has variable arguments. This is 2910 indicated by the last argument being 'void_type_mode' if there 2911 are no variable arguments. If there are variable arguments, then 2912 we won't pass anything in registers in 32-bit mode. */ 2913 2914 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) 2915 { 2916 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 2917 param != 0; param = next_param) 2918 { 2919 next_param = TREE_CHAIN (param); 2920 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 2921 { 2922 if (!TARGET_64BIT) 2923 { 2924 cum->nregs = 0; 2925 cum->sse_nregs = 0; 2926 cum->mmx_nregs = 0; 2927 cum->warn_sse = 0; 2928 cum->warn_mmx = 0; 2929 cum->fastcall = 0; 2930 cum->float_in_sse = 0; 2931 } 2932 cum->maybe_vaarg = true; 2933 } 2934 } 2935 } 2936 if ((!fntype && !libname) 2937 || (fntype && !TYPE_ARG_TYPES (fntype))) 2938 cum->maybe_vaarg = true; 2939 2940 if (TARGET_DEBUG_ARG) 2941 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 2942 2943 return; 2944} 2945 2946/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 2947 But in the case of vector types, it is some vector mode. 2948 2949 When we have only some of our vector isa extensions enabled, then there 2950 are some modes for which vector_mode_supported_p is false. For these 2951 modes, the generic vector support in gcc will choose some non-vector mode 2952 in order to implement the type. By computing the natural mode, we'll 2953 select the proper ABI location for the operand and not depend on whatever 2954 the middle-end decides to do with these vector types. */ 2955 2956static enum machine_mode 2957type_natural_mode (tree type) 2958{ 2959 enum machine_mode mode = TYPE_MODE (type); 2960 2961 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 2962 { 2963 HOST_WIDE_INT size = int_size_in_bytes (type); 2964 if ((size == 8 || size == 16) 2965 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 2966 && TYPE_VECTOR_SUBPARTS (type) > 1) 2967 { 2968 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 2969 2970 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 2971 mode = MIN_MODE_VECTOR_FLOAT; 2972 else 2973 mode = MIN_MODE_VECTOR_INT; 2974 2975 /* Get the mode which has this inner mode and number of units. */ 2976 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 2977 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 2978 && GET_MODE_INNER (mode) == innermode) 2979 return mode; 2980 2981 gcc_unreachable (); 2982 } 2983 } 2984 2985 return mode; 2986} 2987 2988/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 2989 this may not agree with the mode that the type system has chosen for the 2990 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 2991 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 2992 2993static rtx 2994gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 2995 unsigned int regno) 2996{ 2997 rtx tmp; 2998 2999 if (orig_mode != BLKmode) 3000 tmp = gen_rtx_REG (orig_mode, regno); 3001 else 3002 { 3003 tmp = gen_rtx_REG (mode, regno); 3004 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 3005 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 3006 } 3007 3008 return tmp; 3009} 3010 3011/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 3012 of this code is to classify each 8bytes of incoming argument by the register 3013 class and assign registers accordingly. */ 3014 3015/* Return the union class of CLASS1 and CLASS2. 3016 See the x86-64 PS ABI for details. */ 3017 3018static enum x86_64_reg_class 3019merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 3020{ 3021 /* Rule #1: If both classes are equal, this is the resulting class. */ 3022 if (class1 == class2) 3023 return class1; 3024 3025 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 3026 the other class. */ 3027 if (class1 == X86_64_NO_CLASS) 3028 return class2; 3029 if (class2 == X86_64_NO_CLASS) 3030 return class1; 3031 3032 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 3033 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 3034 return X86_64_MEMORY_CLASS; 3035 3036 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 3037 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 3038 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 3039 return X86_64_INTEGERSI_CLASS; 3040 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 3041 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 3042 return X86_64_INTEGER_CLASS; 3043 3044 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 3045 MEMORY is used. */ 3046 if (class1 == X86_64_X87_CLASS 3047 || class1 == X86_64_X87UP_CLASS 3048 || class1 == X86_64_COMPLEX_X87_CLASS 3049 || class2 == X86_64_X87_CLASS 3050 || class2 == X86_64_X87UP_CLASS 3051 || class2 == X86_64_COMPLEX_X87_CLASS) 3052 return X86_64_MEMORY_CLASS; 3053 3054 /* Rule #6: Otherwise class SSE is used. */ 3055 return X86_64_SSE_CLASS; 3056} 3057 3058/* Classify the argument of type TYPE and mode MODE. 3059 CLASSES will be filled by the register class used to pass each word 3060 of the operand. The number of words is returned. In case the parameter 3061 should be passed in memory, 0 is returned. As a special case for zero 3062 sized containers, classes[0] will be NO_CLASS and 1 is returned. 3063 3064 BIT_OFFSET is used internally for handling records and specifies offset 3065 of the offset in bits modulo 256 to avoid overflow cases. 3066 3067 See the x86-64 PS ABI for details. 3068*/ 3069 3070static int 3071classify_argument (enum machine_mode mode, tree type, 3072 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 3073{ 3074 HOST_WIDE_INT bytes = 3075 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3076 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3077 3078 /* Variable sized entities are always passed/returned in memory. */ 3079 if (bytes < 0) 3080 return 0; 3081 3082 if (mode != VOIDmode 3083 && targetm.calls.must_pass_in_stack (mode, type)) 3084 return 0; 3085 3086 if (type && AGGREGATE_TYPE_P (type)) 3087 { 3088 int i; 3089 tree field; 3090 enum x86_64_reg_class subclasses[MAX_CLASSES]; 3091 3092 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 3093 if (bytes > 16) 3094 return 0; 3095 3096 for (i = 0; i < words; i++) 3097 classes[i] = X86_64_NO_CLASS; 3098 3099 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 3100 signalize memory class, so handle it as special case. */ 3101 if (!words) 3102 { 3103 classes[0] = X86_64_NO_CLASS; 3104 return 1; 3105 } 3106 3107 /* Classify each field of record and merge classes. */ 3108 switch (TREE_CODE (type)) 3109 { 3110 case RECORD_TYPE: 3111 /* For classes first merge in the field of the subclasses. */ 3112 if (TYPE_BINFO (type)) 3113 { 3114 tree binfo, base_binfo; 3115 int basenum; 3116 3117 for (binfo = TYPE_BINFO (type), basenum = 0; 3118 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) 3119 { 3120 int num; 3121 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; 3122 tree type = BINFO_TYPE (base_binfo); 3123 3124 num = classify_argument (TYPE_MODE (type), 3125 type, subclasses, 3126 (offset + bit_offset) % 256); 3127 if (!num) 3128 return 0; 3129 for (i = 0; i < num; i++) 3130 { 3131 int pos = (offset + (bit_offset % 64)) / 8 / 8; 3132 classes[i + pos] = 3133 merge_classes (subclasses[i], classes[i + pos]); 3134 } 3135 } 3136 } 3137 /* And now merge the fields of structure. */ 3138 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3139 { 3140 if (TREE_CODE (field) == FIELD_DECL) 3141 { 3142 int num; 3143 3144 if (TREE_TYPE (field) == error_mark_node) 3145 continue; 3146 3147 /* Bitfields are always classified as integer. Handle them 3148 early, since later code would consider them to be 3149 misaligned integers. */ 3150 if (DECL_BIT_FIELD (field)) 3151 { 3152 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3153 i < ((int_bit_position (field) + (bit_offset % 64)) 3154 + tree_low_cst (DECL_SIZE (field), 0) 3155 + 63) / 8 / 8; i++) 3156 classes[i] = 3157 merge_classes (X86_64_INTEGER_CLASS, 3158 classes[i]); 3159 } 3160 else 3161 { 3162 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3163 TREE_TYPE (field), subclasses, 3164 (int_bit_position (field) 3165 + bit_offset) % 256); 3166 if (!num) 3167 return 0; 3168 for (i = 0; i < num; i++) 3169 { 3170 int pos = 3171 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3172 classes[i + pos] = 3173 merge_classes (subclasses[i], classes[i + pos]); 3174 } 3175 } 3176 } 3177 } 3178 break; 3179 3180 case ARRAY_TYPE: 3181 /* Arrays are handled as small records. */ 3182 { 3183 int num; 3184 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 3185 TREE_TYPE (type), subclasses, bit_offset); 3186 if (!num) 3187 return 0; 3188 3189 /* The partial classes are now full classes. */ 3190 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 3191 subclasses[0] = X86_64_SSE_CLASS; 3192 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 3193 subclasses[0] = X86_64_INTEGER_CLASS; 3194 3195 for (i = 0; i < words; i++) 3196 classes[i] = subclasses[i % num]; 3197 3198 break; 3199 } 3200 case UNION_TYPE: 3201 case QUAL_UNION_TYPE: 3202 /* Unions are similar to RECORD_TYPE but offset is always 0. 3203 */ 3204 3205 /* Unions are not derived. */ 3206 gcc_assert (!TYPE_BINFO (type) 3207 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); 3208 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3209 { 3210 if (TREE_CODE (field) == FIELD_DECL) 3211 { 3212 int num; 3213 3214 if (TREE_TYPE (field) == error_mark_node) 3215 continue; 3216 3217 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3218 TREE_TYPE (field), subclasses, 3219 bit_offset); 3220 if (!num) 3221 return 0; 3222 for (i = 0; i < num; i++) 3223 classes[i] = merge_classes (subclasses[i], classes[i]); 3224 } 3225 } 3226 break; 3227 3228 default: 3229 gcc_unreachable (); 3230 } 3231 3232 /* Final merger cleanup. */ 3233 for (i = 0; i < words; i++) 3234 { 3235 /* If one class is MEMORY, everything should be passed in 3236 memory. */ 3237 if (classes[i] == X86_64_MEMORY_CLASS) 3238 return 0; 3239 3240 /* The X86_64_SSEUP_CLASS should be always preceded by 3241 X86_64_SSE_CLASS. */ 3242 if (classes[i] == X86_64_SSEUP_CLASS 3243 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 3244 classes[i] = X86_64_SSE_CLASS; 3245 3246 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 3247 if (classes[i] == X86_64_X87UP_CLASS 3248 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 3249 classes[i] = X86_64_SSE_CLASS; 3250 } 3251 return words; 3252 } 3253 3254 /* Compute alignment needed. We align all types to natural boundaries with 3255 exception of XFmode that is aligned to 64bits. */ 3256 if (mode != VOIDmode && mode != BLKmode) 3257 { 3258 int mode_alignment = GET_MODE_BITSIZE (mode); 3259 3260 if (mode == XFmode) 3261 mode_alignment = 128; 3262 else if (mode == XCmode) 3263 mode_alignment = 256; 3264 if (COMPLEX_MODE_P (mode)) 3265 mode_alignment /= 2; 3266 /* Misaligned fields are always returned in memory. */ 3267 if (bit_offset % mode_alignment) 3268 return 0; 3269 } 3270 3271 /* for V1xx modes, just use the base mode */ 3272 if (VECTOR_MODE_P (mode) 3273 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 3274 mode = GET_MODE_INNER (mode); 3275 3276 /* Classification of atomic types. */ 3277 switch (mode) 3278 { 3279 case SDmode: 3280 case DDmode: 3281 classes[0] = X86_64_SSE_CLASS; 3282 return 1; 3283 case TDmode: 3284 classes[0] = X86_64_SSE_CLASS; 3285 classes[1] = X86_64_SSEUP_CLASS; 3286 return 2; 3287 case DImode: 3288 case SImode: 3289 case HImode: 3290 case QImode: 3291 case CSImode: 3292 case CHImode: 3293 case CQImode: 3294 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3295 classes[0] = X86_64_INTEGERSI_CLASS; 3296 else 3297 classes[0] = X86_64_INTEGER_CLASS; 3298 return 1; 3299 case CDImode: 3300 case TImode: 3301 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 3302 return 2; 3303 case CTImode: 3304 return 0; 3305 case SFmode: 3306 if (!(bit_offset % 64)) 3307 classes[0] = X86_64_SSESF_CLASS; 3308 else 3309 classes[0] = X86_64_SSE_CLASS; 3310 return 1; 3311 case DFmode: 3312 classes[0] = X86_64_SSEDF_CLASS; 3313 return 1; 3314 case XFmode: 3315 classes[0] = X86_64_X87_CLASS; 3316 classes[1] = X86_64_X87UP_CLASS; 3317 return 2; 3318 case TFmode: 3319 classes[0] = X86_64_SSE_CLASS; 3320 classes[1] = X86_64_SSEUP_CLASS; 3321 return 2; 3322 case SCmode: 3323 classes[0] = X86_64_SSE_CLASS; 3324 return 1; 3325 case DCmode: 3326 classes[0] = X86_64_SSEDF_CLASS; 3327 classes[1] = X86_64_SSEDF_CLASS; 3328 return 2; 3329 case XCmode: 3330 classes[0] = X86_64_COMPLEX_X87_CLASS; 3331 return 1; 3332 case TCmode: 3333 /* This modes is larger than 16 bytes. */ 3334 return 0; 3335 case V4SFmode: 3336 case V4SImode: 3337 case V16QImode: 3338 case V8HImode: 3339 case V2DFmode: 3340 case V2DImode: 3341 classes[0] = X86_64_SSE_CLASS; 3342 classes[1] = X86_64_SSEUP_CLASS; 3343 return 2; 3344 case V2SFmode: 3345 case V2SImode: 3346 case V4HImode: 3347 case V8QImode: 3348 classes[0] = X86_64_SSE_CLASS; 3349 return 1; 3350 case BLKmode: 3351 case VOIDmode: 3352 return 0; 3353 default: 3354 gcc_assert (VECTOR_MODE_P (mode)); 3355 3356 if (bytes > 16) 3357 return 0; 3358 3359 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 3360 3361 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3362 classes[0] = X86_64_INTEGERSI_CLASS; 3363 else 3364 classes[0] = X86_64_INTEGER_CLASS; 3365 classes[1] = X86_64_INTEGER_CLASS; 3366 return 1 + (bytes > 8); 3367 } 3368} 3369 3370/* Examine the argument and return set number of register required in each 3371 class. Return 0 iff parameter should be passed in memory. */ 3372static int 3373examine_argument (enum machine_mode mode, tree type, int in_return, 3374 int *int_nregs, int *sse_nregs) 3375{ 3376 enum x86_64_reg_class class[MAX_CLASSES]; 3377 int n = classify_argument (mode, type, class, 0); 3378 3379 *int_nregs = 0; 3380 *sse_nregs = 0; 3381 if (!n) 3382 return 0; 3383 for (n--; n >= 0; n--) 3384 switch (class[n]) 3385 { 3386 case X86_64_INTEGER_CLASS: 3387 case X86_64_INTEGERSI_CLASS: 3388 (*int_nregs)++; 3389 break; 3390 case X86_64_SSE_CLASS: 3391 case X86_64_SSESF_CLASS: 3392 case X86_64_SSEDF_CLASS: 3393 (*sse_nregs)++; 3394 break; 3395 case X86_64_NO_CLASS: 3396 case X86_64_SSEUP_CLASS: 3397 break; 3398 case X86_64_X87_CLASS: 3399 case X86_64_X87UP_CLASS: 3400 if (!in_return) 3401 return 0; 3402 break; 3403 case X86_64_COMPLEX_X87_CLASS: 3404 return in_return ? 2 : 0; 3405 case X86_64_MEMORY_CLASS: 3406 gcc_unreachable (); 3407 } 3408 return 1; 3409} 3410 3411/* Construct container for the argument used by GCC interface. See 3412 FUNCTION_ARG for the detailed description. */ 3413 3414static rtx 3415construct_container (enum machine_mode mode, enum machine_mode orig_mode, 3416 tree type, int in_return, int nintregs, int nsseregs, 3417 const int *intreg, int sse_regno) 3418{ 3419 /* The following variables hold the static issued_error state. */ 3420 static bool issued_sse_arg_error; 3421 static bool issued_sse_ret_error; 3422 static bool issued_x87_ret_error; 3423 3424 enum machine_mode tmpmode; 3425 int bytes = 3426 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3427 enum x86_64_reg_class class[MAX_CLASSES]; 3428 int n; 3429 int i; 3430 int nexps = 0; 3431 int needed_sseregs, needed_intregs; 3432 rtx exp[MAX_CLASSES]; 3433 rtx ret; 3434 3435 n = classify_argument (mode, type, class, 0); 3436 if (TARGET_DEBUG_ARG) 3437 { 3438 if (!n) 3439 fprintf (stderr, "Memory class\n"); 3440 else 3441 { 3442 fprintf (stderr, "Classes:"); 3443 for (i = 0; i < n; i++) 3444 { 3445 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 3446 } 3447 fprintf (stderr, "\n"); 3448 } 3449 } 3450 if (!n) 3451 return NULL; 3452 if (!examine_argument (mode, type, in_return, &needed_intregs, 3453 &needed_sseregs)) 3454 return NULL; 3455 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 3456 return NULL; 3457 3458 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 3459 some less clueful developer tries to use floating-point anyway. */ 3460 if (needed_sseregs && !TARGET_SSE) 3461 { 3462 if (in_return) 3463 { 3464 if (!issued_sse_ret_error) 3465 { 3466 error ("SSE register return with SSE disabled"); 3467 issued_sse_ret_error = true; 3468 } 3469 } 3470 else if (!issued_sse_arg_error) 3471 { 3472 error ("SSE register argument with SSE disabled"); 3473 issued_sse_arg_error = true; 3474 } 3475 return NULL; 3476 } 3477 3478 /* Likewise, error if the ABI requires us to return values in the 3479 x87 registers and the user specified -mno-80387. */ 3480 if (!TARGET_80387 && in_return) 3481 for (i = 0; i < n; i++) 3482 if (class[i] == X86_64_X87_CLASS 3483 || class[i] == X86_64_X87UP_CLASS 3484 || class[i] == X86_64_COMPLEX_X87_CLASS) 3485 { 3486 if (!issued_x87_ret_error) 3487 { 3488 error ("x87 register return with x87 disabled"); 3489 issued_x87_ret_error = true; 3490 } 3491 return NULL; 3492 } 3493 3494 /* First construct simple cases. Avoid SCmode, since we want to use 3495 single register to pass this type. */ 3496 if (n == 1 && mode != SCmode) 3497 switch (class[0]) 3498 { 3499 case X86_64_INTEGER_CLASS: 3500 case X86_64_INTEGERSI_CLASS: 3501 return gen_rtx_REG (mode, intreg[0]); 3502 case X86_64_SSE_CLASS: 3503 case X86_64_SSESF_CLASS: 3504 case X86_64_SSEDF_CLASS: 3505 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); 3506 case X86_64_X87_CLASS: 3507 case X86_64_COMPLEX_X87_CLASS: 3508 return gen_rtx_REG (mode, FIRST_STACK_REG); 3509 case X86_64_NO_CLASS: 3510 /* Zero sized array, struct or class. */ 3511 return NULL; 3512 default: 3513 gcc_unreachable (); 3514 } 3515 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 3516 && mode != BLKmode) 3517 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 3518 if (n == 2 3519 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 3520 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 3521 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 3522 && class[1] == X86_64_INTEGER_CLASS 3523 && (mode == CDImode || mode == TImode || mode == TFmode) 3524 && intreg[0] + 1 == intreg[1]) 3525 return gen_rtx_REG (mode, intreg[0]); 3526 3527 /* Otherwise figure out the entries of the PARALLEL. */ 3528 for (i = 0; i < n; i++) 3529 { 3530 switch (class[i]) 3531 { 3532 case X86_64_NO_CLASS: 3533 break; 3534 case X86_64_INTEGER_CLASS: 3535 case X86_64_INTEGERSI_CLASS: 3536 /* Merge TImodes on aligned occasions here too. */ 3537 if (i * 8 + 8 > bytes) 3538 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 3539 else if (class[i] == X86_64_INTEGERSI_CLASS) 3540 tmpmode = SImode; 3541 else 3542 tmpmode = DImode; 3543 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 3544 if (tmpmode == BLKmode) 3545 tmpmode = DImode; 3546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3547 gen_rtx_REG (tmpmode, *intreg), 3548 GEN_INT (i*8)); 3549 intreg++; 3550 break; 3551 case X86_64_SSESF_CLASS: 3552 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3553 gen_rtx_REG (SFmode, 3554 SSE_REGNO (sse_regno)), 3555 GEN_INT (i*8)); 3556 sse_regno++; 3557 break; 3558 case X86_64_SSEDF_CLASS: 3559 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3560 gen_rtx_REG (DFmode, 3561 SSE_REGNO (sse_regno)), 3562 GEN_INT (i*8)); 3563 sse_regno++; 3564 break; 3565 case X86_64_SSE_CLASS: 3566 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 3567 tmpmode = TImode; 3568 else 3569 tmpmode = DImode; 3570 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3571 gen_rtx_REG (tmpmode, 3572 SSE_REGNO (sse_regno)), 3573 GEN_INT (i*8)); 3574 if (tmpmode == TImode) 3575 i++; 3576 sse_regno++; 3577 break; 3578 default: 3579 gcc_unreachable (); 3580 } 3581 } 3582 3583 /* Empty aligned struct, union or class. */ 3584 if (nexps == 0) 3585 return NULL; 3586 3587 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 3588 for (i = 0; i < nexps; i++) 3589 XVECEXP (ret, 0, i) = exp [i]; 3590 return ret; 3591} 3592 3593/* Update the data in CUM to advance over an argument 3594 of mode MODE and data type TYPE. 3595 (TYPE is null for libcalls where that information may not be available.) */ 3596 3597void 3598function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3599 tree type, int named) 3600{ 3601 int bytes = 3602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3603 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3604 3605 if (type) 3606 mode = type_natural_mode (type); 3607 3608 if (TARGET_DEBUG_ARG) 3609 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " 3610 "mode=%s, named=%d)\n\n", 3611 words, cum->words, cum->nregs, cum->sse_nregs, 3612 GET_MODE_NAME (mode), named); 3613 3614 if (TARGET_64BIT) 3615 { 3616 int int_nregs, sse_nregs; 3617 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 3618 cum->words += words; 3619 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 3620 { 3621 cum->nregs -= int_nregs; 3622 cum->sse_nregs -= sse_nregs; 3623 cum->regno += int_nregs; 3624 cum->sse_regno += sse_nregs; 3625 } 3626 else 3627 cum->words += words; 3628 } 3629 else 3630 { 3631 switch (mode) 3632 { 3633 default: 3634 break; 3635 3636 case BLKmode: 3637 if (bytes < 0) 3638 break; 3639 /* FALLTHRU */ 3640 3641 case DImode: 3642 case SImode: 3643 case HImode: 3644 case QImode: 3645 cum->words += words; 3646 cum->nregs -= words; 3647 cum->regno += words; 3648 3649 if (cum->nregs <= 0) 3650 { 3651 cum->nregs = 0; 3652 cum->regno = 0; 3653 } 3654 break; 3655 3656 case DFmode: 3657 if (cum->float_in_sse < 2) 3658 break; 3659 case SFmode: 3660 if (cum->float_in_sse < 1) 3661 break; 3662 /* FALLTHRU */ 3663 3664 case TImode: 3665 case V16QImode: 3666 case V8HImode: 3667 case V4SImode: 3668 case V2DImode: 3669 case V4SFmode: 3670 case V2DFmode: 3671 if (!type || !AGGREGATE_TYPE_P (type)) 3672 { 3673 cum->sse_words += words; 3674 cum->sse_nregs -= 1; 3675 cum->sse_regno += 1; 3676 if (cum->sse_nregs <= 0) 3677 { 3678 cum->sse_nregs = 0; 3679 cum->sse_regno = 0; 3680 } 3681 } 3682 break; 3683 3684 case V8QImode: 3685 case V4HImode: 3686 case V2SImode: 3687 case V2SFmode: 3688 if (!type || !AGGREGATE_TYPE_P (type)) 3689 { 3690 cum->mmx_words += words; 3691 cum->mmx_nregs -= 1; 3692 cum->mmx_regno += 1; 3693 if (cum->mmx_nregs <= 0) 3694 { 3695 cum->mmx_nregs = 0; 3696 cum->mmx_regno = 0; 3697 } 3698 } 3699 break; 3700 } 3701 } 3702} 3703 3704/* Define where to put the arguments to a function. 3705 Value is zero to push the argument on the stack, 3706 or a hard register in which to store the argument. 3707 3708 MODE is the argument's machine mode. 3709 TYPE is the data type of the argument (as a tree). 3710 This is null for libcalls where that information may 3711 not be available. 3712 CUM is a variable of type CUMULATIVE_ARGS which gives info about 3713 the preceding args and about the function being called. 3714 NAMED is nonzero if this argument is a named parameter 3715 (otherwise it is an extra parameter matching an ellipsis). */ 3716 3717rtx 3718function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 3719 tree type, int named) 3720{ 3721 enum machine_mode mode = orig_mode; 3722 rtx ret = NULL_RTX; 3723 int bytes = 3724 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3725 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3726 static bool warnedsse, warnedmmx; 3727 3728 /* To simplify the code below, represent vector types with a vector mode 3729 even if MMX/SSE are not active. */ 3730 if (type && TREE_CODE (type) == VECTOR_TYPE) 3731 mode = type_natural_mode (type); 3732 3733 /* Handle a hidden AL argument containing number of registers for varargs 3734 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 3735 any AL settings. */ 3736 if (mode == VOIDmode) 3737 { 3738 if (TARGET_64BIT) 3739 return GEN_INT (cum->maybe_vaarg 3740 ? (cum->sse_nregs < 0 3741 ? SSE_REGPARM_MAX 3742 : cum->sse_regno) 3743 : -1); 3744 else 3745 return constm1_rtx; 3746 } 3747 if (TARGET_64BIT) 3748 ret = construct_container (mode, orig_mode, type, 0, cum->nregs, 3749 cum->sse_nregs, 3750 &x86_64_int_parameter_registers [cum->regno], 3751 cum->sse_regno); 3752 else 3753 switch (mode) 3754 { 3755 /* For now, pass fp/complex values on the stack. */ 3756 default: 3757 break; 3758 3759 case BLKmode: 3760 if (bytes < 0) 3761 break; 3762 /* FALLTHRU */ 3763 case DImode: 3764 case SImode: 3765 case HImode: 3766 case QImode: 3767 if (words <= cum->nregs) 3768 { 3769 int regno = cum->regno; 3770 3771 /* Fastcall allocates the first two DWORD (SImode) or 3772 smaller arguments to ECX and EDX. */ 3773 if (cum->fastcall) 3774 { 3775 if (mode == BLKmode || mode == DImode) 3776 break; 3777 3778 /* ECX not EAX is the first allocated register. */ 3779 if (regno == 0) 3780 regno = 2; 3781 } 3782 ret = gen_rtx_REG (mode, regno); 3783 } 3784 break; 3785 case DFmode: 3786 if (cum->float_in_sse < 2) 3787 break; 3788 case SFmode: 3789 if (cum->float_in_sse < 1) 3790 break; 3791 /* FALLTHRU */ 3792 case TImode: 3793 case V16QImode: 3794 case V8HImode: 3795 case V4SImode: 3796 case V2DImode: 3797 case V4SFmode: 3798 case V2DFmode: 3799 if (!type || !AGGREGATE_TYPE_P (type)) 3800 { 3801 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 3802 { 3803 warnedsse = true; 3804 warning (0, "SSE vector argument without SSE enabled " 3805 "changes the ABI"); 3806 } 3807 if (cum->sse_nregs) 3808 ret = gen_reg_or_parallel (mode, orig_mode, 3809 cum->sse_regno + FIRST_SSE_REG); 3810 } 3811 break; 3812 case V8QImode: 3813 case V4HImode: 3814 case V2SImode: 3815 case V2SFmode: 3816 if (!type || !AGGREGATE_TYPE_P (type)) 3817 { 3818 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 3819 { 3820 warnedmmx = true; 3821 warning (0, "MMX vector argument without MMX enabled " 3822 "changes the ABI"); 3823 } 3824 if (cum->mmx_nregs) 3825 ret = gen_reg_or_parallel (mode, orig_mode, 3826 cum->mmx_regno + FIRST_MMX_REG); 3827 } 3828 break; 3829 } 3830 3831 if (TARGET_DEBUG_ARG) 3832 { 3833 fprintf (stderr, 3834 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 3835 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 3836 3837 if (ret) 3838 print_simple_rtl (stderr, ret); 3839 else 3840 fprintf (stderr, ", stack"); 3841 3842 fprintf (stderr, " )\n"); 3843 } 3844 3845 return ret; 3846} 3847 3848/* A C expression that indicates when an argument must be passed by 3849 reference. If nonzero for an argument, a copy of that argument is 3850 made in memory and a pointer to the argument is passed instead of 3851 the argument itself. The pointer is passed in whatever way is 3852 appropriate for passing a pointer to that type. */ 3853 3854static bool 3855ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 3856 enum machine_mode mode ATTRIBUTE_UNUSED, 3857 tree type, bool named ATTRIBUTE_UNUSED) 3858{ 3859 if (!TARGET_64BIT) 3860 return 0; 3861 3862 if (type && int_size_in_bytes (type) == -1) 3863 { 3864 if (TARGET_DEBUG_ARG) 3865 fprintf (stderr, "function_arg_pass_by_reference\n"); 3866 return 1; 3867 } 3868 3869 return 0; 3870} 3871 3872/* Return true when TYPE should be 128bit aligned for 32bit argument passing 3873 ABI. Only called if TARGET_SSE. */ 3874static bool 3875contains_128bit_aligned_vector_p (tree type) 3876{ 3877 enum machine_mode mode = TYPE_MODE (type); 3878 if (SSE_REG_MODE_P (mode) 3879 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 3880 return true; 3881 if (TYPE_ALIGN (type) < 128) 3882 return false; 3883 3884 if (AGGREGATE_TYPE_P (type)) 3885 { 3886 /* Walk the aggregates recursively. */ 3887 switch (TREE_CODE (type)) 3888 { 3889 case RECORD_TYPE: 3890 case UNION_TYPE: 3891 case QUAL_UNION_TYPE: 3892 { 3893 tree field; 3894 3895 if (TYPE_BINFO (type)) 3896 { 3897 tree binfo, base_binfo; 3898 int i; 3899 3900 for (binfo = TYPE_BINFO (type), i = 0; 3901 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) 3902 if (contains_128bit_aligned_vector_p 3903 (BINFO_TYPE (base_binfo))) 3904 return true; 3905 } 3906 /* And now merge the fields of structure. */ 3907 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3908 { 3909 if (TREE_CODE (field) == FIELD_DECL 3910 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 3911 return true; 3912 } 3913 break; 3914 } 3915 3916 case ARRAY_TYPE: 3917 /* Just for use if some languages passes arrays by value. */ 3918 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 3919 return true; 3920 break; 3921 3922 default: 3923 gcc_unreachable (); 3924 } 3925 } 3926 return false; 3927} 3928 3929/* Gives the alignment boundary, in bits, of an argument with the 3930 specified mode and type. */ 3931 3932int 3933ix86_function_arg_boundary (enum machine_mode mode, tree type) 3934{ 3935 int align; 3936 if (type) 3937 align = TYPE_ALIGN (type); 3938 else 3939 align = GET_MODE_ALIGNMENT (mode); 3940 if (align < PARM_BOUNDARY) 3941 align = PARM_BOUNDARY; 3942 if (!TARGET_64BIT) 3943 { 3944 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 3945 make an exception for SSE modes since these require 128bit 3946 alignment. 3947 3948 The handling here differs from field_alignment. ICC aligns MMX 3949 arguments to 4 byte boundaries, while structure fields are aligned 3950 to 8 byte boundaries. */ 3951 if (!TARGET_SSE) 3952 align = PARM_BOUNDARY; 3953 else if (!type) 3954 { 3955 if (!SSE_REG_MODE_P (mode)) 3956 align = PARM_BOUNDARY; 3957 } 3958 else 3959 { 3960 if (!contains_128bit_aligned_vector_p (type)) 3961 align = PARM_BOUNDARY; 3962 } 3963 } 3964 if (align > 128) 3965 align = 128; 3966 return align; 3967} 3968 3969/* Return true if N is a possible register number of function value. */ 3970bool 3971ix86_function_value_regno_p (int regno) 3972{ 3973 if (TARGET_MACHO) 3974 { 3975 if (!TARGET_64BIT) 3976 { 3977 return ((regno) == 0 3978 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3979 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 3980 } 3981 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 3982 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 3983 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 3984 } 3985 else 3986 { 3987 if (regno == 0 3988 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3989 || (regno == FIRST_SSE_REG && TARGET_SSE)) 3990 return true; 3991 3992 if (!TARGET_64BIT 3993 && (regno == FIRST_MMX_REG && TARGET_MMX)) 3994 return true; 3995 3996 return false; 3997 } 3998} 3999 4000/* Define how to find the value returned by a function. 4001 VALTYPE is the data type of the value (as a tree). 4002 If the precise function being called is known, FUNC is its FUNCTION_DECL; 4003 otherwise, FUNC is 0. */ 4004rtx 4005ix86_function_value (tree valtype, tree fntype_or_decl, 4006 bool outgoing ATTRIBUTE_UNUSED) 4007{ 4008 enum machine_mode natmode = type_natural_mode (valtype); 4009 4010 if (TARGET_64BIT) 4011 { 4012 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, 4013 1, REGPARM_MAX, SSE_REGPARM_MAX, 4014 x86_64_int_return_registers, 0); 4015 /* For zero sized structures, construct_container return NULL, but we 4016 need to keep rest of compiler happy by returning meaningful value. */ 4017 if (!ret) 4018 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 4019 return ret; 4020 } 4021 else 4022 { 4023 tree fn = NULL_TREE, fntype; 4024 if (fntype_or_decl 4025 && DECL_P (fntype_or_decl)) 4026 fn = fntype_or_decl; 4027 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 4028 return gen_rtx_REG (TYPE_MODE (valtype), 4029 ix86_value_regno (natmode, fn, fntype)); 4030 } 4031} 4032 4033/* Return true iff type is returned in memory. */ 4034int 4035ix86_return_in_memory (tree type) 4036{ 4037 int needed_intregs, needed_sseregs, size; 4038 enum machine_mode mode = type_natural_mode (type); 4039 4040 if (TARGET_64BIT) 4041 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 4042 4043 if (mode == BLKmode) 4044 return 1; 4045 4046 size = int_size_in_bytes (type); 4047 4048 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 4049 return 0; 4050 4051 if (VECTOR_MODE_P (mode) || mode == TImode) 4052 { 4053 /* User-created vectors small enough to fit in EAX. */ 4054 if (size < 8) 4055 return 0; 4056 4057 /* MMX/3dNow values are returned in MM0, 4058 except when it doesn't exits. */ 4059 if (size == 8) 4060 return (TARGET_MMX ? 0 : 1); 4061 4062 /* SSE values are returned in XMM0, except when it doesn't exist. */ 4063 if (size == 16) 4064 return (TARGET_SSE ? 0 : 1); 4065 } 4066 4067 if (mode == XFmode) 4068 return 0; 4069 4070 if (mode == TDmode) 4071 return 1; 4072 4073 if (size > 12) 4074 return 1; 4075 return 0; 4076} 4077 4078/* When returning SSE vector types, we have a choice of either 4079 (1) being abi incompatible with a -march switch, or 4080 (2) generating an error. 4081 Given no good solution, I think the safest thing is one warning. 4082 The user won't be able to use -Werror, but.... 4083 4084 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 4085 called in response to actually generating a caller or callee that 4086 uses such a type. As opposed to RETURN_IN_MEMORY, which is called 4087 via aggregate_value_p for general type probing from tree-ssa. */ 4088 4089static rtx 4090ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 4091{ 4092 static bool warnedsse, warnedmmx; 4093 4094 if (type) 4095 { 4096 /* Look at the return type of the function, not the function type. */ 4097 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 4098 4099 if (!TARGET_SSE && !warnedsse) 4100 { 4101 if (mode == TImode 4102 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4103 { 4104 warnedsse = true; 4105 warning (0, "SSE vector return without SSE enabled " 4106 "changes the ABI"); 4107 } 4108 } 4109 4110 if (!TARGET_MMX && !warnedmmx) 4111 { 4112 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4113 { 4114 warnedmmx = true; 4115 warning (0, "MMX vector return without MMX enabled " 4116 "changes the ABI"); 4117 } 4118 } 4119 } 4120 4121 return NULL; 4122} 4123 4124/* Define how to find the value returned by a library function 4125 assuming the value has mode MODE. */ 4126rtx 4127ix86_libcall_value (enum machine_mode mode) 4128{ 4129 if (TARGET_64BIT) 4130 { 4131 switch (mode) 4132 { 4133 case SFmode: 4134 case SCmode: 4135 case DFmode: 4136 case DCmode: 4137 case TFmode: 4138 case SDmode: 4139 case DDmode: 4140 case TDmode: 4141 return gen_rtx_REG (mode, FIRST_SSE_REG); 4142 case XFmode: 4143 case XCmode: 4144 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 4145 case TCmode: 4146 return NULL; 4147 default: 4148 return gen_rtx_REG (mode, 0); 4149 } 4150 } 4151 else 4152 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); 4153} 4154 4155/* Given a mode, return the register to use for a return value. */ 4156 4157static int 4158ix86_value_regno (enum machine_mode mode, tree func, tree fntype) 4159{ 4160 gcc_assert (!TARGET_64BIT); 4161 4162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 4163 we normally prevent this case when mmx is not available. However 4164 some ABIs may require the result to be returned like DImode. */ 4165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4166 return TARGET_MMX ? FIRST_MMX_REG : 0; 4167 4168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 4169 we prevent this case when sse is not available. However some ABIs 4170 may require the result to be returned like integer TImode. */ 4171 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4172 return TARGET_SSE ? FIRST_SSE_REG : 0; 4173 4174 /* Decimal floating point values can go in %eax, unlike other float modes. */ 4175 if (DECIMAL_FLOAT_MODE_P (mode)) 4176 return 0; 4177 4178 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ 4179 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) 4180 return 0; 4181 4182 /* Floating point return values in %st(0), except for local functions when 4183 SSE math is enabled or for functions with sseregparm attribute. */ 4184 if ((func || fntype) 4185 && (mode == SFmode || mode == DFmode)) 4186 { 4187 int sse_level = ix86_function_sseregparm (fntype, func); 4188 if ((sse_level >= 1 && mode == SFmode) 4189 || (sse_level == 2 && mode == DFmode)) 4190 return FIRST_SSE_REG; 4191 } 4192 4193 return FIRST_FLOAT_REG; 4194} 4195 4196/* Create the va_list data type. */ 4197 4198static tree 4199ix86_build_builtin_va_list (void) 4200{ 4201 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 4202 4203 /* For i386 we use plain pointer to argument area. */ 4204 if (!TARGET_64BIT) 4205 return build_pointer_type (char_type_node); 4206 4207 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4208 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 4209 4210 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 4211 unsigned_type_node); 4212 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 4213 unsigned_type_node); 4214 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 4215 ptr_type_node); 4216 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 4217 ptr_type_node); 4218 4219 va_list_gpr_counter_field = f_gpr; 4220 va_list_fpr_counter_field = f_fpr; 4221 4222 DECL_FIELD_CONTEXT (f_gpr) = record; 4223 DECL_FIELD_CONTEXT (f_fpr) = record; 4224 DECL_FIELD_CONTEXT (f_ovf) = record; 4225 DECL_FIELD_CONTEXT (f_sav) = record; 4226 4227 TREE_CHAIN (record) = type_decl; 4228 TYPE_NAME (record) = type_decl; 4229 TYPE_FIELDS (record) = f_gpr; 4230 TREE_CHAIN (f_gpr) = f_fpr; 4231 TREE_CHAIN (f_fpr) = f_ovf; 4232 TREE_CHAIN (f_ovf) = f_sav; 4233 4234 layout_type (record); 4235 4236 /* The correct type is an array type of one element. */ 4237 return build_array_type (record, build_index_type (size_zero_node)); 4238} 4239 4240/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 4241 4242static void 4243ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4244 tree type, int *pretend_size ATTRIBUTE_UNUSED, 4245 int no_rtl) 4246{ 4247 CUMULATIVE_ARGS next_cum; 4248 rtx save_area = NULL_RTX, mem; 4249 rtx label; 4250 rtx label_ref; 4251 rtx tmp_reg; 4252 rtx nsse_reg; 4253 int set; 4254 tree fntype; 4255 int stdarg_p; 4256 int i; 4257 4258 if (!TARGET_64BIT) 4259 return; 4260 4261 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) 4262 return; 4263 4264 /* Indicate to allocate space on the stack for varargs save area. */ 4265 ix86_save_varrargs_registers = 1; 4266 4267 cfun->stack_alignment_needed = 128; 4268 4269 fntype = TREE_TYPE (current_function_decl); 4270 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 4271 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 4272 != void_type_node)); 4273 4274 /* For varargs, we do not want to skip the dummy va_dcl argument. 4275 For stdargs, we do want to skip the last named argument. */ 4276 next_cum = *cum; 4277 if (stdarg_p) 4278 function_arg_advance (&next_cum, mode, type, 1); 4279 4280 if (!no_rtl) 4281 save_area = frame_pointer_rtx; 4282 4283 set = get_varargs_alias_set (); 4284 4285 for (i = next_cum.regno; 4286 i < ix86_regparm 4287 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4288 i++) 4289 { 4290 mem = gen_rtx_MEM (Pmode, 4291 plus_constant (save_area, i * UNITS_PER_WORD)); 4292 MEM_NOTRAP_P (mem) = 1; 4293 set_mem_alias_set (mem, set); 4294 emit_move_insn (mem, gen_rtx_REG (Pmode, 4295 x86_64_int_parameter_registers[i])); 4296 } 4297 4298 if (next_cum.sse_nregs && cfun->va_list_fpr_size) 4299 { 4300 /* Now emit code to save SSE registers. The AX parameter contains number 4301 of SSE parameter registers used to call this function. We use 4302 sse_prologue_save insn template that produces computed jump across 4303 SSE saves. We need some preparation work to get this working. */ 4304 4305 label = gen_label_rtx (); 4306 label_ref = gen_rtx_LABEL_REF (Pmode, label); 4307 4308 /* Compute address to jump to : 4309 label - 5*eax + nnamed_sse_arguments*5 */ 4310 tmp_reg = gen_reg_rtx (Pmode); 4311 nsse_reg = gen_reg_rtx (Pmode); 4312 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 4313 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4314 gen_rtx_MULT (Pmode, nsse_reg, 4315 GEN_INT (4)))); 4316 if (next_cum.sse_regno) 4317 emit_move_insn 4318 (nsse_reg, 4319 gen_rtx_CONST (DImode, 4320 gen_rtx_PLUS (DImode, 4321 label_ref, 4322 GEN_INT (next_cum.sse_regno * 4)))); 4323 else 4324 emit_move_insn (nsse_reg, label_ref); 4325 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 4326 4327 /* Compute address of memory block we save into. We always use pointer 4328 pointing 127 bytes after first byte to store - this is needed to keep 4329 instruction size limited by 4 bytes. */ 4330 tmp_reg = gen_reg_rtx (Pmode); 4331 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4332 plus_constant (save_area, 4333 8 * REGPARM_MAX + 127))); 4334 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 4335 MEM_NOTRAP_P (mem) = 1; 4336 set_mem_alias_set (mem, set); 4337 set_mem_align (mem, BITS_PER_WORD); 4338 4339 /* And finally do the dirty job! */ 4340 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 4341 GEN_INT (next_cum.sse_regno), label)); 4342 } 4343 4344} 4345 4346/* Implement va_start. */ 4347 4348void 4349ix86_va_start (tree valist, rtx nextarg) 4350{ 4351 HOST_WIDE_INT words, n_gpr, n_fpr; 4352 tree f_gpr, f_fpr, f_ovf, f_sav; 4353 tree gpr, fpr, ovf, sav, t; 4354 tree type; 4355 4356 /* Only 64bit target needs something special. */ 4357 if (!TARGET_64BIT) 4358 { 4359 std_expand_builtin_va_start (valist, nextarg); 4360 return; 4361 } 4362 4363 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4364 f_fpr = TREE_CHAIN (f_gpr); 4365 f_ovf = TREE_CHAIN (f_fpr); 4366 f_sav = TREE_CHAIN (f_ovf); 4367 4368 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4369 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4370 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4371 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4372 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4373 4374 /* Count number of gp and fp argument registers used. */ 4375 words = current_function_args_info.words; 4376 n_gpr = current_function_args_info.regno; 4377 n_fpr = current_function_args_info.sse_regno; 4378 4379 if (TARGET_DEBUG_ARG) 4380 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 4381 (int) words, (int) n_gpr, (int) n_fpr); 4382 4383 if (cfun->va_list_gpr_size) 4384 { 4385 type = TREE_TYPE (gpr); 4386 t = build2 (MODIFY_EXPR, type, gpr, 4387 build_int_cst (type, n_gpr * 8)); 4388 TREE_SIDE_EFFECTS (t) = 1; 4389 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4390 } 4391 4392 if (cfun->va_list_fpr_size) 4393 { 4394 type = TREE_TYPE (fpr); 4395 t = build2 (MODIFY_EXPR, type, fpr, 4396 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); 4397 TREE_SIDE_EFFECTS (t) = 1; 4398 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4399 } 4400 4401 /* Find the overflow area. */ 4402 type = TREE_TYPE (ovf); 4403 t = make_tree (type, virtual_incoming_args_rtx); 4404 if (words != 0) 4405 t = build2 (PLUS_EXPR, type, t, 4406 build_int_cst (type, words * UNITS_PER_WORD)); 4407 t = build2 (MODIFY_EXPR, type, ovf, t); 4408 TREE_SIDE_EFFECTS (t) = 1; 4409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4410 4411 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) 4412 { 4413 /* Find the register save area. 4414 Prologue of the function save it right above stack frame. */ 4415 type = TREE_TYPE (sav); 4416 t = make_tree (type, frame_pointer_rtx); 4417 t = build2 (MODIFY_EXPR, type, sav, t); 4418 TREE_SIDE_EFFECTS (t) = 1; 4419 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4420 } 4421} 4422 4423/* Implement va_arg. */ 4424 4425tree 4426ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) 4427{ 4428 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4429 tree f_gpr, f_fpr, f_ovf, f_sav; 4430 tree gpr, fpr, ovf, sav, t; 4431 int size, rsize; 4432 tree lab_false, lab_over = NULL_TREE; 4433 tree addr, t2; 4434 rtx container; 4435 int indirect_p = 0; 4436 tree ptrtype; 4437 enum machine_mode nat_mode; 4438 4439 /* Only 64bit target needs something special. */ 4440 if (!TARGET_64BIT) 4441 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4442 4443 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4444 f_fpr = TREE_CHAIN (f_gpr); 4445 f_ovf = TREE_CHAIN (f_fpr); 4446 f_sav = TREE_CHAIN (f_ovf); 4447 4448 valist = build_va_arg_indirect_ref (valist); 4449 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4451 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4452 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4453 4454 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 4455 if (indirect_p) 4456 type = build_pointer_type (type); 4457 size = int_size_in_bytes (type); 4458 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4459 4460 nat_mode = type_natural_mode (type); 4461 container = construct_container (nat_mode, TYPE_MODE (type), type, 0, 4462 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 4463 4464 /* Pull the value out of the saved registers. */ 4465 4466 addr = create_tmp_var (ptr_type_node, "addr"); 4467 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 4468 4469 if (container) 4470 { 4471 int needed_intregs, needed_sseregs; 4472 bool need_temp; 4473 tree int_addr, sse_addr; 4474 4475 lab_false = create_artificial_label (); 4476 lab_over = create_artificial_label (); 4477 4478 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4479 4480 need_temp = (!REG_P (container) 4481 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4482 || TYPE_ALIGN (type) > 128)); 4483 4484 /* In case we are passing structure, verify that it is consecutive block 4485 on the register save area. If not we need to do moves. */ 4486 if (!need_temp && !REG_P (container)) 4487 { 4488 /* Verify that all registers are strictly consecutive */ 4489 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4490 { 4491 int i; 4492 4493 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4494 { 4495 rtx slot = XVECEXP (container, 0, i); 4496 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4497 || INTVAL (XEXP (slot, 1)) != i * 16) 4498 need_temp = 1; 4499 } 4500 } 4501 else 4502 { 4503 int i; 4504 4505 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4506 { 4507 rtx slot = XVECEXP (container, 0, i); 4508 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4509 || INTVAL (XEXP (slot, 1)) != i * 8) 4510 need_temp = 1; 4511 } 4512 } 4513 } 4514 if (!need_temp) 4515 { 4516 int_addr = addr; 4517 sse_addr = addr; 4518 } 4519 else 4520 { 4521 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4522 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 4523 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4524 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 4525 } 4526 4527 /* First ensure that we fit completely in registers. */ 4528 if (needed_intregs) 4529 { 4530 t = build_int_cst (TREE_TYPE (gpr), 4531 (REGPARM_MAX - needed_intregs + 1) * 8); 4532 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4533 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4534 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4535 gimplify_and_add (t, pre_p); 4536 } 4537 if (needed_sseregs) 4538 { 4539 t = build_int_cst (TREE_TYPE (fpr), 4540 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4541 + REGPARM_MAX * 8); 4542 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4543 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4544 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4545 gimplify_and_add (t, pre_p); 4546 } 4547 4548 /* Compute index to start of area used for integer regs. */ 4549 if (needed_intregs) 4550 { 4551 /* int_addr = gpr + sav; */ 4552 t = fold_convert (ptr_type_node, gpr); 4553 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4554 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); 4555 gimplify_and_add (t, pre_p); 4556 } 4557 if (needed_sseregs) 4558 { 4559 /* sse_addr = fpr + sav; */ 4560 t = fold_convert (ptr_type_node, fpr); 4561 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4562 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); 4563 gimplify_and_add (t, pre_p); 4564 } 4565 if (need_temp) 4566 { 4567 int i; 4568 tree temp = create_tmp_var (type, "va_arg_tmp"); 4569 4570 /* addr = &temp; */ 4571 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4572 t = build2 (MODIFY_EXPR, void_type_node, addr, t); 4573 gimplify_and_add (t, pre_p); 4574 4575 for (i = 0; i < XVECLEN (container, 0); i++) 4576 { 4577 rtx slot = XVECEXP (container, 0, i); 4578 rtx reg = XEXP (slot, 0); 4579 enum machine_mode mode = GET_MODE (reg); 4580 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 4581 tree addr_type = build_pointer_type (piece_type); 4582 tree src_addr, src; 4583 int src_offset; 4584 tree dest_addr, dest; 4585 4586 if (SSE_REGNO_P (REGNO (reg))) 4587 { 4588 src_addr = sse_addr; 4589 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4590 } 4591 else 4592 { 4593 src_addr = int_addr; 4594 src_offset = REGNO (reg) * 8; 4595 } 4596 src_addr = fold_convert (addr_type, src_addr); 4597 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, 4598 size_int (src_offset))); 4599 src = build_va_arg_indirect_ref (src_addr); 4600 4601 dest_addr = fold_convert (addr_type, addr); 4602 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, 4603 size_int (INTVAL (XEXP (slot, 1))))); 4604 dest = build_va_arg_indirect_ref (dest_addr); 4605 4606 t = build2 (MODIFY_EXPR, void_type_node, dest, src); 4607 gimplify_and_add (t, pre_p); 4608 } 4609 } 4610 4611 if (needed_intregs) 4612 { 4613 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4614 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4615 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 4616 gimplify_and_add (t, pre_p); 4617 } 4618 if (needed_sseregs) 4619 { 4620 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4621 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4622 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 4623 gimplify_and_add (t, pre_p); 4624 } 4625 4626 t = build1 (GOTO_EXPR, void_type_node, lab_over); 4627 gimplify_and_add (t, pre_p); 4628 4629 t = build1 (LABEL_EXPR, void_type_node, lab_false); 4630 append_to_statement_list (t, pre_p); 4631 } 4632 4633 /* ... otherwise out of the overflow area. */ 4634 4635 /* Care for on-stack alignment if needed. */ 4636 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 4637 || integer_zerop (TYPE_SIZE (type))) 4638 t = ovf; 4639 else 4640 { 4641 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 4642 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, 4643 build_int_cst (TREE_TYPE (ovf), align - 1)); 4644 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4645 build_int_cst (TREE_TYPE (t), -align)); 4646 } 4647 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4648 4649 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); 4650 gimplify_and_add (t2, pre_p); 4651 4652 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 4653 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); 4654 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 4655 gimplify_and_add (t, pre_p); 4656 4657 if (container) 4658 { 4659 t = build1 (LABEL_EXPR, void_type_node, lab_over); 4660 append_to_statement_list (t, pre_p); 4661 } 4662 4663 ptrtype = build_pointer_type (type); 4664 addr = fold_convert (ptrtype, addr); 4665 4666 if (indirect_p) 4667 addr = build_va_arg_indirect_ref (addr); 4668 return build_va_arg_indirect_ref (addr); 4669} 4670 4671/* Return nonzero if OPNUM's MEM should be matched 4672 in movabs* patterns. */ 4673 4674int 4675ix86_check_movabs (rtx insn, int opnum) 4676{ 4677 rtx set, mem; 4678 4679 set = PATTERN (insn); 4680 if (GET_CODE (set) == PARALLEL) 4681 set = XVECEXP (set, 0, 0); 4682 gcc_assert (GET_CODE (set) == SET); 4683 mem = XEXP (set, opnum); 4684 while (GET_CODE (mem) == SUBREG) 4685 mem = SUBREG_REG (mem); 4686 gcc_assert (GET_CODE (mem) == MEM); 4687 return (volatile_ok || !MEM_VOLATILE_P (mem)); 4688} 4689 4690/* Initialize the table of extra 80387 mathematical constants. */ 4691 4692static void 4693init_ext_80387_constants (void) 4694{ 4695 static const char * cst[5] = 4696 { 4697 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4698 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4699 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4700 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4701 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4702 }; 4703 int i; 4704 4705 for (i = 0; i < 5; i++) 4706 { 4707 real_from_string (&ext_80387_constants_table[i], cst[i]); 4708 /* Ensure each constant is rounded to XFmode precision. */ 4709 real_convert (&ext_80387_constants_table[i], 4710 XFmode, &ext_80387_constants_table[i]); 4711 } 4712 4713 ext_80387_constants_init = 1; 4714} 4715 4716/* Return true if the constant is something that can be loaded with 4717 a special instruction. */ 4718 4719int 4720standard_80387_constant_p (rtx x) 4721{ 4722 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4723 return -1; 4724 4725 if (x == CONST0_RTX (GET_MODE (x))) 4726 return 1; 4727 if (x == CONST1_RTX (GET_MODE (x))) 4728 return 2; 4729 4730 /* For XFmode constants, try to find a special 80387 instruction when 4731 optimizing for size or on those CPUs that benefit from them. */ 4732 if (GET_MODE (x) == XFmode 4733 && (optimize_size || x86_ext_80387_constants & TUNEMASK)) 4734 { 4735 REAL_VALUE_TYPE r; 4736 int i; 4737 4738 if (! ext_80387_constants_init) 4739 init_ext_80387_constants (); 4740 4741 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4742 for (i = 0; i < 5; i++) 4743 if (real_identical (&r, &ext_80387_constants_table[i])) 4744 return i + 3; 4745 } 4746 4747 return 0; 4748} 4749 4750/* Return the opcode of the special instruction to be used to load 4751 the constant X. */ 4752 4753const char * 4754standard_80387_constant_opcode (rtx x) 4755{ 4756 switch (standard_80387_constant_p (x)) 4757 { 4758 case 1: 4759 return "fldz"; 4760 case 2: 4761 return "fld1"; 4762 case 3: 4763 return "fldlg2"; 4764 case 4: 4765 return "fldln2"; 4766 case 5: 4767 return "fldl2e"; 4768 case 6: 4769 return "fldl2t"; 4770 case 7: 4771 return "fldpi"; 4772 default: 4773 gcc_unreachable (); 4774 } 4775} 4776 4777/* Return the CONST_DOUBLE representing the 80387 constant that is 4778 loaded by the specified special instruction. The argument IDX 4779 matches the return value from standard_80387_constant_p. */ 4780 4781rtx 4782standard_80387_constant_rtx (int idx) 4783{ 4784 int i; 4785 4786 if (! ext_80387_constants_init) 4787 init_ext_80387_constants (); 4788 4789 switch (idx) 4790 { 4791 case 3: 4792 case 4: 4793 case 5: 4794 case 6: 4795 case 7: 4796 i = idx - 3; 4797 break; 4798 4799 default: 4800 gcc_unreachable (); 4801 } 4802 4803 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4804 XFmode); 4805} 4806 4807/* Return 1 if mode is a valid mode for sse. */ 4808static int 4809standard_sse_mode_p (enum machine_mode mode) 4810{ 4811 switch (mode) 4812 { 4813 case V16QImode: 4814 case V8HImode: 4815 case V4SImode: 4816 case V2DImode: 4817 case V4SFmode: 4818 case V2DFmode: 4819 return 1; 4820 4821 default: 4822 return 0; 4823 } 4824} 4825 4826/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 4827 */ 4828int 4829standard_sse_constant_p (rtx x) 4830{ 4831 enum machine_mode mode = GET_MODE (x); 4832 4833 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 4834 return 1; 4835 if (vector_all_ones_operand (x, mode) 4836 && standard_sse_mode_p (mode)) 4837 return TARGET_SSE2 ? 2 : -1; 4838 4839 return 0; 4840} 4841 4842/* Return the opcode of the special instruction to be used to load 4843 the constant X. */ 4844 4845const char * 4846standard_sse_constant_opcode (rtx insn, rtx x) 4847{ 4848 switch (standard_sse_constant_p (x)) 4849 { 4850 case 1: 4851 if (get_attr_mode (insn) == MODE_V4SF) 4852 return "xorps\t%0, %0"; 4853 else if (get_attr_mode (insn) == MODE_V2DF) 4854 return "xorpd\t%0, %0"; 4855 else 4856 return "pxor\t%0, %0"; 4857 case 2: 4858 return "pcmpeqd\t%0, %0"; 4859 } 4860 gcc_unreachable (); 4861} 4862 4863/* Returns 1 if OP contains a symbol reference */ 4864 4865int 4866symbolic_reference_mentioned_p (rtx op) 4867{ 4868 const char *fmt; 4869 int i; 4870 4871 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4872 return 1; 4873 4874 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4875 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4876 { 4877 if (fmt[i] == 'E') 4878 { 4879 int j; 4880 4881 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4882 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4883 return 1; 4884 } 4885 4886 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4887 return 1; 4888 } 4889 4890 return 0; 4891} 4892 4893/* Return 1 if it is appropriate to emit `ret' instructions in the 4894 body of a function. Do this only if the epilogue is simple, needing a 4895 couple of insns. Prior to reloading, we can't tell how many registers 4896 must be saved, so return 0 then. Return 0 if there is no frame 4897 marker to de-allocate. */ 4898 4899int 4900ix86_can_use_return_insn_p (void) 4901{ 4902 struct ix86_frame frame; 4903 4904 if (! reload_completed || frame_pointer_needed) 4905 return 0; 4906 4907 /* Don't allow more than 32 pop, since that's all we can do 4908 with one instruction. */ 4909 if (current_function_pops_args 4910 && current_function_args_size >= 32768) 4911 return 0; 4912 4913 ix86_compute_frame_layout (&frame); 4914 return frame.to_allocate == 0 && frame.nregs == 0; 4915} 4916 4917/* Value should be nonzero if functions must have frame pointers. 4918 Zero means the frame pointer need not be set up (and parms may 4919 be accessed via the stack pointer) in functions that seem suitable. */ 4920 4921int 4922ix86_frame_pointer_required (void) 4923{ 4924 /* If we accessed previous frames, then the generated code expects 4925 to be able to access the saved ebp value in our frame. */ 4926 if (cfun->machine->accesses_prev_frame) 4927 return 1; 4928 4929 /* Several x86 os'es need a frame pointer for other reasons, 4930 usually pertaining to setjmp. */ 4931 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4932 return 1; 4933 4934 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4935 the frame pointer by default. Turn it back on now if we've not 4936 got a leaf function. */ 4937 if (TARGET_OMIT_LEAF_FRAME_POINTER 4938 && (!current_function_is_leaf 4939 || ix86_current_function_calls_tls_descriptor)) 4940 return 1; 4941 4942 if (current_function_profile) 4943 return 1; 4944 4945 return 0; 4946} 4947 4948/* Record that the current function accesses previous call frames. */ 4949 4950void 4951ix86_setup_frame_addresses (void) 4952{ 4953 cfun->machine->accesses_prev_frame = 1; 4954} 4955 4956#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 4957# define USE_HIDDEN_LINKONCE 1 4958#else 4959# define USE_HIDDEN_LINKONCE 0 4960#endif 4961 4962static int pic_labels_used; 4963 4964/* Fills in the label name that should be used for a pc thunk for 4965 the given register. */ 4966 4967static void 4968get_pc_thunk_name (char name[32], unsigned int regno) 4969{ 4970 gcc_assert (!TARGET_64BIT); 4971 4972 if (USE_HIDDEN_LINKONCE) 4973 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4974 else 4975 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4976} 4977 4978 4979/* This function generates code for -fpic that loads %ebx with 4980 the return address of the caller and then returns. */ 4981 4982void 4983ix86_file_end (void) 4984{ 4985 rtx xops[2]; 4986 int regno; 4987 4988 for (regno = 0; regno < 8; ++regno) 4989 { 4990 char name[32]; 4991 4992 if (! ((pic_labels_used >> regno) & 1)) 4993 continue; 4994 4995 get_pc_thunk_name (name, regno); 4996 4997#if TARGET_MACHO 4998 if (TARGET_MACHO) 4999 { 5000 switch_to_section (darwin_sections[text_coal_section]); 5001 fputs ("\t.weak_definition\t", asm_out_file); 5002 assemble_name (asm_out_file, name); 5003 fputs ("\n\t.private_extern\t", asm_out_file); 5004 assemble_name (asm_out_file, name); 5005 fputs ("\n", asm_out_file); 5006 ASM_OUTPUT_LABEL (asm_out_file, name); 5007 } 5008 else 5009#endif 5010 if (USE_HIDDEN_LINKONCE) 5011 { 5012 tree decl; 5013 5014 decl = build_decl (FUNCTION_DECL, get_identifier (name), 5015 error_mark_node); 5016 TREE_PUBLIC (decl) = 1; 5017 TREE_STATIC (decl) = 1; 5018 DECL_ONE_ONLY (decl) = 1; 5019 5020 (*targetm.asm_out.unique_section) (decl, 0); 5021 switch_to_section (get_named_section (decl, NULL, 0)); 5022 5023 (*targetm.asm_out.globalize_label) (asm_out_file, name); 5024 fputs ("\t.hidden\t", asm_out_file); 5025 assemble_name (asm_out_file, name); 5026 fputc ('\n', asm_out_file); 5027 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 5028 } 5029 else 5030 { 5031 switch_to_section (text_section); 5032 ASM_OUTPUT_LABEL (asm_out_file, name); 5033 } 5034 5035 xops[0] = gen_rtx_REG (SImode, regno); 5036 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 5037 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 5038 output_asm_insn ("ret", xops); 5039 } 5040 5041 if (NEED_INDICATE_EXEC_STACK) 5042 file_end_indicate_exec_stack (); 5043} 5044 5045/* Emit code for the SET_GOT patterns. */ 5046 5047const char * 5048output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 5049{ 5050 rtx xops[3]; 5051 5052 xops[0] = dest; 5053 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 5054 5055 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 5056 { 5057 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 5058 5059 if (!flag_pic) 5060 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 5061 else 5062 output_asm_insn ("call\t%a2", xops); 5063 5064#if TARGET_MACHO 5065 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5066 is what will be referenced by the Mach-O PIC subsystem. */ 5067 if (!label) 5068 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5069#endif 5070 5071 (*targetm.asm_out.internal_label) (asm_out_file, "L", 5072 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 5073 5074 if (flag_pic) 5075 output_asm_insn ("pop{l}\t%0", xops); 5076 } 5077 else 5078 { 5079 char name[32]; 5080 get_pc_thunk_name (name, REGNO (dest)); 5081 pic_labels_used |= 1 << REGNO (dest); 5082 5083 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 5084 xops[2] = gen_rtx_MEM (QImode, xops[2]); 5085 output_asm_insn ("call\t%X2", xops); 5086 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5087 is what will be referenced by the Mach-O PIC subsystem. */ 5088#if TARGET_MACHO 5089 if (!label) 5090 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5091 else 5092 targetm.asm_out.internal_label (asm_out_file, "L", 5093 CODE_LABEL_NUMBER (label)); 5094#endif 5095 } 5096 5097 if (TARGET_MACHO) 5098 return ""; 5099 5100 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 5101 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 5102 else 5103 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 5104 5105 return ""; 5106} 5107 5108/* Generate an "push" pattern for input ARG. */ 5109 5110static rtx 5111gen_push (rtx arg) 5112{ 5113 return gen_rtx_SET (VOIDmode, 5114 gen_rtx_MEM (Pmode, 5115 gen_rtx_PRE_DEC (Pmode, 5116 stack_pointer_rtx)), 5117 arg); 5118} 5119 5120/* Return >= 0 if there is an unused call-clobbered register available 5121 for the entire function. */ 5122 5123static unsigned int 5124ix86_select_alt_pic_regnum (void) 5125{ 5126 if (current_function_is_leaf && !current_function_profile 5127 && !ix86_current_function_calls_tls_descriptor) 5128 { 5129 int i; 5130 for (i = 2; i >= 0; --i) 5131 if (!regs_ever_live[i]) 5132 return i; 5133 } 5134 5135 return INVALID_REGNUM; 5136} 5137 5138/* Return 1 if we need to save REGNO. */ 5139static int 5140ix86_save_reg (unsigned int regno, int maybe_eh_return) 5141{ 5142 if (pic_offset_table_rtx 5143 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 5144 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5145 || current_function_profile 5146 || current_function_calls_eh_return 5147 || current_function_uses_const_pool)) 5148 { 5149 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 5150 return 0; 5151 return 1; 5152 } 5153 5154 if (current_function_calls_eh_return && maybe_eh_return) 5155 { 5156 unsigned i; 5157 for (i = 0; ; i++) 5158 { 5159 unsigned test = EH_RETURN_DATA_REGNO (i); 5160 if (test == INVALID_REGNUM) 5161 break; 5162 if (test == regno) 5163 return 1; 5164 } 5165 } 5166 5167 if (cfun->machine->force_align_arg_pointer 5168 && regno == REGNO (cfun->machine->force_align_arg_pointer)) 5169 return 1; 5170 5171 return (regs_ever_live[regno] 5172 && !call_used_regs[regno] 5173 && !fixed_regs[regno] 5174 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5175} 5176 5177/* Return number of registers to be saved on the stack. */ 5178 5179static int 5180ix86_nsaved_regs (void) 5181{ 5182 int nregs = 0; 5183 int regno; 5184 5185 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5186 if (ix86_save_reg (regno, true)) 5187 nregs++; 5188 return nregs; 5189} 5190 5191/* Return the offset between two registers, one to be eliminated, and the other 5192 its replacement, at the start of a routine. */ 5193 5194HOST_WIDE_INT 5195ix86_initial_elimination_offset (int from, int to) 5196{ 5197 struct ix86_frame frame; 5198 ix86_compute_frame_layout (&frame); 5199 5200 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5201 return frame.hard_frame_pointer_offset; 5202 else if (from == FRAME_POINTER_REGNUM 5203 && to == HARD_FRAME_POINTER_REGNUM) 5204 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5205 else 5206 { 5207 gcc_assert (to == STACK_POINTER_REGNUM); 5208 5209 if (from == ARG_POINTER_REGNUM) 5210 return frame.stack_pointer_offset; 5211 5212 gcc_assert (from == FRAME_POINTER_REGNUM); 5213 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5214 } 5215} 5216 5217/* Fill structure ix86_frame about frame of currently computed function. */ 5218 5219static void 5220ix86_compute_frame_layout (struct ix86_frame *frame) 5221{ 5222 HOST_WIDE_INT total_size; 5223 unsigned int stack_alignment_needed; 5224 HOST_WIDE_INT offset; 5225 unsigned int preferred_alignment; 5226 HOST_WIDE_INT size = get_frame_size (); 5227 5228 frame->nregs = ix86_nsaved_regs (); 5229 total_size = size; 5230 5231 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5232 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5233 5234 /* During reload iteration the amount of registers saved can change. 5235 Recompute the value as needed. Do not recompute when amount of registers 5236 didn't change as reload does multiple calls to the function and does not 5237 expect the decision to change within single iteration. */ 5238 if (!optimize_size 5239 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5240 { 5241 int count = frame->nregs; 5242 5243 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5244 /* The fast prologue uses move instead of push to save registers. This 5245 is significantly longer, but also executes faster as modern hardware 5246 can execute the moves in parallel, but can't do that for push/pop. 5247 5248 Be careful about choosing what prologue to emit: When function takes 5249 many instructions to execute we may use slow version as well as in 5250 case function is known to be outside hot spot (this is known with 5251 feedback only). Weight the size of function by number of registers 5252 to save as it is cheap to use one or two push instructions but very 5253 slow to use many of them. */ 5254 if (count) 5255 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5256 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5257 || (flag_branch_probabilities 5258 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5259 cfun->machine->use_fast_prologue_epilogue = false; 5260 else 5261 cfun->machine->use_fast_prologue_epilogue 5262 = !expensive_function_p (count); 5263 } 5264 if (TARGET_PROLOGUE_USING_MOVE 5265 && cfun->machine->use_fast_prologue_epilogue) 5266 frame->save_regs_using_mov = true; 5267 else 5268 frame->save_regs_using_mov = false; 5269 5270 5271 /* Skip return address and saved base pointer. */ 5272 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5273 5274 frame->hard_frame_pointer_offset = offset; 5275 5276 /* Do some sanity checking of stack_alignment_needed and 5277 preferred_alignment, since i386 port is the only using those features 5278 that may break easily. */ 5279 5280 gcc_assert (!size || stack_alignment_needed); 5281 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 5282 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5283 gcc_assert (stack_alignment_needed 5284 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5285 5286 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5287 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5288 5289 /* Register save area */ 5290 offset += frame->nregs * UNITS_PER_WORD; 5291 5292 /* Va-arg area */ 5293 if (ix86_save_varrargs_registers) 5294 { 5295 offset += X86_64_VARARGS_SIZE; 5296 frame->va_arg_size = X86_64_VARARGS_SIZE; 5297 } 5298 else 5299 frame->va_arg_size = 0; 5300 5301 /* Align start of frame for local function. */ 5302 frame->padding1 = ((offset + stack_alignment_needed - 1) 5303 & -stack_alignment_needed) - offset; 5304 5305 offset += frame->padding1; 5306 5307 /* Frame pointer points here. */ 5308 frame->frame_pointer_offset = offset; 5309 5310 offset += size; 5311 5312 /* Add outgoing arguments area. Can be skipped if we eliminated 5313 all the function calls as dead code. 5314 Skipping is however impossible when function calls alloca. Alloca 5315 expander assumes that last current_function_outgoing_args_size 5316 of stack frame are unused. */ 5317 if (ACCUMULATE_OUTGOING_ARGS 5318 && (!current_function_is_leaf || current_function_calls_alloca 5319 || ix86_current_function_calls_tls_descriptor)) 5320 { 5321 offset += current_function_outgoing_args_size; 5322 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5323 } 5324 else 5325 frame->outgoing_arguments_size = 0; 5326 5327 /* Align stack boundary. Only needed if we're calling another function 5328 or using alloca. */ 5329 if (!current_function_is_leaf || current_function_calls_alloca 5330 || ix86_current_function_calls_tls_descriptor) 5331 frame->padding2 = ((offset + preferred_alignment - 1) 5332 & -preferred_alignment) - offset; 5333 else 5334 frame->padding2 = 0; 5335 5336 offset += frame->padding2; 5337 5338 /* We've reached end of stack frame. */ 5339 frame->stack_pointer_offset = offset; 5340 5341 /* Size prologue needs to allocate. */ 5342 frame->to_allocate = 5343 (size + frame->padding1 + frame->padding2 5344 + frame->outgoing_arguments_size + frame->va_arg_size); 5345 5346 if ((!frame->to_allocate && frame->nregs <= 1) 5347 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5348 frame->save_regs_using_mov = false; 5349 5350 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5351 && current_function_is_leaf 5352 && !ix86_current_function_calls_tls_descriptor) 5353 { 5354 frame->red_zone_size = frame->to_allocate; 5355 if (frame->save_regs_using_mov) 5356 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5357 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5358 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5359 } 5360 else 5361 frame->red_zone_size = 0; 5362 frame->to_allocate -= frame->red_zone_size; 5363 frame->stack_pointer_offset -= frame->red_zone_size; 5364#if 0 5365 fprintf (stderr, "nregs: %i\n", frame->nregs); 5366 fprintf (stderr, "size: %i\n", size); 5367 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5368 fprintf (stderr, "padding1: %i\n", frame->padding1); 5369 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5370 fprintf (stderr, "padding2: %i\n", frame->padding2); 5371 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5372 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5373 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5374 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5375 frame->hard_frame_pointer_offset); 5376 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5377#endif 5378} 5379 5380/* Emit code to save registers in the prologue. */ 5381 5382static void 5383ix86_emit_save_regs (void) 5384{ 5385 unsigned int regno; 5386 rtx insn; 5387 5388 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) 5389 if (ix86_save_reg (regno, true)) 5390 { 5391 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5392 RTX_FRAME_RELATED_P (insn) = 1; 5393 } 5394} 5395 5396/* Emit code to save registers using MOV insns. First register 5397 is restored from POINTER + OFFSET. */ 5398static void 5399ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5400{ 5401 unsigned int regno; 5402 rtx insn; 5403 5404 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5405 if (ix86_save_reg (regno, true)) 5406 { 5407 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5408 Pmode, offset), 5409 gen_rtx_REG (Pmode, regno)); 5410 RTX_FRAME_RELATED_P (insn) = 1; 5411 offset += UNITS_PER_WORD; 5412 } 5413} 5414 5415/* Expand prologue or epilogue stack adjustment. 5416 The pattern exist to put a dependency on all ebp-based memory accesses. 5417 STYLE should be negative if instructions should be marked as frame related, 5418 zero if %r11 register is live and cannot be freely used and positive 5419 otherwise. */ 5420 5421static void 5422pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5423{ 5424 rtx insn; 5425 5426 if (! TARGET_64BIT) 5427 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5428 else if (x86_64_immediate_operand (offset, DImode)) 5429 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5430 else 5431 { 5432 rtx r11; 5433 /* r11 is used by indirect sibcall return as well, set before the 5434 epilogue and used after the epilogue. ATM indirect sibcall 5435 shouldn't be used together with huge frame sizes in one 5436 function because of the frame_size check in sibcall.c. */ 5437 gcc_assert (style); 5438 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5439 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5440 if (style < 0) 5441 RTX_FRAME_RELATED_P (insn) = 1; 5442 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5443 offset)); 5444 } 5445 if (style < 0) 5446 RTX_FRAME_RELATED_P (insn) = 1; 5447} 5448 5449/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 5450 5451static rtx 5452ix86_internal_arg_pointer (void) 5453{ 5454 bool has_force_align_arg_pointer = 5455 (0 != lookup_attribute (ix86_force_align_arg_pointer_string, 5456 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); 5457 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN 5458 && DECL_NAME (current_function_decl) 5459 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 5460 && DECL_FILE_SCOPE_P (current_function_decl)) 5461 || ix86_force_align_arg_pointer 5462 || has_force_align_arg_pointer) 5463 { 5464 /* Nested functions can't realign the stack due to a register 5465 conflict. */ 5466 if (DECL_CONTEXT (current_function_decl) 5467 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) 5468 { 5469 if (ix86_force_align_arg_pointer) 5470 warning (0, "-mstackrealign ignored for nested functions"); 5471 if (has_force_align_arg_pointer) 5472 error ("%s not supported for nested functions", 5473 ix86_force_align_arg_pointer_string); 5474 return virtual_incoming_args_rtx; 5475 } 5476 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); 5477 return copy_to_reg (cfun->machine->force_align_arg_pointer); 5478 } 5479 else 5480 return virtual_incoming_args_rtx; 5481} 5482 5483/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 5484 This is called from dwarf2out.c to emit call frame instructions 5485 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 5486static void 5487ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 5488{ 5489 rtx unspec = SET_SRC (pattern); 5490 gcc_assert (GET_CODE (unspec) == UNSPEC); 5491 5492 switch (index) 5493 { 5494 case UNSPEC_REG_SAVE: 5495 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 5496 SET_DEST (pattern)); 5497 break; 5498 case UNSPEC_DEF_CFA: 5499 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 5500 INTVAL (XVECEXP (unspec, 0, 0))); 5501 break; 5502 default: 5503 gcc_unreachable (); 5504 } 5505} 5506 5507/* Expand the prologue into a bunch of separate insns. */ 5508 5509void 5510ix86_expand_prologue (void) 5511{ 5512 rtx insn; 5513 bool pic_reg_used; 5514 struct ix86_frame frame; 5515 HOST_WIDE_INT allocate; 5516 5517 ix86_compute_frame_layout (&frame); 5518 5519 if (cfun->machine->force_align_arg_pointer) 5520 { 5521 rtx x, y; 5522 5523 /* Grab the argument pointer. */ 5524 x = plus_constant (stack_pointer_rtx, 4); 5525 y = cfun->machine->force_align_arg_pointer; 5526 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 5527 RTX_FRAME_RELATED_P (insn) = 1; 5528 5529 /* The unwind info consists of two parts: install the fafp as the cfa, 5530 and record the fafp as the "save register" of the stack pointer. 5531 The later is there in order that the unwinder can see where it 5532 should restore the stack pointer across the and insn. */ 5533 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); 5534 x = gen_rtx_SET (VOIDmode, y, x); 5535 RTX_FRAME_RELATED_P (x) = 1; 5536 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), 5537 UNSPEC_REG_SAVE); 5538 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); 5539 RTX_FRAME_RELATED_P (y) = 1; 5540 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); 5541 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5542 REG_NOTES (insn) = x; 5543 5544 /* Align the stack. */ 5545 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, 5546 GEN_INT (-16))); 5547 5548 /* And here we cheat like madmen with the unwind info. We force the 5549 cfa register back to sp+4, which is exactly what it was at the 5550 start of the function. Re-pushing the return address results in 5551 the return at the same spot relative to the cfa, and thus is 5552 correct wrt the unwind info. */ 5553 x = cfun->machine->force_align_arg_pointer; 5554 x = gen_frame_mem (Pmode, plus_constant (x, -4)); 5555 insn = emit_insn (gen_push (x)); 5556 RTX_FRAME_RELATED_P (insn) = 1; 5557 5558 x = GEN_INT (4); 5559 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); 5560 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); 5561 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5562 REG_NOTES (insn) = x; 5563 } 5564 5565 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5566 slower on all targets. Also sdb doesn't like it. */ 5567 5568 if (frame_pointer_needed) 5569 { 5570 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5571 RTX_FRAME_RELATED_P (insn) = 1; 5572 5573 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5574 RTX_FRAME_RELATED_P (insn) = 1; 5575 } 5576 5577 allocate = frame.to_allocate; 5578 5579 if (!frame.save_regs_using_mov) 5580 ix86_emit_save_regs (); 5581 else 5582 allocate += frame.nregs * UNITS_PER_WORD; 5583 5584 /* When using red zone we may start register saving before allocating 5585 the stack frame saving one cycle of the prologue. */ 5586 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5587 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5588 : stack_pointer_rtx, 5589 -frame.nregs * UNITS_PER_WORD); 5590 5591 if (allocate == 0) 5592 ; 5593 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5594 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5595 GEN_INT (-allocate), -1); 5596 else 5597 { 5598 /* Only valid for Win32. */ 5599 rtx eax = gen_rtx_REG (SImode, 0); 5600 bool eax_live = ix86_eax_live_at_start_p (); 5601 rtx t; 5602 5603 gcc_assert (!TARGET_64BIT); 5604 5605 if (eax_live) 5606 { 5607 emit_insn (gen_push (eax)); 5608 allocate -= 4; 5609 } 5610 5611 emit_move_insn (eax, GEN_INT (allocate)); 5612 5613 insn = emit_insn (gen_allocate_stack_worker (eax)); 5614 RTX_FRAME_RELATED_P (insn) = 1; 5615 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 5616 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 5617 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 5618 t, REG_NOTES (insn)); 5619 5620 if (eax_live) 5621 { 5622 if (frame_pointer_needed) 5623 t = plus_constant (hard_frame_pointer_rtx, 5624 allocate 5625 - frame.to_allocate 5626 - frame.nregs * UNITS_PER_WORD); 5627 else 5628 t = plus_constant (stack_pointer_rtx, allocate); 5629 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5630 } 5631 } 5632 5633 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5634 { 5635 if (!frame_pointer_needed || !frame.to_allocate) 5636 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5637 else 5638 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5639 -frame.nregs * UNITS_PER_WORD); 5640 } 5641 5642 pic_reg_used = false; 5643 if (pic_offset_table_rtx 5644 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5645 || current_function_profile)) 5646 { 5647 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5648 5649 if (alt_pic_reg_used != INVALID_REGNUM) 5650 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5651 5652 pic_reg_used = true; 5653 } 5654 5655 if (pic_reg_used) 5656 { 5657 if (TARGET_64BIT) 5658 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 5659 else 5660 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5661 5662 /* Even with accurate pre-reload life analysis, we can wind up 5663 deleting all references to the pic register after reload. 5664 Consider if cross-jumping unifies two sides of a branch 5665 controlled by a comparison vs the only read from a global. 5666 In which case, allow the set_got to be deleted, though we're 5667 too late to do anything about the ebx save in the prologue. */ 5668 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5669 } 5670 5671 /* Prevent function calls from be scheduled before the call to mcount. 5672 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5673 if (current_function_profile) 5674 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5675} 5676 5677/* Emit code to restore saved registers using MOV insns. First register 5678 is restored from POINTER + OFFSET. */ 5679static void 5680ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5681 int maybe_eh_return) 5682{ 5683 int regno; 5684 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5685 5686 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5687 if (ix86_save_reg (regno, maybe_eh_return)) 5688 { 5689 /* Ensure that adjust_address won't be forced to produce pointer 5690 out of range allowed by x86-64 instruction set. */ 5691 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5692 { 5693 rtx r11; 5694 5695 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5696 emit_move_insn (r11, GEN_INT (offset)); 5697 emit_insn (gen_adddi3 (r11, r11, pointer)); 5698 base_address = gen_rtx_MEM (Pmode, r11); 5699 offset = 0; 5700 } 5701 emit_move_insn (gen_rtx_REG (Pmode, regno), 5702 adjust_address (base_address, Pmode, offset)); 5703 offset += UNITS_PER_WORD; 5704 } 5705} 5706 5707/* Restore function stack, frame, and registers. */ 5708 5709void 5710ix86_expand_epilogue (int style) 5711{ 5712 int regno; 5713 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5714 struct ix86_frame frame; 5715 HOST_WIDE_INT offset; 5716 5717 ix86_compute_frame_layout (&frame); 5718 5719 /* Calculate start of saved registers relative to ebp. Special care 5720 must be taken for the normal return case of a function using 5721 eh_return: the eax and edx registers are marked as saved, but not 5722 restored along this path. */ 5723 offset = frame.nregs; 5724 if (current_function_calls_eh_return && style != 2) 5725 offset -= 2; 5726 offset *= -UNITS_PER_WORD; 5727 5728 /* If we're only restoring one register and sp is not valid then 5729 using a move instruction to restore the register since it's 5730 less work than reloading sp and popping the register. 5731 5732 The default code result in stack adjustment using add/lea instruction, 5733 while this code results in LEAVE instruction (or discrete equivalent), 5734 so it is profitable in some other cases as well. Especially when there 5735 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5736 and there is exactly one register to pop. This heuristic may need some 5737 tuning in future. */ 5738 if ((!sp_valid && frame.nregs <= 1) 5739 || (TARGET_EPILOGUE_USING_MOVE 5740 && cfun->machine->use_fast_prologue_epilogue 5741 && (frame.nregs > 1 || frame.to_allocate)) 5742 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5743 || (frame_pointer_needed && TARGET_USE_LEAVE 5744 && cfun->machine->use_fast_prologue_epilogue 5745 && frame.nregs == 1) 5746 || current_function_calls_eh_return) 5747 { 5748 /* Restore registers. We can use ebp or esp to address the memory 5749 locations. If both are available, default to ebp, since offsets 5750 are known to be small. Only exception is esp pointing directly to the 5751 end of block of saved registers, where we may simplify addressing 5752 mode. */ 5753 5754 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5755 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5756 frame.to_allocate, style == 2); 5757 else 5758 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5759 offset, style == 2); 5760 5761 /* eh_return epilogues need %ecx added to the stack pointer. */ 5762 if (style == 2) 5763 { 5764 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5765 5766 if (frame_pointer_needed) 5767 { 5768 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5769 tmp = plus_constant (tmp, UNITS_PER_WORD); 5770 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5771 5772 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5773 emit_move_insn (hard_frame_pointer_rtx, tmp); 5774 5775 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5776 const0_rtx, style); 5777 } 5778 else 5779 { 5780 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5781 tmp = plus_constant (tmp, (frame.to_allocate 5782 + frame.nregs * UNITS_PER_WORD)); 5783 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5784 } 5785 } 5786 else if (!frame_pointer_needed) 5787 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5788 GEN_INT (frame.to_allocate 5789 + frame.nregs * UNITS_PER_WORD), 5790 style); 5791 /* If not an i386, mov & pop is faster than "leave". */ 5792 else if (TARGET_USE_LEAVE || optimize_size 5793 || !cfun->machine->use_fast_prologue_epilogue) 5794 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5795 else 5796 { 5797 pro_epilogue_adjust_stack (stack_pointer_rtx, 5798 hard_frame_pointer_rtx, 5799 const0_rtx, style); 5800 if (TARGET_64BIT) 5801 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5802 else 5803 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5804 } 5805 } 5806 else 5807 { 5808 /* First step is to deallocate the stack frame so that we can 5809 pop the registers. */ 5810 if (!sp_valid) 5811 { 5812 gcc_assert (frame_pointer_needed); 5813 pro_epilogue_adjust_stack (stack_pointer_rtx, 5814 hard_frame_pointer_rtx, 5815 GEN_INT (offset), style); 5816 } 5817 else if (frame.to_allocate) 5818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5819 GEN_INT (frame.to_allocate), style); 5820 5821 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5822 if (ix86_save_reg (regno, false)) 5823 { 5824 if (TARGET_64BIT) 5825 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 5826 else 5827 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 5828 } 5829 if (frame_pointer_needed) 5830 { 5831 /* Leave results in shorter dependency chains on CPUs that are 5832 able to grok it fast. */ 5833 if (TARGET_USE_LEAVE) 5834 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5835 else if (TARGET_64BIT) 5836 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5837 else 5838 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5839 } 5840 } 5841 5842 if (cfun->machine->force_align_arg_pointer) 5843 { 5844 emit_insn (gen_addsi3 (stack_pointer_rtx, 5845 cfun->machine->force_align_arg_pointer, 5846 GEN_INT (-4))); 5847 } 5848 5849 /* Sibcall epilogues don't want a return instruction. */ 5850 if (style == 0) 5851 return; 5852 5853 if (current_function_pops_args && current_function_args_size) 5854 { 5855 rtx popc = GEN_INT (current_function_pops_args); 5856 5857 /* i386 can only pop 64K bytes. If asked to pop more, pop 5858 return address, do explicit add, and jump indirectly to the 5859 caller. */ 5860 5861 if (current_function_pops_args >= 65536) 5862 { 5863 rtx ecx = gen_rtx_REG (SImode, 2); 5864 5865 /* There is no "pascal" calling convention in 64bit ABI. */ 5866 gcc_assert (!TARGET_64BIT); 5867 5868 emit_insn (gen_popsi1 (ecx)); 5869 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 5870 emit_jump_insn (gen_return_indirect_internal (ecx)); 5871 } 5872 else 5873 emit_jump_insn (gen_return_pop_internal (popc)); 5874 } 5875 else 5876 emit_jump_insn (gen_return_internal ()); 5877} 5878 5879/* Reset from the function's potential modifications. */ 5880 5881static void 5882ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 5883 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5884{ 5885 if (pic_offset_table_rtx) 5886 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 5887#if TARGET_MACHO 5888 /* Mach-O doesn't support labels at the end of objects, so if 5889 it looks like we might want one, insert a NOP. */ 5890 { 5891 rtx insn = get_last_insn (); 5892 while (insn 5893 && NOTE_P (insn) 5894 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) 5895 insn = PREV_INSN (insn); 5896 if (insn 5897 && (LABEL_P (insn) 5898 || (NOTE_P (insn) 5899 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) 5900 fputs ("\tnop\n", file); 5901 } 5902#endif 5903 5904} 5905 5906/* Extract the parts of an RTL expression that is a valid memory address 5907 for an instruction. Return 0 if the structure of the address is 5908 grossly off. Return -1 if the address contains ASHIFT, so it is not 5909 strictly valid, but still used for computing length of lea instruction. */ 5910 5911int 5912ix86_decompose_address (rtx addr, struct ix86_address *out) 5913{ 5914 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 5915 rtx base_reg, index_reg; 5916 HOST_WIDE_INT scale = 1; 5917 rtx scale_rtx = NULL_RTX; 5918 int retval = 1; 5919 enum ix86_address_seg seg = SEG_DEFAULT; 5920 5921 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 5922 base = addr; 5923 else if (GET_CODE (addr) == PLUS) 5924 { 5925 rtx addends[4], op; 5926 int n = 0, i; 5927 5928 op = addr; 5929 do 5930 { 5931 if (n >= 4) 5932 return 0; 5933 addends[n++] = XEXP (op, 1); 5934 op = XEXP (op, 0); 5935 } 5936 while (GET_CODE (op) == PLUS); 5937 if (n >= 4) 5938 return 0; 5939 addends[n] = op; 5940 5941 for (i = n; i >= 0; --i) 5942 { 5943 op = addends[i]; 5944 switch (GET_CODE (op)) 5945 { 5946 case MULT: 5947 if (index) 5948 return 0; 5949 index = XEXP (op, 0); 5950 scale_rtx = XEXP (op, 1); 5951 break; 5952 5953 case UNSPEC: 5954 if (XINT (op, 1) == UNSPEC_TP 5955 && TARGET_TLS_DIRECT_SEG_REFS 5956 && seg == SEG_DEFAULT) 5957 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 5958 else 5959 return 0; 5960 break; 5961 5962 case REG: 5963 case SUBREG: 5964 if (!base) 5965 base = op; 5966 else if (!index) 5967 index = op; 5968 else 5969 return 0; 5970 break; 5971 5972 case CONST: 5973 case CONST_INT: 5974 case SYMBOL_REF: 5975 case LABEL_REF: 5976 if (disp) 5977 return 0; 5978 disp = op; 5979 break; 5980 5981 default: 5982 return 0; 5983 } 5984 } 5985 } 5986 else if (GET_CODE (addr) == MULT) 5987 { 5988 index = XEXP (addr, 0); /* index*scale */ 5989 scale_rtx = XEXP (addr, 1); 5990 } 5991 else if (GET_CODE (addr) == ASHIFT) 5992 { 5993 rtx tmp; 5994 5995 /* We're called for lea too, which implements ashift on occasion. */ 5996 index = XEXP (addr, 0); 5997 tmp = XEXP (addr, 1); 5998 if (GET_CODE (tmp) != CONST_INT) 5999 return 0; 6000 scale = INTVAL (tmp); 6001 if ((unsigned HOST_WIDE_INT) scale > 3) 6002 return 0; 6003 scale = 1 << scale; 6004 retval = -1; 6005 } 6006 else 6007 disp = addr; /* displacement */ 6008 6009 /* Extract the integral value of scale. */ 6010 if (scale_rtx) 6011 { 6012 if (GET_CODE (scale_rtx) != CONST_INT) 6013 return 0; 6014 scale = INTVAL (scale_rtx); 6015 } 6016 6017 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 6018 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 6019 6020 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 6021 if (base_reg && index_reg && scale == 1 6022 && (index_reg == arg_pointer_rtx 6023 || index_reg == frame_pointer_rtx 6024 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 6025 { 6026 rtx tmp; 6027 tmp = base, base = index, index = tmp; 6028 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 6029 } 6030 6031 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 6032 if ((base_reg == hard_frame_pointer_rtx 6033 || base_reg == frame_pointer_rtx 6034 || base_reg == arg_pointer_rtx) && !disp) 6035 disp = const0_rtx; 6036 6037 /* Special case: on K6, [%esi] makes the instruction vector decoded. 6038 Avoid this by transforming to [%esi+0]. */ 6039 if (ix86_tune == PROCESSOR_K6 && !optimize_size 6040 && base_reg && !index_reg && !disp 6041 && REG_P (base_reg) 6042 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 6043 disp = const0_rtx; 6044 6045 /* Special case: encode reg+reg instead of reg*2. */ 6046 if (!base && index && scale && scale == 2) 6047 base = index, base_reg = index_reg, scale = 1; 6048 6049 /* Special case: scaling cannot be encoded without base or displacement. */ 6050 if (!base && !disp && index && scale != 1) 6051 disp = const0_rtx; 6052 6053 out->base = base; 6054 out->index = index; 6055 out->disp = disp; 6056 out->scale = scale; 6057 out->seg = seg; 6058 6059 return retval; 6060} 6061 6062/* Return cost of the memory address x. 6063 For i386, it is better to use a complex address than let gcc copy 6064 the address into a reg and make a new pseudo. But not if the address 6065 requires to two regs - that would mean more pseudos with longer 6066 lifetimes. */ 6067static int 6068ix86_address_cost (rtx x) 6069{ 6070 struct ix86_address parts; 6071 int cost = 1; 6072 int ok = ix86_decompose_address (x, &parts); 6073 6074 gcc_assert (ok); 6075 6076 if (parts.base && GET_CODE (parts.base) == SUBREG) 6077 parts.base = SUBREG_REG (parts.base); 6078 if (parts.index && GET_CODE (parts.index) == SUBREG) 6079 parts.index = SUBREG_REG (parts.index); 6080 6081 /* More complex memory references are better. */ 6082 if (parts.disp && parts.disp != const0_rtx) 6083 cost--; 6084 if (parts.seg != SEG_DEFAULT) 6085 cost--; 6086 6087 /* Attempt to minimize number of registers in the address. */ 6088 if ((parts.base 6089 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 6090 || (parts.index 6091 && (!REG_P (parts.index) 6092 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 6093 cost++; 6094 6095 if (parts.base 6096 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 6097 && parts.index 6098 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 6099 && parts.base != parts.index) 6100 cost++; 6101 6102 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 6103 since it's predecode logic can't detect the length of instructions 6104 and it degenerates to vector decoded. Increase cost of such 6105 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 6106 to split such addresses or even refuse such addresses at all. 6107 6108 Following addressing modes are affected: 6109 [base+scale*index] 6110 [scale*index+disp] 6111 [base+index] 6112 6113 The first and last case may be avoidable by explicitly coding the zero in 6114 memory address, but I don't have AMD-K6 machine handy to check this 6115 theory. */ 6116 6117 if (TARGET_K6 6118 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 6119 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 6120 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 6121 cost += 10; 6122 6123 return cost; 6124} 6125 6126/* If X is a machine specific address (i.e. a symbol or label being 6127 referenced as a displacement from the GOT implemented using an 6128 UNSPEC), then return the base term. Otherwise return X. */ 6129 6130rtx 6131ix86_find_base_term (rtx x) 6132{ 6133 rtx term; 6134 6135 if (TARGET_64BIT) 6136 { 6137 if (GET_CODE (x) != CONST) 6138 return x; 6139 term = XEXP (x, 0); 6140 if (GET_CODE (term) == PLUS 6141 && (GET_CODE (XEXP (term, 1)) == CONST_INT 6142 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 6143 term = XEXP (term, 0); 6144 if (GET_CODE (term) != UNSPEC 6145 || XINT (term, 1) != UNSPEC_GOTPCREL) 6146 return x; 6147 6148 term = XVECEXP (term, 0, 0); 6149 6150 if (GET_CODE (term) != SYMBOL_REF 6151 && GET_CODE (term) != LABEL_REF) 6152 return x; 6153 6154 return term; 6155 } 6156 6157 term = ix86_delegitimize_address (x); 6158 6159 if (GET_CODE (term) != SYMBOL_REF 6160 && GET_CODE (term) != LABEL_REF) 6161 return x; 6162 6163 return term; 6164} 6165 6166/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 6167 this is used for to form addresses to local data when -fPIC is in 6168 use. */ 6169 6170static bool 6171darwin_local_data_pic (rtx disp) 6172{ 6173 if (GET_CODE (disp) == MINUS) 6174 { 6175 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 6176 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 6177 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 6178 { 6179 const char *sym_name = XSTR (XEXP (disp, 1), 0); 6180 if (! strcmp (sym_name, "<pic base>")) 6181 return true; 6182 } 6183 } 6184 6185 return false; 6186} 6187 6188/* Determine if a given RTX is a valid constant. We already know this 6189 satisfies CONSTANT_P. */ 6190 6191bool 6192legitimate_constant_p (rtx x) 6193{ 6194 switch (GET_CODE (x)) 6195 { 6196 case CONST: 6197 x = XEXP (x, 0); 6198 6199 if (GET_CODE (x) == PLUS) 6200 { 6201 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6202 return false; 6203 x = XEXP (x, 0); 6204 } 6205 6206 if (TARGET_MACHO && darwin_local_data_pic (x)) 6207 return true; 6208 6209 /* Only some unspecs are valid as "constants". */ 6210 if (GET_CODE (x) == UNSPEC) 6211 switch (XINT (x, 1)) 6212 { 6213 case UNSPEC_GOTOFF: 6214 return TARGET_64BIT; 6215 case UNSPEC_TPOFF: 6216 case UNSPEC_NTPOFF: 6217 x = XVECEXP (x, 0, 0); 6218 return (GET_CODE (x) == SYMBOL_REF 6219 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6220 case UNSPEC_DTPOFF: 6221 x = XVECEXP (x, 0, 0); 6222 return (GET_CODE (x) == SYMBOL_REF 6223 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 6224 default: 6225 return false; 6226 } 6227 6228 /* We must have drilled down to a symbol. */ 6229 if (GET_CODE (x) == LABEL_REF) 6230 return true; 6231 if (GET_CODE (x) != SYMBOL_REF) 6232 return false; 6233 /* FALLTHRU */ 6234 6235 case SYMBOL_REF: 6236 /* TLS symbols are never valid. */ 6237 if (SYMBOL_REF_TLS_MODEL (x)) 6238 return false; 6239 break; 6240 6241 case CONST_DOUBLE: 6242 if (GET_MODE (x) == TImode 6243 && x != CONST0_RTX (TImode) 6244 && !TARGET_64BIT) 6245 return false; 6246 break; 6247 6248 case CONST_VECTOR: 6249 if (x == CONST0_RTX (GET_MODE (x))) 6250 return true; 6251 return false; 6252 6253 default: 6254 break; 6255 } 6256 6257 /* Otherwise we handle everything else in the move patterns. */ 6258 return true; 6259} 6260 6261/* Determine if it's legal to put X into the constant pool. This 6262 is not possible for the address of thread-local symbols, which 6263 is checked above. */ 6264 6265static bool 6266ix86_cannot_force_const_mem (rtx x) 6267{ 6268 /* We can always put integral constants and vectors in memory. */ 6269 switch (GET_CODE (x)) 6270 { 6271 case CONST_INT: 6272 case CONST_DOUBLE: 6273 case CONST_VECTOR: 6274 return false; 6275 6276 default: 6277 break; 6278 } 6279 return !legitimate_constant_p (x); 6280} 6281 6282/* Determine if a given RTX is a valid constant address. */ 6283 6284bool 6285constant_address_p (rtx x) 6286{ 6287 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 6288} 6289 6290/* Nonzero if the constant value X is a legitimate general operand 6291 when generating PIC code. It is given that flag_pic is on and 6292 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 6293 6294bool 6295legitimate_pic_operand_p (rtx x) 6296{ 6297 rtx inner; 6298 6299 switch (GET_CODE (x)) 6300 { 6301 case CONST: 6302 inner = XEXP (x, 0); 6303 if (GET_CODE (inner) == PLUS 6304 && GET_CODE (XEXP (inner, 1)) == CONST_INT) 6305 inner = XEXP (inner, 0); 6306 6307 /* Only some unspecs are valid as "constants". */ 6308 if (GET_CODE (inner) == UNSPEC) 6309 switch (XINT (inner, 1)) 6310 { 6311 case UNSPEC_GOTOFF: 6312 return TARGET_64BIT; 6313 case UNSPEC_TPOFF: 6314 x = XVECEXP (inner, 0, 0); 6315 return (GET_CODE (x) == SYMBOL_REF 6316 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6317 default: 6318 return false; 6319 } 6320 /* FALLTHRU */ 6321 6322 case SYMBOL_REF: 6323 case LABEL_REF: 6324 return legitimate_pic_address_disp_p (x); 6325 6326 default: 6327 return true; 6328 } 6329} 6330 6331/* Determine if a given CONST RTX is a valid memory displacement 6332 in PIC mode. */ 6333 6334int 6335legitimate_pic_address_disp_p (rtx disp) 6336{ 6337 bool saw_plus; 6338 6339 /* In 64bit mode we can allow direct addresses of symbols and labels 6340 when they are not dynamic symbols. */ 6341 if (TARGET_64BIT) 6342 { 6343 rtx op0 = disp, op1; 6344 6345 switch (GET_CODE (disp)) 6346 { 6347 case LABEL_REF: 6348 return true; 6349 6350 case CONST: 6351 if (GET_CODE (XEXP (disp, 0)) != PLUS) 6352 break; 6353 op0 = XEXP (XEXP (disp, 0), 0); 6354 op1 = XEXP (XEXP (disp, 0), 1); 6355 if (GET_CODE (op1) != CONST_INT 6356 || INTVAL (op1) >= 16*1024*1024 6357 || INTVAL (op1) < -16*1024*1024) 6358 break; 6359 if (GET_CODE (op0) == LABEL_REF) 6360 return true; 6361 if (GET_CODE (op0) != SYMBOL_REF) 6362 break; 6363 /* FALLTHRU */ 6364 6365 case SYMBOL_REF: 6366 /* TLS references should always be enclosed in UNSPEC. */ 6367 if (SYMBOL_REF_TLS_MODEL (op0)) 6368 return false; 6369 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) 6370 return true; 6371 break; 6372 6373 default: 6374 break; 6375 } 6376 } 6377 if (GET_CODE (disp) != CONST) 6378 return 0; 6379 disp = XEXP (disp, 0); 6380 6381 if (TARGET_64BIT) 6382 { 6383 /* We are unsafe to allow PLUS expressions. This limit allowed distance 6384 of GOT tables. We should not need these anyway. */ 6385 if (GET_CODE (disp) != UNSPEC 6386 || (XINT (disp, 1) != UNSPEC_GOTPCREL 6387 && XINT (disp, 1) != UNSPEC_GOTOFF)) 6388 return 0; 6389 6390 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 6391 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 6392 return 0; 6393 return 1; 6394 } 6395 6396 saw_plus = false; 6397 if (GET_CODE (disp) == PLUS) 6398 { 6399 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 6400 return 0; 6401 disp = XEXP (disp, 0); 6402 saw_plus = true; 6403 } 6404 6405 if (TARGET_MACHO && darwin_local_data_pic (disp)) 6406 return 1; 6407 6408 if (GET_CODE (disp) != UNSPEC) 6409 return 0; 6410 6411 switch (XINT (disp, 1)) 6412 { 6413 case UNSPEC_GOT: 6414 if (saw_plus) 6415 return false; 6416 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6417 case UNSPEC_GOTOFF: 6418 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 6419 While ABI specify also 32bit relocation but we don't produce it in 6420 small PIC model at all. */ 6421 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6422 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6423 && !TARGET_64BIT) 6424 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6425 return false; 6426 case UNSPEC_GOTTPOFF: 6427 case UNSPEC_GOTNTPOFF: 6428 case UNSPEC_INDNTPOFF: 6429 if (saw_plus) 6430 return false; 6431 disp = XVECEXP (disp, 0, 0); 6432 return (GET_CODE (disp) == SYMBOL_REF 6433 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 6434 case UNSPEC_NTPOFF: 6435 disp = XVECEXP (disp, 0, 0); 6436 return (GET_CODE (disp) == SYMBOL_REF 6437 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 6438 case UNSPEC_DTPOFF: 6439 disp = XVECEXP (disp, 0, 0); 6440 return (GET_CODE (disp) == SYMBOL_REF 6441 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 6442 } 6443 6444 return 0; 6445} 6446 6447/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6448 memory address for an instruction. The MODE argument is the machine mode 6449 for the MEM expression that wants to use this address. 6450 6451 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6452 convert common non-canonical forms to canonical form so that they will 6453 be recognized. */ 6454 6455int 6456legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6457{ 6458 struct ix86_address parts; 6459 rtx base, index, disp; 6460 HOST_WIDE_INT scale; 6461 const char *reason = NULL; 6462 rtx reason_rtx = NULL_RTX; 6463 6464 if (TARGET_DEBUG_ADDR) 6465 { 6466 fprintf (stderr, 6467 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6468 GET_MODE_NAME (mode), strict); 6469 debug_rtx (addr); 6470 } 6471 6472 if (ix86_decompose_address (addr, &parts) <= 0) 6473 { 6474 reason = "decomposition failed"; 6475 goto report_error; 6476 } 6477 6478 base = parts.base; 6479 index = parts.index; 6480 disp = parts.disp; 6481 scale = parts.scale; 6482 6483 /* Validate base register. 6484 6485 Don't allow SUBREG's that span more than a word here. It can lead to spill 6486 failures when the base is one word out of a two word structure, which is 6487 represented internally as a DImode int. */ 6488 6489 if (base) 6490 { 6491 rtx reg; 6492 reason_rtx = base; 6493 6494 if (REG_P (base)) 6495 reg = base; 6496 else if (GET_CODE (base) == SUBREG 6497 && REG_P (SUBREG_REG (base)) 6498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 6499 <= UNITS_PER_WORD) 6500 reg = SUBREG_REG (base); 6501 else 6502 { 6503 reason = "base is not a register"; 6504 goto report_error; 6505 } 6506 6507 if (GET_MODE (base) != Pmode) 6508 { 6509 reason = "base is not in Pmode"; 6510 goto report_error; 6511 } 6512 6513 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 6514 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 6515 { 6516 reason = "base is not valid"; 6517 goto report_error; 6518 } 6519 } 6520 6521 /* Validate index register. 6522 6523 Don't allow SUBREG's that span more than a word here -- same as above. */ 6524 6525 if (index) 6526 { 6527 rtx reg; 6528 reason_rtx = index; 6529 6530 if (REG_P (index)) 6531 reg = index; 6532 else if (GET_CODE (index) == SUBREG 6533 && REG_P (SUBREG_REG (index)) 6534 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 6535 <= UNITS_PER_WORD) 6536 reg = SUBREG_REG (index); 6537 else 6538 { 6539 reason = "index is not a register"; 6540 goto report_error; 6541 } 6542 6543 if (GET_MODE (index) != Pmode) 6544 { 6545 reason = "index is not in Pmode"; 6546 goto report_error; 6547 } 6548 6549 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 6550 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 6551 { 6552 reason = "index is not valid"; 6553 goto report_error; 6554 } 6555 } 6556 6557 /* Validate scale factor. */ 6558 if (scale != 1) 6559 { 6560 reason_rtx = GEN_INT (scale); 6561 if (!index) 6562 { 6563 reason = "scale without index"; 6564 goto report_error; 6565 } 6566 6567 if (scale != 2 && scale != 4 && scale != 8) 6568 { 6569 reason = "scale is not a valid multiplier"; 6570 goto report_error; 6571 } 6572 } 6573 6574 /* Validate displacement. */ 6575 if (disp) 6576 { 6577 reason_rtx = disp; 6578 6579 if (GET_CODE (disp) == CONST 6580 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6581 switch (XINT (XEXP (disp, 0), 1)) 6582 { 6583 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 6584 used. While ABI specify also 32bit relocations, we don't produce 6585 them at all and use IP relative instead. */ 6586 case UNSPEC_GOT: 6587 case UNSPEC_GOTOFF: 6588 gcc_assert (flag_pic); 6589 if (!TARGET_64BIT) 6590 goto is_legitimate_pic; 6591 reason = "64bit address unspec"; 6592 goto report_error; 6593 6594 case UNSPEC_GOTPCREL: 6595 gcc_assert (flag_pic); 6596 goto is_legitimate_pic; 6597 6598 case UNSPEC_GOTTPOFF: 6599 case UNSPEC_GOTNTPOFF: 6600 case UNSPEC_INDNTPOFF: 6601 case UNSPEC_NTPOFF: 6602 case UNSPEC_DTPOFF: 6603 break; 6604 6605 default: 6606 reason = "invalid address unspec"; 6607 goto report_error; 6608 } 6609 6610 else if (SYMBOLIC_CONST (disp) 6611 && (flag_pic 6612 || (TARGET_MACHO 6613#if TARGET_MACHO 6614 && MACHOPIC_INDIRECT 6615 && !machopic_operand_p (disp) 6616#endif 6617 ))) 6618 { 6619 6620 is_legitimate_pic: 6621 if (TARGET_64BIT && (index || base)) 6622 { 6623 /* foo@dtpoff(%rX) is ok. */ 6624 if (GET_CODE (disp) != CONST 6625 || GET_CODE (XEXP (disp, 0)) != PLUS 6626 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6627 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6628 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6629 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6630 { 6631 reason = "non-constant pic memory reference"; 6632 goto report_error; 6633 } 6634 } 6635 else if (! legitimate_pic_address_disp_p (disp)) 6636 { 6637 reason = "displacement is an invalid pic construct"; 6638 goto report_error; 6639 } 6640 6641 /* This code used to verify that a symbolic pic displacement 6642 includes the pic_offset_table_rtx register. 6643 6644 While this is good idea, unfortunately these constructs may 6645 be created by "adds using lea" optimization for incorrect 6646 code like: 6647 6648 int a; 6649 int foo(int i) 6650 { 6651 return *(&a+i); 6652 } 6653 6654 This code is nonsensical, but results in addressing 6655 GOT table with pic_offset_table_rtx base. We can't 6656 just refuse it easily, since it gets matched by 6657 "addsi3" pattern, that later gets split to lea in the 6658 case output register differs from input. While this 6659 can be handled by separate addsi pattern for this case 6660 that never results in lea, this seems to be easier and 6661 correct fix for crash to disable this test. */ 6662 } 6663 else if (GET_CODE (disp) != LABEL_REF 6664 && GET_CODE (disp) != CONST_INT 6665 && (GET_CODE (disp) != CONST 6666 || !legitimate_constant_p (disp)) 6667 && (GET_CODE (disp) != SYMBOL_REF 6668 || !legitimate_constant_p (disp))) 6669 { 6670 reason = "displacement is not constant"; 6671 goto report_error; 6672 } 6673 else if (TARGET_64BIT 6674 && !x86_64_immediate_operand (disp, VOIDmode)) 6675 { 6676 reason = "displacement is out of range"; 6677 goto report_error; 6678 } 6679 } 6680 6681 /* Everything looks valid. */ 6682 if (TARGET_DEBUG_ADDR) 6683 fprintf (stderr, "Success.\n"); 6684 return TRUE; 6685 6686 report_error: 6687 if (TARGET_DEBUG_ADDR) 6688 { 6689 fprintf (stderr, "Error: %s\n", reason); 6690 debug_rtx (reason_rtx); 6691 } 6692 return FALSE; 6693} 6694 6695/* Return a unique alias set for the GOT. */ 6696 6697static HOST_WIDE_INT 6698ix86_GOT_alias_set (void) 6699{ 6700 static HOST_WIDE_INT set = -1; 6701 if (set == -1) 6702 set = new_alias_set (); 6703 return set; 6704} 6705 6706/* Return a legitimate reference for ORIG (an address) using the 6707 register REG. If REG is 0, a new pseudo is generated. 6708 6709 There are two types of references that must be handled: 6710 6711 1. Global data references must load the address from the GOT, via 6712 the PIC reg. An insn is emitted to do this load, and the reg is 6713 returned. 6714 6715 2. Static data references, constant pool addresses, and code labels 6716 compute the address as an offset from the GOT, whose base is in 6717 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6718 differentiate them from global data objects. The returned 6719 address is the PIC reg + an unspec constant. 6720 6721 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6722 reg also appears in the address. */ 6723 6724static rtx 6725legitimize_pic_address (rtx orig, rtx reg) 6726{ 6727 rtx addr = orig; 6728 rtx new = orig; 6729 rtx base; 6730 6731#if TARGET_MACHO 6732 if (TARGET_MACHO && !TARGET_64BIT) 6733 { 6734 if (reg == 0) 6735 reg = gen_reg_rtx (Pmode); 6736 /* Use the generic Mach-O PIC machinery. */ 6737 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6738 } 6739#endif 6740 6741 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6742 new = addr; 6743 else if (TARGET_64BIT 6744 && ix86_cmodel != CM_SMALL_PIC 6745 && local_symbolic_operand (addr, Pmode)) 6746 { 6747 rtx tmpreg; 6748 /* This symbol may be referenced via a displacement from the PIC 6749 base address (@GOTOFF). */ 6750 6751 if (reload_in_progress) 6752 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6753 if (GET_CODE (addr) == CONST) 6754 addr = XEXP (addr, 0); 6755 if (GET_CODE (addr) == PLUS) 6756 { 6757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6758 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6759 } 6760 else 6761 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6762 new = gen_rtx_CONST (Pmode, new); 6763 if (!reg) 6764 tmpreg = gen_reg_rtx (Pmode); 6765 else 6766 tmpreg = reg; 6767 emit_move_insn (tmpreg, new); 6768 6769 if (reg != 0) 6770 { 6771 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 6772 tmpreg, 1, OPTAB_DIRECT); 6773 new = reg; 6774 } 6775 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 6776 } 6777 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6778 { 6779 /* This symbol may be referenced via a displacement from the PIC 6780 base address (@GOTOFF). */ 6781 6782 if (reload_in_progress) 6783 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6784 if (GET_CODE (addr) == CONST) 6785 addr = XEXP (addr, 0); 6786 if (GET_CODE (addr) == PLUS) 6787 { 6788 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6789 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6790 } 6791 else 6792 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6793 new = gen_rtx_CONST (Pmode, new); 6794 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6795 6796 if (reg != 0) 6797 { 6798 emit_move_insn (reg, new); 6799 new = reg; 6800 } 6801 } 6802 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 6803 { 6804 if (TARGET_64BIT) 6805 { 6806 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6807 new = gen_rtx_CONST (Pmode, new); 6808 new = gen_const_mem (Pmode, new); 6809 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6810 6811 if (reg == 0) 6812 reg = gen_reg_rtx (Pmode); 6813 /* Use directly gen_movsi, otherwise the address is loaded 6814 into register for CSE. We don't want to CSE this addresses, 6815 instead we CSE addresses from the GOT table, so skip this. */ 6816 emit_insn (gen_movsi (reg, new)); 6817 new = reg; 6818 } 6819 else 6820 { 6821 /* This symbol must be referenced via a load from the 6822 Global Offset Table (@GOT). */ 6823 6824 if (reload_in_progress) 6825 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 6827 new = gen_rtx_CONST (Pmode, new); 6828 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6829 new = gen_const_mem (Pmode, new); 6830 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6831 6832 if (reg == 0) 6833 reg = gen_reg_rtx (Pmode); 6834 emit_move_insn (reg, new); 6835 new = reg; 6836 } 6837 } 6838 else 6839 { 6840 if (GET_CODE (addr) == CONST_INT 6841 && !x86_64_immediate_operand (addr, VOIDmode)) 6842 { 6843 if (reg) 6844 { 6845 emit_move_insn (reg, addr); 6846 new = reg; 6847 } 6848 else 6849 new = force_reg (Pmode, addr); 6850 } 6851 else if (GET_CODE (addr) == CONST) 6852 { 6853 addr = XEXP (addr, 0); 6854 6855 /* We must match stuff we generate before. Assume the only 6856 unspecs that can get here are ours. Not that we could do 6857 anything with them anyway.... */ 6858 if (GET_CODE (addr) == UNSPEC 6859 || (GET_CODE (addr) == PLUS 6860 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 6861 return orig; 6862 gcc_assert (GET_CODE (addr) == PLUS); 6863 } 6864 if (GET_CODE (addr) == PLUS) 6865 { 6866 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 6867 6868 /* Check first to see if this is a constant offset from a @GOTOFF 6869 symbol reference. */ 6870 if (local_symbolic_operand (op0, Pmode) 6871 && GET_CODE (op1) == CONST_INT) 6872 { 6873 if (!TARGET_64BIT) 6874 { 6875 if (reload_in_progress) 6876 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6877 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 6878 UNSPEC_GOTOFF); 6879 new = gen_rtx_PLUS (Pmode, new, op1); 6880 new = gen_rtx_CONST (Pmode, new); 6881 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6882 6883 if (reg != 0) 6884 { 6885 emit_move_insn (reg, new); 6886 new = reg; 6887 } 6888 } 6889 else 6890 { 6891 if (INTVAL (op1) < -16*1024*1024 6892 || INTVAL (op1) >= 16*1024*1024) 6893 { 6894 if (!x86_64_immediate_operand (op1, Pmode)) 6895 op1 = force_reg (Pmode, op1); 6896 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 6897 } 6898 } 6899 } 6900 else 6901 { 6902 base = legitimize_pic_address (XEXP (addr, 0), reg); 6903 new = legitimize_pic_address (XEXP (addr, 1), 6904 base == reg ? NULL_RTX : reg); 6905 6906 if (GET_CODE (new) == CONST_INT) 6907 new = plus_constant (base, INTVAL (new)); 6908 else 6909 { 6910 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 6911 { 6912 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 6913 new = XEXP (new, 1); 6914 } 6915 new = gen_rtx_PLUS (Pmode, base, new); 6916 } 6917 } 6918 } 6919 } 6920 return new; 6921} 6922 6923/* Load the thread pointer. If TO_REG is true, force it into a register. */ 6924 6925static rtx 6926get_thread_pointer (int to_reg) 6927{ 6928 rtx tp, reg, insn; 6929 6930 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 6931 if (!to_reg) 6932 return tp; 6933 6934 reg = gen_reg_rtx (Pmode); 6935 insn = gen_rtx_SET (VOIDmode, reg, tp); 6936 insn = emit_insn (insn); 6937 6938 return reg; 6939} 6940 6941/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 6942 false if we expect this to be used for a memory address and true if 6943 we expect to load the address into a register. */ 6944 6945static rtx 6946legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 6947{ 6948 rtx dest, base, off, pic, tp; 6949 int type; 6950 6951 switch (model) 6952 { 6953 case TLS_MODEL_GLOBAL_DYNAMIC: 6954 dest = gen_reg_rtx (Pmode); 6955 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6956 6957 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6958 { 6959 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6960 6961 start_sequence (); 6962 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6963 insns = get_insns (); 6964 end_sequence (); 6965 6966 emit_libcall_block (insns, dest, rax, x); 6967 } 6968 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6969 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 6970 else 6971 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6972 6973 if (TARGET_GNU2_TLS) 6974 { 6975 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 6976 6977 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 6978 } 6979 break; 6980 6981 case TLS_MODEL_LOCAL_DYNAMIC: 6982 base = gen_reg_rtx (Pmode); 6983 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6984 6985 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6986 { 6987 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6988 6989 start_sequence (); 6990 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6991 insns = get_insns (); 6992 end_sequence (); 6993 6994 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6995 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6996 emit_libcall_block (insns, base, rax, note); 6997 } 6998 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6999 emit_insn (gen_tls_local_dynamic_base_64 (base)); 7000 else 7001 emit_insn (gen_tls_local_dynamic_base_32 (base)); 7002 7003 if (TARGET_GNU2_TLS) 7004 { 7005 rtx x = ix86_tls_module_base (); 7006 7007 set_unique_reg_note (get_last_insn (), REG_EQUIV, 7008 gen_rtx_MINUS (Pmode, x, tp)); 7009 } 7010 7011 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 7012 off = gen_rtx_CONST (Pmode, off); 7013 7014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 7015 7016 if (TARGET_GNU2_TLS) 7017 { 7018 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 7019 7020 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 7021 } 7022 7023 break; 7024 7025 case TLS_MODEL_INITIAL_EXEC: 7026 if (TARGET_64BIT) 7027 { 7028 pic = NULL; 7029 type = UNSPEC_GOTNTPOFF; 7030 } 7031 else if (flag_pic) 7032 { 7033 if (reload_in_progress) 7034 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7035 pic = pic_offset_table_rtx; 7036 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 7037 } 7038 else if (!TARGET_ANY_GNU_TLS) 7039 { 7040 pic = gen_reg_rtx (Pmode); 7041 emit_insn (gen_set_got (pic)); 7042 type = UNSPEC_GOTTPOFF; 7043 } 7044 else 7045 { 7046 pic = NULL; 7047 type = UNSPEC_INDNTPOFF; 7048 } 7049 7050 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 7051 off = gen_rtx_CONST (Pmode, off); 7052 if (pic) 7053 off = gen_rtx_PLUS (Pmode, pic, off); 7054 off = gen_const_mem (Pmode, off); 7055 set_mem_alias_set (off, ix86_GOT_alias_set ()); 7056 7057 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7058 { 7059 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7060 off = force_reg (Pmode, off); 7061 return gen_rtx_PLUS (Pmode, base, off); 7062 } 7063 else 7064 { 7065 base = get_thread_pointer (true); 7066 dest = gen_reg_rtx (Pmode); 7067 emit_insn (gen_subsi3 (dest, base, off)); 7068 } 7069 break; 7070 7071 case TLS_MODEL_LOCAL_EXEC: 7072 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 7073 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7074 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 7075 off = gen_rtx_CONST (Pmode, off); 7076 7077 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7078 { 7079 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7080 return gen_rtx_PLUS (Pmode, base, off); 7081 } 7082 else 7083 { 7084 base = get_thread_pointer (true); 7085 dest = gen_reg_rtx (Pmode); 7086 emit_insn (gen_subsi3 (dest, base, off)); 7087 } 7088 break; 7089 7090 default: 7091 gcc_unreachable (); 7092 } 7093 7094 return dest; 7095} 7096 7097/* Try machine-dependent ways of modifying an illegitimate address 7098 to be legitimate. If we find one, return the new, valid address. 7099 This macro is used in only one place: `memory_address' in explow.c. 7100 7101 OLDX is the address as it was before break_out_memory_refs was called. 7102 In some cases it is useful to look at this to decide what needs to be done. 7103 7104 MODE and WIN are passed so that this macro can use 7105 GO_IF_LEGITIMATE_ADDRESS. 7106 7107 It is always safe for this macro to do nothing. It exists to recognize 7108 opportunities to optimize the output. 7109 7110 For the 80386, we handle X+REG by loading X into a register R and 7111 using R+REG. R will go in a general reg and indexing will be used. 7112 However, if REG is a broken-out memory address or multiplication, 7113 nothing needs to be done because REG can certainly go in a general reg. 7114 7115 When -fpic is used, special handling is needed for symbolic references. 7116 See comments by legitimize_pic_address in i386.c for details. */ 7117 7118rtx 7119legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 7120{ 7121 int changed = 0; 7122 unsigned log; 7123 7124 if (TARGET_DEBUG_ADDR) 7125 { 7126 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 7127 GET_MODE_NAME (mode)); 7128 debug_rtx (x); 7129 } 7130 7131 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 7132 if (log) 7133 return legitimize_tls_address (x, log, false); 7134 if (GET_CODE (x) == CONST 7135 && GET_CODE (XEXP (x, 0)) == PLUS 7136 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 7137 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 7138 { 7139 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); 7140 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 7141 } 7142 7143 if (flag_pic && SYMBOLIC_CONST (x)) 7144 return legitimize_pic_address (x, 0); 7145 7146 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 7147 if (GET_CODE (x) == ASHIFT 7148 && GET_CODE (XEXP (x, 1)) == CONST_INT 7149 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 7150 { 7151 changed = 1; 7152 log = INTVAL (XEXP (x, 1)); 7153 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 7154 GEN_INT (1 << log)); 7155 } 7156 7157 if (GET_CODE (x) == PLUS) 7158 { 7159 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 7160 7161 if (GET_CODE (XEXP (x, 0)) == ASHIFT 7162 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 7163 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 7164 { 7165 changed = 1; 7166 log = INTVAL (XEXP (XEXP (x, 0), 1)); 7167 XEXP (x, 0) = gen_rtx_MULT (Pmode, 7168 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 7169 GEN_INT (1 << log)); 7170 } 7171 7172 if (GET_CODE (XEXP (x, 1)) == ASHIFT 7173 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 7174 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 7175 { 7176 changed = 1; 7177 log = INTVAL (XEXP (XEXP (x, 1), 1)); 7178 XEXP (x, 1) = gen_rtx_MULT (Pmode, 7179 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 7180 GEN_INT (1 << log)); 7181 } 7182 7183 /* Put multiply first if it isn't already. */ 7184 if (GET_CODE (XEXP (x, 1)) == MULT) 7185 { 7186 rtx tmp = XEXP (x, 0); 7187 XEXP (x, 0) = XEXP (x, 1); 7188 XEXP (x, 1) = tmp; 7189 changed = 1; 7190 } 7191 7192 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 7193 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 7194 created by virtual register instantiation, register elimination, and 7195 similar optimizations. */ 7196 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 7197 { 7198 changed = 1; 7199 x = gen_rtx_PLUS (Pmode, 7200 gen_rtx_PLUS (Pmode, XEXP (x, 0), 7201 XEXP (XEXP (x, 1), 0)), 7202 XEXP (XEXP (x, 1), 1)); 7203 } 7204 7205 /* Canonicalize 7206 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 7207 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 7208 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 7209 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 7210 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 7211 && CONSTANT_P (XEXP (x, 1))) 7212 { 7213 rtx constant; 7214 rtx other = NULL_RTX; 7215 7216 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7217 { 7218 constant = XEXP (x, 1); 7219 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 7220 } 7221 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 7222 { 7223 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 7224 other = XEXP (x, 1); 7225 } 7226 else 7227 constant = 0; 7228 7229 if (constant) 7230 { 7231 changed = 1; 7232 x = gen_rtx_PLUS (Pmode, 7233 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 7234 XEXP (XEXP (XEXP (x, 0), 1), 0)), 7235 plus_constant (other, INTVAL (constant))); 7236 } 7237 } 7238 7239 if (changed && legitimate_address_p (mode, x, FALSE)) 7240 return x; 7241 7242 if (GET_CODE (XEXP (x, 0)) == MULT) 7243 { 7244 changed = 1; 7245 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 7246 } 7247 7248 if (GET_CODE (XEXP (x, 1)) == MULT) 7249 { 7250 changed = 1; 7251 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 7252 } 7253 7254 if (changed 7255 && GET_CODE (XEXP (x, 1)) == REG 7256 && GET_CODE (XEXP (x, 0)) == REG) 7257 return x; 7258 7259 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 7260 { 7261 changed = 1; 7262 x = legitimize_pic_address (x, 0); 7263 } 7264 7265 if (changed && legitimate_address_p (mode, x, FALSE)) 7266 return x; 7267 7268 if (GET_CODE (XEXP (x, 0)) == REG) 7269 { 7270 rtx temp = gen_reg_rtx (Pmode); 7271 rtx val = force_operand (XEXP (x, 1), temp); 7272 if (val != temp) 7273 emit_move_insn (temp, val); 7274 7275 XEXP (x, 1) = temp; 7276 return x; 7277 } 7278 7279 else if (GET_CODE (XEXP (x, 1)) == REG) 7280 { 7281 rtx temp = gen_reg_rtx (Pmode); 7282 rtx val = force_operand (XEXP (x, 0), temp); 7283 if (val != temp) 7284 emit_move_insn (temp, val); 7285 7286 XEXP (x, 0) = temp; 7287 return x; 7288 } 7289 } 7290 7291 return x; 7292} 7293 7294/* Print an integer constant expression in assembler syntax. Addition 7295 and subtraction are the only arithmetic that may appear in these 7296 expressions. FILE is the stdio stream to write to, X is the rtx, and 7297 CODE is the operand print code from the output string. */ 7298 7299static void 7300output_pic_addr_const (FILE *file, rtx x, int code) 7301{ 7302 char buf[256]; 7303 7304 switch (GET_CODE (x)) 7305 { 7306 case PC: 7307 gcc_assert (flag_pic); 7308 putc ('.', file); 7309 break; 7310 7311 case SYMBOL_REF: 7312 if (! TARGET_MACHO || TARGET_64BIT) 7313 output_addr_const (file, x); 7314 else 7315 { 7316 const char *name = XSTR (x, 0); 7317 7318 /* Mark the decl as referenced so that cgraph will output the function. */ 7319 if (SYMBOL_REF_DECL (x)) 7320 mark_decl_referenced (SYMBOL_REF_DECL (x)); 7321 7322#if TARGET_MACHO 7323 if (MACHOPIC_INDIRECT 7324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 7325 name = machopic_indirection_name (x, /*stub_p=*/true); 7326#endif 7327 assemble_name (file, name); 7328 } 7329 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 7330 fputs ("@PLT", file); 7331 break; 7332 7333 case LABEL_REF: 7334 x = XEXP (x, 0); 7335 /* FALLTHRU */ 7336 case CODE_LABEL: 7337 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 7338 assemble_name (asm_out_file, buf); 7339 break; 7340 7341 case CONST_INT: 7342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7343 break; 7344 7345 case CONST: 7346 /* This used to output parentheses around the expression, 7347 but that does not work on the 386 (either ATT or BSD assembler). */ 7348 output_pic_addr_const (file, XEXP (x, 0), code); 7349 break; 7350 7351 case CONST_DOUBLE: 7352 if (GET_MODE (x) == VOIDmode) 7353 { 7354 /* We can use %d if the number is <32 bits and positive. */ 7355 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 7356 fprintf (file, "0x%lx%08lx", 7357 (unsigned long) CONST_DOUBLE_HIGH (x), 7358 (unsigned long) CONST_DOUBLE_LOW (x)); 7359 else 7360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 7361 } 7362 else 7363 /* We can't handle floating point constants; 7364 PRINT_OPERAND must handle them. */ 7365 output_operand_lossage ("floating constant misused"); 7366 break; 7367 7368 case PLUS: 7369 /* Some assemblers need integer constants to appear first. */ 7370 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 7371 { 7372 output_pic_addr_const (file, XEXP (x, 0), code); 7373 putc ('+', file); 7374 output_pic_addr_const (file, XEXP (x, 1), code); 7375 } 7376 else 7377 { 7378 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); 7379 output_pic_addr_const (file, XEXP (x, 1), code); 7380 putc ('+', file); 7381 output_pic_addr_const (file, XEXP (x, 0), code); 7382 } 7383 break; 7384 7385 case MINUS: 7386 if (!TARGET_MACHO) 7387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 7388 output_pic_addr_const (file, XEXP (x, 0), code); 7389 putc ('-', file); 7390 output_pic_addr_const (file, XEXP (x, 1), code); 7391 if (!TARGET_MACHO) 7392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 7393 break; 7394 7395 case UNSPEC: 7396 gcc_assert (XVECLEN (x, 0) == 1); 7397 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 7398 switch (XINT (x, 1)) 7399 { 7400 case UNSPEC_GOT: 7401 fputs ("@GOT", file); 7402 break; 7403 case UNSPEC_GOTOFF: 7404 fputs ("@GOTOFF", file); 7405 break; 7406 case UNSPEC_GOTPCREL: 7407 fputs ("@GOTPCREL(%rip)", file); 7408 break; 7409 case UNSPEC_GOTTPOFF: 7410 /* FIXME: This might be @TPOFF in Sun ld too. */ 7411 fputs ("@GOTTPOFF", file); 7412 break; 7413 case UNSPEC_TPOFF: 7414 fputs ("@TPOFF", file); 7415 break; 7416 case UNSPEC_NTPOFF: 7417 if (TARGET_64BIT) 7418 fputs ("@TPOFF", file); 7419 else 7420 fputs ("@NTPOFF", file); 7421 break; 7422 case UNSPEC_DTPOFF: 7423 fputs ("@DTPOFF", file); 7424 break; 7425 case UNSPEC_GOTNTPOFF: 7426 if (TARGET_64BIT) 7427 fputs ("@GOTTPOFF(%rip)", file); 7428 else 7429 fputs ("@GOTNTPOFF", file); 7430 break; 7431 case UNSPEC_INDNTPOFF: 7432 fputs ("@INDNTPOFF", file); 7433 break; 7434 default: 7435 output_operand_lossage ("invalid UNSPEC as operand"); 7436 break; 7437 } 7438 break; 7439 7440 default: 7441 output_operand_lossage ("invalid expression as operand"); 7442 } 7443} 7444 7445/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7446 We need to emit DTP-relative relocations. */ 7447 7448static void 7449i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 7450{ 7451 fputs (ASM_LONG, file); 7452 output_addr_const (file, x); 7453 fputs ("@DTPOFF", file); 7454 switch (size) 7455 { 7456 case 4: 7457 break; 7458 case 8: 7459 fputs (", 0", file); 7460 break; 7461 default: 7462 gcc_unreachable (); 7463 } 7464} 7465 7466/* In the name of slightly smaller debug output, and to cater to 7467 general assembler lossage, recognize PIC+GOTOFF and turn it back 7468 into a direct symbol reference. 7469 7470 On Darwin, this is necessary to avoid a crash, because Darwin 7471 has a different PIC label for each routine but the DWARF debugging 7472 information is not associated with any particular routine, so it's 7473 necessary to remove references to the PIC label from RTL stored by 7474 the DWARF output code. */ 7475 7476static rtx 7477ix86_delegitimize_address (rtx orig_x) 7478{ 7479 rtx x = orig_x; 7480 /* reg_addend is NULL or a multiple of some register. */ 7481 rtx reg_addend = NULL_RTX; 7482 /* const_addend is NULL or a const_int. */ 7483 rtx const_addend = NULL_RTX; 7484 /* This is the result, or NULL. */ 7485 rtx result = NULL_RTX; 7486 7487 if (GET_CODE (x) == MEM) 7488 x = XEXP (x, 0); 7489 7490 if (TARGET_64BIT) 7491 { 7492 if (GET_CODE (x) != CONST 7493 || GET_CODE (XEXP (x, 0)) != UNSPEC 7494 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 7495 || GET_CODE (orig_x) != MEM) 7496 return orig_x; 7497 return XVECEXP (XEXP (x, 0), 0, 0); 7498 } 7499 7500 if (GET_CODE (x) != PLUS 7501 || GET_CODE (XEXP (x, 1)) != CONST) 7502 return orig_x; 7503 7504 if (GET_CODE (XEXP (x, 0)) == REG 7505 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7506 /* %ebx + GOT/GOTOFF */ 7507 ; 7508 else if (GET_CODE (XEXP (x, 0)) == PLUS) 7509 { 7510 /* %ebx + %reg * scale + GOT/GOTOFF */ 7511 reg_addend = XEXP (x, 0); 7512 if (GET_CODE (XEXP (reg_addend, 0)) == REG 7513 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) 7514 reg_addend = XEXP (reg_addend, 1); 7515 else if (GET_CODE (XEXP (reg_addend, 1)) == REG 7516 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) 7517 reg_addend = XEXP (reg_addend, 0); 7518 else 7519 return orig_x; 7520 if (GET_CODE (reg_addend) != REG 7521 && GET_CODE (reg_addend) != MULT 7522 && GET_CODE (reg_addend) != ASHIFT) 7523 return orig_x; 7524 } 7525 else 7526 return orig_x; 7527 7528 x = XEXP (XEXP (x, 1), 0); 7529 if (GET_CODE (x) == PLUS 7530 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7531 { 7532 const_addend = XEXP (x, 1); 7533 x = XEXP (x, 0); 7534 } 7535 7536 if (GET_CODE (x) == UNSPEC 7537 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7538 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 7539 result = XVECEXP (x, 0, 0); 7540 7541 if (TARGET_MACHO && darwin_local_data_pic (x) 7542 && GET_CODE (orig_x) != MEM) 7543 result = XEXP (x, 0); 7544 7545 if (! result) 7546 return orig_x; 7547 7548 if (const_addend) 7549 result = gen_rtx_PLUS (Pmode, result, const_addend); 7550 if (reg_addend) 7551 result = gen_rtx_PLUS (Pmode, reg_addend, result); 7552 return result; 7553} 7554 7555static void 7556put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7557 int fp, FILE *file) 7558{ 7559 const char *suffix; 7560 7561 if (mode == CCFPmode || mode == CCFPUmode) 7562 { 7563 enum rtx_code second_code, bypass_code; 7564 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7565 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 7566 code = ix86_fp_compare_code_to_integer (code); 7567 mode = CCmode; 7568 } 7569 if (reverse) 7570 code = reverse_condition (code); 7571 7572 switch (code) 7573 { 7574 case EQ: 7575 suffix = "e"; 7576 break; 7577 case NE: 7578 suffix = "ne"; 7579 break; 7580 case GT: 7581 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 7582 suffix = "g"; 7583 break; 7584 case GTU: 7585 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 7586 Those same assemblers have the same but opposite lossage on cmov. */ 7587 gcc_assert (mode == CCmode); 7588 suffix = fp ? "nbe" : "a"; 7589 break; 7590 case LT: 7591 switch (mode) 7592 { 7593 case CCNOmode: 7594 case CCGOCmode: 7595 suffix = "s"; 7596 break; 7597 7598 case CCmode: 7599 case CCGCmode: 7600 suffix = "l"; 7601 break; 7602 7603 default: 7604 gcc_unreachable (); 7605 } 7606 break; 7607 case LTU: 7608 gcc_assert (mode == CCmode); 7609 suffix = "b"; 7610 break; 7611 case GE: 7612 switch (mode) 7613 { 7614 case CCNOmode: 7615 case CCGOCmode: 7616 suffix = "ns"; 7617 break; 7618 7619 case CCmode: 7620 case CCGCmode: 7621 suffix = "ge"; 7622 break; 7623 7624 default: 7625 gcc_unreachable (); 7626 } 7627 break; 7628 case GEU: 7629 /* ??? As above. */ 7630 gcc_assert (mode == CCmode); 7631 suffix = fp ? "nb" : "ae"; 7632 break; 7633 case LE: 7634 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 7635 suffix = "le"; 7636 break; 7637 case LEU: 7638 gcc_assert (mode == CCmode); 7639 suffix = "be"; 7640 break; 7641 case UNORDERED: 7642 suffix = fp ? "u" : "p"; 7643 break; 7644 case ORDERED: 7645 suffix = fp ? "nu" : "np"; 7646 break; 7647 default: 7648 gcc_unreachable (); 7649 } 7650 fputs (suffix, file); 7651} 7652 7653/* Print the name of register X to FILE based on its machine mode and number. 7654 If CODE is 'w', pretend the mode is HImode. 7655 If CODE is 'b', pretend the mode is QImode. 7656 If CODE is 'k', pretend the mode is SImode. 7657 If CODE is 'q', pretend the mode is DImode. 7658 If CODE is 'h', pretend the reg is the 'high' byte register. 7659 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7660 7661void 7662print_reg (rtx x, int code, FILE *file) 7663{ 7664 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM 7665 && REGNO (x) != FRAME_POINTER_REGNUM 7666 && REGNO (x) != FLAGS_REG 7667 && REGNO (x) != FPSR_REG); 7668 7669 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7670 putc ('%', file); 7671 7672 if (code == 'w' || MMX_REG_P (x)) 7673 code = 2; 7674 else if (code == 'b') 7675 code = 1; 7676 else if (code == 'k') 7677 code = 4; 7678 else if (code == 'q') 7679 code = 8; 7680 else if (code == 'y') 7681 code = 3; 7682 else if (code == 'h') 7683 code = 0; 7684 else 7685 code = GET_MODE_SIZE (GET_MODE (x)); 7686 7687 /* Irritatingly, AMD extended registers use different naming convention 7688 from the normal registers. */ 7689 if (REX_INT_REG_P (x)) 7690 { 7691 gcc_assert (TARGET_64BIT); 7692 switch (code) 7693 { 7694 case 0: 7695 error ("extended registers have no high halves"); 7696 break; 7697 case 1: 7698 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7699 break; 7700 case 2: 7701 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7702 break; 7703 case 4: 7704 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7705 break; 7706 case 8: 7707 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7708 break; 7709 default: 7710 error ("unsupported operand size for extended register"); 7711 break; 7712 } 7713 return; 7714 } 7715 switch (code) 7716 { 7717 case 3: 7718 if (STACK_TOP_P (x)) 7719 { 7720 fputs ("st(0)", file); 7721 break; 7722 } 7723 /* FALLTHRU */ 7724 case 8: 7725 case 4: 7726 case 12: 7727 if (! ANY_FP_REG_P (x)) 7728 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7729 /* FALLTHRU */ 7730 case 16: 7731 case 2: 7732 normal: 7733 fputs (hi_reg_name[REGNO (x)], file); 7734 break; 7735 case 1: 7736 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7737 goto normal; 7738 fputs (qi_reg_name[REGNO (x)], file); 7739 break; 7740 case 0: 7741 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7742 goto normal; 7743 fputs (qi_high_reg_name[REGNO (x)], file); 7744 break; 7745 default: 7746 gcc_unreachable (); 7747 } 7748} 7749 7750/* Locate some local-dynamic symbol still in use by this function 7751 so that we can print its name in some tls_local_dynamic_base 7752 pattern. */ 7753 7754static const char * 7755get_some_local_dynamic_name (void) 7756{ 7757 rtx insn; 7758 7759 if (cfun->machine->some_ld_name) 7760 return cfun->machine->some_ld_name; 7761 7762 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7763 if (INSN_P (insn) 7764 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7765 return cfun->machine->some_ld_name; 7766 7767 gcc_unreachable (); 7768} 7769 7770static int 7771get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7772{ 7773 rtx x = *px; 7774 7775 if (GET_CODE (x) == SYMBOL_REF 7776 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 7777 { 7778 cfun->machine->some_ld_name = XSTR (x, 0); 7779 return 1; 7780 } 7781 7782 return 0; 7783} 7784 7785/* Meaning of CODE: 7786 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7787 C -- print opcode suffix for set/cmov insn. 7788 c -- like C, but print reversed condition 7789 F,f -- likewise, but for floating-point. 7790 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7791 otherwise nothing 7792 R -- print the prefix for register names. 7793 z -- print the opcode suffix for the size of the current operand. 7794 * -- print a star (in certain assembler syntax) 7795 A -- print an absolute memory reference. 7796 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7797 s -- print a shift double count, followed by the assemblers argument 7798 delimiter. 7799 b -- print the QImode name of the register for the indicated operand. 7800 %b0 would print %al if operands[0] is reg 0. 7801 w -- likewise, print the HImode name of the register. 7802 k -- likewise, print the SImode name of the register. 7803 q -- likewise, print the DImode name of the register. 7804 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7805 y -- print "st(0)" instead of "st" as a register. 7806 D -- print condition for SSE cmp instruction. 7807 P -- if PIC, print an @PLT suffix. 7808 X -- don't print any sort of PIC '@' suffix for a symbol. 7809 & -- print some in-use local-dynamic symbol name. 7810 H -- print a memory address offset by 8; used for sse high-parts 7811 */ 7812 7813void 7814print_operand (FILE *file, rtx x, int code) 7815{ 7816 if (code) 7817 { 7818 switch (code) 7819 { 7820 case '*': 7821 if (ASSEMBLER_DIALECT == ASM_ATT) 7822 putc ('*', file); 7823 return; 7824 7825 case '&': 7826 assemble_name (file, get_some_local_dynamic_name ()); 7827 return; 7828 7829 case 'A': 7830 switch (ASSEMBLER_DIALECT) 7831 { 7832 case ASM_ATT: 7833 putc ('*', file); 7834 break; 7835 7836 case ASM_INTEL: 7837 /* Intel syntax. For absolute addresses, registers should not 7838 be surrounded by braces. */ 7839 if (GET_CODE (x) != REG) 7840 { 7841 putc ('[', file); 7842 PRINT_OPERAND (file, x, 0); 7843 putc (']', file); 7844 return; 7845 } 7846 break; 7847 7848 default: 7849 gcc_unreachable (); 7850 } 7851 7852 PRINT_OPERAND (file, x, 0); 7853 return; 7854 7855 7856 case 'L': 7857 if (ASSEMBLER_DIALECT == ASM_ATT) 7858 putc ('l', file); 7859 return; 7860 7861 case 'W': 7862 if (ASSEMBLER_DIALECT == ASM_ATT) 7863 putc ('w', file); 7864 return; 7865 7866 case 'B': 7867 if (ASSEMBLER_DIALECT == ASM_ATT) 7868 putc ('b', file); 7869 return; 7870 7871 case 'Q': 7872 if (ASSEMBLER_DIALECT == ASM_ATT) 7873 putc ('l', file); 7874 return; 7875 7876 case 'S': 7877 if (ASSEMBLER_DIALECT == ASM_ATT) 7878 putc ('s', file); 7879 return; 7880 7881 case 'T': 7882 if (ASSEMBLER_DIALECT == ASM_ATT) 7883 putc ('t', file); 7884 return; 7885 7886 case 'z': 7887 /* 387 opcodes don't get size suffixes if the operands are 7888 registers. */ 7889 if (STACK_REG_P (x)) 7890 return; 7891 7892 /* Likewise if using Intel opcodes. */ 7893 if (ASSEMBLER_DIALECT == ASM_INTEL) 7894 return; 7895 7896 /* This is the size of op from size of operand. */ 7897 switch (GET_MODE_SIZE (GET_MODE (x))) 7898 { 7899 case 2: 7900#ifdef HAVE_GAS_FILDS_FISTS 7901 putc ('s', file); 7902#endif 7903 return; 7904 7905 case 4: 7906 if (GET_MODE (x) == SFmode) 7907 { 7908 putc ('s', file); 7909 return; 7910 } 7911 else 7912 putc ('l', file); 7913 return; 7914 7915 case 12: 7916 case 16: 7917 putc ('t', file); 7918 return; 7919 7920 case 8: 7921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 7922 { 7923#ifdef GAS_MNEMONICS 7924 putc ('q', file); 7925#else 7926 putc ('l', file); 7927 putc ('l', file); 7928#endif 7929 } 7930 else 7931 putc ('l', file); 7932 return; 7933 7934 default: 7935 gcc_unreachable (); 7936 } 7937 7938 case 'b': 7939 case 'w': 7940 case 'k': 7941 case 'q': 7942 case 'h': 7943 case 'y': 7944 case 'X': 7945 case 'P': 7946 break; 7947 7948 case 's': 7949 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 7950 { 7951 PRINT_OPERAND (file, x, 0); 7952 putc (',', file); 7953 } 7954 return; 7955 7956 case 'D': 7957 /* Little bit of braindamage here. The SSE compare instructions 7958 does use completely different names for the comparisons that the 7959 fp conditional moves. */ 7960 switch (GET_CODE (x)) 7961 { 7962 case EQ: 7963 case UNEQ: 7964 fputs ("eq", file); 7965 break; 7966 case LT: 7967 case UNLT: 7968 fputs ("lt", file); 7969 break; 7970 case LE: 7971 case UNLE: 7972 fputs ("le", file); 7973 break; 7974 case UNORDERED: 7975 fputs ("unord", file); 7976 break; 7977 case NE: 7978 case LTGT: 7979 fputs ("neq", file); 7980 break; 7981 case UNGE: 7982 case GE: 7983 fputs ("nlt", file); 7984 break; 7985 case UNGT: 7986 case GT: 7987 fputs ("nle", file); 7988 break; 7989 case ORDERED: 7990 fputs ("ord", file); 7991 break; 7992 default: 7993 gcc_unreachable (); 7994 } 7995 return; 7996 case 'O': 7997#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7998 if (ASSEMBLER_DIALECT == ASM_ATT) 7999 { 8000 switch (GET_MODE (x)) 8001 { 8002 case HImode: putc ('w', file); break; 8003 case SImode: 8004 case SFmode: putc ('l', file); break; 8005 case DImode: 8006 case DFmode: putc ('q', file); break; 8007 default: gcc_unreachable (); 8008 } 8009 putc ('.', file); 8010 } 8011#endif 8012 return; 8013 case 'C': 8014 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 8015 return; 8016 case 'F': 8017#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8018 if (ASSEMBLER_DIALECT == ASM_ATT) 8019 putc ('.', file); 8020#endif 8021 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 8022 return; 8023 8024 /* Like above, but reverse condition */ 8025 case 'c': 8026 /* Check to see if argument to %c is really a constant 8027 and not a condition code which needs to be reversed. */ 8028 if (!COMPARISON_P (x)) 8029 { 8030 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 8031 return; 8032 } 8033 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 8034 return; 8035 case 'f': 8036#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8037 if (ASSEMBLER_DIALECT == ASM_ATT) 8038 putc ('.', file); 8039#endif 8040 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 8041 return; 8042 8043 case 'H': 8044 /* It doesn't actually matter what mode we use here, as we're 8045 only going to use this for printing. */ 8046 x = adjust_address_nv (x, DImode, 8); 8047 break; 8048 8049 case '+': 8050 { 8051 rtx x; 8052 8053 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 8054 return; 8055 8056 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 8057 if (x) 8058 { 8059 int pred_val = INTVAL (XEXP (x, 0)); 8060 8061 if (pred_val < REG_BR_PROB_BASE * 45 / 100 8062 || pred_val > REG_BR_PROB_BASE * 55 / 100) 8063 { 8064 int taken = pred_val > REG_BR_PROB_BASE / 2; 8065 int cputaken = final_forward_branch_p (current_output_insn) == 0; 8066 8067 /* Emit hints only in the case default branch prediction 8068 heuristics would fail. */ 8069 if (taken != cputaken) 8070 { 8071 /* We use 3e (DS) prefix for taken branches and 8072 2e (CS) prefix for not taken branches. */ 8073 if (taken) 8074 fputs ("ds ; ", file); 8075 else 8076 fputs ("cs ; ", file); 8077 } 8078 } 8079 } 8080 return; 8081 } 8082 default: 8083 output_operand_lossage ("invalid operand code '%c'", code); 8084 } 8085 } 8086 8087 if (GET_CODE (x) == REG) 8088 print_reg (x, code, file); 8089 8090 else if (GET_CODE (x) == MEM) 8091 { 8092 /* No `byte ptr' prefix for call instructions. */ 8093 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 8094 { 8095 const char * size; 8096 switch (GET_MODE_SIZE (GET_MODE (x))) 8097 { 8098 case 1: size = "BYTE"; break; 8099 case 2: size = "WORD"; break; 8100 case 4: size = "DWORD"; break; 8101 case 8: size = "QWORD"; break; 8102 case 12: size = "XWORD"; break; 8103 case 16: size = "XMMWORD"; break; 8104 default: 8105 gcc_unreachable (); 8106 } 8107 8108 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 8109 if (code == 'b') 8110 size = "BYTE"; 8111 else if (code == 'w') 8112 size = "WORD"; 8113 else if (code == 'k') 8114 size = "DWORD"; 8115 8116 fputs (size, file); 8117 fputs (" PTR ", file); 8118 } 8119 8120 x = XEXP (x, 0); 8121 /* Avoid (%rip) for call operands. */ 8122 if (CONSTANT_ADDRESS_P (x) && code == 'P' 8123 && GET_CODE (x) != CONST_INT) 8124 output_addr_const (file, x); 8125 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 8126 output_operand_lossage ("invalid constraints for operand"); 8127 else 8128 output_address (x); 8129 } 8130 8131 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 8132 { 8133 REAL_VALUE_TYPE r; 8134 long l; 8135 8136 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 8137 REAL_VALUE_TO_TARGET_SINGLE (r, l); 8138 8139 if (ASSEMBLER_DIALECT == ASM_ATT) 8140 putc ('$', file); 8141 fprintf (file, "0x%08lx", l); 8142 } 8143 8144 /* These float cases don't actually occur as immediate operands. */ 8145 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 8146 { 8147 char dstr[30]; 8148 8149 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8150 fprintf (file, "%s", dstr); 8151 } 8152 8153 else if (GET_CODE (x) == CONST_DOUBLE 8154 && GET_MODE (x) == XFmode) 8155 { 8156 char dstr[30]; 8157 8158 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8159 fprintf (file, "%s", dstr); 8160 } 8161 8162 else 8163 { 8164 /* We have patterns that allow zero sets of memory, for instance. 8165 In 64-bit mode, we should probably support all 8-byte vectors, 8166 since we can in fact encode that into an immediate. */ 8167 if (GET_CODE (x) == CONST_VECTOR) 8168 { 8169 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 8170 x = const0_rtx; 8171 } 8172 8173 if (code != 'P') 8174 { 8175 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8176 { 8177 if (ASSEMBLER_DIALECT == ASM_ATT) 8178 putc ('$', file); 8179 } 8180 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 8181 || GET_CODE (x) == LABEL_REF) 8182 { 8183 if (ASSEMBLER_DIALECT == ASM_ATT) 8184 putc ('$', file); 8185 else 8186 fputs ("OFFSET FLAT:", file); 8187 } 8188 } 8189 if (GET_CODE (x) == CONST_INT) 8190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 8191 else if (flag_pic) 8192 output_pic_addr_const (file, x, code); 8193 else 8194 output_addr_const (file, x); 8195 } 8196} 8197 8198/* Print a memory operand whose address is ADDR. */ 8199 8200void 8201print_operand_address (FILE *file, rtx addr) 8202{ 8203 struct ix86_address parts; 8204 rtx base, index, disp; 8205 int scale; 8206 int ok = ix86_decompose_address (addr, &parts); 8207 8208 gcc_assert (ok); 8209 8210 base = parts.base; 8211 index = parts.index; 8212 disp = parts.disp; 8213 scale = parts.scale; 8214 8215 switch (parts.seg) 8216 { 8217 case SEG_DEFAULT: 8218 break; 8219 case SEG_FS: 8220 case SEG_GS: 8221 if (USER_LABEL_PREFIX[0] == 0) 8222 putc ('%', file); 8223 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 8224 break; 8225 default: 8226 gcc_unreachable (); 8227 } 8228 8229 if (!base && !index) 8230 { 8231 /* Displacement only requires special attention. */ 8232 8233 if (GET_CODE (disp) == CONST_INT) 8234 { 8235 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 8236 { 8237 if (USER_LABEL_PREFIX[0] == 0) 8238 putc ('%', file); 8239 fputs ("ds:", file); 8240 } 8241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 8242 } 8243 else if (flag_pic) 8244 output_pic_addr_const (file, disp, 0); 8245 else 8246 output_addr_const (file, disp); 8247 8248 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 8249 if (TARGET_64BIT) 8250 { 8251 if (GET_CODE (disp) == CONST 8252 && GET_CODE (XEXP (disp, 0)) == PLUS 8253 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8254 disp = XEXP (XEXP (disp, 0), 0); 8255 if (GET_CODE (disp) == LABEL_REF 8256 || (GET_CODE (disp) == SYMBOL_REF 8257 && SYMBOL_REF_TLS_MODEL (disp) == 0)) 8258 fputs ("(%rip)", file); 8259 } 8260 } 8261 else 8262 { 8263 if (ASSEMBLER_DIALECT == ASM_ATT) 8264 { 8265 if (disp) 8266 { 8267 if (flag_pic) 8268 output_pic_addr_const (file, disp, 0); 8269 else if (GET_CODE (disp) == LABEL_REF) 8270 output_asm_label (disp); 8271 else 8272 output_addr_const (file, disp); 8273 } 8274 8275 putc ('(', file); 8276 if (base) 8277 print_reg (base, 0, file); 8278 if (index) 8279 { 8280 putc (',', file); 8281 print_reg (index, 0, file); 8282 if (scale != 1) 8283 fprintf (file, ",%d", scale); 8284 } 8285 putc (')', file); 8286 } 8287 else 8288 { 8289 rtx offset = NULL_RTX; 8290 8291 if (disp) 8292 { 8293 /* Pull out the offset of a symbol; print any symbol itself. */ 8294 if (GET_CODE (disp) == CONST 8295 && GET_CODE (XEXP (disp, 0)) == PLUS 8296 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8297 { 8298 offset = XEXP (XEXP (disp, 0), 1); 8299 disp = gen_rtx_CONST (VOIDmode, 8300 XEXP (XEXP (disp, 0), 0)); 8301 } 8302 8303 if (flag_pic) 8304 output_pic_addr_const (file, disp, 0); 8305 else if (GET_CODE (disp) == LABEL_REF) 8306 output_asm_label (disp); 8307 else if (GET_CODE (disp) == CONST_INT) 8308 offset = disp; 8309 else 8310 output_addr_const (file, disp); 8311 } 8312 8313 putc ('[', file); 8314 if (base) 8315 { 8316 print_reg (base, 0, file); 8317 if (offset) 8318 { 8319 if (INTVAL (offset) >= 0) 8320 putc ('+', file); 8321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8322 } 8323 } 8324 else if (offset) 8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8326 else 8327 putc ('0', file); 8328 8329 if (index) 8330 { 8331 putc ('+', file); 8332 print_reg (index, 0, file); 8333 if (scale != 1) 8334 fprintf (file, "*%d", scale); 8335 } 8336 putc (']', file); 8337 } 8338 } 8339} 8340 8341bool 8342output_addr_const_extra (FILE *file, rtx x) 8343{ 8344 rtx op; 8345 8346 if (GET_CODE (x) != UNSPEC) 8347 return false; 8348 8349 op = XVECEXP (x, 0, 0); 8350 switch (XINT (x, 1)) 8351 { 8352 case UNSPEC_GOTTPOFF: 8353 output_addr_const (file, op); 8354 /* FIXME: This might be @TPOFF in Sun ld. */ 8355 fputs ("@GOTTPOFF", file); 8356 break; 8357 case UNSPEC_TPOFF: 8358 output_addr_const (file, op); 8359 fputs ("@TPOFF", file); 8360 break; 8361 case UNSPEC_NTPOFF: 8362 output_addr_const (file, op); 8363 if (TARGET_64BIT) 8364 fputs ("@TPOFF", file); 8365 else 8366 fputs ("@NTPOFF", file); 8367 break; 8368 case UNSPEC_DTPOFF: 8369 output_addr_const (file, op); 8370 fputs ("@DTPOFF", file); 8371 break; 8372 case UNSPEC_GOTNTPOFF: 8373 output_addr_const (file, op); 8374 if (TARGET_64BIT) 8375 fputs ("@GOTTPOFF(%rip)", file); 8376 else 8377 fputs ("@GOTNTPOFF", file); 8378 break; 8379 case UNSPEC_INDNTPOFF: 8380 output_addr_const (file, op); 8381 fputs ("@INDNTPOFF", file); 8382 break; 8383 8384 default: 8385 return false; 8386 } 8387 8388 return true; 8389} 8390 8391/* Split one or more DImode RTL references into pairs of SImode 8392 references. The RTL can be REG, offsettable MEM, integer constant, or 8393 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8394 split and "num" is its length. lo_half and hi_half are output arrays 8395 that parallel "operands". */ 8396 8397void 8398split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8399{ 8400 while (num--) 8401 { 8402 rtx op = operands[num]; 8403 8404 /* simplify_subreg refuse to split volatile memory addresses, 8405 but we still have to handle it. */ 8406 if (GET_CODE (op) == MEM) 8407 { 8408 lo_half[num] = adjust_address (op, SImode, 0); 8409 hi_half[num] = adjust_address (op, SImode, 4); 8410 } 8411 else 8412 { 8413 lo_half[num] = simplify_gen_subreg (SImode, op, 8414 GET_MODE (op) == VOIDmode 8415 ? DImode : GET_MODE (op), 0); 8416 hi_half[num] = simplify_gen_subreg (SImode, op, 8417 GET_MODE (op) == VOIDmode 8418 ? DImode : GET_MODE (op), 4); 8419 } 8420 } 8421} 8422/* Split one or more TImode RTL references into pairs of DImode 8423 references. The RTL can be REG, offsettable MEM, integer constant, or 8424 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8425 split and "num" is its length. lo_half and hi_half are output arrays 8426 that parallel "operands". */ 8427 8428void 8429split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8430{ 8431 while (num--) 8432 { 8433 rtx op = operands[num]; 8434 8435 /* simplify_subreg refuse to split volatile memory addresses, but we 8436 still have to handle it. */ 8437 if (GET_CODE (op) == MEM) 8438 { 8439 lo_half[num] = adjust_address (op, DImode, 0); 8440 hi_half[num] = adjust_address (op, DImode, 8); 8441 } 8442 else 8443 { 8444 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 8445 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 8446 } 8447 } 8448} 8449 8450/* Output code to perform a 387 binary operation in INSN, one of PLUS, 8451 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 8452 is the expression of the binary operation. The output may either be 8453 emitted here, or returned to the caller, like all output_* functions. 8454 8455 There is no guarantee that the operands are the same mode, as they 8456 might be within FLOAT or FLOAT_EXTEND expressions. */ 8457 8458#ifndef SYSV386_COMPAT 8459/* Set to 1 for compatibility with brain-damaged assemblers. No-one 8460 wants to fix the assemblers because that causes incompatibility 8461 with gcc. No-one wants to fix gcc because that causes 8462 incompatibility with assemblers... You can use the option of 8463 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 8464#define SYSV386_COMPAT 1 8465#endif 8466 8467const char * 8468output_387_binary_op (rtx insn, rtx *operands) 8469{ 8470 static char buf[30]; 8471 const char *p; 8472 const char *ssep; 8473 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 8474 8475#ifdef ENABLE_CHECKING 8476 /* Even if we do not want to check the inputs, this documents input 8477 constraints. Which helps in understanding the following code. */ 8478 if (STACK_REG_P (operands[0]) 8479 && ((REG_P (operands[1]) 8480 && REGNO (operands[0]) == REGNO (operands[1]) 8481 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 8482 || (REG_P (operands[2]) 8483 && REGNO (operands[0]) == REGNO (operands[2]) 8484 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 8485 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 8486 ; /* ok */ 8487 else 8488 gcc_assert (is_sse); 8489#endif 8490 8491 switch (GET_CODE (operands[3])) 8492 { 8493 case PLUS: 8494 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8495 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8496 p = "fiadd"; 8497 else 8498 p = "fadd"; 8499 ssep = "add"; 8500 break; 8501 8502 case MINUS: 8503 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8504 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8505 p = "fisub"; 8506 else 8507 p = "fsub"; 8508 ssep = "sub"; 8509 break; 8510 8511 case MULT: 8512 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8513 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8514 p = "fimul"; 8515 else 8516 p = "fmul"; 8517 ssep = "mul"; 8518 break; 8519 8520 case DIV: 8521 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8522 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8523 p = "fidiv"; 8524 else 8525 p = "fdiv"; 8526 ssep = "div"; 8527 break; 8528 8529 default: 8530 gcc_unreachable (); 8531 } 8532 8533 if (is_sse) 8534 { 8535 strcpy (buf, ssep); 8536 if (GET_MODE (operands[0]) == SFmode) 8537 strcat (buf, "ss\t{%2, %0|%0, %2}"); 8538 else 8539 strcat (buf, "sd\t{%2, %0|%0, %2}"); 8540 return buf; 8541 } 8542 strcpy (buf, p); 8543 8544 switch (GET_CODE (operands[3])) 8545 { 8546 case MULT: 8547 case PLUS: 8548 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 8549 { 8550 rtx temp = operands[2]; 8551 operands[2] = operands[1]; 8552 operands[1] = temp; 8553 } 8554 8555 /* know operands[0] == operands[1]. */ 8556 8557 if (GET_CODE (operands[2]) == MEM) 8558 { 8559 p = "%z2\t%2"; 8560 break; 8561 } 8562 8563 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8564 { 8565 if (STACK_TOP_P (operands[0])) 8566 /* How is it that we are storing to a dead operand[2]? 8567 Well, presumably operands[1] is dead too. We can't 8568 store the result to st(0) as st(0) gets popped on this 8569 instruction. Instead store to operands[2] (which I 8570 think has to be st(1)). st(1) will be popped later. 8571 gcc <= 2.8.1 didn't have this check and generated 8572 assembly code that the Unixware assembler rejected. */ 8573 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8574 else 8575 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8576 break; 8577 } 8578 8579 if (STACK_TOP_P (operands[0])) 8580 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8581 else 8582 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8583 break; 8584 8585 case MINUS: 8586 case DIV: 8587 if (GET_CODE (operands[1]) == MEM) 8588 { 8589 p = "r%z1\t%1"; 8590 break; 8591 } 8592 8593 if (GET_CODE (operands[2]) == MEM) 8594 { 8595 p = "%z2\t%2"; 8596 break; 8597 } 8598 8599 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8600 { 8601#if SYSV386_COMPAT 8602 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8603 derived assemblers, confusingly reverse the direction of 8604 the operation for fsub{r} and fdiv{r} when the 8605 destination register is not st(0). The Intel assembler 8606 doesn't have this brain damage. Read !SYSV386_COMPAT to 8607 figure out what the hardware really does. */ 8608 if (STACK_TOP_P (operands[0])) 8609 p = "{p\t%0, %2|rp\t%2, %0}"; 8610 else 8611 p = "{rp\t%2, %0|p\t%0, %2}"; 8612#else 8613 if (STACK_TOP_P (operands[0])) 8614 /* As above for fmul/fadd, we can't store to st(0). */ 8615 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8616 else 8617 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8618#endif 8619 break; 8620 } 8621 8622 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8623 { 8624#if SYSV386_COMPAT 8625 if (STACK_TOP_P (operands[0])) 8626 p = "{rp\t%0, %1|p\t%1, %0}"; 8627 else 8628 p = "{p\t%1, %0|rp\t%0, %1}"; 8629#else 8630 if (STACK_TOP_P (operands[0])) 8631 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8632 else 8633 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8634#endif 8635 break; 8636 } 8637 8638 if (STACK_TOP_P (operands[0])) 8639 { 8640 if (STACK_TOP_P (operands[1])) 8641 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8642 else 8643 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8644 break; 8645 } 8646 else if (STACK_TOP_P (operands[1])) 8647 { 8648#if SYSV386_COMPAT 8649 p = "{\t%1, %0|r\t%0, %1}"; 8650#else 8651 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8652#endif 8653 } 8654 else 8655 { 8656#if SYSV386_COMPAT 8657 p = "{r\t%2, %0|\t%0, %2}"; 8658#else 8659 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8660#endif 8661 } 8662 break; 8663 8664 default: 8665 gcc_unreachable (); 8666 } 8667 8668 strcat (buf, p); 8669 return buf; 8670} 8671 8672/* Return needed mode for entity in optimize_mode_switching pass. */ 8673 8674int 8675ix86_mode_needed (int entity, rtx insn) 8676{ 8677 enum attr_i387_cw mode; 8678 8679 /* The mode UNINITIALIZED is used to store control word after a 8680 function call or ASM pattern. The mode ANY specify that function 8681 has no requirements on the control word and make no changes in the 8682 bits we are interested in. */ 8683 8684 if (CALL_P (insn) 8685 || (NONJUMP_INSN_P (insn) 8686 && (asm_noperands (PATTERN (insn)) >= 0 8687 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 8688 return I387_CW_UNINITIALIZED; 8689 8690 if (recog_memoized (insn) < 0) 8691 return I387_CW_ANY; 8692 8693 mode = get_attr_i387_cw (insn); 8694 8695 switch (entity) 8696 { 8697 case I387_TRUNC: 8698 if (mode == I387_CW_TRUNC) 8699 return mode; 8700 break; 8701 8702 case I387_FLOOR: 8703 if (mode == I387_CW_FLOOR) 8704 return mode; 8705 break; 8706 8707 case I387_CEIL: 8708 if (mode == I387_CW_CEIL) 8709 return mode; 8710 break; 8711 8712 case I387_MASK_PM: 8713 if (mode == I387_CW_MASK_PM) 8714 return mode; 8715 break; 8716 8717 default: 8718 gcc_unreachable (); 8719 } 8720 8721 return I387_CW_ANY; 8722} 8723 8724/* Output code to initialize control word copies used by trunc?f?i and 8725 rounding patterns. CURRENT_MODE is set to current control word, 8726 while NEW_MODE is set to new control word. */ 8727 8728void 8729emit_i387_cw_initialization (int mode) 8730{ 8731 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 8732 rtx new_mode; 8733 8734 int slot; 8735 8736 rtx reg = gen_reg_rtx (HImode); 8737 8738 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 8739 emit_move_insn (reg, stored_mode); 8740 8741 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) 8742 { 8743 switch (mode) 8744 { 8745 case I387_CW_TRUNC: 8746 /* round toward zero (truncate) */ 8747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 8748 slot = SLOT_CW_TRUNC; 8749 break; 8750 8751 case I387_CW_FLOOR: 8752 /* round down toward -oo */ 8753 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8754 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 8755 slot = SLOT_CW_FLOOR; 8756 break; 8757 8758 case I387_CW_CEIL: 8759 /* round up toward +oo */ 8760 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8761 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 8762 slot = SLOT_CW_CEIL; 8763 break; 8764 8765 case I387_CW_MASK_PM: 8766 /* mask precision exception for nearbyint() */ 8767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8768 slot = SLOT_CW_MASK_PM; 8769 break; 8770 8771 default: 8772 gcc_unreachable (); 8773 } 8774 } 8775 else 8776 { 8777 switch (mode) 8778 { 8779 case I387_CW_TRUNC: 8780 /* round toward zero (truncate) */ 8781 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8782 slot = SLOT_CW_TRUNC; 8783 break; 8784 8785 case I387_CW_FLOOR: 8786 /* round down toward -oo */ 8787 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 8788 slot = SLOT_CW_FLOOR; 8789 break; 8790 8791 case I387_CW_CEIL: 8792 /* round up toward +oo */ 8793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 8794 slot = SLOT_CW_CEIL; 8795 break; 8796 8797 case I387_CW_MASK_PM: 8798 /* mask precision exception for nearbyint() */ 8799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8800 slot = SLOT_CW_MASK_PM; 8801 break; 8802 8803 default: 8804 gcc_unreachable (); 8805 } 8806 } 8807 8808 gcc_assert (slot < MAX_386_STACK_LOCALS); 8809 8810 new_mode = assign_386_stack_local (HImode, slot); 8811 emit_move_insn (new_mode, reg); 8812} 8813 8814/* Output code for INSN to convert a float to a signed int. OPERANDS 8815 are the insn operands. The output may be [HSD]Imode and the input 8816 operand may be [SDX]Fmode. */ 8817 8818const char * 8819output_fix_trunc (rtx insn, rtx *operands, int fisttp) 8820{ 8821 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8822 int dimode_p = GET_MODE (operands[0]) == DImode; 8823 int round_mode = get_attr_i387_cw (insn); 8824 8825 /* Jump through a hoop or two for DImode, since the hardware has no 8826 non-popping instruction. We used to do this a different way, but 8827 that was somewhat fragile and broke with post-reload splitters. */ 8828 if ((dimode_p || fisttp) && !stack_top_dies) 8829 output_asm_insn ("fld\t%y1", operands); 8830 8831 gcc_assert (STACK_TOP_P (operands[1])); 8832 gcc_assert (GET_CODE (operands[0]) == MEM); 8833 8834 if (fisttp) 8835 output_asm_insn ("fisttp%z0\t%0", operands); 8836 else 8837 { 8838 if (round_mode != I387_CW_ANY) 8839 output_asm_insn ("fldcw\t%3", operands); 8840 if (stack_top_dies || dimode_p) 8841 output_asm_insn ("fistp%z0\t%0", operands); 8842 else 8843 output_asm_insn ("fist%z0\t%0", operands); 8844 if (round_mode != I387_CW_ANY) 8845 output_asm_insn ("fldcw\t%2", operands); 8846 } 8847 8848 return ""; 8849} 8850 8851/* Output code for x87 ffreep insn. The OPNO argument, which may only 8852 have the values zero or one, indicates the ffreep insn's operand 8853 from the OPERANDS array. */ 8854 8855static const char * 8856output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 8857{ 8858 if (TARGET_USE_FFREEP) 8859#if HAVE_AS_IX86_FFREEP 8860 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 8861#else 8862 switch (REGNO (operands[opno])) 8863 { 8864 case FIRST_STACK_REG + 0: return ".word\t0xc0df"; 8865 case FIRST_STACK_REG + 1: return ".word\t0xc1df"; 8866 case FIRST_STACK_REG + 2: return ".word\t0xc2df"; 8867 case FIRST_STACK_REG + 3: return ".word\t0xc3df"; 8868 case FIRST_STACK_REG + 4: return ".word\t0xc4df"; 8869 case FIRST_STACK_REG + 5: return ".word\t0xc5df"; 8870 case FIRST_STACK_REG + 6: return ".word\t0xc6df"; 8871 case FIRST_STACK_REG + 7: return ".word\t0xc7df"; 8872 } 8873#endif 8874 8875 return opno ? "fstp\t%y1" : "fstp\t%y0"; 8876} 8877 8878 8879/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 8880 should be used. UNORDERED_P is true when fucom should be used. */ 8881 8882const char * 8883output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 8884{ 8885 int stack_top_dies; 8886 rtx cmp_op0, cmp_op1; 8887 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 8888 8889 if (eflags_p) 8890 { 8891 cmp_op0 = operands[0]; 8892 cmp_op1 = operands[1]; 8893 } 8894 else 8895 { 8896 cmp_op0 = operands[1]; 8897 cmp_op1 = operands[2]; 8898 } 8899 8900 if (is_sse) 8901 { 8902 if (GET_MODE (operands[0]) == SFmode) 8903 if (unordered_p) 8904 return "ucomiss\t{%1, %0|%0, %1}"; 8905 else 8906 return "comiss\t{%1, %0|%0, %1}"; 8907 else 8908 if (unordered_p) 8909 return "ucomisd\t{%1, %0|%0, %1}"; 8910 else 8911 return "comisd\t{%1, %0|%0, %1}"; 8912 } 8913 8914 gcc_assert (STACK_TOP_P (cmp_op0)); 8915 8916 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8917 8918 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 8919 { 8920 if (stack_top_dies) 8921 { 8922 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 8923 return output_387_ffreep (operands, 1); 8924 } 8925 else 8926 return "ftst\n\tfnstsw\t%0"; 8927 } 8928 8929 if (STACK_REG_P (cmp_op1) 8930 && stack_top_dies 8931 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 8932 && REGNO (cmp_op1) != FIRST_STACK_REG) 8933 { 8934 /* If both the top of the 387 stack dies, and the other operand 8935 is also a stack register that dies, then this must be a 8936 `fcompp' float compare */ 8937 8938 if (eflags_p) 8939 { 8940 /* There is no double popping fcomi variant. Fortunately, 8941 eflags is immune from the fstp's cc clobbering. */ 8942 if (unordered_p) 8943 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 8944 else 8945 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 8946 return output_387_ffreep (operands, 0); 8947 } 8948 else 8949 { 8950 if (unordered_p) 8951 return "fucompp\n\tfnstsw\t%0"; 8952 else 8953 return "fcompp\n\tfnstsw\t%0"; 8954 } 8955 } 8956 else 8957 { 8958 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 8959 8960 static const char * const alt[16] = 8961 { 8962 "fcom%z2\t%y2\n\tfnstsw\t%0", 8963 "fcomp%z2\t%y2\n\tfnstsw\t%0", 8964 "fucom%z2\t%y2\n\tfnstsw\t%0", 8965 "fucomp%z2\t%y2\n\tfnstsw\t%0", 8966 8967 "ficom%z2\t%y2\n\tfnstsw\t%0", 8968 "ficomp%z2\t%y2\n\tfnstsw\t%0", 8969 NULL, 8970 NULL, 8971 8972 "fcomi\t{%y1, %0|%0, %y1}", 8973 "fcomip\t{%y1, %0|%0, %y1}", 8974 "fucomi\t{%y1, %0|%0, %y1}", 8975 "fucomip\t{%y1, %0|%0, %y1}", 8976 8977 NULL, 8978 NULL, 8979 NULL, 8980 NULL 8981 }; 8982 8983 int mask; 8984 const char *ret; 8985 8986 mask = eflags_p << 3; 8987 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 8988 mask |= unordered_p << 1; 8989 mask |= stack_top_dies; 8990 8991 gcc_assert (mask < 16); 8992 ret = alt[mask]; 8993 gcc_assert (ret); 8994 8995 return ret; 8996 } 8997} 8998 8999void 9000ix86_output_addr_vec_elt (FILE *file, int value) 9001{ 9002 const char *directive = ASM_LONG; 9003 9004#ifdef ASM_QUAD 9005 if (TARGET_64BIT) 9006 directive = ASM_QUAD; 9007#else 9008 gcc_assert (!TARGET_64BIT); 9009#endif 9010 9011 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 9012} 9013 9014void 9015ix86_output_addr_diff_elt (FILE *file, int value, int rel) 9016{ 9017 if (TARGET_64BIT) 9018 fprintf (file, "%s%s%d-%s%d\n", 9019 ASM_LONG, LPREFIX, value, LPREFIX, rel); 9020 else if (HAVE_AS_GOTOFF_IN_DATA) 9021 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 9022#if TARGET_MACHO 9023 else if (TARGET_MACHO) 9024 { 9025 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 9026 machopic_output_function_base_name (file); 9027 fprintf(file, "\n"); 9028 } 9029#endif 9030 else 9031 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 9032 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 9033} 9034 9035/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 9036 for the target. */ 9037 9038void 9039ix86_expand_clear (rtx dest) 9040{ 9041 rtx tmp; 9042 9043 /* We play register width games, which are only valid after reload. */ 9044 gcc_assert (reload_completed); 9045 9046 /* Avoid HImode and its attendant prefix byte. */ 9047 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 9048 dest = gen_rtx_REG (SImode, REGNO (dest)); 9049 9050 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 9051 9052 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 9053 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 9054 { 9055 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 9056 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 9057 } 9058 9059 emit_insn (tmp); 9060} 9061 9062/* X is an unchanging MEM. If it is a constant pool reference, return 9063 the constant pool rtx, else NULL. */ 9064 9065rtx 9066maybe_get_pool_constant (rtx x) 9067{ 9068 x = ix86_delegitimize_address (XEXP (x, 0)); 9069 9070 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 9071 return get_pool_constant (x); 9072 9073 return NULL_RTX; 9074} 9075 9076void 9077ix86_expand_move (enum machine_mode mode, rtx operands[]) 9078{ 9079 int strict = (reload_in_progress || reload_completed); 9080 rtx op0, op1; 9081 enum tls_model model; 9082 9083 op0 = operands[0]; 9084 op1 = operands[1]; 9085 9086 if (GET_CODE (op1) == SYMBOL_REF) 9087 { 9088 model = SYMBOL_REF_TLS_MODEL (op1); 9089 if (model) 9090 { 9091 op1 = legitimize_tls_address (op1, model, true); 9092 op1 = force_operand (op1, op0); 9093 if (op1 == op0) 9094 return; 9095 } 9096 } 9097 else if (GET_CODE (op1) == CONST 9098 && GET_CODE (XEXP (op1, 0)) == PLUS 9099 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 9100 { 9101 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); 9102 if (model) 9103 { 9104 rtx addend = XEXP (XEXP (op1, 0), 1); 9105 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); 9106 op1 = force_operand (op1, NULL); 9107 op1 = expand_simple_binop (Pmode, PLUS, op1, addend, 9108 op0, 1, OPTAB_DIRECT); 9109 if (op1 == op0) 9110 return; 9111 } 9112 } 9113 9114 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 9115 { 9116 if (TARGET_MACHO && !TARGET_64BIT) 9117 { 9118#if TARGET_MACHO 9119 if (MACHOPIC_PURE) 9120 { 9121 rtx temp = ((reload_in_progress 9122 || ((op0 && GET_CODE (op0) == REG) 9123 && mode == Pmode)) 9124 ? op0 : gen_reg_rtx (Pmode)); 9125 op1 = machopic_indirect_data_reference (op1, temp); 9126 op1 = machopic_legitimize_pic_address (op1, mode, 9127 temp == op1 ? 0 : temp); 9128 } 9129 else if (MACHOPIC_INDIRECT) 9130 op1 = machopic_indirect_data_reference (op1, 0); 9131 if (op0 == op1) 9132 return; 9133#endif 9134 } 9135 else 9136 { 9137 if (GET_CODE (op0) == MEM) 9138 op1 = force_reg (Pmode, op1); 9139 else 9140 op1 = legitimize_address (op1, op1, Pmode); 9141 } 9142 } 9143 else 9144 { 9145 if (GET_CODE (op0) == MEM 9146 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 9147 || !push_operand (op0, mode)) 9148 && GET_CODE (op1) == MEM) 9149 op1 = force_reg (mode, op1); 9150 9151 if (push_operand (op0, mode) 9152 && ! general_no_elim_operand (op1, mode)) 9153 op1 = copy_to_mode_reg (mode, op1); 9154 9155 /* Force large constants in 64bit compilation into register 9156 to get them CSEed. */ 9157 if (TARGET_64BIT && mode == DImode 9158 && immediate_operand (op1, mode) 9159 && !x86_64_zext_immediate_operand (op1, VOIDmode) 9160 && !register_operand (op0, mode) 9161 && optimize && !reload_completed && !reload_in_progress) 9162 op1 = copy_to_mode_reg (mode, op1); 9163 9164 if (FLOAT_MODE_P (mode)) 9165 { 9166 /* If we are loading a floating point constant to a register, 9167 force the value to memory now, since we'll get better code 9168 out the back end. */ 9169 9170 if (strict) 9171 ; 9172 else if (GET_CODE (op1) == CONST_DOUBLE) 9173 { 9174 op1 = validize_mem (force_const_mem (mode, op1)); 9175 if (!register_operand (op0, mode)) 9176 { 9177 rtx temp = gen_reg_rtx (mode); 9178 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 9179 emit_move_insn (op0, temp); 9180 return; 9181 } 9182 } 9183 } 9184 } 9185 9186 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9187} 9188 9189void 9190ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 9191{ 9192 rtx op0 = operands[0], op1 = operands[1]; 9193 9194 /* Force constants other than zero into memory. We do not know how 9195 the instructions used to build constants modify the upper 64 bits 9196 of the register, once we have that information we may be able 9197 to handle some of them more efficiently. */ 9198 if ((reload_in_progress | reload_completed) == 0 9199 && register_operand (op0, mode) 9200 && CONSTANT_P (op1) 9201 && standard_sse_constant_p (op1) <= 0) 9202 op1 = validize_mem (force_const_mem (mode, op1)); 9203 9204 /* Make operand1 a register if it isn't already. */ 9205 if (!no_new_pseudos 9206 && !register_operand (op0, mode) 9207 && !register_operand (op1, mode)) 9208 { 9209 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 9210 return; 9211 } 9212 9213 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9214} 9215 9216/* Implement the movmisalign patterns for SSE. Non-SSE modes go 9217 straight to ix86_expand_vector_move. */ 9218 9219void 9220ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 9221{ 9222 rtx op0, op1, m; 9223 9224 op0 = operands[0]; 9225 op1 = operands[1]; 9226 9227 if (MEM_P (op1)) 9228 { 9229 /* If we're optimizing for size, movups is the smallest. */ 9230 if (optimize_size) 9231 { 9232 op0 = gen_lowpart (V4SFmode, op0); 9233 op1 = gen_lowpart (V4SFmode, op1); 9234 emit_insn (gen_sse_movups (op0, op1)); 9235 return; 9236 } 9237 9238 /* ??? If we have typed data, then it would appear that using 9239 movdqu is the only way to get unaligned data loaded with 9240 integer type. */ 9241 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9242 { 9243 op0 = gen_lowpart (V16QImode, op0); 9244 op1 = gen_lowpart (V16QImode, op1); 9245 emit_insn (gen_sse2_movdqu (op0, op1)); 9246 return; 9247 } 9248 9249 if (TARGET_SSE2 && mode == V2DFmode) 9250 { 9251 rtx zero; 9252 9253 /* When SSE registers are split into halves, we can avoid 9254 writing to the top half twice. */ 9255 if (TARGET_SSE_SPLIT_REGS) 9256 { 9257 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9258 zero = op0; 9259 } 9260 else 9261 { 9262 /* ??? Not sure about the best option for the Intel chips. 9263 The following would seem to satisfy; the register is 9264 entirely cleared, breaking the dependency chain. We 9265 then store to the upper half, with a dependency depth 9266 of one. A rumor has it that Intel recommends two movsd 9267 followed by an unpacklpd, but this is unconfirmed. And 9268 given that the dependency depth of the unpacklpd would 9269 still be one, I'm not sure why this would be better. */ 9270 zero = CONST0_RTX (V2DFmode); 9271 } 9272 9273 m = adjust_address (op1, DFmode, 0); 9274 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 9275 m = adjust_address (op1, DFmode, 8); 9276 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 9277 } 9278 else 9279 { 9280 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 9281 emit_move_insn (op0, CONST0_RTX (mode)); 9282 else 9283 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9284 9285 if (mode != V4SFmode) 9286 op0 = gen_lowpart (V4SFmode, op0); 9287 m = adjust_address (op1, V2SFmode, 0); 9288 emit_insn (gen_sse_loadlps (op0, op0, m)); 9289 m = adjust_address (op1, V2SFmode, 8); 9290 emit_insn (gen_sse_loadhps (op0, op0, m)); 9291 } 9292 } 9293 else if (MEM_P (op0)) 9294 { 9295 /* If we're optimizing for size, movups is the smallest. */ 9296 if (optimize_size) 9297 { 9298 op0 = gen_lowpart (V4SFmode, op0); 9299 op1 = gen_lowpart (V4SFmode, op1); 9300 emit_insn (gen_sse_movups (op0, op1)); 9301 return; 9302 } 9303 9304 /* ??? Similar to above, only less clear because of quote 9305 typeless stores unquote. */ 9306 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 9307 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9308 { 9309 op0 = gen_lowpart (V16QImode, op0); 9310 op1 = gen_lowpart (V16QImode, op1); 9311 emit_insn (gen_sse2_movdqu (op0, op1)); 9312 return; 9313 } 9314 9315 if (TARGET_SSE2 && mode == V2DFmode) 9316 { 9317 m = adjust_address (op0, DFmode, 0); 9318 emit_insn (gen_sse2_storelpd (m, op1)); 9319 m = adjust_address (op0, DFmode, 8); 9320 emit_insn (gen_sse2_storehpd (m, op1)); 9321 } 9322 else 9323 { 9324 if (mode != V4SFmode) 9325 op1 = gen_lowpart (V4SFmode, op1); 9326 m = adjust_address (op0, V2SFmode, 0); 9327 emit_insn (gen_sse_storelps (m, op1)); 9328 m = adjust_address (op0, V2SFmode, 8); 9329 emit_insn (gen_sse_storehps (m, op1)); 9330 } 9331 } 9332 else 9333 gcc_unreachable (); 9334} 9335 9336/* Expand a push in MODE. This is some mode for which we do not support 9337 proper push instructions, at least from the registers that we expect 9338 the value to live in. */ 9339 9340void 9341ix86_expand_push (enum machine_mode mode, rtx x) 9342{ 9343 rtx tmp; 9344 9345 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 9346 GEN_INT (-GET_MODE_SIZE (mode)), 9347 stack_pointer_rtx, 1, OPTAB_DIRECT); 9348 if (tmp != stack_pointer_rtx) 9349 emit_move_insn (stack_pointer_rtx, tmp); 9350 9351 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 9352 emit_move_insn (tmp, x); 9353} 9354 9355/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 9356 destination to use for the operation. If different from the true 9357 destination in operands[0], a copy operation will be required. */ 9358 9359rtx 9360ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 9361 rtx operands[]) 9362{ 9363 int matching_memory; 9364 rtx src1, src2, dst; 9365 9366 dst = operands[0]; 9367 src1 = operands[1]; 9368 src2 = operands[2]; 9369 9370 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 9371 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9372 && (rtx_equal_p (dst, src2) 9373 || immediate_operand (src1, mode))) 9374 { 9375 rtx temp = src1; 9376 src1 = src2; 9377 src2 = temp; 9378 } 9379 9380 /* If the destination is memory, and we do not have matching source 9381 operands, do things in registers. */ 9382 matching_memory = 0; 9383 if (GET_CODE (dst) == MEM) 9384 { 9385 if (rtx_equal_p (dst, src1)) 9386 matching_memory = 1; 9387 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9388 && rtx_equal_p (dst, src2)) 9389 matching_memory = 2; 9390 else 9391 dst = gen_reg_rtx (mode); 9392 } 9393 9394 /* Both source operands cannot be in memory. */ 9395 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 9396 { 9397 if (matching_memory != 2) 9398 src2 = force_reg (mode, src2); 9399 else 9400 src1 = force_reg (mode, src1); 9401 } 9402 9403 /* If the operation is not commutable, source 1 cannot be a constant 9404 or non-matching memory. */ 9405 if ((CONSTANT_P (src1) 9406 || (!matching_memory && GET_CODE (src1) == MEM)) 9407 && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9408 src1 = force_reg (mode, src1); 9409 9410 src1 = operands[1] = src1; 9411 src2 = operands[2] = src2; 9412 return dst; 9413} 9414 9415/* Similarly, but assume that the destination has already been 9416 set up properly. */ 9417 9418void 9419ix86_fixup_binary_operands_no_copy (enum rtx_code code, 9420 enum machine_mode mode, rtx operands[]) 9421{ 9422 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 9423 gcc_assert (dst == operands[0]); 9424} 9425 9426/* Attempt to expand a binary operator. Make the expansion closer to the 9427 actual machine, then just general_operand, which will allow 3 separate 9428 memory references (one output, two input) in a single insn. */ 9429 9430void 9431ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 9432 rtx operands[]) 9433{ 9434 rtx src1, src2, dst, op, clob; 9435 9436 dst = ix86_fixup_binary_operands (code, mode, operands); 9437 src1 = operands[1]; 9438 src2 = operands[2]; 9439 9440 /* Emit the instruction. */ 9441 9442 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 9443 if (reload_in_progress) 9444 { 9445 /* Reload doesn't know about the flags register, and doesn't know that 9446 it doesn't want to clobber it. We can only do this with PLUS. */ 9447 gcc_assert (code == PLUS); 9448 emit_insn (op); 9449 } 9450 else 9451 { 9452 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9453 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9454 } 9455 9456 /* Fix up the destination if needed. */ 9457 if (dst != operands[0]) 9458 emit_move_insn (operands[0], dst); 9459} 9460 9461/* Return TRUE or FALSE depending on whether the binary operator meets the 9462 appropriate constraints. */ 9463 9464int 9465ix86_binary_operator_ok (enum rtx_code code, 9466 enum machine_mode mode ATTRIBUTE_UNUSED, 9467 rtx operands[3]) 9468{ 9469 /* Both source operands cannot be in memory. */ 9470 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 9471 return 0; 9472 /* If the operation is not commutable, source 1 cannot be a constant. */ 9473 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9474 return 0; 9475 /* If the destination is memory, we must have a matching source operand. */ 9476 if (GET_CODE (operands[0]) == MEM 9477 && ! (rtx_equal_p (operands[0], operands[1]) 9478 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9479 && rtx_equal_p (operands[0], operands[2])))) 9480 return 0; 9481 /* If the operation is not commutable and the source 1 is memory, we must 9482 have a matching destination. */ 9483 if (GET_CODE (operands[1]) == MEM 9484 && GET_RTX_CLASS (code) != RTX_COMM_ARITH 9485 && ! rtx_equal_p (operands[0], operands[1])) 9486 return 0; 9487 return 1; 9488} 9489 9490/* Attempt to expand a unary operator. Make the expansion closer to the 9491 actual machine, then just general_operand, which will allow 2 separate 9492 memory references (one output, one input) in a single insn. */ 9493 9494void 9495ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 9496 rtx operands[]) 9497{ 9498 int matching_memory; 9499 rtx src, dst, op, clob; 9500 9501 dst = operands[0]; 9502 src = operands[1]; 9503 9504 /* If the destination is memory, and we do not have matching source 9505 operands, do things in registers. */ 9506 matching_memory = 0; 9507 if (MEM_P (dst)) 9508 { 9509 if (rtx_equal_p (dst, src)) 9510 matching_memory = 1; 9511 else 9512 dst = gen_reg_rtx (mode); 9513 } 9514 9515 /* When source operand is memory, destination must match. */ 9516 if (MEM_P (src) && !matching_memory) 9517 src = force_reg (mode, src); 9518 9519 /* Emit the instruction. */ 9520 9521 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 9522 if (reload_in_progress || code == NOT) 9523 { 9524 /* Reload doesn't know about the flags register, and doesn't know that 9525 it doesn't want to clobber it. */ 9526 gcc_assert (code == NOT); 9527 emit_insn (op); 9528 } 9529 else 9530 { 9531 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9532 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9533 } 9534 9535 /* Fix up the destination if needed. */ 9536 if (dst != operands[0]) 9537 emit_move_insn (operands[0], dst); 9538} 9539 9540/* Return TRUE or FALSE depending on whether the unary operator meets the 9541 appropriate constraints. */ 9542 9543int 9544ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 9545 enum machine_mode mode ATTRIBUTE_UNUSED, 9546 rtx operands[2] ATTRIBUTE_UNUSED) 9547{ 9548 /* If one of operands is memory, source and destination must match. */ 9549 if ((GET_CODE (operands[0]) == MEM 9550 || GET_CODE (operands[1]) == MEM) 9551 && ! rtx_equal_p (operands[0], operands[1])) 9552 return FALSE; 9553 return TRUE; 9554} 9555 9556/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. 9557 Create a mask for the sign bit in MODE for an SSE register. If VECT is 9558 true, then replicate the mask for all elements of the vector register. 9559 If INVERT is true, then create a mask excluding the sign bit. */ 9560 9561rtx 9562ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 9563{ 9564 enum machine_mode vec_mode; 9565 HOST_WIDE_INT hi, lo; 9566 int shift = 63; 9567 rtvec v; 9568 rtx mask; 9569 9570 /* Find the sign bit, sign extended to 2*HWI. */ 9571 if (mode == SFmode) 9572 lo = 0x80000000, hi = lo < 0; 9573 else if (HOST_BITS_PER_WIDE_INT >= 64) 9574 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 9575 else 9576 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 9577 9578 if (invert) 9579 lo = ~lo, hi = ~hi; 9580 9581 /* Force this value into the low part of a fp vector constant. */ 9582 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); 9583 mask = gen_lowpart (mode, mask); 9584 9585 if (mode == SFmode) 9586 { 9587 if (vect) 9588 v = gen_rtvec (4, mask, mask, mask, mask); 9589 else 9590 v = gen_rtvec (4, mask, CONST0_RTX (SFmode), 9591 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9592 vec_mode = V4SFmode; 9593 } 9594 else 9595 { 9596 if (vect) 9597 v = gen_rtvec (2, mask, mask); 9598 else 9599 v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); 9600 vec_mode = V2DFmode; 9601 } 9602 9603 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); 9604} 9605 9606/* Generate code for floating point ABS or NEG. */ 9607 9608void 9609ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 9610 rtx operands[]) 9611{ 9612 rtx mask, set, use, clob, dst, src; 9613 bool matching_memory; 9614 bool use_sse = false; 9615 bool vector_mode = VECTOR_MODE_P (mode); 9616 enum machine_mode elt_mode = mode; 9617 9618 if (vector_mode) 9619 { 9620 elt_mode = GET_MODE_INNER (mode); 9621 use_sse = true; 9622 } 9623 else if (TARGET_SSE_MATH) 9624 use_sse = SSE_FLOAT_MODE_P (mode); 9625 9626 /* NEG and ABS performed with SSE use bitwise mask operations. 9627 Create the appropriate mask now. */ 9628 if (use_sse) 9629 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 9630 else 9631 mask = NULL_RTX; 9632 9633 dst = operands[0]; 9634 src = operands[1]; 9635 9636 /* If the destination is memory, and we don't have matching source 9637 operands or we're using the x87, do things in registers. */ 9638 matching_memory = false; 9639 if (MEM_P (dst)) 9640 { 9641 if (use_sse && rtx_equal_p (dst, src)) 9642 matching_memory = true; 9643 else 9644 dst = gen_reg_rtx (mode); 9645 } 9646 if (MEM_P (src) && !matching_memory) 9647 src = force_reg (mode, src); 9648 9649 if (vector_mode) 9650 { 9651 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 9652 set = gen_rtx_SET (VOIDmode, dst, set); 9653 emit_insn (set); 9654 } 9655 else 9656 { 9657 set = gen_rtx_fmt_e (code, mode, src); 9658 set = gen_rtx_SET (VOIDmode, dst, set); 9659 if (mask) 9660 { 9661 use = gen_rtx_USE (VOIDmode, mask); 9662 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9663 emit_insn (gen_rtx_PARALLEL (VOIDmode, 9664 gen_rtvec (3, set, use, clob))); 9665 } 9666 else 9667 emit_insn (set); 9668 } 9669 9670 if (dst != operands[0]) 9671 emit_move_insn (operands[0], dst); 9672} 9673 9674/* Expand a copysign operation. Special case operand 0 being a constant. */ 9675 9676void 9677ix86_expand_copysign (rtx operands[]) 9678{ 9679 enum machine_mode mode, vmode; 9680 rtx dest, op0, op1, mask, nmask; 9681 9682 dest = operands[0]; 9683 op0 = operands[1]; 9684 op1 = operands[2]; 9685 9686 mode = GET_MODE (dest); 9687 vmode = mode == SFmode ? V4SFmode : V2DFmode; 9688 9689 if (GET_CODE (op0) == CONST_DOUBLE) 9690 { 9691 rtvec v; 9692 9693 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 9694 op0 = simplify_unary_operation (ABS, mode, op0, mode); 9695 9696 if (op0 == CONST0_RTX (mode)) 9697 op0 = CONST0_RTX (vmode); 9698 else 9699 { 9700 if (mode == SFmode) 9701 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 9702 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9703 else 9704 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 9705 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 9706 } 9707 9708 mask = ix86_build_signbit_mask (mode, 0, 0); 9709 9710 if (mode == SFmode) 9711 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); 9712 else 9713 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); 9714 } 9715 else 9716 { 9717 nmask = ix86_build_signbit_mask (mode, 0, 1); 9718 mask = ix86_build_signbit_mask (mode, 0, 0); 9719 9720 if (mode == SFmode) 9721 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); 9722 else 9723 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); 9724 } 9725} 9726 9727/* Deconstruct a copysign operation into bit masks. Operand 0 is known to 9728 be a constant, and so has already been expanded into a vector constant. */ 9729 9730void 9731ix86_split_copysign_const (rtx operands[]) 9732{ 9733 enum machine_mode mode, vmode; 9734 rtx dest, op0, op1, mask, x; 9735 9736 dest = operands[0]; 9737 op0 = operands[1]; 9738 op1 = operands[2]; 9739 mask = operands[3]; 9740 9741 mode = GET_MODE (dest); 9742 vmode = GET_MODE (mask); 9743 9744 dest = simplify_gen_subreg (vmode, dest, mode, 0); 9745 x = gen_rtx_AND (vmode, dest, mask); 9746 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9747 9748 if (op0 != CONST0_RTX (vmode)) 9749 { 9750 x = gen_rtx_IOR (vmode, dest, op0); 9751 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9752 } 9753} 9754 9755/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 9756 so we have to do two masks. */ 9757 9758void 9759ix86_split_copysign_var (rtx operands[]) 9760{ 9761 enum machine_mode mode, vmode; 9762 rtx dest, scratch, op0, op1, mask, nmask, x; 9763 9764 dest = operands[0]; 9765 scratch = operands[1]; 9766 op0 = operands[2]; 9767 op1 = operands[3]; 9768 nmask = operands[4]; 9769 mask = operands[5]; 9770 9771 mode = GET_MODE (dest); 9772 vmode = GET_MODE (mask); 9773 9774 if (rtx_equal_p (op0, op1)) 9775 { 9776 /* Shouldn't happen often (it's useless, obviously), but when it does 9777 we'd generate incorrect code if we continue below. */ 9778 emit_move_insn (dest, op0); 9779 return; 9780 } 9781 9782 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 9783 { 9784 gcc_assert (REGNO (op1) == REGNO (scratch)); 9785 9786 x = gen_rtx_AND (vmode, scratch, mask); 9787 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9788 9789 dest = mask; 9790 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9791 x = gen_rtx_NOT (vmode, dest); 9792 x = gen_rtx_AND (vmode, x, op0); 9793 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9794 } 9795 else 9796 { 9797 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 9798 { 9799 x = gen_rtx_AND (vmode, scratch, mask); 9800 } 9801 else /* alternative 2,4 */ 9802 { 9803 gcc_assert (REGNO (mask) == REGNO (scratch)); 9804 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 9805 x = gen_rtx_AND (vmode, scratch, op1); 9806 } 9807 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9808 9809 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 9810 { 9811 dest = simplify_gen_subreg (vmode, op0, mode, 0); 9812 x = gen_rtx_AND (vmode, dest, nmask); 9813 } 9814 else /* alternative 3,4 */ 9815 { 9816 gcc_assert (REGNO (nmask) == REGNO (dest)); 9817 dest = nmask; 9818 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9819 x = gen_rtx_AND (vmode, dest, op0); 9820 } 9821 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9822 } 9823 9824 x = gen_rtx_IOR (vmode, dest, scratch); 9825 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9826} 9827 9828/* Return TRUE or FALSE depending on whether the first SET in INSN 9829 has source and destination with matching CC modes, and that the 9830 CC mode is at least as constrained as REQ_MODE. */ 9831 9832int 9833ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 9834{ 9835 rtx set; 9836 enum machine_mode set_mode; 9837 9838 set = PATTERN (insn); 9839 if (GET_CODE (set) == PARALLEL) 9840 set = XVECEXP (set, 0, 0); 9841 gcc_assert (GET_CODE (set) == SET); 9842 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 9843 9844 set_mode = GET_MODE (SET_DEST (set)); 9845 switch (set_mode) 9846 { 9847 case CCNOmode: 9848 if (req_mode != CCNOmode 9849 && (req_mode != CCmode 9850 || XEXP (SET_SRC (set), 1) != const0_rtx)) 9851 return 0; 9852 break; 9853 case CCmode: 9854 if (req_mode == CCGCmode) 9855 return 0; 9856 /* FALLTHRU */ 9857 case CCGCmode: 9858 if (req_mode == CCGOCmode || req_mode == CCNOmode) 9859 return 0; 9860 /* FALLTHRU */ 9861 case CCGOCmode: 9862 if (req_mode == CCZmode) 9863 return 0; 9864 /* FALLTHRU */ 9865 case CCZmode: 9866 break; 9867 9868 default: 9869 gcc_unreachable (); 9870 } 9871 9872 return (GET_MODE (SET_SRC (set)) == set_mode); 9873} 9874 9875/* Generate insn patterns to do an integer compare of OPERANDS. */ 9876 9877static rtx 9878ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 9879{ 9880 enum machine_mode cmpmode; 9881 rtx tmp, flags; 9882 9883 cmpmode = SELECT_CC_MODE (code, op0, op1); 9884 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 9885 9886 /* This is very simple, but making the interface the same as in the 9887 FP case makes the rest of the code easier. */ 9888 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 9889 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 9890 9891 /* Return the test that should be put into the flags user, i.e. 9892 the bcc, scc, or cmov instruction. */ 9893 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 9894} 9895 9896/* Figure out whether to use ordered or unordered fp comparisons. 9897 Return the appropriate mode to use. */ 9898 9899enum machine_mode 9900ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 9901{ 9902 /* ??? In order to make all comparisons reversible, we do all comparisons 9903 non-trapping when compiling for IEEE. Once gcc is able to distinguish 9904 all forms trapping and nontrapping comparisons, we can make inequality 9905 comparisons trapping again, since it results in better code when using 9906 FCOM based compares. */ 9907 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 9908} 9909 9910enum machine_mode 9911ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 9912{ 9913 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 9914 return ix86_fp_compare_mode (code); 9915 switch (code) 9916 { 9917 /* Only zero flag is needed. */ 9918 case EQ: /* ZF=0 */ 9919 case NE: /* ZF!=0 */ 9920 return CCZmode; 9921 /* Codes needing carry flag. */ 9922 case GEU: /* CF=0 */ 9923 case GTU: /* CF=0 & ZF=0 */ 9924 case LTU: /* CF=1 */ 9925 case LEU: /* CF=1 | ZF=1 */ 9926 return CCmode; 9927 /* Codes possibly doable only with sign flag when 9928 comparing against zero. */ 9929 case GE: /* SF=OF or SF=0 */ 9930 case LT: /* SF<>OF or SF=1 */ 9931 if (op1 == const0_rtx) 9932 return CCGOCmode; 9933 else 9934 /* For other cases Carry flag is not required. */ 9935 return CCGCmode; 9936 /* Codes doable only with sign flag when comparing 9937 against zero, but we miss jump instruction for it 9938 so we need to use relational tests against overflow 9939 that thus needs to be zero. */ 9940 case GT: /* ZF=0 & SF=OF */ 9941 case LE: /* ZF=1 | SF<>OF */ 9942 if (op1 == const0_rtx) 9943 return CCNOmode; 9944 else 9945 return CCGCmode; 9946 /* strcmp pattern do (use flags) and combine may ask us for proper 9947 mode. */ 9948 case USE: 9949 return CCmode; 9950 default: 9951 gcc_unreachable (); 9952 } 9953} 9954 9955/* Return the fixed registers used for condition codes. */ 9956 9957static bool 9958ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 9959{ 9960 *p1 = FLAGS_REG; 9961 *p2 = FPSR_REG; 9962 return true; 9963} 9964 9965/* If two condition code modes are compatible, return a condition code 9966 mode which is compatible with both. Otherwise, return 9967 VOIDmode. */ 9968 9969static enum machine_mode 9970ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 9971{ 9972 if (m1 == m2) 9973 return m1; 9974 9975 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 9976 return VOIDmode; 9977 9978 if ((m1 == CCGCmode && m2 == CCGOCmode) 9979 || (m1 == CCGOCmode && m2 == CCGCmode)) 9980 return CCGCmode; 9981 9982 switch (m1) 9983 { 9984 default: 9985 gcc_unreachable (); 9986 9987 case CCmode: 9988 case CCGCmode: 9989 case CCGOCmode: 9990 case CCNOmode: 9991 case CCZmode: 9992 switch (m2) 9993 { 9994 default: 9995 return VOIDmode; 9996 9997 case CCmode: 9998 case CCGCmode: 9999 case CCGOCmode: 10000 case CCNOmode: 10001 case CCZmode: 10002 return CCmode; 10003 } 10004 10005 case CCFPmode: 10006 case CCFPUmode: 10007 /* These are only compatible with themselves, which we already 10008 checked above. */ 10009 return VOIDmode; 10010 } 10011} 10012 10013/* Return true if we should use an FCOMI instruction for this fp comparison. */ 10014 10015int 10016ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 10017{ 10018 enum rtx_code swapped_code = swap_condition (code); 10019 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 10020 || (ix86_fp_comparison_cost (swapped_code) 10021 == ix86_fp_comparison_fcomi_cost (swapped_code))); 10022} 10023 10024/* Swap, force into registers, or otherwise massage the two operands 10025 to a fp comparison. The operands are updated in place; the new 10026 comparison code is returned. */ 10027 10028static enum rtx_code 10029ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 10030{ 10031 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 10032 rtx op0 = *pop0, op1 = *pop1; 10033 enum machine_mode op_mode = GET_MODE (op0); 10034 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 10035 10036 /* All of the unordered compare instructions only work on registers. 10037 The same is true of the fcomi compare instructions. The XFmode 10038 compare instructions require registers except when comparing 10039 against zero or when converting operand 1 from fixed point to 10040 floating point. */ 10041 10042 if (!is_sse 10043 && (fpcmp_mode == CCFPUmode 10044 || (op_mode == XFmode 10045 && ! (standard_80387_constant_p (op0) == 1 10046 || standard_80387_constant_p (op1) == 1) 10047 && GET_CODE (op1) != FLOAT) 10048 || ix86_use_fcomi_compare (code))) 10049 { 10050 op0 = force_reg (op_mode, op0); 10051 op1 = force_reg (op_mode, op1); 10052 } 10053 else 10054 { 10055 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 10056 things around if they appear profitable, otherwise force op0 10057 into a register. */ 10058 10059 if (standard_80387_constant_p (op0) == 0 10060 || (GET_CODE (op0) == MEM 10061 && ! (standard_80387_constant_p (op1) == 0 10062 || GET_CODE (op1) == MEM))) 10063 { 10064 rtx tmp; 10065 tmp = op0, op0 = op1, op1 = tmp; 10066 code = swap_condition (code); 10067 } 10068 10069 if (GET_CODE (op0) != REG) 10070 op0 = force_reg (op_mode, op0); 10071 10072 if (CONSTANT_P (op1)) 10073 { 10074 int tmp = standard_80387_constant_p (op1); 10075 if (tmp == 0) 10076 op1 = validize_mem (force_const_mem (op_mode, op1)); 10077 else if (tmp == 1) 10078 { 10079 if (TARGET_CMOVE) 10080 op1 = force_reg (op_mode, op1); 10081 } 10082 else 10083 op1 = force_reg (op_mode, op1); 10084 } 10085 } 10086 10087 /* Try to rearrange the comparison to make it cheaper. */ 10088 if (ix86_fp_comparison_cost (code) 10089 > ix86_fp_comparison_cost (swap_condition (code)) 10090 && (GET_CODE (op1) == REG || !no_new_pseudos)) 10091 { 10092 rtx tmp; 10093 tmp = op0, op0 = op1, op1 = tmp; 10094 code = swap_condition (code); 10095 if (GET_CODE (op0) != REG) 10096 op0 = force_reg (op_mode, op0); 10097 } 10098 10099 *pop0 = op0; 10100 *pop1 = op1; 10101 return code; 10102} 10103 10104/* Convert comparison codes we use to represent FP comparison to integer 10105 code that will result in proper branch. Return UNKNOWN if no such code 10106 is available. */ 10107 10108enum rtx_code 10109ix86_fp_compare_code_to_integer (enum rtx_code code) 10110{ 10111 switch (code) 10112 { 10113 case GT: 10114 return GTU; 10115 case GE: 10116 return GEU; 10117 case ORDERED: 10118 case UNORDERED: 10119 return code; 10120 break; 10121 case UNEQ: 10122 return EQ; 10123 break; 10124 case UNLT: 10125 return LTU; 10126 break; 10127 case UNLE: 10128 return LEU; 10129 break; 10130 case LTGT: 10131 return NE; 10132 break; 10133 default: 10134 return UNKNOWN; 10135 } 10136} 10137 10138/* Split comparison code CODE into comparisons we can do using branch 10139 instructions. BYPASS_CODE is comparison code for branch that will 10140 branch around FIRST_CODE and SECOND_CODE. If some of branches 10141 is not required, set value to UNKNOWN. 10142 We never require more than two branches. */ 10143 10144void 10145ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 10146 enum rtx_code *first_code, 10147 enum rtx_code *second_code) 10148{ 10149 *first_code = code; 10150 *bypass_code = UNKNOWN; 10151 *second_code = UNKNOWN; 10152 10153 /* The fcomi comparison sets flags as follows: 10154 10155 cmp ZF PF CF 10156 > 0 0 0 10157 < 0 0 1 10158 = 1 0 0 10159 un 1 1 1 */ 10160 10161 switch (code) 10162 { 10163 case GT: /* GTU - CF=0 & ZF=0 */ 10164 case GE: /* GEU - CF=0 */ 10165 case ORDERED: /* PF=0 */ 10166 case UNORDERED: /* PF=1 */ 10167 case UNEQ: /* EQ - ZF=1 */ 10168 case UNLT: /* LTU - CF=1 */ 10169 case UNLE: /* LEU - CF=1 | ZF=1 */ 10170 case LTGT: /* EQ - ZF=0 */ 10171 break; 10172 case LT: /* LTU - CF=1 - fails on unordered */ 10173 *first_code = UNLT; 10174 *bypass_code = UNORDERED; 10175 break; 10176 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 10177 *first_code = UNLE; 10178 *bypass_code = UNORDERED; 10179 break; 10180 case EQ: /* EQ - ZF=1 - fails on unordered */ 10181 *first_code = UNEQ; 10182 *bypass_code = UNORDERED; 10183 break; 10184 case NE: /* NE - ZF=0 - fails on unordered */ 10185 *first_code = LTGT; 10186 *second_code = UNORDERED; 10187 break; 10188 case UNGE: /* GEU - CF=0 - fails on unordered */ 10189 *first_code = GE; 10190 *second_code = UNORDERED; 10191 break; 10192 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 10193 *first_code = GT; 10194 *second_code = UNORDERED; 10195 break; 10196 default: 10197 gcc_unreachable (); 10198 } 10199 if (!TARGET_IEEE_FP) 10200 { 10201 *second_code = UNKNOWN; 10202 *bypass_code = UNKNOWN; 10203 } 10204} 10205 10206/* Return cost of comparison done fcom + arithmetics operations on AX. 10207 All following functions do use number of instructions as a cost metrics. 10208 In future this should be tweaked to compute bytes for optimize_size and 10209 take into account performance of various instructions on various CPUs. */ 10210static int 10211ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 10212{ 10213 if (!TARGET_IEEE_FP) 10214 return 4; 10215 /* The cost of code output by ix86_expand_fp_compare. */ 10216 switch (code) 10217 { 10218 case UNLE: 10219 case UNLT: 10220 case LTGT: 10221 case GT: 10222 case GE: 10223 case UNORDERED: 10224 case ORDERED: 10225 case UNEQ: 10226 return 4; 10227 break; 10228 case LT: 10229 case NE: 10230 case EQ: 10231 case UNGE: 10232 return 5; 10233 break; 10234 case LE: 10235 case UNGT: 10236 return 6; 10237 break; 10238 default: 10239 gcc_unreachable (); 10240 } 10241} 10242 10243/* Return cost of comparison done using fcomi operation. 10244 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10245static int 10246ix86_fp_comparison_fcomi_cost (enum rtx_code code) 10247{ 10248 enum rtx_code bypass_code, first_code, second_code; 10249 /* Return arbitrarily high cost when instruction is not supported - this 10250 prevents gcc from using it. */ 10251 if (!TARGET_CMOVE) 10252 return 1024; 10253 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10254 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 10255} 10256 10257/* Return cost of comparison done using sahf operation. 10258 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10259static int 10260ix86_fp_comparison_sahf_cost (enum rtx_code code) 10261{ 10262 enum rtx_code bypass_code, first_code, second_code; 10263 /* Return arbitrarily high cost when instruction is not preferred - this 10264 avoids gcc from using it. */ 10265 if (!TARGET_USE_SAHF && !optimize_size) 10266 return 1024; 10267 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10268 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 10269} 10270 10271/* Compute cost of the comparison done using any method. 10272 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10273static int 10274ix86_fp_comparison_cost (enum rtx_code code) 10275{ 10276 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 10277 int min; 10278 10279 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 10280 sahf_cost = ix86_fp_comparison_sahf_cost (code); 10281 10282 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 10283 if (min > sahf_cost) 10284 min = sahf_cost; 10285 if (min > fcomi_cost) 10286 min = fcomi_cost; 10287 return min; 10288} 10289 10290/* Generate insn patterns to do a floating point compare of OPERANDS. */ 10291 10292static rtx 10293ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 10294 rtx *second_test, rtx *bypass_test) 10295{ 10296 enum machine_mode fpcmp_mode, intcmp_mode; 10297 rtx tmp, tmp2; 10298 int cost = ix86_fp_comparison_cost (code); 10299 enum rtx_code bypass_code, first_code, second_code; 10300 10301 fpcmp_mode = ix86_fp_compare_mode (code); 10302 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 10303 10304 if (second_test) 10305 *second_test = NULL_RTX; 10306 if (bypass_test) 10307 *bypass_test = NULL_RTX; 10308 10309 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10310 10311 /* Do fcomi/sahf based test when profitable. */ 10312 if ((bypass_code == UNKNOWN || bypass_test) 10313 && (second_code == UNKNOWN || second_test) 10314 && ix86_fp_comparison_arithmetics_cost (code) > cost) 10315 { 10316 if (TARGET_CMOVE) 10317 { 10318 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10319 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 10320 tmp); 10321 emit_insn (tmp); 10322 } 10323 else 10324 { 10325 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10326 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10327 if (!scratch) 10328 scratch = gen_reg_rtx (HImode); 10329 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10330 emit_insn (gen_x86_sahf_1 (scratch)); 10331 } 10332 10333 /* The FP codes work out to act like unsigned. */ 10334 intcmp_mode = fpcmp_mode; 10335 code = first_code; 10336 if (bypass_code != UNKNOWN) 10337 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 10338 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10339 const0_rtx); 10340 if (second_code != UNKNOWN) 10341 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 10342 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10343 const0_rtx); 10344 } 10345 else 10346 { 10347 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 10348 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10349 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10350 if (!scratch) 10351 scratch = gen_reg_rtx (HImode); 10352 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10353 10354 /* In the unordered case, we have to check C2 for NaN's, which 10355 doesn't happen to work out to anything nice combination-wise. 10356 So do some bit twiddling on the value we've got in AH to come 10357 up with an appropriate set of condition codes. */ 10358 10359 intcmp_mode = CCNOmode; 10360 switch (code) 10361 { 10362 case GT: 10363 case UNGT: 10364 if (code == GT || !TARGET_IEEE_FP) 10365 { 10366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10367 code = EQ; 10368 } 10369 else 10370 { 10371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10372 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 10374 intcmp_mode = CCmode; 10375 code = GEU; 10376 } 10377 break; 10378 case LT: 10379 case UNLT: 10380 if (code == LT && TARGET_IEEE_FP) 10381 { 10382 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10383 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 10384 intcmp_mode = CCmode; 10385 code = EQ; 10386 } 10387 else 10388 { 10389 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 10390 code = NE; 10391 } 10392 break; 10393 case GE: 10394 case UNGE: 10395 if (code == GE || !TARGET_IEEE_FP) 10396 { 10397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 10398 code = EQ; 10399 } 10400 else 10401 { 10402 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10403 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10404 GEN_INT (0x01))); 10405 code = NE; 10406 } 10407 break; 10408 case LE: 10409 case UNLE: 10410 if (code == LE && TARGET_IEEE_FP) 10411 { 10412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10413 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10414 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10415 intcmp_mode = CCmode; 10416 code = LTU; 10417 } 10418 else 10419 { 10420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10421 code = NE; 10422 } 10423 break; 10424 case EQ: 10425 case UNEQ: 10426 if (code == EQ && TARGET_IEEE_FP) 10427 { 10428 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10429 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10430 intcmp_mode = CCmode; 10431 code = EQ; 10432 } 10433 else 10434 { 10435 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10436 code = NE; 10437 break; 10438 } 10439 break; 10440 case NE: 10441 case LTGT: 10442 if (code == NE && TARGET_IEEE_FP) 10443 { 10444 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10445 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10446 GEN_INT (0x40))); 10447 code = NE; 10448 } 10449 else 10450 { 10451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10452 code = EQ; 10453 } 10454 break; 10455 10456 case UNORDERED: 10457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10458 code = NE; 10459 break; 10460 case ORDERED: 10461 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10462 code = EQ; 10463 break; 10464 10465 default: 10466 gcc_unreachable (); 10467 } 10468 } 10469 10470 /* Return the test that should be put into the flags user, i.e. 10471 the bcc, scc, or cmov instruction. */ 10472 return gen_rtx_fmt_ee (code, VOIDmode, 10473 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10474 const0_rtx); 10475} 10476 10477rtx 10478ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 10479{ 10480 rtx op0, op1, ret; 10481 op0 = ix86_compare_op0; 10482 op1 = ix86_compare_op1; 10483 10484 if (second_test) 10485 *second_test = NULL_RTX; 10486 if (bypass_test) 10487 *bypass_test = NULL_RTX; 10488 10489 if (ix86_compare_emitted) 10490 { 10491 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 10492 ix86_compare_emitted = NULL_RTX; 10493 } 10494 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10495 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10496 second_test, bypass_test); 10497 else 10498 ret = ix86_expand_int_compare (code, op0, op1); 10499 10500 return ret; 10501} 10502 10503/* Return true if the CODE will result in nontrivial jump sequence. */ 10504bool 10505ix86_fp_jump_nontrivial_p (enum rtx_code code) 10506{ 10507 enum rtx_code bypass_code, first_code, second_code; 10508 if (!TARGET_CMOVE) 10509 return true; 10510 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10511 return bypass_code != UNKNOWN || second_code != UNKNOWN; 10512} 10513 10514void 10515ix86_expand_branch (enum rtx_code code, rtx label) 10516{ 10517 rtx tmp; 10518 10519 /* If we have emitted a compare insn, go straight to simple. 10520 ix86_expand_compare won't emit anything if ix86_compare_emitted 10521 is non NULL. */ 10522 if (ix86_compare_emitted) 10523 goto simple; 10524 10525 switch (GET_MODE (ix86_compare_op0)) 10526 { 10527 case QImode: 10528 case HImode: 10529 case SImode: 10530 simple: 10531 tmp = ix86_expand_compare (code, NULL, NULL); 10532 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10533 gen_rtx_LABEL_REF (VOIDmode, label), 10534 pc_rtx); 10535 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 10536 return; 10537 10538 case SFmode: 10539 case DFmode: 10540 case XFmode: 10541 { 10542 rtvec vec; 10543 int use_fcomi; 10544 enum rtx_code bypass_code, first_code, second_code; 10545 10546 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 10547 &ix86_compare_op1); 10548 10549 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10550 10551 /* Check whether we will use the natural sequence with one jump. If 10552 so, we can expand jump early. Otherwise delay expansion by 10553 creating compound insn to not confuse optimizers. */ 10554 if (bypass_code == UNKNOWN && second_code == UNKNOWN 10555 && TARGET_CMOVE) 10556 { 10557 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 10558 gen_rtx_LABEL_REF (VOIDmode, label), 10559 pc_rtx, NULL_RTX, NULL_RTX); 10560 } 10561 else 10562 { 10563 tmp = gen_rtx_fmt_ee (code, VOIDmode, 10564 ix86_compare_op0, ix86_compare_op1); 10565 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10566 gen_rtx_LABEL_REF (VOIDmode, label), 10567 pc_rtx); 10568 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 10569 10570 use_fcomi = ix86_use_fcomi_compare (code); 10571 vec = rtvec_alloc (3 + !use_fcomi); 10572 RTVEC_ELT (vec, 0) = tmp; 10573 RTVEC_ELT (vec, 1) 10574 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 10575 RTVEC_ELT (vec, 2) 10576 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 10577 if (! use_fcomi) 10578 RTVEC_ELT (vec, 3) 10579 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 10580 10581 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 10582 } 10583 return; 10584 } 10585 10586 case DImode: 10587 if (TARGET_64BIT) 10588 goto simple; 10589 case TImode: 10590 /* Expand DImode branch into multiple compare+branch. */ 10591 { 10592 rtx lo[2], hi[2], label2; 10593 enum rtx_code code1, code2, code3; 10594 enum machine_mode submode; 10595 10596 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 10597 { 10598 tmp = ix86_compare_op0; 10599 ix86_compare_op0 = ix86_compare_op1; 10600 ix86_compare_op1 = tmp; 10601 code = swap_condition (code); 10602 } 10603 if (GET_MODE (ix86_compare_op0) == DImode) 10604 { 10605 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 10606 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 10607 submode = SImode; 10608 } 10609 else 10610 { 10611 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 10612 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 10613 submode = DImode; 10614 } 10615 10616 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 10617 avoid two branches. This costs one extra insn, so disable when 10618 optimizing for size. */ 10619 10620 if ((code == EQ || code == NE) 10621 && (!optimize_size 10622 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 10623 { 10624 rtx xor0, xor1; 10625 10626 xor1 = hi[0]; 10627 if (hi[1] != const0_rtx) 10628 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 10629 NULL_RTX, 0, OPTAB_WIDEN); 10630 10631 xor0 = lo[0]; 10632 if (lo[1] != const0_rtx) 10633 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 10634 NULL_RTX, 0, OPTAB_WIDEN); 10635 10636 tmp = expand_binop (submode, ior_optab, xor1, xor0, 10637 NULL_RTX, 0, OPTAB_WIDEN); 10638 10639 ix86_compare_op0 = tmp; 10640 ix86_compare_op1 = const0_rtx; 10641 ix86_expand_branch (code, label); 10642 return; 10643 } 10644 10645 /* Otherwise, if we are doing less-than or greater-or-equal-than, 10646 op1 is a constant and the low word is zero, then we can just 10647 examine the high word. */ 10648 10649 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 10650 switch (code) 10651 { 10652 case LT: case LTU: case GE: case GEU: 10653 ix86_compare_op0 = hi[0]; 10654 ix86_compare_op1 = hi[1]; 10655 ix86_expand_branch (code, label); 10656 return; 10657 default: 10658 break; 10659 } 10660 10661 /* Otherwise, we need two or three jumps. */ 10662 10663 label2 = gen_label_rtx (); 10664 10665 code1 = code; 10666 code2 = swap_condition (code); 10667 code3 = unsigned_condition (code); 10668 10669 switch (code) 10670 { 10671 case LT: case GT: case LTU: case GTU: 10672 break; 10673 10674 case LE: code1 = LT; code2 = GT; break; 10675 case GE: code1 = GT; code2 = LT; break; 10676 case LEU: code1 = LTU; code2 = GTU; break; 10677 case GEU: code1 = GTU; code2 = LTU; break; 10678 10679 case EQ: code1 = UNKNOWN; code2 = NE; break; 10680 case NE: code2 = UNKNOWN; break; 10681 10682 default: 10683 gcc_unreachable (); 10684 } 10685 10686 /* 10687 * a < b => 10688 * if (hi(a) < hi(b)) goto true; 10689 * if (hi(a) > hi(b)) goto false; 10690 * if (lo(a) < lo(b)) goto true; 10691 * false: 10692 */ 10693 10694 ix86_compare_op0 = hi[0]; 10695 ix86_compare_op1 = hi[1]; 10696 10697 if (code1 != UNKNOWN) 10698 ix86_expand_branch (code1, label); 10699 if (code2 != UNKNOWN) 10700 ix86_expand_branch (code2, label2); 10701 10702 ix86_compare_op0 = lo[0]; 10703 ix86_compare_op1 = lo[1]; 10704 ix86_expand_branch (code3, label); 10705 10706 if (code2 != UNKNOWN) 10707 emit_label (label2); 10708 return; 10709 } 10710 10711 default: 10712 gcc_unreachable (); 10713 } 10714} 10715 10716/* Split branch based on floating point condition. */ 10717void 10718ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 10719 rtx target1, rtx target2, rtx tmp, rtx pushed) 10720{ 10721 rtx second, bypass; 10722 rtx label = NULL_RTX; 10723 rtx condition; 10724 int bypass_probability = -1, second_probability = -1, probability = -1; 10725 rtx i; 10726 10727 if (target2 != pc_rtx) 10728 { 10729 rtx tmp = target2; 10730 code = reverse_condition_maybe_unordered (code); 10731 target2 = target1; 10732 target1 = tmp; 10733 } 10734 10735 condition = ix86_expand_fp_compare (code, op1, op2, 10736 tmp, &second, &bypass); 10737 10738 /* Remove pushed operand from stack. */ 10739 if (pushed) 10740 ix86_free_from_memory (GET_MODE (pushed)); 10741 10742 if (split_branch_probability >= 0) 10743 { 10744 /* Distribute the probabilities across the jumps. 10745 Assume the BYPASS and SECOND to be always test 10746 for UNORDERED. */ 10747 probability = split_branch_probability; 10748 10749 /* Value of 1 is low enough to make no need for probability 10750 to be updated. Later we may run some experiments and see 10751 if unordered values are more frequent in practice. */ 10752 if (bypass) 10753 bypass_probability = 1; 10754 if (second) 10755 second_probability = 1; 10756 } 10757 if (bypass != NULL_RTX) 10758 { 10759 label = gen_label_rtx (); 10760 i = emit_jump_insn (gen_rtx_SET 10761 (VOIDmode, pc_rtx, 10762 gen_rtx_IF_THEN_ELSE (VOIDmode, 10763 bypass, 10764 gen_rtx_LABEL_REF (VOIDmode, 10765 label), 10766 pc_rtx))); 10767 if (bypass_probability >= 0) 10768 REG_NOTES (i) 10769 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10770 GEN_INT (bypass_probability), 10771 REG_NOTES (i)); 10772 } 10773 i = emit_jump_insn (gen_rtx_SET 10774 (VOIDmode, pc_rtx, 10775 gen_rtx_IF_THEN_ELSE (VOIDmode, 10776 condition, target1, target2))); 10777 if (probability >= 0) 10778 REG_NOTES (i) 10779 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10780 GEN_INT (probability), 10781 REG_NOTES (i)); 10782 if (second != NULL_RTX) 10783 { 10784 i = emit_jump_insn (gen_rtx_SET 10785 (VOIDmode, pc_rtx, 10786 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 10787 target2))); 10788 if (second_probability >= 0) 10789 REG_NOTES (i) 10790 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10791 GEN_INT (second_probability), 10792 REG_NOTES (i)); 10793 } 10794 if (label != NULL_RTX) 10795 emit_label (label); 10796} 10797 10798int 10799ix86_expand_setcc (enum rtx_code code, rtx dest) 10800{ 10801 rtx ret, tmp, tmpreg, equiv; 10802 rtx second_test, bypass_test; 10803 10804 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 10805 return 0; /* FAIL */ 10806 10807 gcc_assert (GET_MODE (dest) == QImode); 10808 10809 ret = ix86_expand_compare (code, &second_test, &bypass_test); 10810 PUT_MODE (ret, QImode); 10811 10812 tmp = dest; 10813 tmpreg = dest; 10814 10815 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 10816 if (bypass_test || second_test) 10817 { 10818 rtx test = second_test; 10819 int bypass = 0; 10820 rtx tmp2 = gen_reg_rtx (QImode); 10821 if (bypass_test) 10822 { 10823 gcc_assert (!second_test); 10824 test = bypass_test; 10825 bypass = 1; 10826 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 10827 } 10828 PUT_MODE (test, QImode); 10829 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 10830 10831 if (bypass) 10832 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 10833 else 10834 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 10835 } 10836 10837 /* Attach a REG_EQUAL note describing the comparison result. */ 10838 if (ix86_compare_op0 && ix86_compare_op1) 10839 { 10840 equiv = simplify_gen_relational (code, QImode, 10841 GET_MODE (ix86_compare_op0), 10842 ix86_compare_op0, ix86_compare_op1); 10843 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 10844 } 10845 10846 return 1; /* DONE */ 10847} 10848 10849/* Expand comparison setting or clearing carry flag. Return true when 10850 successful and set pop for the operation. */ 10851static bool 10852ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 10853{ 10854 enum machine_mode mode = 10855 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 10856 10857 /* Do not handle DImode compares that go through special path. Also we can't 10858 deal with FP compares yet. This is possible to add. */ 10859 if (mode == (TARGET_64BIT ? TImode : DImode)) 10860 return false; 10861 if (FLOAT_MODE_P (mode)) 10862 { 10863 rtx second_test = NULL, bypass_test = NULL; 10864 rtx compare_op, compare_seq; 10865 10866 /* Shortcut: following common codes never translate into carry flag compares. */ 10867 if (code == EQ || code == NE || code == UNEQ || code == LTGT 10868 || code == ORDERED || code == UNORDERED) 10869 return false; 10870 10871 /* These comparisons require zero flag; swap operands so they won't. */ 10872 if ((code == GT || code == UNLE || code == LE || code == UNGT) 10873 && !TARGET_IEEE_FP) 10874 { 10875 rtx tmp = op0; 10876 op0 = op1; 10877 op1 = tmp; 10878 code = swap_condition (code); 10879 } 10880 10881 /* Try to expand the comparison and verify that we end up with carry flag 10882 based comparison. This is fails to be true only when we decide to expand 10883 comparison using arithmetic that is not too common scenario. */ 10884 start_sequence (); 10885 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10886 &second_test, &bypass_test); 10887 compare_seq = get_insns (); 10888 end_sequence (); 10889 10890 if (second_test || bypass_test) 10891 return false; 10892 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10893 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10894 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 10895 else 10896 code = GET_CODE (compare_op); 10897 if (code != LTU && code != GEU) 10898 return false; 10899 emit_insn (compare_seq); 10900 *pop = compare_op; 10901 return true; 10902 } 10903 if (!INTEGRAL_MODE_P (mode)) 10904 return false; 10905 switch (code) 10906 { 10907 case LTU: 10908 case GEU: 10909 break; 10910 10911 /* Convert a==0 into (unsigned)a<1. */ 10912 case EQ: 10913 case NE: 10914 if (op1 != const0_rtx) 10915 return false; 10916 op1 = const1_rtx; 10917 code = (code == EQ ? LTU : GEU); 10918 break; 10919 10920 /* Convert a>b into b<a or a>=b-1. */ 10921 case GTU: 10922 case LEU: 10923 if (GET_CODE (op1) == CONST_INT) 10924 { 10925 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 10926 /* Bail out on overflow. We still can swap operands but that 10927 would force loading of the constant into register. */ 10928 if (op1 == const0_rtx 10929 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 10930 return false; 10931 code = (code == GTU ? GEU : LTU); 10932 } 10933 else 10934 { 10935 rtx tmp = op1; 10936 op1 = op0; 10937 op0 = tmp; 10938 code = (code == GTU ? LTU : GEU); 10939 } 10940 break; 10941 10942 /* Convert a>=0 into (unsigned)a<0x80000000. */ 10943 case LT: 10944 case GE: 10945 if (mode == DImode || op1 != const0_rtx) 10946 return false; 10947 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10948 code = (code == LT ? GEU : LTU); 10949 break; 10950 case LE: 10951 case GT: 10952 if (mode == DImode || op1 != constm1_rtx) 10953 return false; 10954 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10955 code = (code == LE ? GEU : LTU); 10956 break; 10957 10958 default: 10959 return false; 10960 } 10961 /* Swapping operands may cause constant to appear as first operand. */ 10962 if (!nonimmediate_operand (op0, VOIDmode)) 10963 { 10964 if (no_new_pseudos) 10965 return false; 10966 op0 = force_reg (mode, op0); 10967 } 10968 ix86_compare_op0 = op0; 10969 ix86_compare_op1 = op1; 10970 *pop = ix86_expand_compare (code, NULL, NULL); 10971 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 10972 return true; 10973} 10974 10975int 10976ix86_expand_int_movcc (rtx operands[]) 10977{ 10978 enum rtx_code code = GET_CODE (operands[1]), compare_code; 10979 rtx compare_seq, compare_op; 10980 rtx second_test, bypass_test; 10981 enum machine_mode mode = GET_MODE (operands[0]); 10982 bool sign_bit_compare_p = false;; 10983 10984 start_sequence (); 10985 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10986 compare_seq = get_insns (); 10987 end_sequence (); 10988 10989 compare_code = GET_CODE (compare_op); 10990 10991 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 10992 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 10993 sign_bit_compare_p = true; 10994 10995 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 10996 HImode insns, we'd be swallowed in word prefix ops. */ 10997 10998 if ((mode != HImode || TARGET_FAST_PREFIX) 10999 && (mode != (TARGET_64BIT ? TImode : DImode)) 11000 && GET_CODE (operands[2]) == CONST_INT 11001 && GET_CODE (operands[3]) == CONST_INT) 11002 { 11003 rtx out = operands[0]; 11004 HOST_WIDE_INT ct = INTVAL (operands[2]); 11005 HOST_WIDE_INT cf = INTVAL (operands[3]); 11006 HOST_WIDE_INT diff; 11007 11008 diff = ct - cf; 11009 /* Sign bit compares are better done using shifts than we do by using 11010 sbb. */ 11011 if (sign_bit_compare_p 11012 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11013 ix86_compare_op1, &compare_op)) 11014 { 11015 /* Detect overlap between destination and compare sources. */ 11016 rtx tmp = out; 11017 11018 if (!sign_bit_compare_p) 11019 { 11020 bool fpcmp = false; 11021 11022 compare_code = GET_CODE (compare_op); 11023 11024 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11025 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11026 { 11027 fpcmp = true; 11028 compare_code = ix86_fp_compare_code_to_integer (compare_code); 11029 } 11030 11031 /* To simplify rest of code, restrict to the GEU case. */ 11032 if (compare_code == LTU) 11033 { 11034 HOST_WIDE_INT tmp = ct; 11035 ct = cf; 11036 cf = tmp; 11037 compare_code = reverse_condition (compare_code); 11038 code = reverse_condition (code); 11039 } 11040 else 11041 { 11042 if (fpcmp) 11043 PUT_CODE (compare_op, 11044 reverse_condition_maybe_unordered 11045 (GET_CODE (compare_op))); 11046 else 11047 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11048 } 11049 diff = ct - cf; 11050 11051 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 11052 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 11053 tmp = gen_reg_rtx (mode); 11054 11055 if (mode == DImode) 11056 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 11057 else 11058 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 11059 } 11060 else 11061 { 11062 if (code == GT || code == GE) 11063 code = reverse_condition (code); 11064 else 11065 { 11066 HOST_WIDE_INT tmp = ct; 11067 ct = cf; 11068 cf = tmp; 11069 diff = ct - cf; 11070 } 11071 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 11072 ix86_compare_op1, VOIDmode, 0, -1); 11073 } 11074 11075 if (diff == 1) 11076 { 11077 /* 11078 * cmpl op0,op1 11079 * sbbl dest,dest 11080 * [addl dest, ct] 11081 * 11082 * Size 5 - 8. 11083 */ 11084 if (ct) 11085 tmp = expand_simple_binop (mode, PLUS, 11086 tmp, GEN_INT (ct), 11087 copy_rtx (tmp), 1, OPTAB_DIRECT); 11088 } 11089 else if (cf == -1) 11090 { 11091 /* 11092 * cmpl op0,op1 11093 * sbbl dest,dest 11094 * orl $ct, dest 11095 * 11096 * Size 8. 11097 */ 11098 tmp = expand_simple_binop (mode, IOR, 11099 tmp, GEN_INT (ct), 11100 copy_rtx (tmp), 1, OPTAB_DIRECT); 11101 } 11102 else if (diff == -1 && ct) 11103 { 11104 /* 11105 * cmpl op0,op1 11106 * sbbl dest,dest 11107 * notl dest 11108 * [addl dest, cf] 11109 * 11110 * Size 8 - 11. 11111 */ 11112 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11113 if (cf) 11114 tmp = expand_simple_binop (mode, PLUS, 11115 copy_rtx (tmp), GEN_INT (cf), 11116 copy_rtx (tmp), 1, OPTAB_DIRECT); 11117 } 11118 else 11119 { 11120 /* 11121 * cmpl op0,op1 11122 * sbbl dest,dest 11123 * [notl dest] 11124 * andl cf - ct, dest 11125 * [addl dest, ct] 11126 * 11127 * Size 8 - 11. 11128 */ 11129 11130 if (cf == 0) 11131 { 11132 cf = ct; 11133 ct = 0; 11134 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11135 } 11136 11137 tmp = expand_simple_binop (mode, AND, 11138 copy_rtx (tmp), 11139 gen_int_mode (cf - ct, mode), 11140 copy_rtx (tmp), 1, OPTAB_DIRECT); 11141 if (ct) 11142 tmp = expand_simple_binop (mode, PLUS, 11143 copy_rtx (tmp), GEN_INT (ct), 11144 copy_rtx (tmp), 1, OPTAB_DIRECT); 11145 } 11146 11147 if (!rtx_equal_p (tmp, out)) 11148 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 11149 11150 return 1; /* DONE */ 11151 } 11152 11153 if (diff < 0) 11154 { 11155 HOST_WIDE_INT tmp; 11156 tmp = ct, ct = cf, cf = tmp; 11157 diff = -diff; 11158 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11159 { 11160 /* We may be reversing unordered compare to normal compare, that 11161 is not valid in general (we may convert non-trapping condition 11162 to trapping one), however on i386 we currently emit all 11163 comparisons unordered. */ 11164 compare_code = reverse_condition_maybe_unordered (compare_code); 11165 code = reverse_condition_maybe_unordered (code); 11166 } 11167 else 11168 { 11169 compare_code = reverse_condition (compare_code); 11170 code = reverse_condition (code); 11171 } 11172 } 11173 11174 compare_code = UNKNOWN; 11175 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 11176 && GET_CODE (ix86_compare_op1) == CONST_INT) 11177 { 11178 if (ix86_compare_op1 == const0_rtx 11179 && (code == LT || code == GE)) 11180 compare_code = code; 11181 else if (ix86_compare_op1 == constm1_rtx) 11182 { 11183 if (code == LE) 11184 compare_code = LT; 11185 else if (code == GT) 11186 compare_code = GE; 11187 } 11188 } 11189 11190 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 11191 if (compare_code != UNKNOWN 11192 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 11193 && (cf == -1 || ct == -1)) 11194 { 11195 /* If lea code below could be used, only optimize 11196 if it results in a 2 insn sequence. */ 11197 11198 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 11199 || diff == 3 || diff == 5 || diff == 9) 11200 || (compare_code == LT && ct == -1) 11201 || (compare_code == GE && cf == -1)) 11202 { 11203 /* 11204 * notl op1 (if necessary) 11205 * sarl $31, op1 11206 * orl cf, op1 11207 */ 11208 if (ct != -1) 11209 { 11210 cf = ct; 11211 ct = -1; 11212 code = reverse_condition (code); 11213 } 11214 11215 out = emit_store_flag (out, code, ix86_compare_op0, 11216 ix86_compare_op1, VOIDmode, 0, -1); 11217 11218 out = expand_simple_binop (mode, IOR, 11219 out, GEN_INT (cf), 11220 out, 1, OPTAB_DIRECT); 11221 if (out != operands[0]) 11222 emit_move_insn (operands[0], out); 11223 11224 return 1; /* DONE */ 11225 } 11226 } 11227 11228 11229 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 11230 || diff == 3 || diff == 5 || diff == 9) 11231 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 11232 && (mode != DImode 11233 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 11234 { 11235 /* 11236 * xorl dest,dest 11237 * cmpl op1,op2 11238 * setcc dest 11239 * lea cf(dest*(ct-cf)),dest 11240 * 11241 * Size 14. 11242 * 11243 * This also catches the degenerate setcc-only case. 11244 */ 11245 11246 rtx tmp; 11247 int nops; 11248 11249 out = emit_store_flag (out, code, ix86_compare_op0, 11250 ix86_compare_op1, VOIDmode, 0, 1); 11251 11252 nops = 0; 11253 /* On x86_64 the lea instruction operates on Pmode, so we need 11254 to get arithmetics done in proper mode to match. */ 11255 if (diff == 1) 11256 tmp = copy_rtx (out); 11257 else 11258 { 11259 rtx out1; 11260 out1 = copy_rtx (out); 11261 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 11262 nops++; 11263 if (diff & 1) 11264 { 11265 tmp = gen_rtx_PLUS (mode, tmp, out1); 11266 nops++; 11267 } 11268 } 11269 if (cf != 0) 11270 { 11271 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 11272 nops++; 11273 } 11274 if (!rtx_equal_p (tmp, out)) 11275 { 11276 if (nops == 1) 11277 out = force_operand (tmp, copy_rtx (out)); 11278 else 11279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 11280 } 11281 if (!rtx_equal_p (out, operands[0])) 11282 emit_move_insn (operands[0], copy_rtx (out)); 11283 11284 return 1; /* DONE */ 11285 } 11286 11287 /* 11288 * General case: Jumpful: 11289 * xorl dest,dest cmpl op1, op2 11290 * cmpl op1, op2 movl ct, dest 11291 * setcc dest jcc 1f 11292 * decl dest movl cf, dest 11293 * andl (cf-ct),dest 1: 11294 * addl ct,dest 11295 * 11296 * Size 20. Size 14. 11297 * 11298 * This is reasonably steep, but branch mispredict costs are 11299 * high on modern cpus, so consider failing only if optimizing 11300 * for space. 11301 */ 11302 11303 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11304 && BRANCH_COST >= 2) 11305 { 11306 if (cf == 0) 11307 { 11308 cf = ct; 11309 ct = 0; 11310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11311 /* We may be reversing unordered compare to normal compare, 11312 that is not valid in general (we may convert non-trapping 11313 condition to trapping one), however on i386 we currently 11314 emit all comparisons unordered. */ 11315 code = reverse_condition_maybe_unordered (code); 11316 else 11317 { 11318 code = reverse_condition (code); 11319 if (compare_code != UNKNOWN) 11320 compare_code = reverse_condition (compare_code); 11321 } 11322 } 11323 11324 if (compare_code != UNKNOWN) 11325 { 11326 /* notl op1 (if needed) 11327 sarl $31, op1 11328 andl (cf-ct), op1 11329 addl ct, op1 11330 11331 For x < 0 (resp. x <= -1) there will be no notl, 11332 so if possible swap the constants to get rid of the 11333 complement. 11334 True/false will be -1/0 while code below (store flag 11335 followed by decrement) is 0/-1, so the constants need 11336 to be exchanged once more. */ 11337 11338 if (compare_code == GE || !cf) 11339 { 11340 code = reverse_condition (code); 11341 compare_code = LT; 11342 } 11343 else 11344 { 11345 HOST_WIDE_INT tmp = cf; 11346 cf = ct; 11347 ct = tmp; 11348 } 11349 11350 out = emit_store_flag (out, code, ix86_compare_op0, 11351 ix86_compare_op1, VOIDmode, 0, -1); 11352 } 11353 else 11354 { 11355 out = emit_store_flag (out, code, ix86_compare_op0, 11356 ix86_compare_op1, VOIDmode, 0, 1); 11357 11358 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 11359 copy_rtx (out), 1, OPTAB_DIRECT); 11360 } 11361 11362 out = expand_simple_binop (mode, AND, copy_rtx (out), 11363 gen_int_mode (cf - ct, mode), 11364 copy_rtx (out), 1, OPTAB_DIRECT); 11365 if (ct) 11366 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 11367 copy_rtx (out), 1, OPTAB_DIRECT); 11368 if (!rtx_equal_p (out, operands[0])) 11369 emit_move_insn (operands[0], copy_rtx (out)); 11370 11371 return 1; /* DONE */ 11372 } 11373 } 11374 11375 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11376 { 11377 /* Try a few things more with specific constants and a variable. */ 11378 11379 optab op; 11380 rtx var, orig_out, out, tmp; 11381 11382 if (BRANCH_COST <= 2) 11383 return 0; /* FAIL */ 11384 11385 /* If one of the two operands is an interesting constant, load a 11386 constant with the above and mask it in with a logical operation. */ 11387 11388 if (GET_CODE (operands[2]) == CONST_INT) 11389 { 11390 var = operands[3]; 11391 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 11392 operands[3] = constm1_rtx, op = and_optab; 11393 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 11394 operands[3] = const0_rtx, op = ior_optab; 11395 else 11396 return 0; /* FAIL */ 11397 } 11398 else if (GET_CODE (operands[3]) == CONST_INT) 11399 { 11400 var = operands[2]; 11401 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 11402 operands[2] = constm1_rtx, op = and_optab; 11403 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 11404 operands[2] = const0_rtx, op = ior_optab; 11405 else 11406 return 0; /* FAIL */ 11407 } 11408 else 11409 return 0; /* FAIL */ 11410 11411 orig_out = operands[0]; 11412 tmp = gen_reg_rtx (mode); 11413 operands[0] = tmp; 11414 11415 /* Recurse to get the constant loaded. */ 11416 if (ix86_expand_int_movcc (operands) == 0) 11417 return 0; /* FAIL */ 11418 11419 /* Mask in the interesting variable. */ 11420 out = expand_binop (mode, op, var, tmp, orig_out, 0, 11421 OPTAB_WIDEN); 11422 if (!rtx_equal_p (out, orig_out)) 11423 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 11424 11425 return 1; /* DONE */ 11426 } 11427 11428 /* 11429 * For comparison with above, 11430 * 11431 * movl cf,dest 11432 * movl ct,tmp 11433 * cmpl op1,op2 11434 * cmovcc tmp,dest 11435 * 11436 * Size 15. 11437 */ 11438 11439 if (! nonimmediate_operand (operands[2], mode)) 11440 operands[2] = force_reg (mode, operands[2]); 11441 if (! nonimmediate_operand (operands[3], mode)) 11442 operands[3] = force_reg (mode, operands[3]); 11443 11444 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11445 { 11446 rtx tmp = gen_reg_rtx (mode); 11447 emit_move_insn (tmp, operands[3]); 11448 operands[3] = tmp; 11449 } 11450 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11451 { 11452 rtx tmp = gen_reg_rtx (mode); 11453 emit_move_insn (tmp, operands[2]); 11454 operands[2] = tmp; 11455 } 11456 11457 if (! register_operand (operands[2], VOIDmode) 11458 && (mode == QImode 11459 || ! register_operand (operands[3], VOIDmode))) 11460 operands[2] = force_reg (mode, operands[2]); 11461 11462 if (mode == QImode 11463 && ! register_operand (operands[3], VOIDmode)) 11464 operands[3] = force_reg (mode, operands[3]); 11465 11466 emit_insn (compare_seq); 11467 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11468 gen_rtx_IF_THEN_ELSE (mode, 11469 compare_op, operands[2], 11470 operands[3]))); 11471 if (bypass_test) 11472 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11473 gen_rtx_IF_THEN_ELSE (mode, 11474 bypass_test, 11475 copy_rtx (operands[3]), 11476 copy_rtx (operands[0])))); 11477 if (second_test) 11478 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11479 gen_rtx_IF_THEN_ELSE (mode, 11480 second_test, 11481 copy_rtx (operands[2]), 11482 copy_rtx (operands[0])))); 11483 11484 return 1; /* DONE */ 11485} 11486 11487/* Swap, force into registers, or otherwise massage the two operands 11488 to an sse comparison with a mask result. Thus we differ a bit from 11489 ix86_prepare_fp_compare_args which expects to produce a flags result. 11490 11491 The DEST operand exists to help determine whether to commute commutative 11492 operators. The POP0/POP1 operands are updated in place. The new 11493 comparison code is returned, or UNKNOWN if not implementable. */ 11494 11495static enum rtx_code 11496ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 11497 rtx *pop0, rtx *pop1) 11498{ 11499 rtx tmp; 11500 11501 switch (code) 11502 { 11503 case LTGT: 11504 case UNEQ: 11505 /* We have no LTGT as an operator. We could implement it with 11506 NE & ORDERED, but this requires an extra temporary. It's 11507 not clear that it's worth it. */ 11508 return UNKNOWN; 11509 11510 case LT: 11511 case LE: 11512 case UNGT: 11513 case UNGE: 11514 /* These are supported directly. */ 11515 break; 11516 11517 case EQ: 11518 case NE: 11519 case UNORDERED: 11520 case ORDERED: 11521 /* For commutative operators, try to canonicalize the destination 11522 operand to be first in the comparison - this helps reload to 11523 avoid extra moves. */ 11524 if (!dest || !rtx_equal_p (dest, *pop1)) 11525 break; 11526 /* FALLTHRU */ 11527 11528 case GE: 11529 case GT: 11530 case UNLE: 11531 case UNLT: 11532 /* These are not supported directly. Swap the comparison operands 11533 to transform into something that is supported. */ 11534 tmp = *pop0; 11535 *pop0 = *pop1; 11536 *pop1 = tmp; 11537 code = swap_condition (code); 11538 break; 11539 11540 default: 11541 gcc_unreachable (); 11542 } 11543 11544 return code; 11545} 11546 11547/* Detect conditional moves that exactly match min/max operational 11548 semantics. Note that this is IEEE safe, as long as we don't 11549 interchange the operands. 11550 11551 Returns FALSE if this conditional move doesn't match a MIN/MAX, 11552 and TRUE if the operation is successful and instructions are emitted. */ 11553 11554static bool 11555ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 11556 rtx cmp_op1, rtx if_true, rtx if_false) 11557{ 11558 enum machine_mode mode; 11559 bool is_min; 11560 rtx tmp; 11561 11562 if (code == LT) 11563 ; 11564 else if (code == UNGE) 11565 { 11566 tmp = if_true; 11567 if_true = if_false; 11568 if_false = tmp; 11569 } 11570 else 11571 return false; 11572 11573 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 11574 is_min = true; 11575 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 11576 is_min = false; 11577 else 11578 return false; 11579 11580 mode = GET_MODE (dest); 11581 11582 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 11583 but MODE may be a vector mode and thus not appropriate. */ 11584 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 11585 { 11586 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 11587 rtvec v; 11588 11589 if_true = force_reg (mode, if_true); 11590 v = gen_rtvec (2, if_true, if_false); 11591 tmp = gen_rtx_UNSPEC (mode, v, u); 11592 } 11593 else 11594 { 11595 code = is_min ? SMIN : SMAX; 11596 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 11597 } 11598 11599 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 11600 return true; 11601} 11602 11603/* Expand an sse vector comparison. Return the register with the result. */ 11604 11605static rtx 11606ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 11607 rtx op_true, rtx op_false) 11608{ 11609 enum machine_mode mode = GET_MODE (dest); 11610 rtx x; 11611 11612 cmp_op0 = force_reg (mode, cmp_op0); 11613 if (!nonimmediate_operand (cmp_op1, mode)) 11614 cmp_op1 = force_reg (mode, cmp_op1); 11615 11616 if (optimize 11617 || reg_overlap_mentioned_p (dest, op_true) 11618 || reg_overlap_mentioned_p (dest, op_false)) 11619 dest = gen_reg_rtx (mode); 11620 11621 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 11622 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11623 11624 return dest; 11625} 11626 11627/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 11628 operations. This is used for both scalar and vector conditional moves. */ 11629 11630static void 11631ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 11632{ 11633 enum machine_mode mode = GET_MODE (dest); 11634 rtx t2, t3, x; 11635 11636 if (op_false == CONST0_RTX (mode)) 11637 { 11638 op_true = force_reg (mode, op_true); 11639 x = gen_rtx_AND (mode, cmp, op_true); 11640 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11641 } 11642 else if (op_true == CONST0_RTX (mode)) 11643 { 11644 op_false = force_reg (mode, op_false); 11645 x = gen_rtx_NOT (mode, cmp); 11646 x = gen_rtx_AND (mode, x, op_false); 11647 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11648 } 11649 else 11650 { 11651 op_true = force_reg (mode, op_true); 11652 op_false = force_reg (mode, op_false); 11653 11654 t2 = gen_reg_rtx (mode); 11655 if (optimize) 11656 t3 = gen_reg_rtx (mode); 11657 else 11658 t3 = dest; 11659 11660 x = gen_rtx_AND (mode, op_true, cmp); 11661 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 11662 11663 x = gen_rtx_NOT (mode, cmp); 11664 x = gen_rtx_AND (mode, x, op_false); 11665 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 11666 11667 x = gen_rtx_IOR (mode, t3, t2); 11668 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11669 } 11670} 11671 11672/* Expand a floating-point conditional move. Return true if successful. */ 11673 11674int 11675ix86_expand_fp_movcc (rtx operands[]) 11676{ 11677 enum machine_mode mode = GET_MODE (operands[0]); 11678 enum rtx_code code = GET_CODE (operands[1]); 11679 rtx tmp, compare_op, second_test, bypass_test; 11680 11681 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 11682 { 11683 enum machine_mode cmode; 11684 11685 /* Since we've no cmove for sse registers, don't force bad register 11686 allocation just to gain access to it. Deny movcc when the 11687 comparison mode doesn't match the move mode. */ 11688 cmode = GET_MODE (ix86_compare_op0); 11689 if (cmode == VOIDmode) 11690 cmode = GET_MODE (ix86_compare_op1); 11691 if (cmode != mode) 11692 return 0; 11693 11694 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11695 &ix86_compare_op0, 11696 &ix86_compare_op1); 11697 if (code == UNKNOWN) 11698 return 0; 11699 11700 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 11701 ix86_compare_op1, operands[2], 11702 operands[3])) 11703 return 1; 11704 11705 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 11706 ix86_compare_op1, operands[2], operands[3]); 11707 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 11708 return 1; 11709 } 11710 11711 /* The floating point conditional move instructions don't directly 11712 support conditions resulting from a signed integer comparison. */ 11713 11714 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11715 11716 /* The floating point conditional move instructions don't directly 11717 support signed integer comparisons. */ 11718 11719 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 11720 { 11721 gcc_assert (!second_test && !bypass_test); 11722 tmp = gen_reg_rtx (QImode); 11723 ix86_expand_setcc (code, tmp); 11724 code = NE; 11725 ix86_compare_op0 = tmp; 11726 ix86_compare_op1 = const0_rtx; 11727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11728 } 11729 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11730 { 11731 tmp = gen_reg_rtx (mode); 11732 emit_move_insn (tmp, operands[3]); 11733 operands[3] = tmp; 11734 } 11735 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11736 { 11737 tmp = gen_reg_rtx (mode); 11738 emit_move_insn (tmp, operands[2]); 11739 operands[2] = tmp; 11740 } 11741 11742 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11743 gen_rtx_IF_THEN_ELSE (mode, compare_op, 11744 operands[2], operands[3]))); 11745 if (bypass_test) 11746 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11747 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 11748 operands[3], operands[0]))); 11749 if (second_test) 11750 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11751 gen_rtx_IF_THEN_ELSE (mode, second_test, 11752 operands[2], operands[0]))); 11753 11754 return 1; 11755} 11756 11757/* Expand a floating-point vector conditional move; a vcond operation 11758 rather than a movcc operation. */ 11759 11760bool 11761ix86_expand_fp_vcond (rtx operands[]) 11762{ 11763 enum rtx_code code = GET_CODE (operands[3]); 11764 rtx cmp; 11765 11766 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11767 &operands[4], &operands[5]); 11768 if (code == UNKNOWN) 11769 return false; 11770 11771 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 11772 operands[5], operands[1], operands[2])) 11773 return true; 11774 11775 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 11776 operands[1], operands[2]); 11777 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 11778 return true; 11779} 11780 11781/* Expand a signed integral vector conditional move. */ 11782 11783bool 11784ix86_expand_int_vcond (rtx operands[]) 11785{ 11786 enum machine_mode mode = GET_MODE (operands[0]); 11787 enum rtx_code code = GET_CODE (operands[3]); 11788 bool negate = false; 11789 rtx x, cop0, cop1; 11790 11791 cop0 = operands[4]; 11792 cop1 = operands[5]; 11793 11794 /* Canonicalize the comparison to EQ, GT, GTU. */ 11795 switch (code) 11796 { 11797 case EQ: 11798 case GT: 11799 case GTU: 11800 break; 11801 11802 case NE: 11803 case LE: 11804 case LEU: 11805 code = reverse_condition (code); 11806 negate = true; 11807 break; 11808 11809 case GE: 11810 case GEU: 11811 code = reverse_condition (code); 11812 negate = true; 11813 /* FALLTHRU */ 11814 11815 case LT: 11816 case LTU: 11817 code = swap_condition (code); 11818 x = cop0, cop0 = cop1, cop1 = x; 11819 break; 11820 11821 default: 11822 gcc_unreachable (); 11823 } 11824 11825 /* Unsigned parallel compare is not supported by the hardware. Play some 11826 tricks to turn this into a signed comparison against 0. */ 11827 if (code == GTU) 11828 { 11829 cop0 = force_reg (mode, cop0); 11830 11831 switch (mode) 11832 { 11833 case V4SImode: 11834 { 11835 rtx t1, t2, mask; 11836 11837 /* Perform a parallel modulo subtraction. */ 11838 t1 = gen_reg_rtx (mode); 11839 emit_insn (gen_subv4si3 (t1, cop0, cop1)); 11840 11841 /* Extract the original sign bit of op0. */ 11842 mask = GEN_INT (-0x80000000); 11843 mask = gen_rtx_CONST_VECTOR (mode, 11844 gen_rtvec (4, mask, mask, mask, mask)); 11845 mask = force_reg (mode, mask); 11846 t2 = gen_reg_rtx (mode); 11847 emit_insn (gen_andv4si3 (t2, cop0, mask)); 11848 11849 /* XOR it back into the result of the subtraction. This results 11850 in the sign bit set iff we saw unsigned underflow. */ 11851 x = gen_reg_rtx (mode); 11852 emit_insn (gen_xorv4si3 (x, t1, t2)); 11853 11854 code = GT; 11855 } 11856 break; 11857 11858 case V16QImode: 11859 case V8HImode: 11860 /* Perform a parallel unsigned saturating subtraction. */ 11861 x = gen_reg_rtx (mode); 11862 emit_insn (gen_rtx_SET (VOIDmode, x, 11863 gen_rtx_US_MINUS (mode, cop0, cop1))); 11864 11865 code = EQ; 11866 negate = !negate; 11867 break; 11868 11869 default: 11870 gcc_unreachable (); 11871 } 11872 11873 cop0 = x; 11874 cop1 = CONST0_RTX (mode); 11875 } 11876 11877 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 11878 operands[1+negate], operands[2-negate]); 11879 11880 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 11881 operands[2-negate]); 11882 return true; 11883} 11884 11885/* Expand conditional increment or decrement using adb/sbb instructions. 11886 The default case using setcc followed by the conditional move can be 11887 done by generic code. */ 11888int 11889ix86_expand_int_addcc (rtx operands[]) 11890{ 11891 enum rtx_code code = GET_CODE (operands[1]); 11892 rtx compare_op; 11893 rtx val = const0_rtx; 11894 bool fpcmp = false; 11895 enum machine_mode mode = GET_MODE (operands[0]); 11896 11897 if (operands[3] != const1_rtx 11898 && operands[3] != constm1_rtx) 11899 return 0; 11900 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11901 ix86_compare_op1, &compare_op)) 11902 return 0; 11903 code = GET_CODE (compare_op); 11904 11905 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11906 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11907 { 11908 fpcmp = true; 11909 code = ix86_fp_compare_code_to_integer (code); 11910 } 11911 11912 if (code != LTU) 11913 { 11914 val = constm1_rtx; 11915 if (fpcmp) 11916 PUT_CODE (compare_op, 11917 reverse_condition_maybe_unordered 11918 (GET_CODE (compare_op))); 11919 else 11920 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11921 } 11922 PUT_MODE (compare_op, mode); 11923 11924 /* Construct either adc or sbb insn. */ 11925 if ((code == LTU) == (operands[3] == constm1_rtx)) 11926 { 11927 switch (GET_MODE (operands[0])) 11928 { 11929 case QImode: 11930 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 11931 break; 11932 case HImode: 11933 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 11934 break; 11935 case SImode: 11936 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 11937 break; 11938 case DImode: 11939 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11940 break; 11941 default: 11942 gcc_unreachable (); 11943 } 11944 } 11945 else 11946 { 11947 switch (GET_MODE (operands[0])) 11948 { 11949 case QImode: 11950 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 11951 break; 11952 case HImode: 11953 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 11954 break; 11955 case SImode: 11956 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 11957 break; 11958 case DImode: 11959 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11960 break; 11961 default: 11962 gcc_unreachable (); 11963 } 11964 } 11965 return 1; /* DONE */ 11966} 11967 11968 11969/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 11970 works for floating pointer parameters and nonoffsetable memories. 11971 For pushes, it returns just stack offsets; the values will be saved 11972 in the right order. Maximally three parts are generated. */ 11973 11974static int 11975ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 11976{ 11977 int size; 11978 11979 if (!TARGET_64BIT) 11980 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 11981 else 11982 size = (GET_MODE_SIZE (mode) + 4) / 8; 11983 11984 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); 11985 gcc_assert (size >= 2 && size <= 3); 11986 11987 /* Optimize constant pool reference to immediates. This is used by fp 11988 moves, that force all constants to memory to allow combining. */ 11989 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) 11990 { 11991 rtx tmp = maybe_get_pool_constant (operand); 11992 if (tmp) 11993 operand = tmp; 11994 } 11995 11996 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 11997 { 11998 /* The only non-offsetable memories we handle are pushes. */ 11999 int ok = push_operand (operand, VOIDmode); 12000 12001 gcc_assert (ok); 12002 12003 operand = copy_rtx (operand); 12004 PUT_MODE (operand, Pmode); 12005 parts[0] = parts[1] = parts[2] = operand; 12006 return size; 12007 } 12008 12009 if (GET_CODE (operand) == CONST_VECTOR) 12010 { 12011 enum machine_mode imode = int_mode_for_mode (mode); 12012 /* Caution: if we looked through a constant pool memory above, 12013 the operand may actually have a different mode now. That's 12014 ok, since we want to pun this all the way back to an integer. */ 12015 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 12016 gcc_assert (operand != NULL); 12017 mode = imode; 12018 } 12019 12020 if (!TARGET_64BIT) 12021 { 12022 if (mode == DImode) 12023 split_di (&operand, 1, &parts[0], &parts[1]); 12024 else 12025 { 12026 if (REG_P (operand)) 12027 { 12028 gcc_assert (reload_completed); 12029 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 12030 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 12031 if (size == 3) 12032 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 12033 } 12034 else if (offsettable_memref_p (operand)) 12035 { 12036 operand = adjust_address (operand, SImode, 0); 12037 parts[0] = operand; 12038 parts[1] = adjust_address (operand, SImode, 4); 12039 if (size == 3) 12040 parts[2] = adjust_address (operand, SImode, 8); 12041 } 12042 else if (GET_CODE (operand) == CONST_DOUBLE) 12043 { 12044 REAL_VALUE_TYPE r; 12045 long l[4]; 12046 12047 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12048 switch (mode) 12049 { 12050 case XFmode: 12051 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 12052 parts[2] = gen_int_mode (l[2], SImode); 12053 break; 12054 case DFmode: 12055 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 12056 break; 12057 default: 12058 gcc_unreachable (); 12059 } 12060 parts[1] = gen_int_mode (l[1], SImode); 12061 parts[0] = gen_int_mode (l[0], SImode); 12062 } 12063 else 12064 gcc_unreachable (); 12065 } 12066 } 12067 else 12068 { 12069 if (mode == TImode) 12070 split_ti (&operand, 1, &parts[0], &parts[1]); 12071 if (mode == XFmode || mode == TFmode) 12072 { 12073 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 12074 if (REG_P (operand)) 12075 { 12076 gcc_assert (reload_completed); 12077 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 12078 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 12079 } 12080 else if (offsettable_memref_p (operand)) 12081 { 12082 operand = adjust_address (operand, DImode, 0); 12083 parts[0] = operand; 12084 parts[1] = adjust_address (operand, upper_mode, 8); 12085 } 12086 else if (GET_CODE (operand) == CONST_DOUBLE) 12087 { 12088 REAL_VALUE_TYPE r; 12089 long l[4]; 12090 12091 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12092 real_to_target (l, &r, mode); 12093 12094 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 12095 if (HOST_BITS_PER_WIDE_INT >= 64) 12096 parts[0] 12097 = gen_int_mode 12098 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12099 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 12100 DImode); 12101 else 12102 parts[0] = immed_double_const (l[0], l[1], DImode); 12103 12104 if (upper_mode == SImode) 12105 parts[1] = gen_int_mode (l[2], SImode); 12106 else if (HOST_BITS_PER_WIDE_INT >= 64) 12107 parts[1] 12108 = gen_int_mode 12109 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12110 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 12111 DImode); 12112 else 12113 parts[1] = immed_double_const (l[2], l[3], DImode); 12114 } 12115 else 12116 gcc_unreachable (); 12117 } 12118 } 12119 12120 return size; 12121} 12122 12123/* Emit insns to perform a move or push of DI, DF, and XF values. 12124 Return false when normal moves are needed; true when all required 12125 insns have been emitted. Operands 2-4 contain the input values 12126 int the correct order; operands 5-7 contain the output values. */ 12127 12128void 12129ix86_split_long_move (rtx operands[]) 12130{ 12131 rtx part[2][3]; 12132 int nparts; 12133 int push = 0; 12134 int collisions = 0; 12135 enum machine_mode mode = GET_MODE (operands[0]); 12136 12137 /* The DFmode expanders may ask us to move double. 12138 For 64bit target this is single move. By hiding the fact 12139 here we simplify i386.md splitters. */ 12140 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 12141 { 12142 /* Optimize constant pool reference to immediates. This is used by 12143 fp moves, that force all constants to memory to allow combining. */ 12144 12145 if (GET_CODE (operands[1]) == MEM 12146 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 12147 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 12148 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 12149 if (push_operand (operands[0], VOIDmode)) 12150 { 12151 operands[0] = copy_rtx (operands[0]); 12152 PUT_MODE (operands[0], Pmode); 12153 } 12154 else 12155 operands[0] = gen_lowpart (DImode, operands[0]); 12156 operands[1] = gen_lowpart (DImode, operands[1]); 12157 emit_move_insn (operands[0], operands[1]); 12158 return; 12159 } 12160 12161 /* The only non-offsettable memory we handle is push. */ 12162 if (push_operand (operands[0], VOIDmode)) 12163 push = 1; 12164 else 12165 gcc_assert (GET_CODE (operands[0]) != MEM 12166 || offsettable_memref_p (operands[0])); 12167 12168 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 12169 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 12170 12171 /* When emitting push, take care for source operands on the stack. */ 12172 if (push && GET_CODE (operands[1]) == MEM 12173 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 12174 { 12175 if (nparts == 3) 12176 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 12177 XEXP (part[1][2], 0)); 12178 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 12179 XEXP (part[1][1], 0)); 12180 } 12181 12182 /* We need to do copy in the right order in case an address register 12183 of the source overlaps the destination. */ 12184 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 12185 { 12186 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 12187 collisions++; 12188 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12189 collisions++; 12190 if (nparts == 3 12191 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 12192 collisions++; 12193 12194 /* Collision in the middle part can be handled by reordering. */ 12195 if (collisions == 1 && nparts == 3 12196 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12197 { 12198 rtx tmp; 12199 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 12200 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 12201 } 12202 12203 /* If there are more collisions, we can't handle it by reordering. 12204 Do an lea to the last part and use only one colliding move. */ 12205 else if (collisions > 1) 12206 { 12207 rtx base; 12208 12209 collisions = 1; 12210 12211 base = part[0][nparts - 1]; 12212 12213 /* Handle the case when the last part isn't valid for lea. 12214 Happens in 64-bit mode storing the 12-byte XFmode. */ 12215 if (GET_MODE (base) != Pmode) 12216 base = gen_rtx_REG (Pmode, REGNO (base)); 12217 12218 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 12219 part[1][0] = replace_equiv_address (part[1][0], base); 12220 part[1][1] = replace_equiv_address (part[1][1], 12221 plus_constant (base, UNITS_PER_WORD)); 12222 if (nparts == 3) 12223 part[1][2] = replace_equiv_address (part[1][2], 12224 plus_constant (base, 8)); 12225 } 12226 } 12227 12228 if (push) 12229 { 12230 if (!TARGET_64BIT) 12231 { 12232 if (nparts == 3) 12233 { 12234 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 12235 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 12236 emit_move_insn (part[0][2], part[1][2]); 12237 } 12238 } 12239 else 12240 { 12241 /* In 64bit mode we don't have 32bit push available. In case this is 12242 register, it is OK - we will just use larger counterpart. We also 12243 retype memory - these comes from attempt to avoid REX prefix on 12244 moving of second half of TFmode value. */ 12245 if (GET_MODE (part[1][1]) == SImode) 12246 { 12247 switch (GET_CODE (part[1][1])) 12248 { 12249 case MEM: 12250 part[1][1] = adjust_address (part[1][1], DImode, 0); 12251 break; 12252 12253 case REG: 12254 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 12255 break; 12256 12257 default: 12258 gcc_unreachable (); 12259 } 12260 12261 if (GET_MODE (part[1][0]) == SImode) 12262 part[1][0] = part[1][1]; 12263 } 12264 } 12265 emit_move_insn (part[0][1], part[1][1]); 12266 emit_move_insn (part[0][0], part[1][0]); 12267 return; 12268 } 12269 12270 /* Choose correct order to not overwrite the source before it is copied. */ 12271 if ((REG_P (part[0][0]) 12272 && REG_P (part[1][1]) 12273 && (REGNO (part[0][0]) == REGNO (part[1][1]) 12274 || (nparts == 3 12275 && REGNO (part[0][0]) == REGNO (part[1][2])))) 12276 || (collisions > 0 12277 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 12278 { 12279 if (nparts == 3) 12280 { 12281 operands[2] = part[0][2]; 12282 operands[3] = part[0][1]; 12283 operands[4] = part[0][0]; 12284 operands[5] = part[1][2]; 12285 operands[6] = part[1][1]; 12286 operands[7] = part[1][0]; 12287 } 12288 else 12289 { 12290 operands[2] = part[0][1]; 12291 operands[3] = part[0][0]; 12292 operands[5] = part[1][1]; 12293 operands[6] = part[1][0]; 12294 } 12295 } 12296 else 12297 { 12298 if (nparts == 3) 12299 { 12300 operands[2] = part[0][0]; 12301 operands[3] = part[0][1]; 12302 operands[4] = part[0][2]; 12303 operands[5] = part[1][0]; 12304 operands[6] = part[1][1]; 12305 operands[7] = part[1][2]; 12306 } 12307 else 12308 { 12309 operands[2] = part[0][0]; 12310 operands[3] = part[0][1]; 12311 operands[5] = part[1][0]; 12312 operands[6] = part[1][1]; 12313 } 12314 } 12315 12316 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 12317 if (optimize_size) 12318 { 12319 if (GET_CODE (operands[5]) == CONST_INT 12320 && operands[5] != const0_rtx 12321 && REG_P (operands[2])) 12322 { 12323 if (GET_CODE (operands[6]) == CONST_INT 12324 && INTVAL (operands[6]) == INTVAL (operands[5])) 12325 operands[6] = operands[2]; 12326 12327 if (nparts == 3 12328 && GET_CODE (operands[7]) == CONST_INT 12329 && INTVAL (operands[7]) == INTVAL (operands[5])) 12330 operands[7] = operands[2]; 12331 } 12332 12333 if (nparts == 3 12334 && GET_CODE (operands[6]) == CONST_INT 12335 && operands[6] != const0_rtx 12336 && REG_P (operands[3]) 12337 && GET_CODE (operands[7]) == CONST_INT 12338 && INTVAL (operands[7]) == INTVAL (operands[6])) 12339 operands[7] = operands[3]; 12340 } 12341 12342 emit_move_insn (operands[2], operands[5]); 12343 emit_move_insn (operands[3], operands[6]); 12344 if (nparts == 3) 12345 emit_move_insn (operands[4], operands[7]); 12346 12347 return; 12348} 12349 12350/* Helper function of ix86_split_ashl used to generate an SImode/DImode 12351 left shift by a constant, either using a single shift or 12352 a sequence of add instructions. */ 12353 12354static void 12355ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 12356{ 12357 if (count == 1) 12358 { 12359 emit_insn ((mode == DImode 12360 ? gen_addsi3 12361 : gen_adddi3) (operand, operand, operand)); 12362 } 12363 else if (!optimize_size 12364 && count * ix86_cost->add <= ix86_cost->shift_const) 12365 { 12366 int i; 12367 for (i=0; i<count; i++) 12368 { 12369 emit_insn ((mode == DImode 12370 ? gen_addsi3 12371 : gen_adddi3) (operand, operand, operand)); 12372 } 12373 } 12374 else 12375 emit_insn ((mode == DImode 12376 ? gen_ashlsi3 12377 : gen_ashldi3) (operand, operand, GEN_INT (count))); 12378} 12379 12380void 12381ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 12382{ 12383 rtx low[2], high[2]; 12384 int count; 12385 const int single_width = mode == DImode ? 32 : 64; 12386 12387 if (GET_CODE (operands[2]) == CONST_INT) 12388 { 12389 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12390 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12391 12392 if (count >= single_width) 12393 { 12394 emit_move_insn (high[0], low[1]); 12395 emit_move_insn (low[0], const0_rtx); 12396 12397 if (count > single_width) 12398 ix86_expand_ashl_const (high[0], count - single_width, mode); 12399 } 12400 else 12401 { 12402 if (!rtx_equal_p (operands[0], operands[1])) 12403 emit_move_insn (operands[0], operands[1]); 12404 emit_insn ((mode == DImode 12405 ? gen_x86_shld_1 12406 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 12407 ix86_expand_ashl_const (low[0], count, mode); 12408 } 12409 return; 12410 } 12411 12412 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12413 12414 if (operands[1] == const1_rtx) 12415 { 12416 /* Assuming we've chosen a QImode capable registers, then 1 << N 12417 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 12418 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 12419 { 12420 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 12421 12422 ix86_expand_clear (low[0]); 12423 ix86_expand_clear (high[0]); 12424 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 12425 12426 d = gen_lowpart (QImode, low[0]); 12427 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12428 s = gen_rtx_EQ (QImode, flags, const0_rtx); 12429 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12430 12431 d = gen_lowpart (QImode, high[0]); 12432 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12433 s = gen_rtx_NE (QImode, flags, const0_rtx); 12434 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12435 } 12436 12437 /* Otherwise, we can get the same results by manually performing 12438 a bit extract operation on bit 5/6, and then performing the two 12439 shifts. The two methods of getting 0/1 into low/high are exactly 12440 the same size. Avoiding the shift in the bit extract case helps 12441 pentium4 a bit; no one else seems to care much either way. */ 12442 else 12443 { 12444 rtx x; 12445 12446 if (TARGET_PARTIAL_REG_STALL && !optimize_size) 12447 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 12448 else 12449 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 12450 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 12451 12452 emit_insn ((mode == DImode 12453 ? gen_lshrsi3 12454 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 12455 emit_insn ((mode == DImode 12456 ? gen_andsi3 12457 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 12458 emit_move_insn (low[0], high[0]); 12459 emit_insn ((mode == DImode 12460 ? gen_xorsi3 12461 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 12462 } 12463 12464 emit_insn ((mode == DImode 12465 ? gen_ashlsi3 12466 : gen_ashldi3) (low[0], low[0], operands[2])); 12467 emit_insn ((mode == DImode 12468 ? gen_ashlsi3 12469 : gen_ashldi3) (high[0], high[0], operands[2])); 12470 return; 12471 } 12472 12473 if (operands[1] == constm1_rtx) 12474 { 12475 /* For -1 << N, we can avoid the shld instruction, because we 12476 know that we're shifting 0...31/63 ones into a -1. */ 12477 emit_move_insn (low[0], constm1_rtx); 12478 if (optimize_size) 12479 emit_move_insn (high[0], low[0]); 12480 else 12481 emit_move_insn (high[0], constm1_rtx); 12482 } 12483 else 12484 { 12485 if (!rtx_equal_p (operands[0], operands[1])) 12486 emit_move_insn (operands[0], operands[1]); 12487 12488 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12489 emit_insn ((mode == DImode 12490 ? gen_x86_shld_1 12491 : gen_x86_64_shld) (high[0], low[0], operands[2])); 12492 } 12493 12494 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 12495 12496 if (TARGET_CMOVE && scratch) 12497 { 12498 ix86_expand_clear (scratch); 12499 emit_insn ((mode == DImode 12500 ? gen_x86_shift_adj_1 12501 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); 12502 } 12503 else 12504 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 12505} 12506 12507void 12508ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 12509{ 12510 rtx low[2], high[2]; 12511 int count; 12512 const int single_width = mode == DImode ? 32 : 64; 12513 12514 if (GET_CODE (operands[2]) == CONST_INT) 12515 { 12516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12517 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12518 12519 if (count == single_width * 2 - 1) 12520 { 12521 emit_move_insn (high[0], high[1]); 12522 emit_insn ((mode == DImode 12523 ? gen_ashrsi3 12524 : gen_ashrdi3) (high[0], high[0], 12525 GEN_INT (single_width - 1))); 12526 emit_move_insn (low[0], high[0]); 12527 12528 } 12529 else if (count >= single_width) 12530 { 12531 emit_move_insn (low[0], high[1]); 12532 emit_move_insn (high[0], low[0]); 12533 emit_insn ((mode == DImode 12534 ? gen_ashrsi3 12535 : gen_ashrdi3) (high[0], high[0], 12536 GEN_INT (single_width - 1))); 12537 if (count > single_width) 12538 emit_insn ((mode == DImode 12539 ? gen_ashrsi3 12540 : gen_ashrdi3) (low[0], low[0], 12541 GEN_INT (count - single_width))); 12542 } 12543 else 12544 { 12545 if (!rtx_equal_p (operands[0], operands[1])) 12546 emit_move_insn (operands[0], operands[1]); 12547 emit_insn ((mode == DImode 12548 ? gen_x86_shrd_1 12549 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12550 emit_insn ((mode == DImode 12551 ? gen_ashrsi3 12552 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 12553 } 12554 } 12555 else 12556 { 12557 if (!rtx_equal_p (operands[0], operands[1])) 12558 emit_move_insn (operands[0], operands[1]); 12559 12560 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12561 12562 emit_insn ((mode == DImode 12563 ? gen_x86_shrd_1 12564 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12565 emit_insn ((mode == DImode 12566 ? gen_ashrsi3 12567 : gen_ashrdi3) (high[0], high[0], operands[2])); 12568 12569 if (TARGET_CMOVE && scratch) 12570 { 12571 emit_move_insn (scratch, high[0]); 12572 emit_insn ((mode == DImode 12573 ? gen_ashrsi3 12574 : gen_ashrdi3) (scratch, scratch, 12575 GEN_INT (single_width - 1))); 12576 emit_insn ((mode == DImode 12577 ? gen_x86_shift_adj_1 12578 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12579 scratch)); 12580 } 12581 else 12582 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 12583 } 12584} 12585 12586void 12587ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 12588{ 12589 rtx low[2], high[2]; 12590 int count; 12591 const int single_width = mode == DImode ? 32 : 64; 12592 12593 if (GET_CODE (operands[2]) == CONST_INT) 12594 { 12595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12596 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12597 12598 if (count >= single_width) 12599 { 12600 emit_move_insn (low[0], high[1]); 12601 ix86_expand_clear (high[0]); 12602 12603 if (count > single_width) 12604 emit_insn ((mode == DImode 12605 ? gen_lshrsi3 12606 : gen_lshrdi3) (low[0], low[0], 12607 GEN_INT (count - single_width))); 12608 } 12609 else 12610 { 12611 if (!rtx_equal_p (operands[0], operands[1])) 12612 emit_move_insn (operands[0], operands[1]); 12613 emit_insn ((mode == DImode 12614 ? gen_x86_shrd_1 12615 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12616 emit_insn ((mode == DImode 12617 ? gen_lshrsi3 12618 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 12619 } 12620 } 12621 else 12622 { 12623 if (!rtx_equal_p (operands[0], operands[1])) 12624 emit_move_insn (operands[0], operands[1]); 12625 12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12627 12628 emit_insn ((mode == DImode 12629 ? gen_x86_shrd_1 12630 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12631 emit_insn ((mode == DImode 12632 ? gen_lshrsi3 12633 : gen_lshrdi3) (high[0], high[0], operands[2])); 12634 12635 /* Heh. By reversing the arguments, we can reuse this pattern. */ 12636 if (TARGET_CMOVE && scratch) 12637 { 12638 ix86_expand_clear (scratch); 12639 emit_insn ((mode == DImode 12640 ? gen_x86_shift_adj_1 12641 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12642 scratch)); 12643 } 12644 else 12645 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 12646 } 12647} 12648 12649/* Helper function for the string operations below. Dest VARIABLE whether 12650 it is aligned to VALUE bytes. If true, jump to the label. */ 12651static rtx 12652ix86_expand_aligntest (rtx variable, int value) 12653{ 12654 rtx label = gen_label_rtx (); 12655 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 12656 if (GET_MODE (variable) == DImode) 12657 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 12658 else 12659 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 12660 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 12661 1, label); 12662 return label; 12663} 12664 12665/* Adjust COUNTER by the VALUE. */ 12666static void 12667ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 12668{ 12669 if (GET_MODE (countreg) == DImode) 12670 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 12671 else 12672 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 12673} 12674 12675/* Zero extend possibly SImode EXP to Pmode register. */ 12676rtx 12677ix86_zero_extend_to_Pmode (rtx exp) 12678{ 12679 rtx r; 12680 if (GET_MODE (exp) == VOIDmode) 12681 return force_reg (Pmode, exp); 12682 if (GET_MODE (exp) == Pmode) 12683 return copy_to_mode_reg (Pmode, exp); 12684 r = gen_reg_rtx (Pmode); 12685 emit_insn (gen_zero_extendsidi2 (r, exp)); 12686 return r; 12687} 12688 12689/* Expand string move (memcpy) operation. Use i386 string operations when 12690 profitable. expand_clrmem contains similar code. */ 12691int 12692ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) 12693{ 12694 rtx srcreg, destreg, countreg, srcexp, destexp; 12695 enum machine_mode counter_mode; 12696 HOST_WIDE_INT align = 0; 12697 unsigned HOST_WIDE_INT count = 0; 12698 12699 if (GET_CODE (align_exp) == CONST_INT) 12700 align = INTVAL (align_exp); 12701 12702 /* Can't use any of this if the user has appropriated esi or edi. */ 12703 if (global_regs[4] || global_regs[5]) 12704 return 0; 12705 12706 /* This simple hack avoids all inlining code and simplifies code below. */ 12707 if (!TARGET_ALIGN_STRINGOPS) 12708 align = 64; 12709 12710 if (GET_CODE (count_exp) == CONST_INT) 12711 { 12712 count = INTVAL (count_exp); 12713 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12714 return 0; 12715 } 12716 12717 /* Figure out proper mode for counter. For 32bits it is always SImode, 12718 for 64bits use SImode when possible, otherwise DImode. 12719 Set count to number of bytes copied when known at compile time. */ 12720 if (!TARGET_64BIT 12721 || GET_MODE (count_exp) == SImode 12722 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12723 counter_mode = SImode; 12724 else 12725 counter_mode = DImode; 12726 12727 gcc_assert (counter_mode == SImode || counter_mode == DImode); 12728 12729 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12730 if (destreg != XEXP (dst, 0)) 12731 dst = replace_equiv_address_nv (dst, destreg); 12732 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 12733 if (srcreg != XEXP (src, 0)) 12734 src = replace_equiv_address_nv (src, srcreg); 12735 12736 /* When optimizing for size emit simple rep ; movsb instruction for 12737 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? 12738 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. 12739 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is 12740 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, 12741 but we don't know whether upper 24 (resp. 56) bits of %ecx will be 12742 known to be zero or not. The rep; movsb sequence causes higher 12743 register pressure though, so take that into account. */ 12744 12745 if ((!optimize || optimize_size) 12746 && (count == 0 12747 || ((count & 0x03) 12748 && (!optimize_size 12749 || count > 5 * 4 12750 || (count & 3) + count / 4 > 6)))) 12751 { 12752 emit_insn (gen_cld ()); 12753 countreg = ix86_zero_extend_to_Pmode (count_exp); 12754 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12755 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 12756 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 12757 destexp, srcexp)); 12758 } 12759 12760 /* For constant aligned (or small unaligned) copies use rep movsl 12761 followed by code copying the rest. For PentiumPro ensure 8 byte 12762 alignment to allow rep movsl acceleration. */ 12763 12764 else if (count != 0 12765 && (align >= 8 12766 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12767 || optimize_size || count < (unsigned int) 64)) 12768 { 12769 unsigned HOST_WIDE_INT offset = 0; 12770 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12771 rtx srcmem, dstmem; 12772 12773 emit_insn (gen_cld ()); 12774 if (count & ~(size - 1)) 12775 { 12776 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) 12777 { 12778 enum machine_mode movs_mode = size == 4 ? SImode : DImode; 12779 12780 while (offset < (count & ~(size - 1))) 12781 { 12782 srcmem = adjust_automodify_address_nv (src, movs_mode, 12783 srcreg, offset); 12784 dstmem = adjust_automodify_address_nv (dst, movs_mode, 12785 destreg, offset); 12786 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12787 offset += size; 12788 } 12789 } 12790 else 12791 { 12792 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) 12793 & (TARGET_64BIT ? -1 : 0x3fffffff)); 12794 countreg = copy_to_mode_reg (counter_mode, countreg); 12795 countreg = ix86_zero_extend_to_Pmode (countreg); 12796 12797 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12798 GEN_INT (size == 4 ? 2 : 3)); 12799 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12800 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12801 12802 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12803 countreg, destexp, srcexp)); 12804 offset = count & ~(size - 1); 12805 } 12806 } 12807 if (size == 8 && (count & 0x04)) 12808 { 12809 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 12810 offset); 12811 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 12812 offset); 12813 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12814 offset += 4; 12815 } 12816 if (count & 0x02) 12817 { 12818 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 12819 offset); 12820 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 12821 offset); 12822 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12823 offset += 2; 12824 } 12825 if (count & 0x01) 12826 { 12827 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 12828 offset); 12829 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 12830 offset); 12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12832 } 12833 } 12834 /* The generic code based on the glibc implementation: 12835 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 12836 allowing accelerated copying there) 12837 - copy the data using rep movsl 12838 - copy the rest. */ 12839 else 12840 { 12841 rtx countreg2; 12842 rtx label = NULL; 12843 rtx srcmem, dstmem; 12844 int desired_alignment = (TARGET_PENTIUMPRO 12845 && (count == 0 || count >= (unsigned int) 260) 12846 ? 8 : UNITS_PER_WORD); 12847 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 12848 dst = change_address (dst, BLKmode, destreg); 12849 src = change_address (src, BLKmode, srcreg); 12850 12851 /* In case we don't know anything about the alignment, default to 12852 library version, since it is usually equally fast and result in 12853 shorter code. 12854 12855 Also emit call when we know that the count is large and call overhead 12856 will not be important. */ 12857 if (!TARGET_INLINE_ALL_STRINGOPS 12858 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 12859 return 0; 12860 12861 if (TARGET_SINGLE_STRINGOP) 12862 emit_insn (gen_cld ()); 12863 12864 countreg2 = gen_reg_rtx (Pmode); 12865 countreg = copy_to_mode_reg (counter_mode, count_exp); 12866 12867 /* We don't use loops to align destination and to copy parts smaller 12868 than 4 bytes, because gcc is able to optimize such code better (in 12869 the case the destination or the count really is aligned, gcc is often 12870 able to predict the branches) and also it is friendlier to the 12871 hardware branch prediction. 12872 12873 Using loops is beneficial for generic case, because we can 12874 handle small counts using the loops. Many CPUs (such as Athlon) 12875 have large REP prefix setup costs. 12876 12877 This is quite costly. Maybe we can revisit this decision later or 12878 add some customizability to this code. */ 12879 12880 if (count == 0 && align < desired_alignment) 12881 { 12882 label = gen_label_rtx (); 12883 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 12884 LEU, 0, counter_mode, 1, label); 12885 } 12886 if (align <= 1) 12887 { 12888 rtx label = ix86_expand_aligntest (destreg, 1); 12889 srcmem = change_address (src, QImode, srcreg); 12890 dstmem = change_address (dst, QImode, destreg); 12891 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12892 ix86_adjust_counter (countreg, 1); 12893 emit_label (label); 12894 LABEL_NUSES (label) = 1; 12895 } 12896 if (align <= 2) 12897 { 12898 rtx label = ix86_expand_aligntest (destreg, 2); 12899 srcmem = change_address (src, HImode, srcreg); 12900 dstmem = change_address (dst, HImode, destreg); 12901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12902 ix86_adjust_counter (countreg, 2); 12903 emit_label (label); 12904 LABEL_NUSES (label) = 1; 12905 } 12906 if (align <= 4 && desired_alignment > 4) 12907 { 12908 rtx label = ix86_expand_aligntest (destreg, 4); 12909 srcmem = change_address (src, SImode, srcreg); 12910 dstmem = change_address (dst, SImode, destreg); 12911 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12912 ix86_adjust_counter (countreg, 4); 12913 emit_label (label); 12914 LABEL_NUSES (label) = 1; 12915 } 12916 12917 if (label && desired_alignment > 4 && !TARGET_64BIT) 12918 { 12919 emit_label (label); 12920 LABEL_NUSES (label) = 1; 12921 label = NULL_RTX; 12922 } 12923 if (!TARGET_SINGLE_STRINGOP) 12924 emit_insn (gen_cld ()); 12925 if (TARGET_64BIT) 12926 { 12927 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 12928 GEN_INT (3))); 12929 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 12930 } 12931 else 12932 { 12933 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 12934 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 12935 } 12936 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12937 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12938 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12939 countreg2, destexp, srcexp)); 12940 12941 if (label) 12942 { 12943 emit_label (label); 12944 LABEL_NUSES (label) = 1; 12945 } 12946 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 12947 { 12948 srcmem = change_address (src, SImode, srcreg); 12949 dstmem = change_address (dst, SImode, destreg); 12950 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12951 } 12952 if ((align <= 4 || count == 0) && TARGET_64BIT) 12953 { 12954 rtx label = ix86_expand_aligntest (countreg, 4); 12955 srcmem = change_address (src, SImode, srcreg); 12956 dstmem = change_address (dst, SImode, destreg); 12957 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12958 emit_label (label); 12959 LABEL_NUSES (label) = 1; 12960 } 12961 if (align > 2 && count != 0 && (count & 2)) 12962 { 12963 srcmem = change_address (src, HImode, srcreg); 12964 dstmem = change_address (dst, HImode, destreg); 12965 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12966 } 12967 if (align <= 2 || count == 0) 12968 { 12969 rtx label = ix86_expand_aligntest (countreg, 2); 12970 srcmem = change_address (src, HImode, srcreg); 12971 dstmem = change_address (dst, HImode, destreg); 12972 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12973 emit_label (label); 12974 LABEL_NUSES (label) = 1; 12975 } 12976 if (align > 1 && count != 0 && (count & 1)) 12977 { 12978 srcmem = change_address (src, QImode, srcreg); 12979 dstmem = change_address (dst, QImode, destreg); 12980 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12981 } 12982 if (align <= 1 || count == 0) 12983 { 12984 rtx label = ix86_expand_aligntest (countreg, 1); 12985 srcmem = change_address (src, QImode, srcreg); 12986 dstmem = change_address (dst, QImode, destreg); 12987 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12988 emit_label (label); 12989 LABEL_NUSES (label) = 1; 12990 } 12991 } 12992 12993 return 1; 12994} 12995 12996/* Expand string clear operation (bzero). Use i386 string operations when 12997 profitable. expand_movmem contains similar code. */ 12998int 12999ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) 13000{ 13001 rtx destreg, zeroreg, countreg, destexp; 13002 enum machine_mode counter_mode; 13003 HOST_WIDE_INT align = 0; 13004 unsigned HOST_WIDE_INT count = 0; 13005 13006 if (GET_CODE (align_exp) == CONST_INT) 13007 align = INTVAL (align_exp); 13008 13009 /* Can't use any of this if the user has appropriated esi. */ 13010 if (global_regs[4]) 13011 return 0; 13012 13013 /* This simple hack avoids all inlining code and simplifies code below. */ 13014 if (!TARGET_ALIGN_STRINGOPS) 13015 align = 32; 13016 13017 if (GET_CODE (count_exp) == CONST_INT) 13018 { 13019 count = INTVAL (count_exp); 13020 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 13021 return 0; 13022 } 13023 /* Figure out proper mode for counter. For 32bits it is always SImode, 13024 for 64bits use SImode when possible, otherwise DImode. 13025 Set count to number of bytes copied when known at compile time. */ 13026 if (!TARGET_64BIT 13027 || GET_MODE (count_exp) == SImode 13028 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 13029 counter_mode = SImode; 13030 else 13031 counter_mode = DImode; 13032 13033 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 13034 if (destreg != XEXP (dst, 0)) 13035 dst = replace_equiv_address_nv (dst, destreg); 13036 13037 13038 /* When optimizing for size emit simple rep ; movsb instruction for 13039 counts not divisible by 4. The movl $N, %ecx; rep; stosb 13040 sequence is 7 bytes long, so if optimizing for size and count is 13041 small enough that some stosl, stosw and stosb instructions without 13042 rep are shorter, fall back into the next if. */ 13043 13044 if ((!optimize || optimize_size) 13045 && (count == 0 13046 || ((count & 0x03) 13047 && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) 13048 { 13049 emit_insn (gen_cld ()); 13050 13051 countreg = ix86_zero_extend_to_Pmode (count_exp); 13052 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 13053 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 13054 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 13055 } 13056 else if (count != 0 13057 && (align >= 8 13058 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 13059 || optimize_size || count < (unsigned int) 64)) 13060 { 13061 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 13062 unsigned HOST_WIDE_INT offset = 0; 13063 13064 emit_insn (gen_cld ()); 13065 13066 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 13067 if (count & ~(size - 1)) 13068 { 13069 unsigned HOST_WIDE_INT repcount; 13070 unsigned int max_nonrep; 13071 13072 repcount = count >> (size == 4 ? 2 : 3); 13073 if (!TARGET_64BIT) 13074 repcount &= 0x3fffffff; 13075 13076 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. 13077 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN 13078 bytes. In both cases the latter seems to be faster for small 13079 values of N. */ 13080 max_nonrep = size == 4 ? 7 : 4; 13081 if (!optimize_size) 13082 switch (ix86_tune) 13083 { 13084 case PROCESSOR_PENTIUM4: 13085 case PROCESSOR_NOCONA: 13086 max_nonrep = 3; 13087 break; 13088 default: 13089 break; 13090 } 13091 13092 if (repcount <= max_nonrep) 13093 while (repcount-- > 0) 13094 { 13095 rtx mem = adjust_automodify_address_nv (dst, 13096 GET_MODE (zeroreg), 13097 destreg, offset); 13098 emit_insn (gen_strset (destreg, mem, zeroreg)); 13099 offset += size; 13100 } 13101 else 13102 { 13103 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); 13104 countreg = ix86_zero_extend_to_Pmode (countreg); 13105 destexp = gen_rtx_ASHIFT (Pmode, countreg, 13106 GEN_INT (size == 4 ? 2 : 3)); 13107 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13108 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, 13109 destexp)); 13110 offset = count & ~(size - 1); 13111 } 13112 } 13113 if (size == 8 && (count & 0x04)) 13114 { 13115 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 13116 offset); 13117 emit_insn (gen_strset (destreg, mem, 13118 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13119 offset += 4; 13120 } 13121 if (count & 0x02) 13122 { 13123 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 13124 offset); 13125 emit_insn (gen_strset (destreg, mem, 13126 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13127 offset += 2; 13128 } 13129 if (count & 0x01) 13130 { 13131 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 13132 offset); 13133 emit_insn (gen_strset (destreg, mem, 13134 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13135 } 13136 } 13137 else 13138 { 13139 rtx countreg2; 13140 rtx label = NULL; 13141 /* Compute desired alignment of the string operation. */ 13142 int desired_alignment = (TARGET_PENTIUMPRO 13143 && (count == 0 || count >= (unsigned int) 260) 13144 ? 8 : UNITS_PER_WORD); 13145 13146 /* In case we don't know anything about the alignment, default to 13147 library version, since it is usually equally fast and result in 13148 shorter code. 13149 13150 Also emit call when we know that the count is large and call overhead 13151 will not be important. */ 13152 if (!TARGET_INLINE_ALL_STRINGOPS 13153 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13154 return 0; 13155 13156 if (TARGET_SINGLE_STRINGOP) 13157 emit_insn (gen_cld ()); 13158 13159 countreg2 = gen_reg_rtx (Pmode); 13160 countreg = copy_to_mode_reg (counter_mode, count_exp); 13161 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 13162 /* Get rid of MEM_OFFSET, it won't be accurate. */ 13163 dst = change_address (dst, BLKmode, destreg); 13164 13165 if (count == 0 && align < desired_alignment) 13166 { 13167 label = gen_label_rtx (); 13168 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13169 LEU, 0, counter_mode, 1, label); 13170 } 13171 if (align <= 1) 13172 { 13173 rtx label = ix86_expand_aligntest (destreg, 1); 13174 emit_insn (gen_strset (destreg, dst, 13175 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13176 ix86_adjust_counter (countreg, 1); 13177 emit_label (label); 13178 LABEL_NUSES (label) = 1; 13179 } 13180 if (align <= 2) 13181 { 13182 rtx label = ix86_expand_aligntest (destreg, 2); 13183 emit_insn (gen_strset (destreg, dst, 13184 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13185 ix86_adjust_counter (countreg, 2); 13186 emit_label (label); 13187 LABEL_NUSES (label) = 1; 13188 } 13189 if (align <= 4 && desired_alignment > 4) 13190 { 13191 rtx label = ix86_expand_aligntest (destreg, 4); 13192 emit_insn (gen_strset (destreg, dst, 13193 (TARGET_64BIT 13194 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 13195 : zeroreg))); 13196 ix86_adjust_counter (countreg, 4); 13197 emit_label (label); 13198 LABEL_NUSES (label) = 1; 13199 } 13200 13201 if (label && desired_alignment > 4 && !TARGET_64BIT) 13202 { 13203 emit_label (label); 13204 LABEL_NUSES (label) = 1; 13205 label = NULL_RTX; 13206 } 13207 13208 if (!TARGET_SINGLE_STRINGOP) 13209 emit_insn (gen_cld ()); 13210 if (TARGET_64BIT) 13211 { 13212 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13213 GEN_INT (3))); 13214 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13215 } 13216 else 13217 { 13218 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13219 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13220 } 13221 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13222 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 13223 13224 if (label) 13225 { 13226 emit_label (label); 13227 LABEL_NUSES (label) = 1; 13228 } 13229 13230 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13231 emit_insn (gen_strset (destreg, dst, 13232 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13233 if (TARGET_64BIT && (align <= 4 || count == 0)) 13234 { 13235 rtx label = ix86_expand_aligntest (countreg, 4); 13236 emit_insn (gen_strset (destreg, dst, 13237 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13238 emit_label (label); 13239 LABEL_NUSES (label) = 1; 13240 } 13241 if (align > 2 && count != 0 && (count & 2)) 13242 emit_insn (gen_strset (destreg, dst, 13243 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13244 if (align <= 2 || count == 0) 13245 { 13246 rtx label = ix86_expand_aligntest (countreg, 2); 13247 emit_insn (gen_strset (destreg, dst, 13248 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13249 emit_label (label); 13250 LABEL_NUSES (label) = 1; 13251 } 13252 if (align > 1 && count != 0 && (count & 1)) 13253 emit_insn (gen_strset (destreg, dst, 13254 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13255 if (align <= 1 || count == 0) 13256 { 13257 rtx label = ix86_expand_aligntest (countreg, 1); 13258 emit_insn (gen_strset (destreg, dst, 13259 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13260 emit_label (label); 13261 LABEL_NUSES (label) = 1; 13262 } 13263 } 13264 return 1; 13265} 13266 13267/* Expand strlen. */ 13268int 13269ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 13270{ 13271 rtx addr, scratch1, scratch2, scratch3, scratch4; 13272 13273 /* The generic case of strlen expander is long. Avoid it's 13274 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 13275 13276 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13277 && !TARGET_INLINE_ALL_STRINGOPS 13278 && !optimize_size 13279 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 13280 return 0; 13281 13282 addr = force_reg (Pmode, XEXP (src, 0)); 13283 scratch1 = gen_reg_rtx (Pmode); 13284 13285 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13286 && !optimize_size) 13287 { 13288 /* Well it seems that some optimizer does not combine a call like 13289 foo(strlen(bar), strlen(bar)); 13290 when the move and the subtraction is done here. It does calculate 13291 the length just once when these instructions are done inside of 13292 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 13293 often used and I use one fewer register for the lifetime of 13294 output_strlen_unroll() this is better. */ 13295 13296 emit_move_insn (out, addr); 13297 13298 ix86_expand_strlensi_unroll_1 (out, src, align); 13299 13300 /* strlensi_unroll_1 returns the address of the zero at the end of 13301 the string, like memchr(), so compute the length by subtracting 13302 the start address. */ 13303 if (TARGET_64BIT) 13304 emit_insn (gen_subdi3 (out, out, addr)); 13305 else 13306 emit_insn (gen_subsi3 (out, out, addr)); 13307 } 13308 else 13309 { 13310 rtx unspec; 13311 scratch2 = gen_reg_rtx (Pmode); 13312 scratch3 = gen_reg_rtx (Pmode); 13313 scratch4 = force_reg (Pmode, constm1_rtx); 13314 13315 emit_move_insn (scratch3, addr); 13316 eoschar = force_reg (QImode, eoschar); 13317 13318 emit_insn (gen_cld ()); 13319 src = replace_equiv_address_nv (src, scratch3); 13320 13321 /* If .md starts supporting :P, this can be done in .md. */ 13322 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 13323 scratch4), UNSPEC_SCAS); 13324 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 13325 if (TARGET_64BIT) 13326 { 13327 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 13328 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 13329 } 13330 else 13331 { 13332 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 13333 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 13334 } 13335 } 13336 return 1; 13337} 13338 13339/* Expand the appropriate insns for doing strlen if not just doing 13340 repnz; scasb 13341 13342 out = result, initialized with the start address 13343 align_rtx = alignment of the address. 13344 scratch = scratch register, initialized with the startaddress when 13345 not aligned, otherwise undefined 13346 13347 This is just the body. It needs the initializations mentioned above and 13348 some address computing at the end. These things are done in i386.md. */ 13349 13350static void 13351ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 13352{ 13353 int align; 13354 rtx tmp; 13355 rtx align_2_label = NULL_RTX; 13356 rtx align_3_label = NULL_RTX; 13357 rtx align_4_label = gen_label_rtx (); 13358 rtx end_0_label = gen_label_rtx (); 13359 rtx mem; 13360 rtx tmpreg = gen_reg_rtx (SImode); 13361 rtx scratch = gen_reg_rtx (SImode); 13362 rtx cmp; 13363 13364 align = 0; 13365 if (GET_CODE (align_rtx) == CONST_INT) 13366 align = INTVAL (align_rtx); 13367 13368 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 13369 13370 /* Is there a known alignment and is it less than 4? */ 13371 if (align < 4) 13372 { 13373 rtx scratch1 = gen_reg_rtx (Pmode); 13374 emit_move_insn (scratch1, out); 13375 /* Is there a known alignment and is it not 2? */ 13376 if (align != 2) 13377 { 13378 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 13379 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 13380 13381 /* Leave just the 3 lower bits. */ 13382 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 13383 NULL_RTX, 0, OPTAB_WIDEN); 13384 13385 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13386 Pmode, 1, align_4_label); 13387 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 13388 Pmode, 1, align_2_label); 13389 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 13390 Pmode, 1, align_3_label); 13391 } 13392 else 13393 { 13394 /* Since the alignment is 2, we have to check 2 or 0 bytes; 13395 check if is aligned to 4 - byte. */ 13396 13397 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 13398 NULL_RTX, 0, OPTAB_WIDEN); 13399 13400 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13401 Pmode, 1, align_4_label); 13402 } 13403 13404 mem = change_address (src, QImode, out); 13405 13406 /* Now compare the bytes. */ 13407 13408 /* Compare the first n unaligned byte on a byte per byte basis. */ 13409 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 13410 QImode, 1, end_0_label); 13411 13412 /* Increment the address. */ 13413 if (TARGET_64BIT) 13414 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13415 else 13416 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13417 13418 /* Not needed with an alignment of 2 */ 13419 if (align != 2) 13420 { 13421 emit_label (align_2_label); 13422 13423 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13424 end_0_label); 13425 13426 if (TARGET_64BIT) 13427 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13428 else 13429 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13430 13431 emit_label (align_3_label); 13432 } 13433 13434 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13435 end_0_label); 13436 13437 if (TARGET_64BIT) 13438 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13439 else 13440 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13441 } 13442 13443 /* Generate loop to check 4 bytes at a time. It is not a good idea to 13444 align this loop. It gives only huge programs, but does not help to 13445 speed up. */ 13446 emit_label (align_4_label); 13447 13448 mem = change_address (src, SImode, out); 13449 emit_move_insn (scratch, mem); 13450 if (TARGET_64BIT) 13451 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 13452 else 13453 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 13454 13455 /* This formula yields a nonzero result iff one of the bytes is zero. 13456 This saves three branches inside loop and many cycles. */ 13457 13458 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 13459 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 13460 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 13461 emit_insn (gen_andsi3 (tmpreg, tmpreg, 13462 gen_int_mode (0x80808080, SImode))); 13463 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 13464 align_4_label); 13465 13466 if (TARGET_CMOVE) 13467 { 13468 rtx reg = gen_reg_rtx (SImode); 13469 rtx reg2 = gen_reg_rtx (Pmode); 13470 emit_move_insn (reg, tmpreg); 13471 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 13472 13473 /* If zero is not in the first two bytes, move two bytes forward. */ 13474 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13475 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13476 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13477 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 13478 gen_rtx_IF_THEN_ELSE (SImode, tmp, 13479 reg, 13480 tmpreg))); 13481 /* Emit lea manually to avoid clobbering of flags. */ 13482 emit_insn (gen_rtx_SET (SImode, reg2, 13483 gen_rtx_PLUS (Pmode, out, const2_rtx))); 13484 13485 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13486 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13487 emit_insn (gen_rtx_SET (VOIDmode, out, 13488 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 13489 reg2, 13490 out))); 13491 13492 } 13493 else 13494 { 13495 rtx end_2_label = gen_label_rtx (); 13496 /* Is zero in the first two bytes? */ 13497 13498 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13499 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13500 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 13501 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 13502 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 13503 pc_rtx); 13504 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 13505 JUMP_LABEL (tmp) = end_2_label; 13506 13507 /* Not in the first two. Move two bytes forward. */ 13508 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 13509 if (TARGET_64BIT) 13510 emit_insn (gen_adddi3 (out, out, const2_rtx)); 13511 else 13512 emit_insn (gen_addsi3 (out, out, const2_rtx)); 13513 13514 emit_label (end_2_label); 13515 13516 } 13517 13518 /* Avoid branch in fixing the byte. */ 13519 tmpreg = gen_lowpart (QImode, tmpreg); 13520 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 13521 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 13522 if (TARGET_64BIT) 13523 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 13524 else 13525 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 13526 13527 emit_label (end_0_label); 13528} 13529 13530void 13531ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 13532 rtx callarg2 ATTRIBUTE_UNUSED, 13533 rtx pop, int sibcall) 13534{ 13535 rtx use = NULL, call; 13536 13537 if (pop == const0_rtx) 13538 pop = NULL; 13539 gcc_assert (!TARGET_64BIT || !pop); 13540 13541 if (TARGET_MACHO && !TARGET_64BIT) 13542 { 13543#if TARGET_MACHO 13544 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 13545 fnaddr = machopic_indirect_call_target (fnaddr); 13546#endif 13547 } 13548 else 13549 { 13550 /* Static functions and indirect calls don't need the pic register. */ 13551 if (! TARGET_64BIT && flag_pic 13552 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 13553 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 13554 use_reg (&use, pic_offset_table_rtx); 13555 } 13556 13557 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 13558 { 13559 rtx al = gen_rtx_REG (QImode, 0); 13560 emit_move_insn (al, callarg2); 13561 use_reg (&use, al); 13562 } 13563 13564 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 13565 { 13566 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13567 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13568 } 13569 if (sibcall && TARGET_64BIT 13570 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 13571 { 13572 rtx addr; 13573 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13574 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 13575 emit_move_insn (fnaddr, addr); 13576 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13577 } 13578 13579 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 13580 if (retval) 13581 call = gen_rtx_SET (VOIDmode, retval, call); 13582 if (pop) 13583 { 13584 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 13585 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 13586 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 13587 } 13588 13589 call = emit_call_insn (call); 13590 if (use) 13591 CALL_INSN_FUNCTION_USAGE (call) = use; 13592} 13593 13594 13595/* Clear stack slot assignments remembered from previous functions. 13596 This is called from INIT_EXPANDERS once before RTL is emitted for each 13597 function. */ 13598 13599static struct machine_function * 13600ix86_init_machine_status (void) 13601{ 13602 struct machine_function *f; 13603 13604 f = ggc_alloc_cleared (sizeof (struct machine_function)); 13605 f->use_fast_prologue_epilogue_nregs = -1; 13606 f->tls_descriptor_call_expanded_p = 0; 13607 13608 return f; 13609} 13610 13611/* Return a MEM corresponding to a stack slot with mode MODE. 13612 Allocate a new slot if necessary. 13613 13614 The RTL for a function can have several slots available: N is 13615 which slot to use. */ 13616 13617rtx 13618assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 13619{ 13620 struct stack_local_entry *s; 13621 13622 gcc_assert (n < MAX_386_STACK_LOCALS); 13623 13624 /* Virtual slot is valid only before vregs are instantiated. */ 13625 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 13626 13627 for (s = ix86_stack_locals; s; s = s->next) 13628 if (s->mode == mode && s->n == n) 13629 return s->rtl; 13630 13631 s = (struct stack_local_entry *) 13632 ggc_alloc (sizeof (struct stack_local_entry)); 13633 s->n = n; 13634 s->mode = mode; 13635 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 13636 13637 s->next = ix86_stack_locals; 13638 ix86_stack_locals = s; 13639 return s->rtl; 13640} 13641 13642/* Construct the SYMBOL_REF for the tls_get_addr function. */ 13643 13644static GTY(()) rtx ix86_tls_symbol; 13645rtx 13646ix86_tls_get_addr (void) 13647{ 13648 13649 if (!ix86_tls_symbol) 13650 { 13651 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 13652 (TARGET_ANY_GNU_TLS 13653 && !TARGET_64BIT) 13654 ? "___tls_get_addr" 13655 : "__tls_get_addr"); 13656 } 13657 13658 return ix86_tls_symbol; 13659} 13660 13661/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 13662 13663static GTY(()) rtx ix86_tls_module_base_symbol; 13664rtx 13665ix86_tls_module_base (void) 13666{ 13667 13668 if (!ix86_tls_module_base_symbol) 13669 { 13670 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 13671 "_TLS_MODULE_BASE_"); 13672 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 13673 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 13674 } 13675 13676 return ix86_tls_module_base_symbol; 13677} 13678 13679/* Calculate the length of the memory address in the instruction 13680 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 13681 13682int 13683memory_address_length (rtx addr) 13684{ 13685 struct ix86_address parts; 13686 rtx base, index, disp; 13687 int len; 13688 int ok; 13689 13690 if (GET_CODE (addr) == PRE_DEC 13691 || GET_CODE (addr) == POST_INC 13692 || GET_CODE (addr) == PRE_MODIFY 13693 || GET_CODE (addr) == POST_MODIFY) 13694 return 0; 13695 13696 ok = ix86_decompose_address (addr, &parts); 13697 gcc_assert (ok); 13698 13699 if (parts.base && GET_CODE (parts.base) == SUBREG) 13700 parts.base = SUBREG_REG (parts.base); 13701 if (parts.index && GET_CODE (parts.index) == SUBREG) 13702 parts.index = SUBREG_REG (parts.index); 13703 13704 base = parts.base; 13705 index = parts.index; 13706 disp = parts.disp; 13707 len = 0; 13708 13709 /* Rule of thumb: 13710 - esp as the base always wants an index, 13711 - ebp as the base always wants a displacement. */ 13712 13713 /* Register Indirect. */ 13714 if (base && !index && !disp) 13715 { 13716 /* esp (for its index) and ebp (for its displacement) need 13717 the two-byte modrm form. */ 13718 if (addr == stack_pointer_rtx 13719 || addr == arg_pointer_rtx 13720 || addr == frame_pointer_rtx 13721 || addr == hard_frame_pointer_rtx) 13722 len = 1; 13723 } 13724 13725 /* Direct Addressing. */ 13726 else if (disp && !base && !index) 13727 len = 4; 13728 13729 else 13730 { 13731 /* Find the length of the displacement constant. */ 13732 if (disp) 13733 { 13734 if (base && satisfies_constraint_K (disp)) 13735 len = 1; 13736 else 13737 len = 4; 13738 } 13739 /* ebp always wants a displacement. */ 13740 else if (base == hard_frame_pointer_rtx) 13741 len = 1; 13742 13743 /* An index requires the two-byte modrm form.... */ 13744 if (index 13745 /* ...like esp, which always wants an index. */ 13746 || base == stack_pointer_rtx 13747 || base == arg_pointer_rtx 13748 || base == frame_pointer_rtx) 13749 len += 1; 13750 } 13751 13752 return len; 13753} 13754 13755/* Compute default value for "length_immediate" attribute. When SHORTFORM 13756 is set, expect that insn have 8bit immediate alternative. */ 13757int 13758ix86_attr_length_immediate_default (rtx insn, int shortform) 13759{ 13760 int len = 0; 13761 int i; 13762 extract_insn_cached (insn); 13763 for (i = recog_data.n_operands - 1; i >= 0; --i) 13764 if (CONSTANT_P (recog_data.operand[i])) 13765 { 13766 gcc_assert (!len); 13767 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 13768 len = 1; 13769 else 13770 { 13771 switch (get_attr_mode (insn)) 13772 { 13773 case MODE_QI: 13774 len+=1; 13775 break; 13776 case MODE_HI: 13777 len+=2; 13778 break; 13779 case MODE_SI: 13780 len+=4; 13781 break; 13782 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 13783 case MODE_DI: 13784 len+=4; 13785 break; 13786 default: 13787 fatal_insn ("unknown insn mode", insn); 13788 } 13789 } 13790 } 13791 return len; 13792} 13793/* Compute default value for "length_address" attribute. */ 13794int 13795ix86_attr_length_address_default (rtx insn) 13796{ 13797 int i; 13798 13799 if (get_attr_type (insn) == TYPE_LEA) 13800 { 13801 rtx set = PATTERN (insn); 13802 13803 if (GET_CODE (set) == PARALLEL) 13804 set = XVECEXP (set, 0, 0); 13805 13806 gcc_assert (GET_CODE (set) == SET); 13807 13808 return memory_address_length (SET_SRC (set)); 13809 } 13810 13811 extract_insn_cached (insn); 13812 for (i = recog_data.n_operands - 1; i >= 0; --i) 13813 if (GET_CODE (recog_data.operand[i]) == MEM) 13814 { 13815 return memory_address_length (XEXP (recog_data.operand[i], 0)); 13816 break; 13817 } 13818 return 0; 13819} 13820 13821/* Return the maximum number of instructions a cpu can issue. */ 13822 13823static int 13824ix86_issue_rate (void) 13825{ 13826 switch (ix86_tune) 13827 { 13828 case PROCESSOR_PENTIUM: 13829 case PROCESSOR_K6: 13830 return 2; 13831 13832 case PROCESSOR_PENTIUMPRO: 13833 case PROCESSOR_PENTIUM4: 13834 case PROCESSOR_ATHLON: 13835 case PROCESSOR_K8: 13836 case PROCESSOR_NOCONA: 13837 case PROCESSOR_GENERIC32: 13838 case PROCESSOR_GENERIC64: 13839 return 3; 13840 13841 case PROCESSOR_CORE2: 13842 return 4; 13843 13844 default: 13845 return 1; 13846 } 13847} 13848 13849/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 13850 by DEP_INSN and nothing set by DEP_INSN. */ 13851 13852static int 13853ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13854{ 13855 rtx set, set2; 13856 13857 /* Simplify the test for uninteresting insns. */ 13858 if (insn_type != TYPE_SETCC 13859 && insn_type != TYPE_ICMOV 13860 && insn_type != TYPE_FCMOV 13861 && insn_type != TYPE_IBR) 13862 return 0; 13863 13864 if ((set = single_set (dep_insn)) != 0) 13865 { 13866 set = SET_DEST (set); 13867 set2 = NULL_RTX; 13868 } 13869 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 13870 && XVECLEN (PATTERN (dep_insn), 0) == 2 13871 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 13872 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 13873 { 13874 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13875 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13876 } 13877 else 13878 return 0; 13879 13880 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 13881 return 0; 13882 13883 /* This test is true if the dependent insn reads the flags but 13884 not any other potentially set register. */ 13885 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 13886 return 0; 13887 13888 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 13889 return 0; 13890 13891 return 1; 13892} 13893 13894/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 13895 address with operands set by DEP_INSN. */ 13896 13897static int 13898ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13899{ 13900 rtx addr; 13901 13902 if (insn_type == TYPE_LEA 13903 && TARGET_PENTIUM) 13904 { 13905 addr = PATTERN (insn); 13906 13907 if (GET_CODE (addr) == PARALLEL) 13908 addr = XVECEXP (addr, 0, 0); 13909 13910 gcc_assert (GET_CODE (addr) == SET); 13911 13912 addr = SET_SRC (addr); 13913 } 13914 else 13915 { 13916 int i; 13917 extract_insn_cached (insn); 13918 for (i = recog_data.n_operands - 1; i >= 0; --i) 13919 if (GET_CODE (recog_data.operand[i]) == MEM) 13920 { 13921 addr = XEXP (recog_data.operand[i], 0); 13922 goto found; 13923 } 13924 return 0; 13925 found:; 13926 } 13927 13928 return modified_in_p (addr, dep_insn); 13929} 13930 13931static int 13932ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 13933{ 13934 enum attr_type insn_type, dep_insn_type; 13935 enum attr_memory memory; 13936 rtx set, set2; 13937 int dep_insn_code_number; 13938 13939 /* Anti and output dependencies have zero cost on all CPUs. */ 13940 if (REG_NOTE_KIND (link) != 0) 13941 return 0; 13942 13943 dep_insn_code_number = recog_memoized (dep_insn); 13944 13945 /* If we can't recognize the insns, we can't really do anything. */ 13946 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 13947 return cost; 13948 13949 insn_type = get_attr_type (insn); 13950 dep_insn_type = get_attr_type (dep_insn); 13951 13952 switch (ix86_tune) 13953 { 13954 case PROCESSOR_PENTIUM: 13955 /* Address Generation Interlock adds a cycle of latency. */ 13956 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 13957 cost += 1; 13958 13959 /* ??? Compares pair with jump/setcc. */ 13960 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 13961 cost = 0; 13962 13963 /* Floating point stores require value to be ready one cycle earlier. */ 13964 if (insn_type == TYPE_FMOV 13965 && get_attr_memory (insn) == MEMORY_STORE 13966 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13967 cost += 1; 13968 break; 13969 13970 case PROCESSOR_PENTIUMPRO: 13971 memory = get_attr_memory (insn); 13972 13973 /* INT->FP conversion is expensive. */ 13974 if (get_attr_fp_int_src (dep_insn)) 13975 cost += 5; 13976 13977 /* There is one cycle extra latency between an FP op and a store. */ 13978 if (insn_type == TYPE_FMOV 13979 && (set = single_set (dep_insn)) != NULL_RTX 13980 && (set2 = single_set (insn)) != NULL_RTX 13981 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 13982 && GET_CODE (SET_DEST (set2)) == MEM) 13983 cost += 1; 13984 13985 /* Show ability of reorder buffer to hide latency of load by executing 13986 in parallel with previous instruction in case 13987 previous instruction is not needed to compute the address. */ 13988 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13989 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13990 { 13991 /* Claim moves to take one cycle, as core can issue one load 13992 at time and the next load can start cycle later. */ 13993 if (dep_insn_type == TYPE_IMOV 13994 || dep_insn_type == TYPE_FMOV) 13995 cost = 1; 13996 else if (cost > 1) 13997 cost--; 13998 } 13999 break; 14000 14001 case PROCESSOR_K6: 14002 memory = get_attr_memory (insn); 14003 14004 /* The esp dependency is resolved before the instruction is really 14005 finished. */ 14006 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 14007 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 14008 return 1; 14009 14010 /* INT->FP conversion is expensive. */ 14011 if (get_attr_fp_int_src (dep_insn)) 14012 cost += 5; 14013 14014 /* Show ability of reorder buffer to hide latency of load by executing 14015 in parallel with previous instruction in case 14016 previous instruction is not needed to compute the address. */ 14017 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14018 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14019 { 14020 /* Claim moves to take one cycle, as core can issue one load 14021 at time and the next load can start cycle later. */ 14022 if (dep_insn_type == TYPE_IMOV 14023 || dep_insn_type == TYPE_FMOV) 14024 cost = 1; 14025 else if (cost > 2) 14026 cost -= 2; 14027 else 14028 cost = 1; 14029 } 14030 break; 14031 14032 case PROCESSOR_ATHLON: 14033 case PROCESSOR_K8: 14034 case PROCESSOR_GENERIC32: 14035 case PROCESSOR_GENERIC64: 14036 memory = get_attr_memory (insn); 14037 14038 /* Show ability of reorder buffer to hide latency of load by executing 14039 in parallel with previous instruction in case 14040 previous instruction is not needed to compute the address. */ 14041 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14042 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14043 { 14044 enum attr_unit unit = get_attr_unit (insn); 14045 int loadcost = 3; 14046 14047 /* Because of the difference between the length of integer and 14048 floating unit pipeline preparation stages, the memory operands 14049 for floating point are cheaper. 14050 14051 ??? For Athlon it the difference is most probably 2. */ 14052 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 14053 loadcost = 3; 14054 else 14055 loadcost = TARGET_ATHLON ? 2 : 0; 14056 14057 if (cost >= loadcost) 14058 cost -= loadcost; 14059 else 14060 cost = 0; 14061 } 14062 14063 default: 14064 break; 14065 } 14066 14067 return cost; 14068} 14069 14070/* How many alternative schedules to try. This should be as wide as the 14071 scheduling freedom in the DFA, but no wider. Making this value too 14072 large results extra work for the scheduler. */ 14073 14074static int 14075ia32_multipass_dfa_lookahead (void) 14076{ 14077 if (ix86_tune == PROCESSOR_PENTIUM) 14078 return 2; 14079 14080 if (ix86_tune == PROCESSOR_PENTIUMPRO 14081 || ix86_tune == PROCESSOR_K6) 14082 return 1; 14083 14084 else 14085 return 0; 14086} 14087 14088 14089/* Compute the alignment given to a constant that is being placed in memory. 14090 EXP is the constant and ALIGN is the alignment that the object would 14091 ordinarily have. 14092 The value of this function is used instead of that alignment to align 14093 the object. */ 14094 14095int 14096ix86_constant_alignment (tree exp, int align) 14097{ 14098 if (TREE_CODE (exp) == REAL_CST) 14099 { 14100 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 14101 return 64; 14102 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 14103 return 128; 14104 } 14105 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 14106 && !TARGET_NO_ALIGN_LONG_STRINGS 14107 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 14108 return BITS_PER_WORD; 14109 14110 return align; 14111} 14112 14113/* Compute the alignment for a static variable. 14114 TYPE is the data type, and ALIGN is the alignment that 14115 the object would ordinarily have. The value of this function is used 14116 instead of that alignment to align the object. */ 14117 14118int 14119ix86_data_alignment (tree type, int align) 14120{ 14121 int max_align = optimize_size ? BITS_PER_WORD : 256; 14122 14123 if (AGGREGATE_TYPE_P (type) 14124 && TYPE_SIZE (type) 14125 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14126 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 14127 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 14128 && align < max_align) 14129 align = max_align; 14130 14131 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14132 to 16byte boundary. */ 14133 if (TARGET_64BIT) 14134 { 14135 if (AGGREGATE_TYPE_P (type) 14136 && TYPE_SIZE (type) 14137 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14138 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 14139 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14140 return 128; 14141 } 14142 14143 if (TREE_CODE (type) == ARRAY_TYPE) 14144 { 14145 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14146 return 64; 14147 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14148 return 128; 14149 } 14150 else if (TREE_CODE (type) == COMPLEX_TYPE) 14151 { 14152 14153 if (TYPE_MODE (type) == DCmode && align < 64) 14154 return 64; 14155 if (TYPE_MODE (type) == XCmode && align < 128) 14156 return 128; 14157 } 14158 else if ((TREE_CODE (type) == RECORD_TYPE 14159 || TREE_CODE (type) == UNION_TYPE 14160 || TREE_CODE (type) == QUAL_UNION_TYPE) 14161 && TYPE_FIELDS (type)) 14162 { 14163 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14164 return 64; 14165 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14166 return 128; 14167 } 14168 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14169 || TREE_CODE (type) == INTEGER_TYPE) 14170 { 14171 if (TYPE_MODE (type) == DFmode && align < 64) 14172 return 64; 14173 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14174 return 128; 14175 } 14176 14177 return align; 14178} 14179 14180/* Compute the alignment for a local variable. 14181 TYPE is the data type, and ALIGN is the alignment that 14182 the object would ordinarily have. The value of this macro is used 14183 instead of that alignment to align the object. */ 14184 14185int 14186ix86_local_alignment (tree type, int align) 14187{ 14188 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14189 to 16byte boundary. */ 14190 if (TARGET_64BIT) 14191 { 14192 if (AGGREGATE_TYPE_P (type) 14193 && TYPE_SIZE (type) 14194 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14195 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 14196 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14197 return 128; 14198 } 14199 if (TREE_CODE (type) == ARRAY_TYPE) 14200 { 14201 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14202 return 64; 14203 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14204 return 128; 14205 } 14206 else if (TREE_CODE (type) == COMPLEX_TYPE) 14207 { 14208 if (TYPE_MODE (type) == DCmode && align < 64) 14209 return 64; 14210 if (TYPE_MODE (type) == XCmode && align < 128) 14211 return 128; 14212 } 14213 else if ((TREE_CODE (type) == RECORD_TYPE 14214 || TREE_CODE (type) == UNION_TYPE 14215 || TREE_CODE (type) == QUAL_UNION_TYPE) 14216 && TYPE_FIELDS (type)) 14217 { 14218 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14219 return 64; 14220 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14221 return 128; 14222 } 14223 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14224 || TREE_CODE (type) == INTEGER_TYPE) 14225 { 14226 14227 if (TYPE_MODE (type) == DFmode && align < 64) 14228 return 64; 14229 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14230 return 128; 14231 } 14232 return align; 14233} 14234 14235/* Emit RTL insns to initialize the variable parts of a trampoline. 14236 FNADDR is an RTX for the address of the function's pure code. 14237 CXT is an RTX for the static chain value for the function. */ 14238void 14239x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 14240{ 14241 if (!TARGET_64BIT) 14242 { 14243 /* Compute offset from the end of the jmp to the target function. */ 14244 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 14245 plus_constant (tramp, 10), 14246 NULL_RTX, 1, OPTAB_DIRECT); 14247 emit_move_insn (gen_rtx_MEM (QImode, tramp), 14248 gen_int_mode (0xb9, QImode)); 14249 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 14250 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 14251 gen_int_mode (0xe9, QImode)); 14252 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 14253 } 14254 else 14255 { 14256 int offset = 0; 14257 /* Try to load address using shorter movl instead of movabs. 14258 We may want to support movq for kernel mode, but kernel does not use 14259 trampolines at the moment. */ 14260 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 14261 { 14262 fnaddr = copy_to_mode_reg (DImode, fnaddr); 14263 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14264 gen_int_mode (0xbb41, HImode)); 14265 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 14266 gen_lowpart (SImode, fnaddr)); 14267 offset += 6; 14268 } 14269 else 14270 { 14271 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14272 gen_int_mode (0xbb49, HImode)); 14273 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14274 fnaddr); 14275 offset += 10; 14276 } 14277 /* Load static chain using movabs to r10. */ 14278 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14279 gen_int_mode (0xba49, HImode)); 14280 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14281 cxt); 14282 offset += 10; 14283 /* Jump to the r11 */ 14284 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14285 gen_int_mode (0xff49, HImode)); 14286 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 14287 gen_int_mode (0xe3, QImode)); 14288 offset += 3; 14289 gcc_assert (offset <= TRAMPOLINE_SIZE); 14290 } 14291 14292#ifdef ENABLE_EXECUTE_STACK 14293 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 14294 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 14295#endif 14296} 14297 14298/* Codes for all the SSE/MMX builtins. */ 14299enum ix86_builtins 14300{ 14301 IX86_BUILTIN_ADDPS, 14302 IX86_BUILTIN_ADDSS, 14303 IX86_BUILTIN_DIVPS, 14304 IX86_BUILTIN_DIVSS, 14305 IX86_BUILTIN_MULPS, 14306 IX86_BUILTIN_MULSS, 14307 IX86_BUILTIN_SUBPS, 14308 IX86_BUILTIN_SUBSS, 14309 14310 IX86_BUILTIN_CMPEQPS, 14311 IX86_BUILTIN_CMPLTPS, 14312 IX86_BUILTIN_CMPLEPS, 14313 IX86_BUILTIN_CMPGTPS, 14314 IX86_BUILTIN_CMPGEPS, 14315 IX86_BUILTIN_CMPNEQPS, 14316 IX86_BUILTIN_CMPNLTPS, 14317 IX86_BUILTIN_CMPNLEPS, 14318 IX86_BUILTIN_CMPNGTPS, 14319 IX86_BUILTIN_CMPNGEPS, 14320 IX86_BUILTIN_CMPORDPS, 14321 IX86_BUILTIN_CMPUNORDPS, 14322 IX86_BUILTIN_CMPEQSS, 14323 IX86_BUILTIN_CMPLTSS, 14324 IX86_BUILTIN_CMPLESS, 14325 IX86_BUILTIN_CMPNEQSS, 14326 IX86_BUILTIN_CMPNLTSS, 14327 IX86_BUILTIN_CMPNLESS, 14328 IX86_BUILTIN_CMPNGTSS, 14329 IX86_BUILTIN_CMPNGESS, 14330 IX86_BUILTIN_CMPORDSS, 14331 IX86_BUILTIN_CMPUNORDSS, 14332 14333 IX86_BUILTIN_COMIEQSS, 14334 IX86_BUILTIN_COMILTSS, 14335 IX86_BUILTIN_COMILESS, 14336 IX86_BUILTIN_COMIGTSS, 14337 IX86_BUILTIN_COMIGESS, 14338 IX86_BUILTIN_COMINEQSS, 14339 IX86_BUILTIN_UCOMIEQSS, 14340 IX86_BUILTIN_UCOMILTSS, 14341 IX86_BUILTIN_UCOMILESS, 14342 IX86_BUILTIN_UCOMIGTSS, 14343 IX86_BUILTIN_UCOMIGESS, 14344 IX86_BUILTIN_UCOMINEQSS, 14345 14346 IX86_BUILTIN_CVTPI2PS, 14347 IX86_BUILTIN_CVTPS2PI, 14348 IX86_BUILTIN_CVTSI2SS, 14349 IX86_BUILTIN_CVTSI642SS, 14350 IX86_BUILTIN_CVTSS2SI, 14351 IX86_BUILTIN_CVTSS2SI64, 14352 IX86_BUILTIN_CVTTPS2PI, 14353 IX86_BUILTIN_CVTTSS2SI, 14354 IX86_BUILTIN_CVTTSS2SI64, 14355 14356 IX86_BUILTIN_MAXPS, 14357 IX86_BUILTIN_MAXSS, 14358 IX86_BUILTIN_MINPS, 14359 IX86_BUILTIN_MINSS, 14360 14361 IX86_BUILTIN_LOADUPS, 14362 IX86_BUILTIN_STOREUPS, 14363 IX86_BUILTIN_MOVSS, 14364 14365 IX86_BUILTIN_MOVHLPS, 14366 IX86_BUILTIN_MOVLHPS, 14367 IX86_BUILTIN_LOADHPS, 14368 IX86_BUILTIN_LOADLPS, 14369 IX86_BUILTIN_STOREHPS, 14370 IX86_BUILTIN_STORELPS, 14371 14372 IX86_BUILTIN_MASKMOVQ, 14373 IX86_BUILTIN_MOVMSKPS, 14374 IX86_BUILTIN_PMOVMSKB, 14375 14376 IX86_BUILTIN_MOVNTPS, 14377 IX86_BUILTIN_MOVNTQ, 14378 14379 IX86_BUILTIN_LOADDQU, 14380 IX86_BUILTIN_STOREDQU, 14381 14382 IX86_BUILTIN_PACKSSWB, 14383 IX86_BUILTIN_PACKSSDW, 14384 IX86_BUILTIN_PACKUSWB, 14385 14386 IX86_BUILTIN_PADDB, 14387 IX86_BUILTIN_PADDW, 14388 IX86_BUILTIN_PADDD, 14389 IX86_BUILTIN_PADDQ, 14390 IX86_BUILTIN_PADDSB, 14391 IX86_BUILTIN_PADDSW, 14392 IX86_BUILTIN_PADDUSB, 14393 IX86_BUILTIN_PADDUSW, 14394 IX86_BUILTIN_PSUBB, 14395 IX86_BUILTIN_PSUBW, 14396 IX86_BUILTIN_PSUBD, 14397 IX86_BUILTIN_PSUBQ, 14398 IX86_BUILTIN_PSUBSB, 14399 IX86_BUILTIN_PSUBSW, 14400 IX86_BUILTIN_PSUBUSB, 14401 IX86_BUILTIN_PSUBUSW, 14402 14403 IX86_BUILTIN_PAND, 14404 IX86_BUILTIN_PANDN, 14405 IX86_BUILTIN_POR, 14406 IX86_BUILTIN_PXOR, 14407 14408 IX86_BUILTIN_PAVGB, 14409 IX86_BUILTIN_PAVGW, 14410 14411 IX86_BUILTIN_PCMPEQB, 14412 IX86_BUILTIN_PCMPEQW, 14413 IX86_BUILTIN_PCMPEQD, 14414 IX86_BUILTIN_PCMPGTB, 14415 IX86_BUILTIN_PCMPGTW, 14416 IX86_BUILTIN_PCMPGTD, 14417 14418 IX86_BUILTIN_PMADDWD, 14419 14420 IX86_BUILTIN_PMAXSW, 14421 IX86_BUILTIN_PMAXUB, 14422 IX86_BUILTIN_PMINSW, 14423 IX86_BUILTIN_PMINUB, 14424 14425 IX86_BUILTIN_PMULHUW, 14426 IX86_BUILTIN_PMULHW, 14427 IX86_BUILTIN_PMULLW, 14428 14429 IX86_BUILTIN_PSADBW, 14430 IX86_BUILTIN_PSHUFW, 14431 14432 IX86_BUILTIN_PSLLW, 14433 IX86_BUILTIN_PSLLD, 14434 IX86_BUILTIN_PSLLQ, 14435 IX86_BUILTIN_PSRAW, 14436 IX86_BUILTIN_PSRAD, 14437 IX86_BUILTIN_PSRLW, 14438 IX86_BUILTIN_PSRLD, 14439 IX86_BUILTIN_PSRLQ, 14440 IX86_BUILTIN_PSLLWI, 14441 IX86_BUILTIN_PSLLDI, 14442 IX86_BUILTIN_PSLLQI, 14443 IX86_BUILTIN_PSRAWI, 14444 IX86_BUILTIN_PSRADI, 14445 IX86_BUILTIN_PSRLWI, 14446 IX86_BUILTIN_PSRLDI, 14447 IX86_BUILTIN_PSRLQI, 14448 14449 IX86_BUILTIN_PUNPCKHBW, 14450 IX86_BUILTIN_PUNPCKHWD, 14451 IX86_BUILTIN_PUNPCKHDQ, 14452 IX86_BUILTIN_PUNPCKLBW, 14453 IX86_BUILTIN_PUNPCKLWD, 14454 IX86_BUILTIN_PUNPCKLDQ, 14455 14456 IX86_BUILTIN_SHUFPS, 14457 14458 IX86_BUILTIN_RCPPS, 14459 IX86_BUILTIN_RCPSS, 14460 IX86_BUILTIN_RSQRTPS, 14461 IX86_BUILTIN_RSQRTSS, 14462 IX86_BUILTIN_SQRTPS, 14463 IX86_BUILTIN_SQRTSS, 14464 14465 IX86_BUILTIN_UNPCKHPS, 14466 IX86_BUILTIN_UNPCKLPS, 14467 14468 IX86_BUILTIN_ANDPS, 14469 IX86_BUILTIN_ANDNPS, 14470 IX86_BUILTIN_ORPS, 14471 IX86_BUILTIN_XORPS, 14472 14473 IX86_BUILTIN_EMMS, 14474 IX86_BUILTIN_LDMXCSR, 14475 IX86_BUILTIN_STMXCSR, 14476 IX86_BUILTIN_SFENCE, 14477 14478 /* 3DNow! Original */ 14479 IX86_BUILTIN_FEMMS, 14480 IX86_BUILTIN_PAVGUSB, 14481 IX86_BUILTIN_PF2ID, 14482 IX86_BUILTIN_PFACC, 14483 IX86_BUILTIN_PFADD, 14484 IX86_BUILTIN_PFCMPEQ, 14485 IX86_BUILTIN_PFCMPGE, 14486 IX86_BUILTIN_PFCMPGT, 14487 IX86_BUILTIN_PFMAX, 14488 IX86_BUILTIN_PFMIN, 14489 IX86_BUILTIN_PFMUL, 14490 IX86_BUILTIN_PFRCP, 14491 IX86_BUILTIN_PFRCPIT1, 14492 IX86_BUILTIN_PFRCPIT2, 14493 IX86_BUILTIN_PFRSQIT1, 14494 IX86_BUILTIN_PFRSQRT, 14495 IX86_BUILTIN_PFSUB, 14496 IX86_BUILTIN_PFSUBR, 14497 IX86_BUILTIN_PI2FD, 14498 IX86_BUILTIN_PMULHRW, 14499 14500 /* 3DNow! Athlon Extensions */ 14501 IX86_BUILTIN_PF2IW, 14502 IX86_BUILTIN_PFNACC, 14503 IX86_BUILTIN_PFPNACC, 14504 IX86_BUILTIN_PI2FW, 14505 IX86_BUILTIN_PSWAPDSI, 14506 IX86_BUILTIN_PSWAPDSF, 14507 14508 /* SSE2 */ 14509 IX86_BUILTIN_ADDPD, 14510 IX86_BUILTIN_ADDSD, 14511 IX86_BUILTIN_DIVPD, 14512 IX86_BUILTIN_DIVSD, 14513 IX86_BUILTIN_MULPD, 14514 IX86_BUILTIN_MULSD, 14515 IX86_BUILTIN_SUBPD, 14516 IX86_BUILTIN_SUBSD, 14517 14518 IX86_BUILTIN_CMPEQPD, 14519 IX86_BUILTIN_CMPLTPD, 14520 IX86_BUILTIN_CMPLEPD, 14521 IX86_BUILTIN_CMPGTPD, 14522 IX86_BUILTIN_CMPGEPD, 14523 IX86_BUILTIN_CMPNEQPD, 14524 IX86_BUILTIN_CMPNLTPD, 14525 IX86_BUILTIN_CMPNLEPD, 14526 IX86_BUILTIN_CMPNGTPD, 14527 IX86_BUILTIN_CMPNGEPD, 14528 IX86_BUILTIN_CMPORDPD, 14529 IX86_BUILTIN_CMPUNORDPD, 14530 IX86_BUILTIN_CMPNEPD, 14531 IX86_BUILTIN_CMPEQSD, 14532 IX86_BUILTIN_CMPLTSD, 14533 IX86_BUILTIN_CMPLESD, 14534 IX86_BUILTIN_CMPNEQSD, 14535 IX86_BUILTIN_CMPNLTSD, 14536 IX86_BUILTIN_CMPNLESD, 14537 IX86_BUILTIN_CMPORDSD, 14538 IX86_BUILTIN_CMPUNORDSD, 14539 IX86_BUILTIN_CMPNESD, 14540 14541 IX86_BUILTIN_COMIEQSD, 14542 IX86_BUILTIN_COMILTSD, 14543 IX86_BUILTIN_COMILESD, 14544 IX86_BUILTIN_COMIGTSD, 14545 IX86_BUILTIN_COMIGESD, 14546 IX86_BUILTIN_COMINEQSD, 14547 IX86_BUILTIN_UCOMIEQSD, 14548 IX86_BUILTIN_UCOMILTSD, 14549 IX86_BUILTIN_UCOMILESD, 14550 IX86_BUILTIN_UCOMIGTSD, 14551 IX86_BUILTIN_UCOMIGESD, 14552 IX86_BUILTIN_UCOMINEQSD, 14553 14554 IX86_BUILTIN_MAXPD, 14555 IX86_BUILTIN_MAXSD, 14556 IX86_BUILTIN_MINPD, 14557 IX86_BUILTIN_MINSD, 14558 14559 IX86_BUILTIN_ANDPD, 14560 IX86_BUILTIN_ANDNPD, 14561 IX86_BUILTIN_ORPD, 14562 IX86_BUILTIN_XORPD, 14563 14564 IX86_BUILTIN_SQRTPD, 14565 IX86_BUILTIN_SQRTSD, 14566 14567 IX86_BUILTIN_UNPCKHPD, 14568 IX86_BUILTIN_UNPCKLPD, 14569 14570 IX86_BUILTIN_SHUFPD, 14571 14572 IX86_BUILTIN_LOADUPD, 14573 IX86_BUILTIN_STOREUPD, 14574 IX86_BUILTIN_MOVSD, 14575 14576 IX86_BUILTIN_LOADHPD, 14577 IX86_BUILTIN_LOADLPD, 14578 14579 IX86_BUILTIN_CVTDQ2PD, 14580 IX86_BUILTIN_CVTDQ2PS, 14581 14582 IX86_BUILTIN_CVTPD2DQ, 14583 IX86_BUILTIN_CVTPD2PI, 14584 IX86_BUILTIN_CVTPD2PS, 14585 IX86_BUILTIN_CVTTPD2DQ, 14586 IX86_BUILTIN_CVTTPD2PI, 14587 14588 IX86_BUILTIN_CVTPI2PD, 14589 IX86_BUILTIN_CVTSI2SD, 14590 IX86_BUILTIN_CVTSI642SD, 14591 14592 IX86_BUILTIN_CVTSD2SI, 14593 IX86_BUILTIN_CVTSD2SI64, 14594 IX86_BUILTIN_CVTSD2SS, 14595 IX86_BUILTIN_CVTSS2SD, 14596 IX86_BUILTIN_CVTTSD2SI, 14597 IX86_BUILTIN_CVTTSD2SI64, 14598 14599 IX86_BUILTIN_CVTPS2DQ, 14600 IX86_BUILTIN_CVTPS2PD, 14601 IX86_BUILTIN_CVTTPS2DQ, 14602 14603 IX86_BUILTIN_MOVNTI, 14604 IX86_BUILTIN_MOVNTPD, 14605 IX86_BUILTIN_MOVNTDQ, 14606 14607 /* SSE2 MMX */ 14608 IX86_BUILTIN_MASKMOVDQU, 14609 IX86_BUILTIN_MOVMSKPD, 14610 IX86_BUILTIN_PMOVMSKB128, 14611 14612 IX86_BUILTIN_PACKSSWB128, 14613 IX86_BUILTIN_PACKSSDW128, 14614 IX86_BUILTIN_PACKUSWB128, 14615 14616 IX86_BUILTIN_PADDB128, 14617 IX86_BUILTIN_PADDW128, 14618 IX86_BUILTIN_PADDD128, 14619 IX86_BUILTIN_PADDQ128, 14620 IX86_BUILTIN_PADDSB128, 14621 IX86_BUILTIN_PADDSW128, 14622 IX86_BUILTIN_PADDUSB128, 14623 IX86_BUILTIN_PADDUSW128, 14624 IX86_BUILTIN_PSUBB128, 14625 IX86_BUILTIN_PSUBW128, 14626 IX86_BUILTIN_PSUBD128, 14627 IX86_BUILTIN_PSUBQ128, 14628 IX86_BUILTIN_PSUBSB128, 14629 IX86_BUILTIN_PSUBSW128, 14630 IX86_BUILTIN_PSUBUSB128, 14631 IX86_BUILTIN_PSUBUSW128, 14632 14633 IX86_BUILTIN_PAND128, 14634 IX86_BUILTIN_PANDN128, 14635 IX86_BUILTIN_POR128, 14636 IX86_BUILTIN_PXOR128, 14637 14638 IX86_BUILTIN_PAVGB128, 14639 IX86_BUILTIN_PAVGW128, 14640 14641 IX86_BUILTIN_PCMPEQB128, 14642 IX86_BUILTIN_PCMPEQW128, 14643 IX86_BUILTIN_PCMPEQD128, 14644 IX86_BUILTIN_PCMPGTB128, 14645 IX86_BUILTIN_PCMPGTW128, 14646 IX86_BUILTIN_PCMPGTD128, 14647 14648 IX86_BUILTIN_PMADDWD128, 14649 14650 IX86_BUILTIN_PMAXSW128, 14651 IX86_BUILTIN_PMAXUB128, 14652 IX86_BUILTIN_PMINSW128, 14653 IX86_BUILTIN_PMINUB128, 14654 14655 IX86_BUILTIN_PMULUDQ, 14656 IX86_BUILTIN_PMULUDQ128, 14657 IX86_BUILTIN_PMULHUW128, 14658 IX86_BUILTIN_PMULHW128, 14659 IX86_BUILTIN_PMULLW128, 14660 14661 IX86_BUILTIN_PSADBW128, 14662 IX86_BUILTIN_PSHUFHW, 14663 IX86_BUILTIN_PSHUFLW, 14664 IX86_BUILTIN_PSHUFD, 14665 14666 IX86_BUILTIN_PSLLW128, 14667 IX86_BUILTIN_PSLLD128, 14668 IX86_BUILTIN_PSLLQ128, 14669 IX86_BUILTIN_PSRAW128, 14670 IX86_BUILTIN_PSRAD128, 14671 IX86_BUILTIN_PSRLW128, 14672 IX86_BUILTIN_PSRLD128, 14673 IX86_BUILTIN_PSRLQ128, 14674 IX86_BUILTIN_PSLLDQI128, 14675 IX86_BUILTIN_PSLLWI128, 14676 IX86_BUILTIN_PSLLDI128, 14677 IX86_BUILTIN_PSLLQI128, 14678 IX86_BUILTIN_PSRAWI128, 14679 IX86_BUILTIN_PSRADI128, 14680 IX86_BUILTIN_PSRLDQI128, 14681 IX86_BUILTIN_PSRLWI128, 14682 IX86_BUILTIN_PSRLDI128, 14683 IX86_BUILTIN_PSRLQI128, 14684 14685 IX86_BUILTIN_PUNPCKHBW128, 14686 IX86_BUILTIN_PUNPCKHWD128, 14687 IX86_BUILTIN_PUNPCKHDQ128, 14688 IX86_BUILTIN_PUNPCKHQDQ128, 14689 IX86_BUILTIN_PUNPCKLBW128, 14690 IX86_BUILTIN_PUNPCKLWD128, 14691 IX86_BUILTIN_PUNPCKLDQ128, 14692 IX86_BUILTIN_PUNPCKLQDQ128, 14693 14694 IX86_BUILTIN_CLFLUSH, 14695 IX86_BUILTIN_MFENCE, 14696 IX86_BUILTIN_LFENCE, 14697 14698 /* Prescott New Instructions. */ 14699 IX86_BUILTIN_ADDSUBPS, 14700 IX86_BUILTIN_HADDPS, 14701 IX86_BUILTIN_HSUBPS, 14702 IX86_BUILTIN_MOVSHDUP, 14703 IX86_BUILTIN_MOVSLDUP, 14704 IX86_BUILTIN_ADDSUBPD, 14705 IX86_BUILTIN_HADDPD, 14706 IX86_BUILTIN_HSUBPD, 14707 IX86_BUILTIN_LDDQU, 14708 14709 IX86_BUILTIN_MONITOR, 14710 IX86_BUILTIN_MWAIT, 14711 14712 /* SSSE3. */ 14713 IX86_BUILTIN_PHADDW, 14714 IX86_BUILTIN_PHADDD, 14715 IX86_BUILTIN_PHADDSW, 14716 IX86_BUILTIN_PHSUBW, 14717 IX86_BUILTIN_PHSUBD, 14718 IX86_BUILTIN_PHSUBSW, 14719 IX86_BUILTIN_PMADDUBSW, 14720 IX86_BUILTIN_PMULHRSW, 14721 IX86_BUILTIN_PSHUFB, 14722 IX86_BUILTIN_PSIGNB, 14723 IX86_BUILTIN_PSIGNW, 14724 IX86_BUILTIN_PSIGND, 14725 IX86_BUILTIN_PALIGNR, 14726 IX86_BUILTIN_PABSB, 14727 IX86_BUILTIN_PABSW, 14728 IX86_BUILTIN_PABSD, 14729 14730 IX86_BUILTIN_PHADDW128, 14731 IX86_BUILTIN_PHADDD128, 14732 IX86_BUILTIN_PHADDSW128, 14733 IX86_BUILTIN_PHSUBW128, 14734 IX86_BUILTIN_PHSUBD128, 14735 IX86_BUILTIN_PHSUBSW128, 14736 IX86_BUILTIN_PMADDUBSW128, 14737 IX86_BUILTIN_PMULHRSW128, 14738 IX86_BUILTIN_PSHUFB128, 14739 IX86_BUILTIN_PSIGNB128, 14740 IX86_BUILTIN_PSIGNW128, 14741 IX86_BUILTIN_PSIGND128, 14742 IX86_BUILTIN_PALIGNR128, 14743 IX86_BUILTIN_PABSB128, 14744 IX86_BUILTIN_PABSW128, 14745 IX86_BUILTIN_PABSD128, 14746 14747 IX86_BUILTIN_VEC_INIT_V2SI, 14748 IX86_BUILTIN_VEC_INIT_V4HI, 14749 IX86_BUILTIN_VEC_INIT_V8QI, 14750 IX86_BUILTIN_VEC_EXT_V2DF, 14751 IX86_BUILTIN_VEC_EXT_V2DI, 14752 IX86_BUILTIN_VEC_EXT_V4SF, 14753 IX86_BUILTIN_VEC_EXT_V4SI, 14754 IX86_BUILTIN_VEC_EXT_V8HI, 14755 IX86_BUILTIN_VEC_EXT_V16QI, 14756 IX86_BUILTIN_VEC_EXT_V2SI, 14757 IX86_BUILTIN_VEC_EXT_V4HI, 14758 IX86_BUILTIN_VEC_SET_V8HI, 14759 IX86_BUILTIN_VEC_SET_V4HI, 14760 14761 IX86_BUILTIN_MAX 14762}; 14763 14764#define def_builtin(MASK, NAME, TYPE, CODE) \ 14765do { \ 14766 if ((MASK) & target_flags \ 14767 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 14768 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 14769 NULL, NULL_TREE); \ 14770} while (0) 14771 14772/* Bits for builtin_description.flag. */ 14773 14774/* Set when we don't support the comparison natively, and should 14775 swap_comparison in order to support it. */ 14776#define BUILTIN_DESC_SWAP_OPERANDS 1 14777 14778struct builtin_description 14779{ 14780 const unsigned int mask; 14781 const enum insn_code icode; 14782 const char *const name; 14783 const enum ix86_builtins code; 14784 const enum rtx_code comparison; 14785 const unsigned int flag; 14786}; 14787 14788static const struct builtin_description bdesc_comi[] = 14789{ 14790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 14791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 14792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 14793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 14794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 14795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 14796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 14797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 14798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 14799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 14800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 14801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 14802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 14803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 14804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 14805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 14806 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 14807 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 14808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 14809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 14810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 14811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 14812 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 14813 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 14814}; 14815 14816static const struct builtin_description bdesc_2arg[] = 14817{ 14818 /* SSE */ 14819 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 14820 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 14821 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 14822 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 14823 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 14824 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 14825 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 14826 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 14827 14828 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 14829 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 14830 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 14831 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 14832 BUILTIN_DESC_SWAP_OPERANDS }, 14833 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 14834 BUILTIN_DESC_SWAP_OPERANDS }, 14835 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 14836 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, 14837 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, 14838 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, 14839 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, 14840 BUILTIN_DESC_SWAP_OPERANDS }, 14841 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, 14842 BUILTIN_DESC_SWAP_OPERANDS }, 14843 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, 14844 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 14845 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 14846 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 14847 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 14848 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, 14849 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, 14850 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, 14851 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, 14852 BUILTIN_DESC_SWAP_OPERANDS }, 14853 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, 14854 BUILTIN_DESC_SWAP_OPERANDS }, 14855 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, 14856 14857 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 14858 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 14859 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 14860 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 14861 14862 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 14863 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 14864 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 14865 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 14866 14867 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 14868 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 14869 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 14870 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 14871 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 14872 14873 /* MMX */ 14874 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 14875 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 14876 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 14877 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 14878 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 14879 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 14880 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 14881 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 14882 14883 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 14884 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 14885 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 14886 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 14887 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 14888 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 14889 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 14890 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 14891 14892 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 14893 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 14894 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 14895 14896 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 14897 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 14898 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 14899 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 14900 14901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 14902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 14903 14904 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 14905 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 14906 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 14907 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 14908 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 14909 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 14910 14911 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 14912 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 14913 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 14914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 14915 14916 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 14917 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 14918 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 14919 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 14920 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 14921 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 14922 14923 /* Special. */ 14924 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 14925 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 14926 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 14927 14928 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 14929 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 14930 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 14931 14932 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 14933 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 14934 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 14935 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 14936 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 14937 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 14938 14939 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 14940 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 14941 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 14942 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 14943 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 14944 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 14945 14946 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 14947 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 14948 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 14949 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 14950 14951 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 14952 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 14953 14954 /* SSE2 */ 14955 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 14956 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 14957 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 14958 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 14959 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 14960 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 14961 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 14962 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 14963 14964 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 14965 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 14966 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 14967 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 14968 BUILTIN_DESC_SWAP_OPERANDS }, 14969 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 14970 BUILTIN_DESC_SWAP_OPERANDS }, 14971 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 14972 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, 14973 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, 14974 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, 14975 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, 14976 BUILTIN_DESC_SWAP_OPERANDS }, 14977 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, 14978 BUILTIN_DESC_SWAP_OPERANDS }, 14979 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, 14980 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 14981 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 14982 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 14983 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 14984 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, 14985 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, 14986 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, 14987 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, 14988 14989 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 14990 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 14991 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 14992 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 14993 14994 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 14995 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 14996 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 14997 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 14998 14999 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 15000 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 15001 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 15002 15003 /* SSE2 MMX */ 15004 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 15005 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 15006 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 15007 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 15008 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 15009 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 15010 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 15011 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 15012 15013 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 15014 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 15015 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 15016 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 15017 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 15018 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 15019 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 15020 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 15021 15022 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 15023 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 15024 15025 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 15026 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 15027 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 15028 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 15029 15030 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 15031 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 15032 15033 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 15034 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 15035 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 15036 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 15037 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 15038 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 15039 15040 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 15041 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 15042 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 15043 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 15044 15045 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 15046 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 15047 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 15048 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 15049 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 15050 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 15051 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 15052 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 15053 15054 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 15055 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 15056 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 15057 15058 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 15059 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 15060 15061 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, 15062 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, 15063 15064 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 15065 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 15066 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 15067 15068 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 15069 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 15070 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 15071 15072 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 15073 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 15074 15075 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 15076 15077 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 15078 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 15079 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 15080 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 15081 15082 /* SSE3 MMX */ 15083 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 15084 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 15085 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 15086 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 15087 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 15088 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, 15089 15090 /* SSSE3 */ 15091 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, 15092 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, 15093 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, 15094 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, 15095 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, 15096 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, 15097 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, 15098 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, 15099 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, 15100 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, 15101 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, 15102 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, 15103 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, 15104 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, 15105 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, 15106 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, 15107 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, 15108 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, 15109 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, 15110 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, 15111 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, 15112 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, 15113 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, 15114 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 } 15115}; 15116 15117static const struct builtin_description bdesc_1arg[] = 15118{ 15119 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 15120 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 15121 15122 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 15123 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 15124 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 15125 15126 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 15127 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 15128 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 15129 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 15130 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 15131 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 15132 15133 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 15134 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 15135 15136 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 15137 15138 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 15139 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 15140 15141 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 15142 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 15143 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 15144 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 15145 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 15146 15147 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 15148 15149 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 15150 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 15151 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 15152 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 15153 15154 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 15155 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 15156 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 15157 15158 /* SSE3 */ 15159 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 15160 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 15161 15162 /* SSSE3 */ 15163 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, 15164 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, 15165 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, 15166 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, 15167 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, 15168 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, 15169}; 15170 15171static void 15172ix86_init_builtins (void) 15173{ 15174 if (TARGET_MMX) 15175 ix86_init_mmx_sse_builtins (); 15176} 15177 15178/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 15179 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 15180 builtins. */ 15181static void 15182ix86_init_mmx_sse_builtins (void) 15183{ 15184 const struct builtin_description * d; 15185 size_t i; 15186 15187 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode); 15188 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 15189 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 15190 tree V2DI_type_node 15191 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 15192 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 15193 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 15194 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 15195 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 15196 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); 15197 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 15198 15199 tree pchar_type_node = build_pointer_type (char_type_node); 15200 tree pcchar_type_node = build_pointer_type ( 15201 build_type_variant (char_type_node, 1, 0)); 15202 tree pfloat_type_node = build_pointer_type (float_type_node); 15203 tree pcfloat_type_node = build_pointer_type ( 15204 build_type_variant (float_type_node, 1, 0)); 15205 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 15206 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 15207 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 15208 15209 /* Comparisons. */ 15210 tree int_ftype_v4sf_v4sf 15211 = build_function_type_list (integer_type_node, 15212 V4SF_type_node, V4SF_type_node, NULL_TREE); 15213 tree v4si_ftype_v4sf_v4sf 15214 = build_function_type_list (V4SI_type_node, 15215 V4SF_type_node, V4SF_type_node, NULL_TREE); 15216 /* MMX/SSE/integer conversions. */ 15217 tree int_ftype_v4sf 15218 = build_function_type_list (integer_type_node, 15219 V4SF_type_node, NULL_TREE); 15220 tree int64_ftype_v4sf 15221 = build_function_type_list (long_long_integer_type_node, 15222 V4SF_type_node, NULL_TREE); 15223 tree int_ftype_v8qi 15224 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 15225 tree v4sf_ftype_v4sf_int 15226 = build_function_type_list (V4SF_type_node, 15227 V4SF_type_node, integer_type_node, NULL_TREE); 15228 tree v4sf_ftype_v4sf_int64 15229 = build_function_type_list (V4SF_type_node, 15230 V4SF_type_node, long_long_integer_type_node, 15231 NULL_TREE); 15232 tree v4sf_ftype_v4sf_v2si 15233 = build_function_type_list (V4SF_type_node, 15234 V4SF_type_node, V2SI_type_node, NULL_TREE); 15235 15236 /* Miscellaneous. */ 15237 tree v8qi_ftype_v4hi_v4hi 15238 = build_function_type_list (V8QI_type_node, 15239 V4HI_type_node, V4HI_type_node, NULL_TREE); 15240 tree v4hi_ftype_v2si_v2si 15241 = build_function_type_list (V4HI_type_node, 15242 V2SI_type_node, V2SI_type_node, NULL_TREE); 15243 tree v4sf_ftype_v4sf_v4sf_int 15244 = build_function_type_list (V4SF_type_node, 15245 V4SF_type_node, V4SF_type_node, 15246 integer_type_node, NULL_TREE); 15247 tree v2si_ftype_v4hi_v4hi 15248 = build_function_type_list (V2SI_type_node, 15249 V4HI_type_node, V4HI_type_node, NULL_TREE); 15250 tree v4hi_ftype_v4hi_int 15251 = build_function_type_list (V4HI_type_node, 15252 V4HI_type_node, integer_type_node, NULL_TREE); 15253 tree v4hi_ftype_v4hi_di 15254 = build_function_type_list (V4HI_type_node, 15255 V4HI_type_node, long_long_unsigned_type_node, 15256 NULL_TREE); 15257 tree v2si_ftype_v2si_di 15258 = build_function_type_list (V2SI_type_node, 15259 V2SI_type_node, long_long_unsigned_type_node, 15260 NULL_TREE); 15261 tree void_ftype_void 15262 = build_function_type (void_type_node, void_list_node); 15263 tree void_ftype_unsigned 15264 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 15265 tree void_ftype_unsigned_unsigned 15266 = build_function_type_list (void_type_node, unsigned_type_node, 15267 unsigned_type_node, NULL_TREE); 15268 tree void_ftype_pcvoid_unsigned_unsigned 15269 = build_function_type_list (void_type_node, const_ptr_type_node, 15270 unsigned_type_node, unsigned_type_node, 15271 NULL_TREE); 15272 tree unsigned_ftype_void 15273 = build_function_type (unsigned_type_node, void_list_node); 15274 tree v2si_ftype_v4sf 15275 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 15276 /* Loads/stores. */ 15277 tree void_ftype_v8qi_v8qi_pchar 15278 = build_function_type_list (void_type_node, 15279 V8QI_type_node, V8QI_type_node, 15280 pchar_type_node, NULL_TREE); 15281 tree v4sf_ftype_pcfloat 15282 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 15283 /* @@@ the type is bogus */ 15284 tree v4sf_ftype_v4sf_pv2si 15285 = build_function_type_list (V4SF_type_node, 15286 V4SF_type_node, pv2si_type_node, NULL_TREE); 15287 tree void_ftype_pv2si_v4sf 15288 = build_function_type_list (void_type_node, 15289 pv2si_type_node, V4SF_type_node, NULL_TREE); 15290 tree void_ftype_pfloat_v4sf 15291 = build_function_type_list (void_type_node, 15292 pfloat_type_node, V4SF_type_node, NULL_TREE); 15293 tree void_ftype_pdi_di 15294 = build_function_type_list (void_type_node, 15295 pdi_type_node, long_long_unsigned_type_node, 15296 NULL_TREE); 15297 tree void_ftype_pv2di_v2di 15298 = build_function_type_list (void_type_node, 15299 pv2di_type_node, V2DI_type_node, NULL_TREE); 15300 /* Normal vector unops. */ 15301 tree v4sf_ftype_v4sf 15302 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 15303 tree v16qi_ftype_v16qi 15304 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); 15305 tree v8hi_ftype_v8hi 15306 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); 15307 tree v4si_ftype_v4si 15308 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); 15309 tree v8qi_ftype_v8qi 15310 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); 15311 tree v4hi_ftype_v4hi 15312 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); 15313 15314 /* Normal vector binops. */ 15315 tree v4sf_ftype_v4sf_v4sf 15316 = build_function_type_list (V4SF_type_node, 15317 V4SF_type_node, V4SF_type_node, NULL_TREE); 15318 tree v8qi_ftype_v8qi_v8qi 15319 = build_function_type_list (V8QI_type_node, 15320 V8QI_type_node, V8QI_type_node, NULL_TREE); 15321 tree v4hi_ftype_v4hi_v4hi 15322 = build_function_type_list (V4HI_type_node, 15323 V4HI_type_node, V4HI_type_node, NULL_TREE); 15324 tree v2si_ftype_v2si_v2si 15325 = build_function_type_list (V2SI_type_node, 15326 V2SI_type_node, V2SI_type_node, NULL_TREE); 15327 tree di_ftype_di_di 15328 = build_function_type_list (long_long_unsigned_type_node, 15329 long_long_unsigned_type_node, 15330 long_long_unsigned_type_node, NULL_TREE); 15331 15332 tree di_ftype_di_di_int 15333 = build_function_type_list (long_long_unsigned_type_node, 15334 long_long_unsigned_type_node, 15335 long_long_unsigned_type_node, 15336 integer_type_node, NULL_TREE); 15337 15338 tree v2si_ftype_v2sf 15339 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 15340 tree v2sf_ftype_v2si 15341 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 15342 tree v2si_ftype_v2si 15343 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 15344 tree v2sf_ftype_v2sf 15345 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 15346 tree v2sf_ftype_v2sf_v2sf 15347 = build_function_type_list (V2SF_type_node, 15348 V2SF_type_node, V2SF_type_node, NULL_TREE); 15349 tree v2si_ftype_v2sf_v2sf 15350 = build_function_type_list (V2SI_type_node, 15351 V2SF_type_node, V2SF_type_node, NULL_TREE); 15352 tree pint_type_node = build_pointer_type (integer_type_node); 15353 tree pdouble_type_node = build_pointer_type (double_type_node); 15354 tree pcdouble_type_node = build_pointer_type ( 15355 build_type_variant (double_type_node, 1, 0)); 15356 tree int_ftype_v2df_v2df 15357 = build_function_type_list (integer_type_node, 15358 V2DF_type_node, V2DF_type_node, NULL_TREE); 15359 15360 tree void_ftype_pcvoid 15361 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 15362 tree v4sf_ftype_v4si 15363 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 15364 tree v4si_ftype_v4sf 15365 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 15366 tree v2df_ftype_v4si 15367 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 15368 tree v4si_ftype_v2df 15369 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 15370 tree v2si_ftype_v2df 15371 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 15372 tree v4sf_ftype_v2df 15373 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 15374 tree v2df_ftype_v2si 15375 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 15376 tree v2df_ftype_v4sf 15377 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 15378 tree int_ftype_v2df 15379 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 15380 tree int64_ftype_v2df 15381 = build_function_type_list (long_long_integer_type_node, 15382 V2DF_type_node, NULL_TREE); 15383 tree v2df_ftype_v2df_int 15384 = build_function_type_list (V2DF_type_node, 15385 V2DF_type_node, integer_type_node, NULL_TREE); 15386 tree v2df_ftype_v2df_int64 15387 = build_function_type_list (V2DF_type_node, 15388 V2DF_type_node, long_long_integer_type_node, 15389 NULL_TREE); 15390 tree v4sf_ftype_v4sf_v2df 15391 = build_function_type_list (V4SF_type_node, 15392 V4SF_type_node, V2DF_type_node, NULL_TREE); 15393 tree v2df_ftype_v2df_v4sf 15394 = build_function_type_list (V2DF_type_node, 15395 V2DF_type_node, V4SF_type_node, NULL_TREE); 15396 tree v2df_ftype_v2df_v2df_int 15397 = build_function_type_list (V2DF_type_node, 15398 V2DF_type_node, V2DF_type_node, 15399 integer_type_node, 15400 NULL_TREE); 15401 tree v2df_ftype_v2df_pcdouble 15402 = build_function_type_list (V2DF_type_node, 15403 V2DF_type_node, pcdouble_type_node, NULL_TREE); 15404 tree void_ftype_pdouble_v2df 15405 = build_function_type_list (void_type_node, 15406 pdouble_type_node, V2DF_type_node, NULL_TREE); 15407 tree void_ftype_pint_int 15408 = build_function_type_list (void_type_node, 15409 pint_type_node, integer_type_node, NULL_TREE); 15410 tree void_ftype_v16qi_v16qi_pchar 15411 = build_function_type_list (void_type_node, 15412 V16QI_type_node, V16QI_type_node, 15413 pchar_type_node, NULL_TREE); 15414 tree v2df_ftype_pcdouble 15415 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 15416 tree v2df_ftype_v2df_v2df 15417 = build_function_type_list (V2DF_type_node, 15418 V2DF_type_node, V2DF_type_node, NULL_TREE); 15419 tree v16qi_ftype_v16qi_v16qi 15420 = build_function_type_list (V16QI_type_node, 15421 V16QI_type_node, V16QI_type_node, NULL_TREE); 15422 tree v8hi_ftype_v8hi_v8hi 15423 = build_function_type_list (V8HI_type_node, 15424 V8HI_type_node, V8HI_type_node, NULL_TREE); 15425 tree v4si_ftype_v4si_v4si 15426 = build_function_type_list (V4SI_type_node, 15427 V4SI_type_node, V4SI_type_node, NULL_TREE); 15428 tree v2di_ftype_v2di_v2di 15429 = build_function_type_list (V2DI_type_node, 15430 V2DI_type_node, V2DI_type_node, NULL_TREE); 15431 tree v2di_ftype_v2df_v2df 15432 = build_function_type_list (V2DI_type_node, 15433 V2DF_type_node, V2DF_type_node, NULL_TREE); 15434 tree v2df_ftype_v2df 15435 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 15436 tree v2di_ftype_v2di_int 15437 = build_function_type_list (V2DI_type_node, 15438 V2DI_type_node, integer_type_node, NULL_TREE); 15439 tree v2di_ftype_v2di_v2di_int 15440 = build_function_type_list (V2DI_type_node, V2DI_type_node, 15441 V2DI_type_node, integer_type_node, NULL_TREE); 15442 tree v4si_ftype_v4si_int 15443 = build_function_type_list (V4SI_type_node, 15444 V4SI_type_node, integer_type_node, NULL_TREE); 15445 tree v8hi_ftype_v8hi_int 15446 = build_function_type_list (V8HI_type_node, 15447 V8HI_type_node, integer_type_node, NULL_TREE); 15448 tree v4si_ftype_v8hi_v8hi 15449 = build_function_type_list (V4SI_type_node, 15450 V8HI_type_node, V8HI_type_node, NULL_TREE); 15451 tree di_ftype_v8qi_v8qi 15452 = build_function_type_list (long_long_unsigned_type_node, 15453 V8QI_type_node, V8QI_type_node, NULL_TREE); 15454 tree di_ftype_v2si_v2si 15455 = build_function_type_list (long_long_unsigned_type_node, 15456 V2SI_type_node, V2SI_type_node, NULL_TREE); 15457 tree v2di_ftype_v16qi_v16qi 15458 = build_function_type_list (V2DI_type_node, 15459 V16QI_type_node, V16QI_type_node, NULL_TREE); 15460 tree v2di_ftype_v4si_v4si 15461 = build_function_type_list (V2DI_type_node, 15462 V4SI_type_node, V4SI_type_node, NULL_TREE); 15463 tree int_ftype_v16qi 15464 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 15465 tree v16qi_ftype_pcchar 15466 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 15467 tree void_ftype_pchar_v16qi 15468 = build_function_type_list (void_type_node, 15469 pchar_type_node, V16QI_type_node, NULL_TREE); 15470 15471 tree float80_type; 15472 tree float128_type; 15473 tree ftype; 15474 15475 /* The __float80 type. */ 15476 if (TYPE_MODE (long_double_type_node) == XFmode) 15477 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 15478 "__float80"); 15479 else 15480 { 15481 /* The __float80 type. */ 15482 float80_type = make_node (REAL_TYPE); 15483 TYPE_PRECISION (float80_type) = 80; 15484 layout_type (float80_type); 15485 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 15486 } 15487 15488 if (TARGET_64BIT) 15489 { 15490 float128_type = make_node (REAL_TYPE); 15491 TYPE_PRECISION (float128_type) = 128; 15492 layout_type (float128_type); 15493 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 15494 } 15495 15496 /* Add all builtins that are more or less simple operations on two 15497 operands. */ 15498 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 15499 { 15500 /* Use one of the operands; the target can have a different mode for 15501 mask-generating compares. */ 15502 enum machine_mode mode; 15503 tree type; 15504 15505 if (d->name == 0) 15506 continue; 15507 mode = insn_data[d->icode].operand[1].mode; 15508 15509 switch (mode) 15510 { 15511 case V16QImode: 15512 type = v16qi_ftype_v16qi_v16qi; 15513 break; 15514 case V8HImode: 15515 type = v8hi_ftype_v8hi_v8hi; 15516 break; 15517 case V4SImode: 15518 type = v4si_ftype_v4si_v4si; 15519 break; 15520 case V2DImode: 15521 type = v2di_ftype_v2di_v2di; 15522 break; 15523 case V2DFmode: 15524 type = v2df_ftype_v2df_v2df; 15525 break; 15526 case V4SFmode: 15527 type = v4sf_ftype_v4sf_v4sf; 15528 break; 15529 case V8QImode: 15530 type = v8qi_ftype_v8qi_v8qi; 15531 break; 15532 case V4HImode: 15533 type = v4hi_ftype_v4hi_v4hi; 15534 break; 15535 case V2SImode: 15536 type = v2si_ftype_v2si_v2si; 15537 break; 15538 case DImode: 15539 type = di_ftype_di_di; 15540 break; 15541 15542 default: 15543 gcc_unreachable (); 15544 } 15545 15546 /* Override for comparisons. */ 15547 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 15548 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) 15549 type = v4si_ftype_v4sf_v4sf; 15550 15551 if (d->icode == CODE_FOR_sse2_maskcmpv2df3 15552 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 15553 type = v2di_ftype_v2df_v2df; 15554 15555 def_builtin (d->mask, d->name, type, d->code); 15556 } 15557 15558 /* Add all builtins that are more or less simple operations on 1 operand. */ 15559 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 15560 { 15561 enum machine_mode mode; 15562 tree type; 15563 15564 if (d->name == 0) 15565 continue; 15566 mode = insn_data[d->icode].operand[1].mode; 15567 15568 switch (mode) 15569 { 15570 case V16QImode: 15571 type = v16qi_ftype_v16qi; 15572 break; 15573 case V8HImode: 15574 type = v8hi_ftype_v8hi; 15575 break; 15576 case V4SImode: 15577 type = v4si_ftype_v4si; 15578 break; 15579 case V2DFmode: 15580 type = v2df_ftype_v2df; 15581 break; 15582 case V4SFmode: 15583 type = v4sf_ftype_v4sf; 15584 break; 15585 case V8QImode: 15586 type = v8qi_ftype_v8qi; 15587 break; 15588 case V4HImode: 15589 type = v4hi_ftype_v4hi; 15590 break; 15591 case V2SImode: 15592 type = v2si_ftype_v2si; 15593 break; 15594 15595 default: 15596 abort (); 15597 } 15598 15599 def_builtin (d->mask, d->name, type, d->code); 15600 } 15601 15602 /* Add the remaining MMX insns with somewhat more complicated types. */ 15603 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 15604 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 15605 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 15606 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 15607 15608 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 15609 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 15610 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 15611 15612 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 15613 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 15614 15615 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 15616 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 15617 15618 /* comi/ucomi insns. */ 15619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 15620 if (d->mask == MASK_SSE2) 15621 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 15622 else 15623 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 15624 15625 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 15626 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 15627 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 15628 15629 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 15630 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 15631 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 15632 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 15633 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 15634 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 15635 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 15636 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 15637 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 15638 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 15639 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 15640 15641 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 15642 15643 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 15644 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 15645 15646 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 15647 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 15648 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 15649 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 15650 15651 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 15652 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 15653 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 15654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 15655 15656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 15657 15658 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 15659 15660 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 15661 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 15662 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 15663 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 15664 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 15665 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 15666 15667 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 15668 15669 /* Original 3DNow! */ 15670 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 15671 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 15672 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 15673 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 15674 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 15675 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 15676 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 15677 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 15678 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 15679 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 15680 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 15681 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 15682 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 15683 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 15684 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 15685 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 15686 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 15687 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 15688 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 15689 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 15690 15691 /* 3DNow! extension as used in the Athlon CPU. */ 15692 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 15693 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 15694 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 15695 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 15696 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 15697 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 15698 15699 /* SSE2 */ 15700 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 15701 15702 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 15703 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 15704 15705 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); 15706 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); 15707 15708 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 15709 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 15710 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 15711 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 15712 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 15713 15714 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 15715 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 15716 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 15717 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 15718 15719 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 15720 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 15721 15722 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 15723 15724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 15725 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 15726 15727 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 15728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 15729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 15730 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 15731 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 15732 15733 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 15734 15735 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 15736 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 15737 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 15738 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 15739 15740 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 15741 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 15742 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 15743 15744 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 15745 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 15746 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 15747 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 15748 15749 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 15750 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 15751 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 15752 15753 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 15754 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 15755 15756 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); 15757 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); 15758 15759 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); 15760 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); 15761 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 15762 15763 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); 15764 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); 15765 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 15766 15767 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); 15768 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); 15769 15770 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 15771 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 15772 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 15773 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 15774 15775 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 15776 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 15777 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 15778 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 15779 15780 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 15781 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 15782 15783 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 15784 15785 /* Prescott New Instructions. */ 15786 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 15787 void_ftype_pcvoid_unsigned_unsigned, 15788 IX86_BUILTIN_MONITOR); 15789 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 15790 void_ftype_unsigned_unsigned, 15791 IX86_BUILTIN_MWAIT); 15792 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 15793 v4sf_ftype_v4sf, 15794 IX86_BUILTIN_MOVSHDUP); 15795 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 15796 v4sf_ftype_v4sf, 15797 IX86_BUILTIN_MOVSLDUP); 15798 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 15799 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 15800 15801 /* SSSE3. */ 15802 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", 15803 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); 15804 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, 15805 IX86_BUILTIN_PALIGNR); 15806 15807 /* Access to the vec_init patterns. */ 15808 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 15809 integer_type_node, NULL_TREE); 15810 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", 15811 ftype, IX86_BUILTIN_VEC_INIT_V2SI); 15812 15813 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 15814 short_integer_type_node, 15815 short_integer_type_node, 15816 short_integer_type_node, NULL_TREE); 15817 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", 15818 ftype, IX86_BUILTIN_VEC_INIT_V4HI); 15819 15820 ftype = build_function_type_list (V8QI_type_node, char_type_node, 15821 char_type_node, char_type_node, 15822 char_type_node, char_type_node, 15823 char_type_node, char_type_node, 15824 char_type_node, NULL_TREE); 15825 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", 15826 ftype, IX86_BUILTIN_VEC_INIT_V8QI); 15827 15828 /* Access to the vec_extract patterns. */ 15829 ftype = build_function_type_list (double_type_node, V2DF_type_node, 15830 integer_type_node, NULL_TREE); 15831 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", 15832 ftype, IX86_BUILTIN_VEC_EXT_V2DF); 15833 15834 ftype = build_function_type_list (long_long_integer_type_node, 15835 V2DI_type_node, integer_type_node, 15836 NULL_TREE); 15837 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", 15838 ftype, IX86_BUILTIN_VEC_EXT_V2DI); 15839 15840 ftype = build_function_type_list (float_type_node, V4SF_type_node, 15841 integer_type_node, NULL_TREE); 15842 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", 15843 ftype, IX86_BUILTIN_VEC_EXT_V4SF); 15844 15845 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 15846 integer_type_node, NULL_TREE); 15847 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", 15848 ftype, IX86_BUILTIN_VEC_EXT_V4SI); 15849 15850 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 15851 integer_type_node, NULL_TREE); 15852 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", 15853 ftype, IX86_BUILTIN_VEC_EXT_V8HI); 15854 15855 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 15856 integer_type_node, NULL_TREE); 15857 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", 15858 ftype, IX86_BUILTIN_VEC_EXT_V4HI); 15859 15860 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 15861 integer_type_node, NULL_TREE); 15862 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", 15863 ftype, IX86_BUILTIN_VEC_EXT_V2SI); 15864 15865 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 15866 integer_type_node, NULL_TREE); 15867 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 15868 15869 /* Access to the vec_set patterns. */ 15870 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 15871 intHI_type_node, 15872 integer_type_node, NULL_TREE); 15873 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", 15874 ftype, IX86_BUILTIN_VEC_SET_V8HI); 15875 15876 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 15877 intHI_type_node, 15878 integer_type_node, NULL_TREE); 15879 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", 15880 ftype, IX86_BUILTIN_VEC_SET_V4HI); 15881} 15882 15883/* Errors in the source file can cause expand_expr to return const0_rtx 15884 where we expect a vector. To avoid crashing, use one of the vector 15885 clear instructions. */ 15886static rtx 15887safe_vector_operand (rtx x, enum machine_mode mode) 15888{ 15889 if (x == const0_rtx) 15890 x = CONST0_RTX (mode); 15891 return x; 15892} 15893 15894/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 15895 15896static rtx 15897ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 15898{ 15899 rtx pat, xops[3]; 15900 tree arg0 = TREE_VALUE (arglist); 15901 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15902 rtx op0 = expand_normal (arg0); 15903 rtx op1 = expand_normal (arg1); 15904 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15905 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15906 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 15907 15908 if (VECTOR_MODE_P (mode0)) 15909 op0 = safe_vector_operand (op0, mode0); 15910 if (VECTOR_MODE_P (mode1)) 15911 op1 = safe_vector_operand (op1, mode1); 15912 15913 if (optimize || !target 15914 || GET_MODE (target) != tmode 15915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15916 target = gen_reg_rtx (tmode); 15917 15918 if (GET_MODE (op1) == SImode && mode1 == TImode) 15919 { 15920 rtx x = gen_reg_rtx (V4SImode); 15921 emit_insn (gen_sse2_loadd (x, op1)); 15922 op1 = gen_lowpart (TImode, x); 15923 } 15924 15925 /* The insn must want input operands in the same modes as the 15926 result. */ 15927 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 15928 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 15929 15930 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 15931 op0 = copy_to_mode_reg (mode0, op0); 15932 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 15933 op1 = copy_to_mode_reg (mode1, op1); 15934 15935 /* ??? Using ix86_fixup_binary_operands is problematic when 15936 we've got mismatched modes. Fake it. */ 15937 15938 xops[0] = target; 15939 xops[1] = op0; 15940 xops[2] = op1; 15941 15942 if (tmode == mode0 && tmode == mode1) 15943 { 15944 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); 15945 op0 = xops[1]; 15946 op1 = xops[2]; 15947 } 15948 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) 15949 { 15950 op0 = force_reg (mode0, op0); 15951 op1 = force_reg (mode1, op1); 15952 target = gen_reg_rtx (tmode); 15953 } 15954 15955 pat = GEN_FCN (icode) (target, op0, op1); 15956 if (! pat) 15957 return 0; 15958 emit_insn (pat); 15959 return target; 15960} 15961 15962/* Subroutine of ix86_expand_builtin to take care of stores. */ 15963 15964static rtx 15965ix86_expand_store_builtin (enum insn_code icode, tree arglist) 15966{ 15967 rtx pat; 15968 tree arg0 = TREE_VALUE (arglist); 15969 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15970 rtx op0 = expand_normal (arg0); 15971 rtx op1 = expand_normal (arg1); 15972 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 15973 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 15974 15975 if (VECTOR_MODE_P (mode1)) 15976 op1 = safe_vector_operand (op1, mode1); 15977 15978 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 15979 op1 = copy_to_mode_reg (mode1, op1); 15980 15981 pat = GEN_FCN (icode) (op0, op1); 15982 if (pat) 15983 emit_insn (pat); 15984 return 0; 15985} 15986 15987/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 15988 15989static rtx 15990ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 15991 rtx target, int do_load) 15992{ 15993 rtx pat; 15994 tree arg0 = TREE_VALUE (arglist); 15995 rtx op0 = expand_normal (arg0); 15996 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15997 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15998 15999 if (optimize || !target 16000 || GET_MODE (target) != tmode 16001 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16002 target = gen_reg_rtx (tmode); 16003 if (do_load) 16004 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16005 else 16006 { 16007 if (VECTOR_MODE_P (mode0)) 16008 op0 = safe_vector_operand (op0, mode0); 16009 16010 if ((optimize && !register_operand (op0, mode0)) 16011 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16012 op0 = copy_to_mode_reg (mode0, op0); 16013 } 16014 16015 pat = GEN_FCN (icode) (target, op0); 16016 if (! pat) 16017 return 0; 16018 emit_insn (pat); 16019 return target; 16020} 16021 16022/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 16023 sqrtss, rsqrtss, rcpss. */ 16024 16025static rtx 16026ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 16027{ 16028 rtx pat; 16029 tree arg0 = TREE_VALUE (arglist); 16030 rtx op1, op0 = expand_normal (arg0); 16031 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16032 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16033 16034 if (optimize || !target 16035 || GET_MODE (target) != tmode 16036 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16037 target = gen_reg_rtx (tmode); 16038 16039 if (VECTOR_MODE_P (mode0)) 16040 op0 = safe_vector_operand (op0, mode0); 16041 16042 if ((optimize && !register_operand (op0, mode0)) 16043 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16044 op0 = copy_to_mode_reg (mode0, op0); 16045 16046 op1 = op0; 16047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 16048 op1 = copy_to_mode_reg (mode0, op1); 16049 16050 pat = GEN_FCN (icode) (target, op0, op1); 16051 if (! pat) 16052 return 0; 16053 emit_insn (pat); 16054 return target; 16055} 16056 16057/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 16058 16059static rtx 16060ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 16061 rtx target) 16062{ 16063 rtx pat; 16064 tree arg0 = TREE_VALUE (arglist); 16065 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16066 rtx op0 = expand_normal (arg0); 16067 rtx op1 = expand_normal (arg1); 16068 rtx op2; 16069 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 16070 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 16071 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 16072 enum rtx_code comparison = d->comparison; 16073 16074 if (VECTOR_MODE_P (mode0)) 16075 op0 = safe_vector_operand (op0, mode0); 16076 if (VECTOR_MODE_P (mode1)) 16077 op1 = safe_vector_operand (op1, mode1); 16078 16079 /* Swap operands if we have a comparison that isn't available in 16080 hardware. */ 16081 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16082 { 16083 rtx tmp = gen_reg_rtx (mode1); 16084 emit_move_insn (tmp, op1); 16085 op1 = op0; 16086 op0 = tmp; 16087 } 16088 16089 if (optimize || !target 16090 || GET_MODE (target) != tmode 16091 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 16092 target = gen_reg_rtx (tmode); 16093 16094 if ((optimize && !register_operand (op0, mode0)) 16095 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 16096 op0 = copy_to_mode_reg (mode0, op0); 16097 if ((optimize && !register_operand (op1, mode1)) 16098 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 16099 op1 = copy_to_mode_reg (mode1, op1); 16100 16101 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16102 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 16103 if (! pat) 16104 return 0; 16105 emit_insn (pat); 16106 return target; 16107} 16108 16109/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 16110 16111static rtx 16112ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 16113 rtx target) 16114{ 16115 rtx pat; 16116 tree arg0 = TREE_VALUE (arglist); 16117 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16118 rtx op0 = expand_normal (arg0); 16119 rtx op1 = expand_normal (arg1); 16120 rtx op2; 16121 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 16122 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 16123 enum rtx_code comparison = d->comparison; 16124 16125 if (VECTOR_MODE_P (mode0)) 16126 op0 = safe_vector_operand (op0, mode0); 16127 if (VECTOR_MODE_P (mode1)) 16128 op1 = safe_vector_operand (op1, mode1); 16129 16130 /* Swap operands if we have a comparison that isn't available in 16131 hardware. */ 16132 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16133 { 16134 rtx tmp = op1; 16135 op1 = op0; 16136 op0 = tmp; 16137 } 16138 16139 target = gen_reg_rtx (SImode); 16140 emit_move_insn (target, const0_rtx); 16141 target = gen_rtx_SUBREG (QImode, target, 0); 16142 16143 if ((optimize && !register_operand (op0, mode0)) 16144 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 16145 op0 = copy_to_mode_reg (mode0, op0); 16146 if ((optimize && !register_operand (op1, mode1)) 16147 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 16148 op1 = copy_to_mode_reg (mode1, op1); 16149 16150 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16151 pat = GEN_FCN (d->icode) (op0, op1); 16152 if (! pat) 16153 return 0; 16154 emit_insn (pat); 16155 emit_insn (gen_rtx_SET (VOIDmode, 16156 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 16157 gen_rtx_fmt_ee (comparison, QImode, 16158 SET_DEST (pat), 16159 const0_rtx))); 16160 16161 return SUBREG_REG (target); 16162} 16163 16164/* Return the integer constant in ARG. Constrain it to be in the range 16165 of the subparts of VEC_TYPE; issue an error if not. */ 16166 16167static int 16168get_element_number (tree vec_type, tree arg) 16169{ 16170 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 16171 16172 if (!host_integerp (arg, 1) 16173 || (elt = tree_low_cst (arg, 1), elt > max)) 16174 { 16175 error ("selector must be an integer constant in the range 0..%wi", max); 16176 return 0; 16177 } 16178 16179 return elt; 16180} 16181 16182/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16183 ix86_expand_vector_init. We DO have language-level syntax for this, in 16184 the form of (type){ init-list }. Except that since we can't place emms 16185 instructions from inside the compiler, we can't allow the use of MMX 16186 registers unless the user explicitly asks for it. So we do *not* define 16187 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 16188 we have builtins invoked by mmintrin.h that gives us license to emit 16189 these sorts of instructions. */ 16190 16191static rtx 16192ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) 16193{ 16194 enum machine_mode tmode = TYPE_MODE (type); 16195 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 16196 int i, n_elt = GET_MODE_NUNITS (tmode); 16197 rtvec v = rtvec_alloc (n_elt); 16198 16199 gcc_assert (VECTOR_MODE_P (tmode)); 16200 16201 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) 16202 { 16203 rtx x = expand_normal (TREE_VALUE (arglist)); 16204 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 16205 } 16206 16207 gcc_assert (arglist == NULL); 16208 16209 if (!target || !register_operand (target, tmode)) 16210 target = gen_reg_rtx (tmode); 16211 16212 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 16213 return target; 16214} 16215 16216/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16217 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 16218 had a language-level syntax for referencing vector elements. */ 16219 16220static rtx 16221ix86_expand_vec_ext_builtin (tree arglist, rtx target) 16222{ 16223 enum machine_mode tmode, mode0; 16224 tree arg0, arg1; 16225 int elt; 16226 rtx op0; 16227 16228 arg0 = TREE_VALUE (arglist); 16229 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16230 16231 op0 = expand_normal (arg0); 16232 elt = get_element_number (TREE_TYPE (arg0), arg1); 16233 16234 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16235 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 16236 gcc_assert (VECTOR_MODE_P (mode0)); 16237 16238 op0 = force_reg (mode0, op0); 16239 16240 if (optimize || !target || !register_operand (target, tmode)) 16241 target = gen_reg_rtx (tmode); 16242 16243 ix86_expand_vector_extract (true, target, op0, elt); 16244 16245 return target; 16246} 16247 16248/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16249 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 16250 a language-level syntax for referencing vector elements. */ 16251 16252static rtx 16253ix86_expand_vec_set_builtin (tree arglist) 16254{ 16255 enum machine_mode tmode, mode1; 16256 tree arg0, arg1, arg2; 16257 int elt; 16258 rtx op0, op1, target; 16259 16260 arg0 = TREE_VALUE (arglist); 16261 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16262 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16263 16264 tmode = TYPE_MODE (TREE_TYPE (arg0)); 16265 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16266 gcc_assert (VECTOR_MODE_P (tmode)); 16267 16268 op0 = expand_expr (arg0, NULL_RTX, tmode, 0); 16269 op1 = expand_expr (arg1, NULL_RTX, mode1, 0); 16270 elt = get_element_number (TREE_TYPE (arg0), arg2); 16271 16272 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 16273 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 16274 16275 op0 = force_reg (tmode, op0); 16276 op1 = force_reg (mode1, op1); 16277 16278 /* OP0 is the source of these builtin functions and shouldn't be 16279 modified. Create a copy, use it and return it as target. */ 16280 target = gen_reg_rtx (tmode); 16281 emit_move_insn (target, op0); 16282 ix86_expand_vector_set (true, target, op1, elt); 16283 16284 return target; 16285} 16286 16287/* Expand an expression EXP that calls a built-in function, 16288 with result going to TARGET if that's convenient 16289 (and in mode MODE if that's convenient). 16290 SUBTARGET may be used as the target for computing one of EXP's operands. 16291 IGNORE is nonzero if the value is to be ignored. */ 16292 16293static rtx 16294ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 16295 enum machine_mode mode ATTRIBUTE_UNUSED, 16296 int ignore ATTRIBUTE_UNUSED) 16297{ 16298 const struct builtin_description *d; 16299 size_t i; 16300 enum insn_code icode; 16301 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 16302 tree arglist = TREE_OPERAND (exp, 1); 16303 tree arg0, arg1, arg2; 16304 rtx op0, op1, op2, pat; 16305 enum machine_mode tmode, mode0, mode1, mode2, mode3; 16306 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 16307 16308 switch (fcode) 16309 { 16310 case IX86_BUILTIN_EMMS: 16311 emit_insn (gen_mmx_emms ()); 16312 return 0; 16313 16314 case IX86_BUILTIN_SFENCE: 16315 emit_insn (gen_sse_sfence ()); 16316 return 0; 16317 16318 case IX86_BUILTIN_MASKMOVQ: 16319 case IX86_BUILTIN_MASKMOVDQU: 16320 icode = (fcode == IX86_BUILTIN_MASKMOVQ 16321 ? CODE_FOR_mmx_maskmovq 16322 : CODE_FOR_sse2_maskmovdqu); 16323 /* Note the arg order is different from the operand order. */ 16324 arg1 = TREE_VALUE (arglist); 16325 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 16326 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16327 op0 = expand_normal (arg0); 16328 op1 = expand_normal (arg1); 16329 op2 = expand_normal (arg2); 16330 mode0 = insn_data[icode].operand[0].mode; 16331 mode1 = insn_data[icode].operand[1].mode; 16332 mode2 = insn_data[icode].operand[2].mode; 16333 16334 op0 = force_reg (Pmode, op0); 16335 op0 = gen_rtx_MEM (mode1, op0); 16336 16337 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 16338 op0 = copy_to_mode_reg (mode0, op0); 16339 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 16340 op1 = copy_to_mode_reg (mode1, op1); 16341 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 16342 op2 = copy_to_mode_reg (mode2, op2); 16343 pat = GEN_FCN (icode) (op0, op1, op2); 16344 if (! pat) 16345 return 0; 16346 emit_insn (pat); 16347 return 0; 16348 16349 case IX86_BUILTIN_SQRTSS: 16350 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); 16351 case IX86_BUILTIN_RSQRTSS: 16352 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); 16353 case IX86_BUILTIN_RCPSS: 16354 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); 16355 16356 case IX86_BUILTIN_LOADUPS: 16357 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 16358 16359 case IX86_BUILTIN_STOREUPS: 16360 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 16361 16362 case IX86_BUILTIN_LOADHPS: 16363 case IX86_BUILTIN_LOADLPS: 16364 case IX86_BUILTIN_LOADHPD: 16365 case IX86_BUILTIN_LOADLPD: 16366 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps 16367 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps 16368 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd 16369 : CODE_FOR_sse2_loadlpd); 16370 arg0 = TREE_VALUE (arglist); 16371 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16372 op0 = expand_normal (arg0); 16373 op1 = expand_normal (arg1); 16374 tmode = insn_data[icode].operand[0].mode; 16375 mode0 = insn_data[icode].operand[1].mode; 16376 mode1 = insn_data[icode].operand[2].mode; 16377 16378 op0 = force_reg (mode0, op0); 16379 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 16380 if (optimize || target == 0 16381 || GET_MODE (target) != tmode 16382 || !register_operand (target, tmode)) 16383 target = gen_reg_rtx (tmode); 16384 pat = GEN_FCN (icode) (target, op0, op1); 16385 if (! pat) 16386 return 0; 16387 emit_insn (pat); 16388 return target; 16389 16390 case IX86_BUILTIN_STOREHPS: 16391 case IX86_BUILTIN_STORELPS: 16392 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps 16393 : CODE_FOR_sse_storelps); 16394 arg0 = TREE_VALUE (arglist); 16395 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16396 op0 = expand_normal (arg0); 16397 op1 = expand_normal (arg1); 16398 mode0 = insn_data[icode].operand[0].mode; 16399 mode1 = insn_data[icode].operand[1].mode; 16400 16401 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16402 op1 = force_reg (mode1, op1); 16403 16404 pat = GEN_FCN (icode) (op0, op1); 16405 if (! pat) 16406 return 0; 16407 emit_insn (pat); 16408 return const0_rtx; 16409 16410 case IX86_BUILTIN_MOVNTPS: 16411 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 16412 case IX86_BUILTIN_MOVNTQ: 16413 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 16414 16415 case IX86_BUILTIN_LDMXCSR: 16416 op0 = expand_normal (TREE_VALUE (arglist)); 16417 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16418 emit_move_insn (target, op0); 16419 emit_insn (gen_sse_ldmxcsr (target)); 16420 return 0; 16421 16422 case IX86_BUILTIN_STMXCSR: 16423 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16424 emit_insn (gen_sse_stmxcsr (target)); 16425 return copy_to_mode_reg (SImode, target); 16426 16427 case IX86_BUILTIN_SHUFPS: 16428 case IX86_BUILTIN_SHUFPD: 16429 icode = (fcode == IX86_BUILTIN_SHUFPS 16430 ? CODE_FOR_sse_shufps 16431 : CODE_FOR_sse2_shufpd); 16432 arg0 = TREE_VALUE (arglist); 16433 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16434 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16435 op0 = expand_normal (arg0); 16436 op1 = expand_normal (arg1); 16437 op2 = expand_normal (arg2); 16438 tmode = insn_data[icode].operand[0].mode; 16439 mode0 = insn_data[icode].operand[1].mode; 16440 mode1 = insn_data[icode].operand[2].mode; 16441 mode2 = insn_data[icode].operand[3].mode; 16442 16443 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16444 op0 = copy_to_mode_reg (mode0, op0); 16445 if ((optimize && !register_operand (op1, mode1)) 16446 || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16447 op1 = copy_to_mode_reg (mode1, op1); 16448 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 16449 { 16450 /* @@@ better error message */ 16451 error ("mask must be an immediate"); 16452 return gen_reg_rtx (tmode); 16453 } 16454 if (optimize || target == 0 16455 || GET_MODE (target) != tmode 16456 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16457 target = gen_reg_rtx (tmode); 16458 pat = GEN_FCN (icode) (target, op0, op1, op2); 16459 if (! pat) 16460 return 0; 16461 emit_insn (pat); 16462 return target; 16463 16464 case IX86_BUILTIN_PSHUFW: 16465 case IX86_BUILTIN_PSHUFD: 16466 case IX86_BUILTIN_PSHUFHW: 16467 case IX86_BUILTIN_PSHUFLW: 16468 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 16469 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 16470 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 16471 : CODE_FOR_mmx_pshufw); 16472 arg0 = TREE_VALUE (arglist); 16473 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16474 op0 = expand_normal (arg0); 16475 op1 = expand_normal (arg1); 16476 tmode = insn_data[icode].operand[0].mode; 16477 mode1 = insn_data[icode].operand[1].mode; 16478 mode2 = insn_data[icode].operand[2].mode; 16479 16480 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16481 op0 = copy_to_mode_reg (mode1, op0); 16482 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16483 { 16484 /* @@@ better error message */ 16485 error ("mask must be an immediate"); 16486 return const0_rtx; 16487 } 16488 if (target == 0 16489 || GET_MODE (target) != tmode 16490 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16491 target = gen_reg_rtx (tmode); 16492 pat = GEN_FCN (icode) (target, op0, op1); 16493 if (! pat) 16494 return 0; 16495 emit_insn (pat); 16496 return target; 16497 16498 case IX86_BUILTIN_PSLLWI128: 16499 icode = CODE_FOR_ashlv8hi3; 16500 goto do_pshifti; 16501 case IX86_BUILTIN_PSLLDI128: 16502 icode = CODE_FOR_ashlv4si3; 16503 goto do_pshifti; 16504 case IX86_BUILTIN_PSLLQI128: 16505 icode = CODE_FOR_ashlv2di3; 16506 goto do_pshifti; 16507 case IX86_BUILTIN_PSRAWI128: 16508 icode = CODE_FOR_ashrv8hi3; 16509 goto do_pshifti; 16510 case IX86_BUILTIN_PSRADI128: 16511 icode = CODE_FOR_ashrv4si3; 16512 goto do_pshifti; 16513 case IX86_BUILTIN_PSRLWI128: 16514 icode = CODE_FOR_lshrv8hi3; 16515 goto do_pshifti; 16516 case IX86_BUILTIN_PSRLDI128: 16517 icode = CODE_FOR_lshrv4si3; 16518 goto do_pshifti; 16519 case IX86_BUILTIN_PSRLQI128: 16520 icode = CODE_FOR_lshrv2di3; 16521 goto do_pshifti; 16522 do_pshifti: 16523 arg0 = TREE_VALUE (arglist); 16524 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16525 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16526 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16527 16528 if (GET_CODE (op1) != CONST_INT) 16529 { 16530 error ("shift must be an immediate"); 16531 return const0_rtx; 16532 } 16533 if (INTVAL (op1) < 0 || INTVAL (op1) > 255) 16534 op1 = GEN_INT (255); 16535 16536 tmode = insn_data[icode].operand[0].mode; 16537 mode1 = insn_data[icode].operand[1].mode; 16538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16539 op0 = copy_to_reg (op0); 16540 16541 target = gen_reg_rtx (tmode); 16542 pat = GEN_FCN (icode) (target, op0, op1); 16543 if (!pat) 16544 return 0; 16545 emit_insn (pat); 16546 return target; 16547 16548 case IX86_BUILTIN_PSLLW128: 16549 icode = CODE_FOR_ashlv8hi3; 16550 goto do_pshift; 16551 case IX86_BUILTIN_PSLLD128: 16552 icode = CODE_FOR_ashlv4si3; 16553 goto do_pshift; 16554 case IX86_BUILTIN_PSLLQ128: 16555 icode = CODE_FOR_ashlv2di3; 16556 goto do_pshift; 16557 case IX86_BUILTIN_PSRAW128: 16558 icode = CODE_FOR_ashrv8hi3; 16559 goto do_pshift; 16560 case IX86_BUILTIN_PSRAD128: 16561 icode = CODE_FOR_ashrv4si3; 16562 goto do_pshift; 16563 case IX86_BUILTIN_PSRLW128: 16564 icode = CODE_FOR_lshrv8hi3; 16565 goto do_pshift; 16566 case IX86_BUILTIN_PSRLD128: 16567 icode = CODE_FOR_lshrv4si3; 16568 goto do_pshift; 16569 case IX86_BUILTIN_PSRLQ128: 16570 icode = CODE_FOR_lshrv2di3; 16571 goto do_pshift; 16572 do_pshift: 16573 arg0 = TREE_VALUE (arglist); 16574 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16575 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16576 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16577 16578 tmode = insn_data[icode].operand[0].mode; 16579 mode1 = insn_data[icode].operand[1].mode; 16580 16581 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16582 op0 = copy_to_reg (op0); 16583 16584 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); 16585 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 16586 op1 = copy_to_reg (op1); 16587 16588 target = gen_reg_rtx (tmode); 16589 pat = GEN_FCN (icode) (target, op0, op1); 16590 if (!pat) 16591 return 0; 16592 emit_insn (pat); 16593 return target; 16594 16595 case IX86_BUILTIN_PSLLDQI128: 16596 case IX86_BUILTIN_PSRLDQI128: 16597 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 16598 : CODE_FOR_sse2_lshrti3); 16599 arg0 = TREE_VALUE (arglist); 16600 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16601 op0 = expand_normal (arg0); 16602 op1 = expand_normal (arg1); 16603 tmode = insn_data[icode].operand[0].mode; 16604 mode1 = insn_data[icode].operand[1].mode; 16605 mode2 = insn_data[icode].operand[2].mode; 16606 16607 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16608 { 16609 op0 = copy_to_reg (op0); 16610 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16611 } 16612 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16613 { 16614 error ("shift must be an immediate"); 16615 return const0_rtx; 16616 } 16617 target = gen_reg_rtx (V2DImode); 16618 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), 16619 op0, op1); 16620 if (! pat) 16621 return 0; 16622 emit_insn (pat); 16623 return target; 16624 16625 case IX86_BUILTIN_FEMMS: 16626 emit_insn (gen_mmx_femms ()); 16627 return NULL_RTX; 16628 16629 case IX86_BUILTIN_PAVGUSB: 16630 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); 16631 16632 case IX86_BUILTIN_PF2ID: 16633 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); 16634 16635 case IX86_BUILTIN_PFACC: 16636 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); 16637 16638 case IX86_BUILTIN_PFADD: 16639 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); 16640 16641 case IX86_BUILTIN_PFCMPEQ: 16642 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); 16643 16644 case IX86_BUILTIN_PFCMPGE: 16645 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); 16646 16647 case IX86_BUILTIN_PFCMPGT: 16648 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); 16649 16650 case IX86_BUILTIN_PFMAX: 16651 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); 16652 16653 case IX86_BUILTIN_PFMIN: 16654 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); 16655 16656 case IX86_BUILTIN_PFMUL: 16657 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); 16658 16659 case IX86_BUILTIN_PFRCP: 16660 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); 16661 16662 case IX86_BUILTIN_PFRCPIT1: 16663 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); 16664 16665 case IX86_BUILTIN_PFRCPIT2: 16666 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); 16667 16668 case IX86_BUILTIN_PFRSQIT1: 16669 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); 16670 16671 case IX86_BUILTIN_PFRSQRT: 16672 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); 16673 16674 case IX86_BUILTIN_PFSUB: 16675 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); 16676 16677 case IX86_BUILTIN_PFSUBR: 16678 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); 16679 16680 case IX86_BUILTIN_PI2FD: 16681 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); 16682 16683 case IX86_BUILTIN_PMULHRW: 16684 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); 16685 16686 case IX86_BUILTIN_PF2IW: 16687 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); 16688 16689 case IX86_BUILTIN_PFNACC: 16690 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); 16691 16692 case IX86_BUILTIN_PFPNACC: 16693 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); 16694 16695 case IX86_BUILTIN_PI2FW: 16696 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); 16697 16698 case IX86_BUILTIN_PSWAPDSI: 16699 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); 16700 16701 case IX86_BUILTIN_PSWAPDSF: 16702 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); 16703 16704 case IX86_BUILTIN_SQRTSD: 16705 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); 16706 case IX86_BUILTIN_LOADUPD: 16707 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 16708 case IX86_BUILTIN_STOREUPD: 16709 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 16710 16711 case IX86_BUILTIN_MFENCE: 16712 emit_insn (gen_sse2_mfence ()); 16713 return 0; 16714 case IX86_BUILTIN_LFENCE: 16715 emit_insn (gen_sse2_lfence ()); 16716 return 0; 16717 16718 case IX86_BUILTIN_CLFLUSH: 16719 arg0 = TREE_VALUE (arglist); 16720 op0 = expand_normal (arg0); 16721 icode = CODE_FOR_sse2_clflush; 16722 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 16723 op0 = copy_to_mode_reg (Pmode, op0); 16724 16725 emit_insn (gen_sse2_clflush (op0)); 16726 return 0; 16727 16728 case IX86_BUILTIN_MOVNTPD: 16729 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 16730 case IX86_BUILTIN_MOVNTDQ: 16731 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 16732 case IX86_BUILTIN_MOVNTI: 16733 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 16734 16735 case IX86_BUILTIN_LOADDQU: 16736 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 16737 case IX86_BUILTIN_STOREDQU: 16738 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 16739 16740 case IX86_BUILTIN_MONITOR: 16741 arg0 = TREE_VALUE (arglist); 16742 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16743 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16744 op0 = expand_normal (arg0); 16745 op1 = expand_normal (arg1); 16746 op2 = expand_normal (arg2); 16747 if (!REG_P (op0)) 16748 op0 = copy_to_mode_reg (Pmode, op0); 16749 if (!REG_P (op1)) 16750 op1 = copy_to_mode_reg (SImode, op1); 16751 if (!REG_P (op2)) 16752 op2 = copy_to_mode_reg (SImode, op2); 16753 if (!TARGET_64BIT) 16754 emit_insn (gen_sse3_monitor (op0, op1, op2)); 16755 else 16756 emit_insn (gen_sse3_monitor64 (op0, op1, op2)); 16757 return 0; 16758 16759 case IX86_BUILTIN_MWAIT: 16760 arg0 = TREE_VALUE (arglist); 16761 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16762 op0 = expand_normal (arg0); 16763 op1 = expand_normal (arg1); 16764 if (!REG_P (op0)) 16765 op0 = copy_to_mode_reg (SImode, op0); 16766 if (!REG_P (op1)) 16767 op1 = copy_to_mode_reg (SImode, op1); 16768 emit_insn (gen_sse3_mwait (op0, op1)); 16769 return 0; 16770 16771 case IX86_BUILTIN_LDDQU: 16772 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, 16773 target, 1); 16774 16775 case IX86_BUILTIN_PALIGNR: 16776 case IX86_BUILTIN_PALIGNR128: 16777 if (fcode == IX86_BUILTIN_PALIGNR) 16778 { 16779 icode = CODE_FOR_ssse3_palignrdi; 16780 mode = DImode; 16781 } 16782 else 16783 { 16784 icode = CODE_FOR_ssse3_palignrti; 16785 mode = V2DImode; 16786 } 16787 arg0 = TREE_VALUE (arglist); 16788 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16789 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16790 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16791 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16792 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 16793 tmode = insn_data[icode].operand[0].mode; 16794 mode1 = insn_data[icode].operand[1].mode; 16795 mode2 = insn_data[icode].operand[2].mode; 16796 mode3 = insn_data[icode].operand[3].mode; 16797 16798 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16799 { 16800 op0 = copy_to_reg (op0); 16801 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16802 } 16803 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16804 { 16805 op1 = copy_to_reg (op1); 16806 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); 16807 } 16808 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 16809 { 16810 error ("shift must be an immediate"); 16811 return const0_rtx; 16812 } 16813 target = gen_reg_rtx (mode); 16814 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), 16815 op0, op1, op2); 16816 if (! pat) 16817 return 0; 16818 emit_insn (pat); 16819 return target; 16820 16821 case IX86_BUILTIN_VEC_INIT_V2SI: 16822 case IX86_BUILTIN_VEC_INIT_V4HI: 16823 case IX86_BUILTIN_VEC_INIT_V8QI: 16824 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); 16825 16826 case IX86_BUILTIN_VEC_EXT_V2DF: 16827 case IX86_BUILTIN_VEC_EXT_V2DI: 16828 case IX86_BUILTIN_VEC_EXT_V4SF: 16829 case IX86_BUILTIN_VEC_EXT_V4SI: 16830 case IX86_BUILTIN_VEC_EXT_V8HI: 16831 case IX86_BUILTIN_VEC_EXT_V16QI: 16832 case IX86_BUILTIN_VEC_EXT_V2SI: 16833 case IX86_BUILTIN_VEC_EXT_V4HI: 16834 return ix86_expand_vec_ext_builtin (arglist, target); 16835 16836 case IX86_BUILTIN_VEC_SET_V8HI: 16837 case IX86_BUILTIN_VEC_SET_V4HI: 16838 return ix86_expand_vec_set_builtin (arglist); 16839 16840 default: 16841 break; 16842 } 16843 16844 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 16845 if (d->code == fcode) 16846 { 16847 /* Compares are treated specially. */ 16848 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 16849 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 16850 || d->icode == CODE_FOR_sse2_maskcmpv2df3 16851 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 16852 return ix86_expand_sse_compare (d, arglist, target); 16853 16854 return ix86_expand_binop_builtin (d->icode, arglist, target); 16855 } 16856 16857 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 16858 if (d->code == fcode) 16859 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 16860 16861 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 16862 if (d->code == fcode) 16863 return ix86_expand_sse_comi (d, arglist, target); 16864 16865 gcc_unreachable (); 16866} 16867 16868/* Store OPERAND to the memory after reload is completed. This means 16869 that we can't easily use assign_stack_local. */ 16870rtx 16871ix86_force_to_memory (enum machine_mode mode, rtx operand) 16872{ 16873 rtx result; 16874 16875 gcc_assert (reload_completed); 16876 if (TARGET_RED_ZONE) 16877 { 16878 result = gen_rtx_MEM (mode, 16879 gen_rtx_PLUS (Pmode, 16880 stack_pointer_rtx, 16881 GEN_INT (-RED_ZONE_SIZE))); 16882 emit_move_insn (result, operand); 16883 } 16884 else if (!TARGET_RED_ZONE && TARGET_64BIT) 16885 { 16886 switch (mode) 16887 { 16888 case HImode: 16889 case SImode: 16890 operand = gen_lowpart (DImode, operand); 16891 /* FALLTHRU */ 16892 case DImode: 16893 emit_insn ( 16894 gen_rtx_SET (VOIDmode, 16895 gen_rtx_MEM (DImode, 16896 gen_rtx_PRE_DEC (DImode, 16897 stack_pointer_rtx)), 16898 operand)); 16899 break; 16900 default: 16901 gcc_unreachable (); 16902 } 16903 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16904 } 16905 else 16906 { 16907 switch (mode) 16908 { 16909 case DImode: 16910 { 16911 rtx operands[2]; 16912 split_di (&operand, 1, operands, operands + 1); 16913 emit_insn ( 16914 gen_rtx_SET (VOIDmode, 16915 gen_rtx_MEM (SImode, 16916 gen_rtx_PRE_DEC (Pmode, 16917 stack_pointer_rtx)), 16918 operands[1])); 16919 emit_insn ( 16920 gen_rtx_SET (VOIDmode, 16921 gen_rtx_MEM (SImode, 16922 gen_rtx_PRE_DEC (Pmode, 16923 stack_pointer_rtx)), 16924 operands[0])); 16925 } 16926 break; 16927 case HImode: 16928 /* Store HImodes as SImodes. */ 16929 operand = gen_lowpart (SImode, operand); 16930 /* FALLTHRU */ 16931 case SImode: 16932 emit_insn ( 16933 gen_rtx_SET (VOIDmode, 16934 gen_rtx_MEM (GET_MODE (operand), 16935 gen_rtx_PRE_DEC (SImode, 16936 stack_pointer_rtx)), 16937 operand)); 16938 break; 16939 default: 16940 gcc_unreachable (); 16941 } 16942 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16943 } 16944 return result; 16945} 16946 16947/* Free operand from the memory. */ 16948void 16949ix86_free_from_memory (enum machine_mode mode) 16950{ 16951 if (!TARGET_RED_ZONE) 16952 { 16953 int size; 16954 16955 if (mode == DImode || TARGET_64BIT) 16956 size = 8; 16957 else 16958 size = 4; 16959 /* Use LEA to deallocate stack space. In peephole2 it will be converted 16960 to pop or add instruction if registers are available. */ 16961 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 16962 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 16963 GEN_INT (size)))); 16964 } 16965} 16966 16967/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 16968 QImode must go into class Q_REGS. 16969 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 16970 movdf to do mem-to-mem moves through integer regs. */ 16971enum reg_class 16972ix86_preferred_reload_class (rtx x, enum reg_class class) 16973{ 16974 enum machine_mode mode = GET_MODE (x); 16975 16976 /* We're only allowed to return a subclass of CLASS. Many of the 16977 following checks fail for NO_REGS, so eliminate that early. */ 16978 if (class == NO_REGS) 16979 return NO_REGS; 16980 16981 /* All classes can load zeros. */ 16982 if (x == CONST0_RTX (mode)) 16983 return class; 16984 16985 /* Force constants into memory if we are loading a (nonzero) constant into 16986 an MMX or SSE register. This is because there are no MMX/SSE instructions 16987 to load from a constant. */ 16988 if (CONSTANT_P (x) 16989 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) 16990 return NO_REGS; 16991 16992 /* Prefer SSE regs only, if we can use them for math. */ 16993 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 16994 return SSE_CLASS_P (class) ? class : NO_REGS; 16995 16996 /* Floating-point constants need more complex checks. */ 16997 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 16998 { 16999 /* General regs can load everything. */ 17000 if (reg_class_subset_p (class, GENERAL_REGS)) 17001 return class; 17002 17003 /* Floats can load 0 and 1 plus some others. Note that we eliminated 17004 zero above. We only want to wind up preferring 80387 registers if 17005 we plan on doing computation with them. */ 17006 if (TARGET_80387 17007 && standard_80387_constant_p (x)) 17008 { 17009 /* Limit class to non-sse. */ 17010 if (class == FLOAT_SSE_REGS) 17011 return FLOAT_REGS; 17012 if (class == FP_TOP_SSE_REGS) 17013 return FP_TOP_REG; 17014 if (class == FP_SECOND_SSE_REGS) 17015 return FP_SECOND_REG; 17016 if (class == FLOAT_INT_REGS || class == FLOAT_REGS) 17017 return class; 17018 } 17019 17020 return NO_REGS; 17021 } 17022 17023 /* Generally when we see PLUS here, it's the function invariant 17024 (plus soft-fp const_int). Which can only be computed into general 17025 regs. */ 17026 if (GET_CODE (x) == PLUS) 17027 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; 17028 17029 /* QImode constants are easy to load, but non-constant QImode data 17030 must go into Q_REGS. */ 17031 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 17032 { 17033 if (reg_class_subset_p (class, Q_REGS)) 17034 return class; 17035 if (reg_class_subset_p (Q_REGS, class)) 17036 return Q_REGS; 17037 return NO_REGS; 17038 } 17039 17040 return class; 17041} 17042 17043/* Discourage putting floating-point values in SSE registers unless 17044 SSE math is being used, and likewise for the 387 registers. */ 17045enum reg_class 17046ix86_preferred_output_reload_class (rtx x, enum reg_class class) 17047{ 17048 enum machine_mode mode = GET_MODE (x); 17049 17050 /* Restrict the output reload class to the register bank that we are doing 17051 math on. If we would like not to return a subset of CLASS, reject this 17052 alternative: if reload cannot do this, it will still use its choice. */ 17053 mode = GET_MODE (x); 17054 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 17055 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; 17056 17057 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) 17058 { 17059 if (class == FP_TOP_SSE_REGS) 17060 return FP_TOP_REG; 17061 else if (class == FP_SECOND_SSE_REGS) 17062 return FP_SECOND_REG; 17063 else 17064 return FLOAT_CLASS_P (class) ? class : NO_REGS; 17065 } 17066 17067 return class; 17068} 17069 17070/* If we are copying between general and FP registers, we need a memory 17071 location. The same is true for SSE and MMX registers. 17072 17073 The macro can't work reliably when one of the CLASSES is class containing 17074 registers from multiple units (SSE, MMX, integer). We avoid this by never 17075 combining those units in single alternative in the machine description. 17076 Ensure that this constraint holds to avoid unexpected surprises. 17077 17078 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 17079 enforce these sanity checks. */ 17080 17081int 17082ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 17083 enum machine_mode mode, int strict) 17084{ 17085 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 17086 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 17087 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 17088 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 17089 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 17090 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 17091 { 17092 gcc_assert (!strict); 17093 return true; 17094 } 17095 17096 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 17097 return true; 17098 17099 /* ??? This is a lie. We do have moves between mmx/general, and for 17100 mmx/sse2. But by saying we need secondary memory we discourage the 17101 register allocator from using the mmx registers unless needed. */ 17102 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 17103 return true; 17104 17105 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17106 { 17107 /* SSE1 doesn't have any direct moves from other classes. */ 17108 if (!TARGET_SSE2) 17109 return true; 17110 17111 /* If the target says that inter-unit moves are more expensive 17112 than moving through memory, then don't generate them. */ 17113 if (!TARGET_INTER_UNIT_MOVES && !optimize_size) 17114 return true; 17115 17116 /* Between SSE and general, we have moves no larger than word size. */ 17117 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 17118 return true; 17119 17120 /* ??? For the cost of one register reformat penalty, we could use 17121 the same instructions to move SFmode and DFmode data, but the 17122 relevant move patterns don't support those alternatives. */ 17123 if (mode == SFmode || mode == DFmode) 17124 return true; 17125 } 17126 17127 return false; 17128} 17129 17130/* Return true if the registers in CLASS cannot represent the change from 17131 modes FROM to TO. */ 17132 17133bool 17134ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 17135 enum reg_class class) 17136{ 17137 if (from == to) 17138 return false; 17139 17140 /* x87 registers can't do subreg at all, as all values are reformatted 17141 to extended precision. */ 17142 if (MAYBE_FLOAT_CLASS_P (class)) 17143 return true; 17144 17145 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) 17146 { 17147 /* Vector registers do not support QI or HImode loads. If we don't 17148 disallow a change to these modes, reload will assume it's ok to 17149 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 17150 the vec_dupv4hi pattern. */ 17151 if (GET_MODE_SIZE (from) < 4) 17152 return true; 17153 17154 /* Vector registers do not support subreg with nonzero offsets, which 17155 are otherwise valid for integer registers. Since we can't see 17156 whether we have a nonzero offset from here, prohibit all 17157 nonparadoxical subregs changing size. */ 17158 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 17159 return true; 17160 } 17161 17162 return false; 17163} 17164 17165/* Return the cost of moving data from a register in class CLASS1 to 17166 one in class CLASS2. 17167 17168 It is not required that the cost always equal 2 when FROM is the same as TO; 17169 on some machines it is expensive to move between registers if they are not 17170 general registers. */ 17171 17172int 17173ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 17174 enum reg_class class2) 17175{ 17176 /* In case we require secondary memory, compute cost of the store followed 17177 by load. In order to avoid bad register allocation choices, we need 17178 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 17179 17180 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 17181 { 17182 int cost = 1; 17183 17184 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 17185 MEMORY_MOVE_COST (mode, class1, 1)); 17186 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 17187 MEMORY_MOVE_COST (mode, class2, 1)); 17188 17189 /* In case of copying from general_purpose_register we may emit multiple 17190 stores followed by single load causing memory size mismatch stall. 17191 Count this as arbitrarily high cost of 20. */ 17192 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 17193 cost += 20; 17194 17195 /* In the case of FP/MMX moves, the registers actually overlap, and we 17196 have to switch modes in order to treat them differently. */ 17197 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 17198 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 17199 cost += 20; 17200 17201 return cost; 17202 } 17203 17204 /* Moves between SSE/MMX and integer unit are expensive. */ 17205 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 17206 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17207 return ix86_cost->mmxsse_to_integer; 17208 if (MAYBE_FLOAT_CLASS_P (class1)) 17209 return ix86_cost->fp_move; 17210 if (MAYBE_SSE_CLASS_P (class1)) 17211 return ix86_cost->sse_move; 17212 if (MAYBE_MMX_CLASS_P (class1)) 17213 return ix86_cost->mmx_move; 17214 return 2; 17215} 17216 17217/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 17218 17219bool 17220ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 17221{ 17222 /* Flags and only flags can only hold CCmode values. */ 17223 if (CC_REGNO_P (regno)) 17224 return GET_MODE_CLASS (mode) == MODE_CC; 17225 if (GET_MODE_CLASS (mode) == MODE_CC 17226 || GET_MODE_CLASS (mode) == MODE_RANDOM 17227 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 17228 return 0; 17229 if (FP_REGNO_P (regno)) 17230 return VALID_FP_MODE_P (mode); 17231 if (SSE_REGNO_P (regno)) 17232 { 17233 /* We implement the move patterns for all vector modes into and 17234 out of SSE registers, even when no operation instructions 17235 are available. */ 17236 return (VALID_SSE_REG_MODE (mode) 17237 || VALID_SSE2_REG_MODE (mode) 17238 || VALID_MMX_REG_MODE (mode) 17239 || VALID_MMX_REG_MODE_3DNOW (mode)); 17240 } 17241 if (MMX_REGNO_P (regno)) 17242 { 17243 /* We implement the move patterns for 3DNOW modes even in MMX mode, 17244 so if the register is available at all, then we can move data of 17245 the given mode into or out of it. */ 17246 return (VALID_MMX_REG_MODE (mode) 17247 || VALID_MMX_REG_MODE_3DNOW (mode)); 17248 } 17249 17250 if (mode == QImode) 17251 { 17252 /* Take care for QImode values - they can be in non-QI regs, 17253 but then they do cause partial register stalls. */ 17254 if (regno < 4 || TARGET_64BIT) 17255 return 1; 17256 if (!TARGET_PARTIAL_REG_STALL) 17257 return 1; 17258 return reload_in_progress || reload_completed; 17259 } 17260 /* We handle both integer and floats in the general purpose registers. */ 17261 else if (VALID_INT_MODE_P (mode)) 17262 return 1; 17263 else if (VALID_FP_MODE_P (mode)) 17264 return 1; 17265 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 17266 on to use that value in smaller contexts, this can easily force a 17267 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 17268 supporting DImode, allow it. */ 17269 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 17270 return 1; 17271 17272 return 0; 17273} 17274 17275/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 17276 tieable integer mode. */ 17277 17278static bool 17279ix86_tieable_integer_mode_p (enum machine_mode mode) 17280{ 17281 switch (mode) 17282 { 17283 case HImode: 17284 case SImode: 17285 return true; 17286 17287 case QImode: 17288 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 17289 17290 case DImode: 17291 return TARGET_64BIT; 17292 17293 default: 17294 return false; 17295 } 17296} 17297 17298/* Return true if MODE1 is accessible in a register that can hold MODE2 17299 without copying. That is, all register classes that can hold MODE2 17300 can also hold MODE1. */ 17301 17302bool 17303ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 17304{ 17305 if (mode1 == mode2) 17306 return true; 17307 17308 if (ix86_tieable_integer_mode_p (mode1) 17309 && ix86_tieable_integer_mode_p (mode2)) 17310 return true; 17311 17312 /* MODE2 being XFmode implies fp stack or general regs, which means we 17313 can tie any smaller floating point modes to it. Note that we do not 17314 tie this with TFmode. */ 17315 if (mode2 == XFmode) 17316 return mode1 == SFmode || mode1 == DFmode; 17317 17318 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 17319 that we can tie it with SFmode. */ 17320 if (mode2 == DFmode) 17321 return mode1 == SFmode; 17322 17323 /* If MODE2 is only appropriate for an SSE register, then tie with 17324 any other mode acceptable to SSE registers. */ 17325 if (GET_MODE_SIZE (mode2) >= 8 17326 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 17327 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); 17328 17329 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie 17330 with any other mode acceptable to MMX registers. */ 17331 if (GET_MODE_SIZE (mode2) == 8 17332 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 17333 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); 17334 17335 return false; 17336} 17337 17338/* Return the cost of moving data of mode M between a 17339 register and memory. A value of 2 is the default; this cost is 17340 relative to those in `REGISTER_MOVE_COST'. 17341 17342 If moving between registers and memory is more expensive than 17343 between two registers, you should define this macro to express the 17344 relative cost. 17345 17346 Model also increased moving costs of QImode registers in non 17347 Q_REGS classes. 17348 */ 17349int 17350ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 17351{ 17352 if (FLOAT_CLASS_P (class)) 17353 { 17354 int index; 17355 switch (mode) 17356 { 17357 case SFmode: 17358 index = 0; 17359 break; 17360 case DFmode: 17361 index = 1; 17362 break; 17363 case XFmode: 17364 index = 2; 17365 break; 17366 default: 17367 return 100; 17368 } 17369 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 17370 } 17371 if (SSE_CLASS_P (class)) 17372 { 17373 int index; 17374 switch (GET_MODE_SIZE (mode)) 17375 { 17376 case 4: 17377 index = 0; 17378 break; 17379 case 8: 17380 index = 1; 17381 break; 17382 case 16: 17383 index = 2; 17384 break; 17385 default: 17386 return 100; 17387 } 17388 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 17389 } 17390 if (MMX_CLASS_P (class)) 17391 { 17392 int index; 17393 switch (GET_MODE_SIZE (mode)) 17394 { 17395 case 4: 17396 index = 0; 17397 break; 17398 case 8: 17399 index = 1; 17400 break; 17401 default: 17402 return 100; 17403 } 17404 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 17405 } 17406 switch (GET_MODE_SIZE (mode)) 17407 { 17408 case 1: 17409 if (in) 17410 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 17411 : ix86_cost->movzbl_load); 17412 else 17413 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 17414 : ix86_cost->int_store[0] + 4); 17415 break; 17416 case 2: 17417 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 17418 default: 17419 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 17420 if (mode == TFmode) 17421 mode = XFmode; 17422 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 17423 * (((int) GET_MODE_SIZE (mode) 17424 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 17425 } 17426} 17427 17428/* Compute a (partial) cost for rtx X. Return true if the complete 17429 cost has been computed, and false if subexpressions should be 17430 scanned. In either case, *TOTAL contains the cost result. */ 17431 17432static bool 17433ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 17434{ 17435 enum machine_mode mode = GET_MODE (x); 17436 17437 switch (code) 17438 { 17439 case CONST_INT: 17440 case CONST: 17441 case LABEL_REF: 17442 case SYMBOL_REF: 17443 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 17444 *total = 3; 17445 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 17446 *total = 2; 17447 else if (flag_pic && SYMBOLIC_CONST (x) 17448 && (!TARGET_64BIT 17449 || (!GET_CODE (x) != LABEL_REF 17450 && (GET_CODE (x) != SYMBOL_REF 17451 || !SYMBOL_REF_LOCAL_P (x))))) 17452 *total = 1; 17453 else 17454 *total = 0; 17455 return true; 17456 17457 case CONST_DOUBLE: 17458 if (mode == VOIDmode) 17459 *total = 0; 17460 else 17461 switch (standard_80387_constant_p (x)) 17462 { 17463 case 1: /* 0.0 */ 17464 *total = 1; 17465 break; 17466 default: /* Other constants */ 17467 *total = 2; 17468 break; 17469 case 0: 17470 case -1: 17471 /* Start with (MEM (SYMBOL_REF)), since that's where 17472 it'll probably end up. Add a penalty for size. */ 17473 *total = (COSTS_N_INSNS (1) 17474 + (flag_pic != 0 && !TARGET_64BIT) 17475 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 17476 break; 17477 } 17478 return true; 17479 17480 case ZERO_EXTEND: 17481 /* The zero extensions is often completely free on x86_64, so make 17482 it as cheap as possible. */ 17483 if (TARGET_64BIT && mode == DImode 17484 && GET_MODE (XEXP (x, 0)) == SImode) 17485 *total = 1; 17486 else if (TARGET_ZERO_EXTEND_WITH_AND) 17487 *total = ix86_cost->add; 17488 else 17489 *total = ix86_cost->movzx; 17490 return false; 17491 17492 case SIGN_EXTEND: 17493 *total = ix86_cost->movsx; 17494 return false; 17495 17496 case ASHIFT: 17497 if (GET_CODE (XEXP (x, 1)) == CONST_INT 17498 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 17499 { 17500 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17501 if (value == 1) 17502 { 17503 *total = ix86_cost->add; 17504 return false; 17505 } 17506 if ((value == 2 || value == 3) 17507 && ix86_cost->lea <= ix86_cost->shift_const) 17508 { 17509 *total = ix86_cost->lea; 17510 return false; 17511 } 17512 } 17513 /* FALLTHRU */ 17514 17515 case ROTATE: 17516 case ASHIFTRT: 17517 case LSHIFTRT: 17518 case ROTATERT: 17519 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 17520 { 17521 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17522 { 17523 if (INTVAL (XEXP (x, 1)) > 32) 17524 *total = ix86_cost->shift_const + COSTS_N_INSNS (2); 17525 else 17526 *total = ix86_cost->shift_const * 2; 17527 } 17528 else 17529 { 17530 if (GET_CODE (XEXP (x, 1)) == AND) 17531 *total = ix86_cost->shift_var * 2; 17532 else 17533 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); 17534 } 17535 } 17536 else 17537 { 17538 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17539 *total = ix86_cost->shift_const; 17540 else 17541 *total = ix86_cost->shift_var; 17542 } 17543 return false; 17544 17545 case MULT: 17546 if (FLOAT_MODE_P (mode)) 17547 { 17548 *total = ix86_cost->fmul; 17549 return false; 17550 } 17551 else 17552 { 17553 rtx op0 = XEXP (x, 0); 17554 rtx op1 = XEXP (x, 1); 17555 int nbits; 17556 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17557 { 17558 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17559 for (nbits = 0; value != 0; value &= value - 1) 17560 nbits++; 17561 } 17562 else 17563 /* This is arbitrary. */ 17564 nbits = 7; 17565 17566 /* Compute costs correctly for widening multiplication. */ 17567 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) 17568 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 17569 == GET_MODE_SIZE (mode)) 17570 { 17571 int is_mulwiden = 0; 17572 enum machine_mode inner_mode = GET_MODE (op0); 17573 17574 if (GET_CODE (op0) == GET_CODE (op1)) 17575 is_mulwiden = 1, op1 = XEXP (op1, 0); 17576 else if (GET_CODE (op1) == CONST_INT) 17577 { 17578 if (GET_CODE (op0) == SIGN_EXTEND) 17579 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 17580 == INTVAL (op1); 17581 else 17582 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 17583 } 17584 17585 if (is_mulwiden) 17586 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 17587 } 17588 17589 *total = (ix86_cost->mult_init[MODE_INDEX (mode)] 17590 + nbits * ix86_cost->mult_bit 17591 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); 17592 17593 return true; 17594 } 17595 17596 case DIV: 17597 case UDIV: 17598 case MOD: 17599 case UMOD: 17600 if (FLOAT_MODE_P (mode)) 17601 *total = ix86_cost->fdiv; 17602 else 17603 *total = ix86_cost->divide[MODE_INDEX (mode)]; 17604 return false; 17605 17606 case PLUS: 17607 if (FLOAT_MODE_P (mode)) 17608 *total = ix86_cost->fadd; 17609 else if (GET_MODE_CLASS (mode) == MODE_INT 17610 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 17611 { 17612 if (GET_CODE (XEXP (x, 0)) == PLUS 17613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 17614 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 17615 && CONSTANT_P (XEXP (x, 1))) 17616 { 17617 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 17618 if (val == 2 || val == 4 || val == 8) 17619 { 17620 *total = ix86_cost->lea; 17621 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17622 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 17623 outer_code); 17624 *total += rtx_cost (XEXP (x, 1), outer_code); 17625 return true; 17626 } 17627 } 17628 else if (GET_CODE (XEXP (x, 0)) == MULT 17629 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 17630 { 17631 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 17632 if (val == 2 || val == 4 || val == 8) 17633 { 17634 *total = ix86_cost->lea; 17635 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17636 *total += rtx_cost (XEXP (x, 1), outer_code); 17637 return true; 17638 } 17639 } 17640 else if (GET_CODE (XEXP (x, 0)) == PLUS) 17641 { 17642 *total = ix86_cost->lea; 17643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17644 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17645 *total += rtx_cost (XEXP (x, 1), outer_code); 17646 return true; 17647 } 17648 } 17649 /* FALLTHRU */ 17650 17651 case MINUS: 17652 if (FLOAT_MODE_P (mode)) 17653 { 17654 *total = ix86_cost->fadd; 17655 return false; 17656 } 17657 /* FALLTHRU */ 17658 17659 case AND: 17660 case IOR: 17661 case XOR: 17662 if (!TARGET_64BIT && mode == DImode) 17663 { 17664 *total = (ix86_cost->add * 2 17665 + (rtx_cost (XEXP (x, 0), outer_code) 17666 << (GET_MODE (XEXP (x, 0)) != DImode)) 17667 + (rtx_cost (XEXP (x, 1), outer_code) 17668 << (GET_MODE (XEXP (x, 1)) != DImode))); 17669 return true; 17670 } 17671 /* FALLTHRU */ 17672 17673 case NEG: 17674 if (FLOAT_MODE_P (mode)) 17675 { 17676 *total = ix86_cost->fchs; 17677 return false; 17678 } 17679 /* FALLTHRU */ 17680 17681 case NOT: 17682 if (!TARGET_64BIT && mode == DImode) 17683 *total = ix86_cost->add * 2; 17684 else 17685 *total = ix86_cost->add; 17686 return false; 17687 17688 case COMPARE: 17689 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 17690 && XEXP (XEXP (x, 0), 1) == const1_rtx 17691 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT 17692 && XEXP (x, 1) == const0_rtx) 17693 { 17694 /* This kind of construct is implemented using test[bwl]. 17695 Treat it as if we had an AND. */ 17696 *total = (ix86_cost->add 17697 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) 17698 + rtx_cost (const1_rtx, outer_code)); 17699 return true; 17700 } 17701 return false; 17702 17703 case FLOAT_EXTEND: 17704 if (!TARGET_SSE_MATH 17705 || mode == XFmode 17706 || (mode == DFmode && !TARGET_SSE2)) 17707 /* For standard 80387 constants, raise the cost to prevent 17708 compress_float_constant() to generate load from memory. */ 17709 switch (standard_80387_constant_p (XEXP (x, 0))) 17710 { 17711 case -1: 17712 case 0: 17713 *total = 0; 17714 break; 17715 case 1: /* 0.0 */ 17716 *total = 1; 17717 break; 17718 default: 17719 *total = (x86_ext_80387_constants & TUNEMASK 17720 || optimize_size 17721 ? 1 : 0); 17722 } 17723 return false; 17724 17725 case ABS: 17726 if (FLOAT_MODE_P (mode)) 17727 *total = ix86_cost->fabs; 17728 return false; 17729 17730 case SQRT: 17731 if (FLOAT_MODE_P (mode)) 17732 *total = ix86_cost->fsqrt; 17733 return false; 17734 17735 case UNSPEC: 17736 if (XINT (x, 1) == UNSPEC_TP) 17737 *total = 0; 17738 return false; 17739 17740 default: 17741 return false; 17742 } 17743} 17744 17745#if TARGET_MACHO 17746 17747static int current_machopic_label_num; 17748 17749/* Given a symbol name and its associated stub, write out the 17750 definition of the stub. */ 17751 17752void 17753machopic_output_stub (FILE *file, const char *symb, const char *stub) 17754{ 17755 unsigned int length; 17756 char *binder_name, *symbol_name, lazy_ptr_name[32]; 17757 int label = ++current_machopic_label_num; 17758 17759 /* For 64-bit we shouldn't get here. */ 17760 gcc_assert (!TARGET_64BIT); 17761 17762 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 17763 symb = (*targetm.strip_name_encoding) (symb); 17764 17765 length = strlen (stub); 17766 binder_name = alloca (length + 32); 17767 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 17768 17769 length = strlen (symb); 17770 symbol_name = alloca (length + 32); 17771 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 17772 17773 sprintf (lazy_ptr_name, "L%d$lz", label); 17774 17775 if (MACHOPIC_PURE) 17776 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 17777 else 17778 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 17779 17780 fprintf (file, "%s:\n", stub); 17781 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17782 17783 if (MACHOPIC_PURE) 17784 { 17785 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 17786 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 17787 fprintf (file, "\tjmp\t*%%edx\n"); 17788 } 17789 else 17790 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 17791 17792 fprintf (file, "%s:\n", binder_name); 17793 17794 if (MACHOPIC_PURE) 17795 { 17796 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 17797 fprintf (file, "\tpushl\t%%eax\n"); 17798 } 17799 else 17800 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 17801 17802 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 17803 17804 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 17805 fprintf (file, "%s:\n", lazy_ptr_name); 17806 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17807 fprintf (file, "\t.long %s\n", binder_name); 17808} 17809 17810void 17811darwin_x86_file_end (void) 17812{ 17813 darwin_file_end (); 17814 ix86_file_end (); 17815} 17816#endif /* TARGET_MACHO */ 17817 17818/* Order the registers for register allocator. */ 17819 17820void 17821x86_order_regs_for_local_alloc (void) 17822{ 17823 int pos = 0; 17824 int i; 17825 17826 /* First allocate the local general purpose registers. */ 17827 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17828 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 17829 reg_alloc_order [pos++] = i; 17830 17831 /* Global general purpose registers. */ 17832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17833 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 17834 reg_alloc_order [pos++] = i; 17835 17836 /* x87 registers come first in case we are doing FP math 17837 using them. */ 17838 if (!TARGET_SSE_MATH) 17839 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17840 reg_alloc_order [pos++] = i; 17841 17842 /* SSE registers. */ 17843 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 17844 reg_alloc_order [pos++] = i; 17845 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 17846 reg_alloc_order [pos++] = i; 17847 17848 /* x87 registers. */ 17849 if (TARGET_SSE_MATH) 17850 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17851 reg_alloc_order [pos++] = i; 17852 17853 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 17854 reg_alloc_order [pos++] = i; 17855 17856 /* Initialize the rest of array as we do not allocate some registers 17857 at all. */ 17858 while (pos < FIRST_PSEUDO_REGISTER) 17859 reg_alloc_order [pos++] = 0; 17860} 17861 17862/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 17863 struct attribute_spec.handler. */ 17864static tree 17865ix86_handle_struct_attribute (tree *node, tree name, 17866 tree args ATTRIBUTE_UNUSED, 17867 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 17868{ 17869 tree *type = NULL; 17870 if (DECL_P (*node)) 17871 { 17872 if (TREE_CODE (*node) == TYPE_DECL) 17873 type = &TREE_TYPE (*node); 17874 } 17875 else 17876 type = node; 17877 17878 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 17879 || TREE_CODE (*type) == UNION_TYPE))) 17880 { 17881 warning (OPT_Wattributes, "%qs attribute ignored", 17882 IDENTIFIER_POINTER (name)); 17883 *no_add_attrs = true; 17884 } 17885 17886 else if ((is_attribute_p ("ms_struct", name) 17887 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 17888 || ((is_attribute_p ("gcc_struct", name) 17889 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 17890 { 17891 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 17892 IDENTIFIER_POINTER (name)); 17893 *no_add_attrs = true; 17894 } 17895 17896 return NULL_TREE; 17897} 17898 17899static bool 17900ix86_ms_bitfield_layout_p (tree record_type) 17901{ 17902 return (TARGET_MS_BITFIELD_LAYOUT && 17903 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 17904 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 17905} 17906 17907/* Returns an expression indicating where the this parameter is 17908 located on entry to the FUNCTION. */ 17909 17910static rtx 17911x86_this_parameter (tree function) 17912{ 17913 tree type = TREE_TYPE (function); 17914 17915 if (TARGET_64BIT) 17916 { 17917 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 17918 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 17919 } 17920 17921 if (ix86_function_regparm (type, function) > 0) 17922 { 17923 tree parm; 17924 17925 parm = TYPE_ARG_TYPES (type); 17926 /* Figure out whether or not the function has a variable number of 17927 arguments. */ 17928 for (; parm; parm = TREE_CHAIN (parm)) 17929 if (TREE_VALUE (parm) == void_type_node) 17930 break; 17931 /* If not, the this parameter is in the first argument. */ 17932 if (parm) 17933 { 17934 int regno = 0; 17935 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 17936 regno = 2; 17937 return gen_rtx_REG (SImode, regno); 17938 } 17939 } 17940 17941 if (aggregate_value_p (TREE_TYPE (type), type)) 17942 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 17943 else 17944 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 17945} 17946 17947/* Determine whether x86_output_mi_thunk can succeed. */ 17948 17949static bool 17950x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 17951 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 17952 HOST_WIDE_INT vcall_offset, tree function) 17953{ 17954 /* 64-bit can handle anything. */ 17955 if (TARGET_64BIT) 17956 return true; 17957 17958 /* For 32-bit, everything's fine if we have one free register. */ 17959 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 17960 return true; 17961 17962 /* Need a free register for vcall_offset. */ 17963 if (vcall_offset) 17964 return false; 17965 17966 /* Need a free register for GOT references. */ 17967 if (flag_pic && !(*targetm.binds_local_p) (function)) 17968 return false; 17969 17970 /* Otherwise ok. */ 17971 return true; 17972} 17973 17974/* Output the assembler code for a thunk function. THUNK_DECL is the 17975 declaration for the thunk function itself, FUNCTION is the decl for 17976 the target function. DELTA is an immediate constant offset to be 17977 added to THIS. If VCALL_OFFSET is nonzero, the word at 17978 *(*this + vcall_offset) should be added to THIS. */ 17979 17980static void 17981x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 17982 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 17983 HOST_WIDE_INT vcall_offset, tree function) 17984{ 17985 rtx xops[3]; 17986 rtx this = x86_this_parameter (function); 17987 rtx this_reg, tmp; 17988 17989 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 17990 pull it in now and let DELTA benefit. */ 17991 if (REG_P (this)) 17992 this_reg = this; 17993 else if (vcall_offset) 17994 { 17995 /* Put the this parameter into %eax. */ 17996 xops[0] = this; 17997 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 17998 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17999 } 18000 else 18001 this_reg = NULL_RTX; 18002 18003 /* Adjust the this parameter by a fixed constant. */ 18004 if (delta) 18005 { 18006 xops[0] = GEN_INT (delta); 18007 xops[1] = this_reg ? this_reg : this; 18008 if (TARGET_64BIT) 18009 { 18010 if (!x86_64_general_operand (xops[0], DImode)) 18011 { 18012 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18013 xops[1] = tmp; 18014 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 18015 xops[0] = tmp; 18016 xops[1] = this; 18017 } 18018 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18019 } 18020 else 18021 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18022 } 18023 18024 /* Adjust the this parameter by a value stored in the vtable. */ 18025 if (vcall_offset) 18026 { 18027 if (TARGET_64BIT) 18028 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18029 else 18030 { 18031 int tmp_regno = 2 /* ECX */; 18032 if (lookup_attribute ("fastcall", 18033 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 18034 tmp_regno = 0 /* EAX */; 18035 tmp = gen_rtx_REG (SImode, tmp_regno); 18036 } 18037 18038 xops[0] = gen_rtx_MEM (Pmode, this_reg); 18039 xops[1] = tmp; 18040 if (TARGET_64BIT) 18041 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18042 else 18043 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18044 18045 /* Adjust the this parameter. */ 18046 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 18047 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 18048 { 18049 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 18050 xops[0] = GEN_INT (vcall_offset); 18051 xops[1] = tmp2; 18052 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18053 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 18054 } 18055 xops[1] = this_reg; 18056 if (TARGET_64BIT) 18057 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18058 else 18059 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18060 } 18061 18062 /* If necessary, drop THIS back to its stack slot. */ 18063 if (this_reg && this_reg != this) 18064 { 18065 xops[0] = this_reg; 18066 xops[1] = this; 18067 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18068 } 18069 18070 xops[0] = XEXP (DECL_RTL (function), 0); 18071 if (TARGET_64BIT) 18072 { 18073 if (!flag_pic || (*targetm.binds_local_p) (function)) 18074 output_asm_insn ("jmp\t%P0", xops); 18075 else 18076 { 18077 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 18078 tmp = gen_rtx_CONST (Pmode, tmp); 18079 tmp = gen_rtx_MEM (QImode, tmp); 18080 xops[0] = tmp; 18081 output_asm_insn ("jmp\t%A0", xops); 18082 } 18083 } 18084 else 18085 { 18086 if (!flag_pic || (*targetm.binds_local_p) (function)) 18087 output_asm_insn ("jmp\t%P0", xops); 18088 else 18089#if TARGET_MACHO 18090 if (TARGET_MACHO) 18091 { 18092 rtx sym_ref = XEXP (DECL_RTL (function), 0); 18093 tmp = (gen_rtx_SYMBOL_REF 18094 (Pmode, 18095 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 18096 tmp = gen_rtx_MEM (QImode, tmp); 18097 xops[0] = tmp; 18098 output_asm_insn ("jmp\t%0", xops); 18099 } 18100 else 18101#endif /* TARGET_MACHO */ 18102 { 18103 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 18104 output_set_got (tmp, NULL_RTX); 18105 18106 xops[1] = tmp; 18107 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 18108 output_asm_insn ("jmp\t{*}%1", xops); 18109 } 18110 } 18111} 18112 18113static void 18114x86_file_start (void) 18115{ 18116 default_file_start (); 18117#if TARGET_MACHO 18118 darwin_file_start (); 18119#endif 18120 if (X86_FILE_START_VERSION_DIRECTIVE) 18121 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 18122 if (X86_FILE_START_FLTUSED) 18123 fputs ("\t.global\t__fltused\n", asm_out_file); 18124 if (ix86_asm_dialect == ASM_INTEL) 18125 fputs ("\t.intel_syntax\n", asm_out_file); 18126} 18127 18128int 18129x86_field_alignment (tree field, int computed) 18130{ 18131 enum machine_mode mode; 18132 tree type = TREE_TYPE (field); 18133 18134 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 18135 return computed; 18136 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 18137 ? get_inner_array_type (type) : type); 18138 if (mode == DFmode || mode == DCmode 18139 || GET_MODE_CLASS (mode) == MODE_INT 18140 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 18141 return MIN (32, computed); 18142 return computed; 18143} 18144 18145/* Output assembler code to FILE to increment profiler label # LABELNO 18146 for profiling a function entry. */ 18147void 18148x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 18149{ 18150 if (TARGET_64BIT) 18151 if (flag_pic) 18152 { 18153#ifndef NO_PROFILE_COUNTERS 18154 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 18155#endif 18156 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 18157 } 18158 else 18159 { 18160#ifndef NO_PROFILE_COUNTERS 18161 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 18162#endif 18163 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18164 } 18165 else if (flag_pic) 18166 { 18167#ifndef NO_PROFILE_COUNTERS 18168 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 18169 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 18170#endif 18171 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 18172 } 18173 else 18174 { 18175#ifndef NO_PROFILE_COUNTERS 18176 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 18177 PROFILE_COUNT_REGISTER); 18178#endif 18179 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18180 } 18181} 18182 18183/* We don't have exact information about the insn sizes, but we may assume 18184 quite safely that we are informed about all 1 byte insns and memory 18185 address sizes. This is enough to eliminate unnecessary padding in 18186 99% of cases. */ 18187 18188static int 18189min_insn_size (rtx insn) 18190{ 18191 int l = 0; 18192 18193 if (!INSN_P (insn) || !active_insn_p (insn)) 18194 return 0; 18195 18196 /* Discard alignments we've emit and jump instructions. */ 18197 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 18198 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 18199 return 0; 18200 if (GET_CODE (insn) == JUMP_INSN 18201 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 18202 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 18203 return 0; 18204 18205 /* Important case - calls are always 5 bytes. 18206 It is common to have many calls in the row. */ 18207 if (GET_CODE (insn) == CALL_INSN 18208 && symbolic_reference_mentioned_p (PATTERN (insn)) 18209 && !SIBLING_CALL_P (insn)) 18210 return 5; 18211 if (get_attr_length (insn) <= 1) 18212 return 1; 18213 18214 /* For normal instructions we may rely on the sizes of addresses 18215 and the presence of symbol to require 4 bytes of encoding. 18216 This is not the case for jumps where references are PC relative. */ 18217 if (GET_CODE (insn) != JUMP_INSN) 18218 { 18219 l = get_attr_length_address (insn); 18220 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 18221 l = 4; 18222 } 18223 if (l) 18224 return 1+l; 18225 else 18226 return 2; 18227} 18228 18229/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 18230 window. */ 18231 18232static void 18233ix86_avoid_jump_misspredicts (void) 18234{ 18235 rtx insn, start = get_insns (); 18236 int nbytes = 0, njumps = 0; 18237 int isjump = 0; 18238 18239 /* Look for all minimal intervals of instructions containing 4 jumps. 18240 The intervals are bounded by START and INSN. NBYTES is the total 18241 size of instructions in the interval including INSN and not including 18242 START. When the NBYTES is smaller than 16 bytes, it is possible 18243 that the end of START and INSN ends up in the same 16byte page. 18244 18245 The smallest offset in the page INSN can start is the case where START 18246 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 18247 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 18248 */ 18249 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 18250 { 18251 18252 nbytes += min_insn_size (insn); 18253 if (dump_file) 18254 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 18255 INSN_UID (insn), min_insn_size (insn)); 18256 if ((GET_CODE (insn) == JUMP_INSN 18257 && GET_CODE (PATTERN (insn)) != ADDR_VEC 18258 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 18259 || GET_CODE (insn) == CALL_INSN) 18260 njumps++; 18261 else 18262 continue; 18263 18264 while (njumps > 3) 18265 { 18266 start = NEXT_INSN (start); 18267 if ((GET_CODE (start) == JUMP_INSN 18268 && GET_CODE (PATTERN (start)) != ADDR_VEC 18269 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 18270 || GET_CODE (start) == CALL_INSN) 18271 njumps--, isjump = 1; 18272 else 18273 isjump = 0; 18274 nbytes -= min_insn_size (start); 18275 } 18276 gcc_assert (njumps >= 0); 18277 if (dump_file) 18278 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 18279 INSN_UID (start), INSN_UID (insn), nbytes); 18280 18281 if (njumps == 3 && isjump && nbytes < 16) 18282 { 18283 int padsize = 15 - nbytes + min_insn_size (insn); 18284 18285 if (dump_file) 18286 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 18287 INSN_UID (insn), padsize); 18288 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 18289 } 18290 } 18291} 18292 18293/* AMD Athlon works faster 18294 when RET is not destination of conditional jump or directly preceded 18295 by other jump instruction. We avoid the penalty by inserting NOP just 18296 before the RET instructions in such cases. */ 18297static void 18298ix86_pad_returns (void) 18299{ 18300 edge e; 18301 edge_iterator ei; 18302 18303 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 18304 { 18305 basic_block bb = e->src; 18306 rtx ret = BB_END (bb); 18307 rtx prev; 18308 bool replace = false; 18309 18310 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 18311 || !maybe_hot_bb_p (bb)) 18312 continue; 18313 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 18314 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 18315 break; 18316 if (prev && GET_CODE (prev) == CODE_LABEL) 18317 { 18318 edge e; 18319 edge_iterator ei; 18320 18321 FOR_EACH_EDGE (e, ei, bb->preds) 18322 if (EDGE_FREQUENCY (e) && e->src->index >= 0 18323 && !(e->flags & EDGE_FALLTHRU)) 18324 replace = true; 18325 } 18326 if (!replace) 18327 { 18328 prev = prev_active_insn (ret); 18329 if (prev 18330 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 18331 || GET_CODE (prev) == CALL_INSN)) 18332 replace = true; 18333 /* Empty functions get branch mispredict even when the jump destination 18334 is not visible to us. */ 18335 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 18336 replace = true; 18337 } 18338 if (replace) 18339 { 18340 emit_insn_before (gen_return_internal_long (), ret); 18341 delete_insn (ret); 18342 } 18343 } 18344} 18345 18346/* Implement machine specific optimizations. We implement padding of returns 18347 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 18348static void 18349ix86_reorg (void) 18350{ 18351 if (TARGET_PAD_RETURNS && optimize && !optimize_size) 18352 ix86_pad_returns (); 18353 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) 18354 ix86_avoid_jump_misspredicts (); 18355} 18356 18357/* Return nonzero when QImode register that must be represented via REX prefix 18358 is used. */ 18359bool 18360x86_extended_QIreg_mentioned_p (rtx insn) 18361{ 18362 int i; 18363 extract_insn_cached (insn); 18364 for (i = 0; i < recog_data.n_operands; i++) 18365 if (REG_P (recog_data.operand[i]) 18366 && REGNO (recog_data.operand[i]) >= 4) 18367 return true; 18368 return false; 18369} 18370 18371/* Return nonzero when P points to register encoded via REX prefix. 18372 Called via for_each_rtx. */ 18373static int 18374extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 18375{ 18376 unsigned int regno; 18377 if (!REG_P (*p)) 18378 return 0; 18379 regno = REGNO (*p); 18380 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 18381} 18382 18383/* Return true when INSN mentions register that must be encoded using REX 18384 prefix. */ 18385bool 18386x86_extended_reg_mentioned_p (rtx insn) 18387{ 18388 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 18389} 18390 18391/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 18392 optabs would emit if we didn't have TFmode patterns. */ 18393 18394void 18395x86_emit_floatuns (rtx operands[2]) 18396{ 18397 rtx neglab, donelab, i0, i1, f0, in, out; 18398 enum machine_mode mode, inmode; 18399 18400 inmode = GET_MODE (operands[1]); 18401 gcc_assert (inmode == SImode || inmode == DImode); 18402 18403 out = operands[0]; 18404 in = force_reg (inmode, operands[1]); 18405 mode = GET_MODE (out); 18406 neglab = gen_label_rtx (); 18407 donelab = gen_label_rtx (); 18408 i1 = gen_reg_rtx (Pmode); 18409 f0 = gen_reg_rtx (mode); 18410 18411 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 18412 18413 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 18414 emit_jump_insn (gen_jump (donelab)); 18415 emit_barrier (); 18416 18417 emit_label (neglab); 18418 18419 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18420 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18421 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 18422 expand_float (f0, i0, 0); 18423 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 18424 18425 emit_label (donelab); 18426} 18427 18428/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18429 with all elements equal to VAR. Return true if successful. */ 18430 18431static bool 18432ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 18433 rtx target, rtx val) 18434{ 18435 enum machine_mode smode, wsmode, wvmode; 18436 rtx x; 18437 18438 switch (mode) 18439 { 18440 case V2SImode: 18441 case V2SFmode: 18442 if (!mmx_ok) 18443 return false; 18444 /* FALLTHRU */ 18445 18446 case V2DFmode: 18447 case V2DImode: 18448 case V4SFmode: 18449 case V4SImode: 18450 val = force_reg (GET_MODE_INNER (mode), val); 18451 x = gen_rtx_VEC_DUPLICATE (mode, val); 18452 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18453 return true; 18454 18455 case V4HImode: 18456 if (!mmx_ok) 18457 return false; 18458 if (TARGET_SSE || TARGET_3DNOW_A) 18459 { 18460 val = gen_lowpart (SImode, val); 18461 x = gen_rtx_TRUNCATE (HImode, val); 18462 x = gen_rtx_VEC_DUPLICATE (mode, x); 18463 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18464 return true; 18465 } 18466 else 18467 { 18468 smode = HImode; 18469 wsmode = SImode; 18470 wvmode = V2SImode; 18471 goto widen; 18472 } 18473 18474 case V8QImode: 18475 if (!mmx_ok) 18476 return false; 18477 smode = QImode; 18478 wsmode = HImode; 18479 wvmode = V4HImode; 18480 goto widen; 18481 case V8HImode: 18482 if (TARGET_SSE2) 18483 { 18484 rtx tmp1, tmp2; 18485 /* Extend HImode to SImode using a paradoxical SUBREG. */ 18486 tmp1 = gen_reg_rtx (SImode); 18487 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18488 /* Insert the SImode value as low element of V4SImode vector. */ 18489 tmp2 = gen_reg_rtx (V4SImode); 18490 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18491 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18492 CONST0_RTX (V4SImode), 18493 const1_rtx); 18494 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18495 /* Cast the V4SImode vector back to a V8HImode vector. */ 18496 tmp1 = gen_reg_rtx (V8HImode); 18497 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 18498 /* Duplicate the low short through the whole low SImode word. */ 18499 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 18500 /* Cast the V8HImode vector back to a V4SImode vector. */ 18501 tmp2 = gen_reg_rtx (V4SImode); 18502 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18503 /* Replicate the low element of the V4SImode vector. */ 18504 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18505 /* Cast the V2SImode back to V8HImode, and store in target. */ 18506 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 18507 return true; 18508 } 18509 smode = HImode; 18510 wsmode = SImode; 18511 wvmode = V4SImode; 18512 goto widen; 18513 case V16QImode: 18514 if (TARGET_SSE2) 18515 { 18516 rtx tmp1, tmp2; 18517 /* Extend QImode to SImode using a paradoxical SUBREG. */ 18518 tmp1 = gen_reg_rtx (SImode); 18519 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18520 /* Insert the SImode value as low element of V4SImode vector. */ 18521 tmp2 = gen_reg_rtx (V4SImode); 18522 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18523 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18524 CONST0_RTX (V4SImode), 18525 const1_rtx); 18526 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18527 /* Cast the V4SImode vector back to a V16QImode vector. */ 18528 tmp1 = gen_reg_rtx (V16QImode); 18529 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 18530 /* Duplicate the low byte through the whole low SImode word. */ 18531 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18532 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18533 /* Cast the V16QImode vector back to a V4SImode vector. */ 18534 tmp2 = gen_reg_rtx (V4SImode); 18535 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18536 /* Replicate the low element of the V4SImode vector. */ 18537 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18538 /* Cast the V2SImode back to V16QImode, and store in target. */ 18539 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 18540 return true; 18541 } 18542 smode = QImode; 18543 wsmode = HImode; 18544 wvmode = V8HImode; 18545 goto widen; 18546 widen: 18547 /* Replicate the value once into the next wider mode and recurse. */ 18548 val = convert_modes (wsmode, smode, val, true); 18549 x = expand_simple_binop (wsmode, ASHIFT, val, 18550 GEN_INT (GET_MODE_BITSIZE (smode)), 18551 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18552 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 18553 18554 x = gen_reg_rtx (wvmode); 18555 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 18556 gcc_unreachable (); 18557 emit_move_insn (target, gen_lowpart (mode, x)); 18558 return true; 18559 18560 default: 18561 return false; 18562 } 18563} 18564 18565/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18566 whose ONE_VAR element is VAR, and other elements are zero. Return true 18567 if successful. */ 18568 18569static bool 18570ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 18571 rtx target, rtx var, int one_var) 18572{ 18573 enum machine_mode vsimode; 18574 rtx new_target; 18575 rtx x, tmp; 18576 18577 switch (mode) 18578 { 18579 case V2SFmode: 18580 case V2SImode: 18581 if (!mmx_ok) 18582 return false; 18583 /* FALLTHRU */ 18584 18585 case V2DFmode: 18586 case V2DImode: 18587 if (one_var != 0) 18588 return false; 18589 var = force_reg (GET_MODE_INNER (mode), var); 18590 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 18591 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18592 return true; 18593 18594 case V4SFmode: 18595 case V4SImode: 18596 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 18597 new_target = gen_reg_rtx (mode); 18598 else 18599 new_target = target; 18600 var = force_reg (GET_MODE_INNER (mode), var); 18601 x = gen_rtx_VEC_DUPLICATE (mode, var); 18602 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 18603 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 18604 if (one_var != 0) 18605 { 18606 /* We need to shuffle the value to the correct position, so 18607 create a new pseudo to store the intermediate result. */ 18608 18609 /* With SSE2, we can use the integer shuffle insns. */ 18610 if (mode != V4SFmode && TARGET_SSE2) 18611 { 18612 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 18613 GEN_INT (1), 18614 GEN_INT (one_var == 1 ? 0 : 1), 18615 GEN_INT (one_var == 2 ? 0 : 1), 18616 GEN_INT (one_var == 3 ? 0 : 1))); 18617 if (target != new_target) 18618 emit_move_insn (target, new_target); 18619 return true; 18620 } 18621 18622 /* Otherwise convert the intermediate result to V4SFmode and 18623 use the SSE1 shuffle instructions. */ 18624 if (mode != V4SFmode) 18625 { 18626 tmp = gen_reg_rtx (V4SFmode); 18627 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 18628 } 18629 else 18630 tmp = new_target; 18631 18632 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, 18633 GEN_INT (1), 18634 GEN_INT (one_var == 1 ? 0 : 1), 18635 GEN_INT (one_var == 2 ? 0+4 : 1+4), 18636 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 18637 18638 if (mode != V4SFmode) 18639 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 18640 else if (tmp != target) 18641 emit_move_insn (target, tmp); 18642 } 18643 else if (target != new_target) 18644 emit_move_insn (target, new_target); 18645 return true; 18646 18647 case V8HImode: 18648 case V16QImode: 18649 vsimode = V4SImode; 18650 goto widen; 18651 case V4HImode: 18652 case V8QImode: 18653 if (!mmx_ok) 18654 return false; 18655 vsimode = V2SImode; 18656 goto widen; 18657 widen: 18658 if (one_var != 0) 18659 return false; 18660 18661 /* Zero extend the variable element to SImode and recurse. */ 18662 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 18663 18664 x = gen_reg_rtx (vsimode); 18665 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 18666 var, one_var)) 18667 gcc_unreachable (); 18668 18669 emit_move_insn (target, gen_lowpart (mode, x)); 18670 return true; 18671 18672 default: 18673 return false; 18674 } 18675} 18676 18677/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18678 consisting of the values in VALS. It is known that all elements 18679 except ONE_VAR are constants. Return true if successful. */ 18680 18681static bool 18682ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 18683 rtx target, rtx vals, int one_var) 18684{ 18685 rtx var = XVECEXP (vals, 0, one_var); 18686 enum machine_mode wmode; 18687 rtx const_vec, x; 18688 18689 const_vec = copy_rtx (vals); 18690 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 18691 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 18692 18693 switch (mode) 18694 { 18695 case V2DFmode: 18696 case V2DImode: 18697 case V2SFmode: 18698 case V2SImode: 18699 /* For the two element vectors, it's just as easy to use 18700 the general case. */ 18701 return false; 18702 18703 case V4SFmode: 18704 case V4SImode: 18705 case V8HImode: 18706 case V4HImode: 18707 break; 18708 18709 case V16QImode: 18710 wmode = V8HImode; 18711 goto widen; 18712 case V8QImode: 18713 wmode = V4HImode; 18714 goto widen; 18715 widen: 18716 /* There's no way to set one QImode entry easily. Combine 18717 the variable value with its adjacent constant value, and 18718 promote to an HImode set. */ 18719 x = XVECEXP (vals, 0, one_var ^ 1); 18720 if (one_var & 1) 18721 { 18722 var = convert_modes (HImode, QImode, var, true); 18723 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 18724 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18725 x = GEN_INT (INTVAL (x) & 0xff); 18726 } 18727 else 18728 { 18729 var = convert_modes (HImode, QImode, var, true); 18730 x = gen_int_mode (INTVAL (x) << 8, HImode); 18731 } 18732 if (x != const0_rtx) 18733 var = expand_simple_binop (HImode, IOR, var, x, var, 18734 1, OPTAB_LIB_WIDEN); 18735 18736 x = gen_reg_rtx (wmode); 18737 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 18738 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 18739 18740 emit_move_insn (target, gen_lowpart (mode, x)); 18741 return true; 18742 18743 default: 18744 return false; 18745 } 18746 18747 emit_move_insn (target, const_vec); 18748 ix86_expand_vector_set (mmx_ok, target, var, one_var); 18749 return true; 18750} 18751 18752/* A subroutine of ix86_expand_vector_init. Handle the most general case: 18753 all values variable, and none identical. */ 18754 18755static void 18756ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 18757 rtx target, rtx vals) 18758{ 18759 enum machine_mode half_mode = GET_MODE_INNER (mode); 18760 rtx op0 = NULL, op1 = NULL; 18761 bool use_vec_concat = false; 18762 18763 switch (mode) 18764 { 18765 case V2SFmode: 18766 case V2SImode: 18767 if (!mmx_ok && !TARGET_SSE) 18768 break; 18769 /* FALLTHRU */ 18770 18771 case V2DFmode: 18772 case V2DImode: 18773 /* For the two element vectors, we always implement VEC_CONCAT. */ 18774 op0 = XVECEXP (vals, 0, 0); 18775 op1 = XVECEXP (vals, 0, 1); 18776 use_vec_concat = true; 18777 break; 18778 18779 case V4SFmode: 18780 half_mode = V2SFmode; 18781 goto half; 18782 case V4SImode: 18783 half_mode = V2SImode; 18784 goto half; 18785 half: 18786 { 18787 rtvec v; 18788 18789 /* For V4SF and V4SI, we implement a concat of two V2 vectors. 18790 Recurse to load the two halves. */ 18791 18792 op0 = gen_reg_rtx (half_mode); 18793 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); 18794 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); 18795 18796 op1 = gen_reg_rtx (half_mode); 18797 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); 18798 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); 18799 18800 use_vec_concat = true; 18801 } 18802 break; 18803 18804 case V8HImode: 18805 case V16QImode: 18806 case V4HImode: 18807 case V8QImode: 18808 break; 18809 18810 default: 18811 gcc_unreachable (); 18812 } 18813 18814 if (use_vec_concat) 18815 { 18816 if (!register_operand (op0, half_mode)) 18817 op0 = force_reg (half_mode, op0); 18818 if (!register_operand (op1, half_mode)) 18819 op1 = force_reg (half_mode, op1); 18820 18821 emit_insn (gen_rtx_SET (VOIDmode, target, 18822 gen_rtx_VEC_CONCAT (mode, op0, op1))); 18823 } 18824 else 18825 { 18826 int i, j, n_elts, n_words, n_elt_per_word; 18827 enum machine_mode inner_mode; 18828 rtx words[4], shift; 18829 18830 inner_mode = GET_MODE_INNER (mode); 18831 n_elts = GET_MODE_NUNITS (mode); 18832 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 18833 n_elt_per_word = n_elts / n_words; 18834 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 18835 18836 for (i = 0; i < n_words; ++i) 18837 { 18838 rtx word = NULL_RTX; 18839 18840 for (j = 0; j < n_elt_per_word; ++j) 18841 { 18842 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 18843 elt = convert_modes (word_mode, inner_mode, elt, true); 18844 18845 if (j == 0) 18846 word = elt; 18847 else 18848 { 18849 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 18850 word, 1, OPTAB_LIB_WIDEN); 18851 word = expand_simple_binop (word_mode, IOR, word, elt, 18852 word, 1, OPTAB_LIB_WIDEN); 18853 } 18854 } 18855 18856 words[i] = word; 18857 } 18858 18859 if (n_words == 1) 18860 emit_move_insn (target, gen_lowpart (mode, words[0])); 18861 else if (n_words == 2) 18862 { 18863 rtx tmp = gen_reg_rtx (mode); 18864 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); 18865 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 18866 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 18867 emit_move_insn (target, tmp); 18868 } 18869 else if (n_words == 4) 18870 { 18871 rtx tmp = gen_reg_rtx (V4SImode); 18872 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 18873 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 18874 emit_move_insn (target, gen_lowpart (mode, tmp)); 18875 } 18876 else 18877 gcc_unreachable (); 18878 } 18879} 18880 18881/* Initialize vector TARGET via VALS. Suppress the use of MMX 18882 instructions unless MMX_OK is true. */ 18883 18884void 18885ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 18886{ 18887 enum machine_mode mode = GET_MODE (target); 18888 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18889 int n_elts = GET_MODE_NUNITS (mode); 18890 int n_var = 0, one_var = -1; 18891 bool all_same = true, all_const_zero = true; 18892 int i; 18893 rtx x; 18894 18895 for (i = 0; i < n_elts; ++i) 18896 { 18897 x = XVECEXP (vals, 0, i); 18898 if (!CONSTANT_P (x)) 18899 n_var++, one_var = i; 18900 else if (x != CONST0_RTX (inner_mode)) 18901 all_const_zero = false; 18902 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 18903 all_same = false; 18904 } 18905 18906 /* Constants are best loaded from the constant pool. */ 18907 if (n_var == 0) 18908 { 18909 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 18910 return; 18911 } 18912 18913 /* If all values are identical, broadcast the value. */ 18914 if (all_same 18915 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 18916 XVECEXP (vals, 0, 0))) 18917 return; 18918 18919 /* Values where only one field is non-constant are best loaded from 18920 the pool and overwritten via move later. */ 18921 if (n_var == 1) 18922 { 18923 if (all_const_zero 18924 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 18925 XVECEXP (vals, 0, one_var), 18926 one_var)) 18927 return; 18928 18929 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 18930 return; 18931 } 18932 18933 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 18934} 18935 18936void 18937ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 18938{ 18939 enum machine_mode mode = GET_MODE (target); 18940 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18941 bool use_vec_merge = false; 18942 rtx tmp; 18943 18944 switch (mode) 18945 { 18946 case V2SFmode: 18947 case V2SImode: 18948 if (mmx_ok) 18949 { 18950 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 18951 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 18952 if (elt == 0) 18953 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 18954 else 18955 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 18956 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18957 return; 18958 } 18959 break; 18960 18961 case V2DFmode: 18962 case V2DImode: 18963 { 18964 rtx op0, op1; 18965 18966 /* For the two element vectors, we implement a VEC_CONCAT with 18967 the extraction of the other element. */ 18968 18969 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 18970 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 18971 18972 if (elt == 0) 18973 op0 = val, op1 = tmp; 18974 else 18975 op0 = tmp, op1 = val; 18976 18977 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 18978 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18979 } 18980 return; 18981 18982 case V4SFmode: 18983 switch (elt) 18984 { 18985 case 0: 18986 use_vec_merge = true; 18987 break; 18988 18989 case 1: 18990 /* tmp = target = A B C D */ 18991 tmp = copy_to_reg (target); 18992 /* target = A A B B */ 18993 emit_insn (gen_sse_unpcklps (target, target, target)); 18994 /* target = X A B B */ 18995 ix86_expand_vector_set (false, target, val, 0); 18996 /* target = A X C D */ 18997 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18998 GEN_INT (1), GEN_INT (0), 18999 GEN_INT (2+4), GEN_INT (3+4))); 19000 return; 19001 19002 case 2: 19003 /* tmp = target = A B C D */ 19004 tmp = copy_to_reg (target); 19005 /* tmp = X B C D */ 19006 ix86_expand_vector_set (false, tmp, val, 0); 19007 /* target = A B X D */ 19008 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19009 GEN_INT (0), GEN_INT (1), 19010 GEN_INT (0+4), GEN_INT (3+4))); 19011 return; 19012 19013 case 3: 19014 /* tmp = target = A B C D */ 19015 tmp = copy_to_reg (target); 19016 /* tmp = X B C D */ 19017 ix86_expand_vector_set (false, tmp, val, 0); 19018 /* target = A B X D */ 19019 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19020 GEN_INT (0), GEN_INT (1), 19021 GEN_INT (2+4), GEN_INT (0+4))); 19022 return; 19023 19024 default: 19025 gcc_unreachable (); 19026 } 19027 break; 19028 19029 case V4SImode: 19030 /* Element 0 handled by vec_merge below. */ 19031 if (elt == 0) 19032 { 19033 use_vec_merge = true; 19034 break; 19035 } 19036 19037 if (TARGET_SSE2) 19038 { 19039 /* With SSE2, use integer shuffles to swap element 0 and ELT, 19040 store into element 0, then shuffle them back. */ 19041 19042 rtx order[4]; 19043 19044 order[0] = GEN_INT (elt); 19045 order[1] = const1_rtx; 19046 order[2] = const2_rtx; 19047 order[3] = GEN_INT (3); 19048 order[elt] = const0_rtx; 19049 19050 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19051 order[1], order[2], order[3])); 19052 19053 ix86_expand_vector_set (false, target, val, 0); 19054 19055 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19056 order[1], order[2], order[3])); 19057 } 19058 else 19059 { 19060 /* For SSE1, we have to reuse the V4SF code. */ 19061 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 19062 gen_lowpart (SFmode, val), elt); 19063 } 19064 return; 19065 19066 case V8HImode: 19067 use_vec_merge = TARGET_SSE2; 19068 break; 19069 case V4HImode: 19070 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19071 break; 19072 19073 case V16QImode: 19074 case V8QImode: 19075 default: 19076 break; 19077 } 19078 19079 if (use_vec_merge) 19080 { 19081 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 19082 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 19083 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19084 } 19085 else 19086 { 19087 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19088 19089 emit_move_insn (mem, target); 19090 19091 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19092 emit_move_insn (tmp, val); 19093 19094 emit_move_insn (target, mem); 19095 } 19096} 19097 19098void 19099ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 19100{ 19101 enum machine_mode mode = GET_MODE (vec); 19102 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19103 bool use_vec_extr = false; 19104 rtx tmp; 19105 19106 switch (mode) 19107 { 19108 case V2SImode: 19109 case V2SFmode: 19110 if (!mmx_ok) 19111 break; 19112 /* FALLTHRU */ 19113 19114 case V2DFmode: 19115 case V2DImode: 19116 use_vec_extr = true; 19117 break; 19118 19119 case V4SFmode: 19120 switch (elt) 19121 { 19122 case 0: 19123 tmp = vec; 19124 break; 19125 19126 case 1: 19127 case 3: 19128 tmp = gen_reg_rtx (mode); 19129 emit_insn (gen_sse_shufps_1 (tmp, vec, vec, 19130 GEN_INT (elt), GEN_INT (elt), 19131 GEN_INT (elt+4), GEN_INT (elt+4))); 19132 break; 19133 19134 case 2: 19135 tmp = gen_reg_rtx (mode); 19136 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 19137 break; 19138 19139 default: 19140 gcc_unreachable (); 19141 } 19142 vec = tmp; 19143 use_vec_extr = true; 19144 elt = 0; 19145 break; 19146 19147 case V4SImode: 19148 if (TARGET_SSE2) 19149 { 19150 switch (elt) 19151 { 19152 case 0: 19153 tmp = vec; 19154 break; 19155 19156 case 1: 19157 case 3: 19158 tmp = gen_reg_rtx (mode); 19159 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 19160 GEN_INT (elt), GEN_INT (elt), 19161 GEN_INT (elt), GEN_INT (elt))); 19162 break; 19163 19164 case 2: 19165 tmp = gen_reg_rtx (mode); 19166 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 19167 break; 19168 19169 default: 19170 gcc_unreachable (); 19171 } 19172 vec = tmp; 19173 use_vec_extr = true; 19174 elt = 0; 19175 } 19176 else 19177 { 19178 /* For SSE1, we have to reuse the V4SF code. */ 19179 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 19180 gen_lowpart (V4SFmode, vec), elt); 19181 return; 19182 } 19183 break; 19184 19185 case V8HImode: 19186 use_vec_extr = TARGET_SSE2; 19187 break; 19188 case V4HImode: 19189 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19190 break; 19191 19192 case V16QImode: 19193 case V8QImode: 19194 /* ??? Could extract the appropriate HImode element and shift. */ 19195 default: 19196 break; 19197 } 19198 19199 if (use_vec_extr) 19200 { 19201 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 19202 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 19203 19204 /* Let the rtl optimizers know about the zero extension performed. */ 19205 if (inner_mode == HImode) 19206 { 19207 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 19208 target = gen_lowpart (SImode, target); 19209 } 19210 19211 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19212 } 19213 else 19214 { 19215 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19216 19217 emit_move_insn (mem, vec); 19218 19219 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19220 emit_move_insn (target, tmp); 19221 } 19222} 19223 19224/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 19225 pattern to reduce; DEST is the destination; IN is the input vector. */ 19226 19227void 19228ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 19229{ 19230 rtx tmp1, tmp2, tmp3; 19231 19232 tmp1 = gen_reg_rtx (V4SFmode); 19233 tmp2 = gen_reg_rtx (V4SFmode); 19234 tmp3 = gen_reg_rtx (V4SFmode); 19235 19236 emit_insn (gen_sse_movhlps (tmp1, in, in)); 19237 emit_insn (fn (tmp2, tmp1, in)); 19238 19239 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, 19240 GEN_INT (1), GEN_INT (1), 19241 GEN_INT (1+4), GEN_INT (1+4))); 19242 emit_insn (fn (dest, tmp2, tmp3)); 19243} 19244 19245/* Target hook for scalar_mode_supported_p. */ 19246static bool 19247ix86_scalar_mode_supported_p (enum machine_mode mode) 19248{ 19249 if (DECIMAL_FLOAT_MODE_P (mode)) 19250 return true; 19251 else 19252 return default_scalar_mode_supported_p (mode); 19253} 19254 19255/* Implements target hook vector_mode_supported_p. */ 19256static bool 19257ix86_vector_mode_supported_p (enum machine_mode mode) 19258{ 19259 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 19260 return true; 19261 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 19262 return true; 19263 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 19264 return true; 19265 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 19266 return true; 19267 return false; 19268} 19269 19270/* Worker function for TARGET_MD_ASM_CLOBBERS. 19271 19272 We do this in the new i386 backend to maintain source compatibility 19273 with the old cc0-based compiler. */ 19274 19275static tree 19276ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 19277 tree inputs ATTRIBUTE_UNUSED, 19278 tree clobbers) 19279{ 19280 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 19281 clobbers); 19282 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 19283 clobbers); 19284 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), 19285 clobbers); 19286 return clobbers; 19287} 19288 19289/* Return true if this goes in small data/bss. */ 19290 19291static bool 19292ix86_in_large_data_p (tree exp) 19293{ 19294 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 19295 return false; 19296 19297 /* Functions are never large data. */ 19298 if (TREE_CODE (exp) == FUNCTION_DECL) 19299 return false; 19300 19301 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 19302 { 19303 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 19304 if (strcmp (section, ".ldata") == 0 19305 || strcmp (section, ".lbss") == 0) 19306 return true; 19307 return false; 19308 } 19309 else 19310 { 19311 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 19312 19313 /* If this is an incomplete type with size 0, then we can't put it 19314 in data because it might be too big when completed. */ 19315 if (!size || size > ix86_section_threshold) 19316 return true; 19317 } 19318 19319 return false; 19320} 19321static void 19322ix86_encode_section_info (tree decl, rtx rtl, int first) 19323{ 19324 default_encode_section_info (decl, rtl, first); 19325 19326 if (TREE_CODE (decl) == VAR_DECL 19327 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 19328 && ix86_in_large_data_p (decl)) 19329 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 19330} 19331 19332/* Worker function for REVERSE_CONDITION. */ 19333 19334enum rtx_code 19335ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 19336{ 19337 return (mode != CCFPmode && mode != CCFPUmode 19338 ? reverse_condition (code) 19339 : reverse_condition_maybe_unordered (code)); 19340} 19341 19342/* Output code to perform an x87 FP register move, from OPERANDS[1] 19343 to OPERANDS[0]. */ 19344 19345const char * 19346output_387_reg_move (rtx insn, rtx *operands) 19347{ 19348 if (REG_P (operands[1]) 19349 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 19350 { 19351 if (REGNO (operands[0]) == FIRST_STACK_REG) 19352 return output_387_ffreep (operands, 0); 19353 return "fstp\t%y0"; 19354 } 19355 if (STACK_TOP_P (operands[0])) 19356 return "fld%z1\t%y1"; 19357 return "fst\t%y0"; 19358} 19359 19360/* Output code to perform a conditional jump to LABEL, if C2 flag in 19361 FP status register is set. */ 19362 19363void 19364ix86_emit_fp_unordered_jump (rtx label) 19365{ 19366 rtx reg = gen_reg_rtx (HImode); 19367 rtx temp; 19368 19369 emit_insn (gen_x86_fnstsw_1 (reg)); 19370 19371 if (TARGET_USE_SAHF) 19372 { 19373 emit_insn (gen_x86_sahf_1 (reg)); 19374 19375 temp = gen_rtx_REG (CCmode, FLAGS_REG); 19376 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 19377 } 19378 else 19379 { 19380 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 19381 19382 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 19383 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 19384 } 19385 19386 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 19387 gen_rtx_LABEL_REF (VOIDmode, label), 19388 pc_rtx); 19389 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 19390 emit_jump_insn (temp); 19391} 19392 19393/* Output code to perform a log1p XFmode calculation. */ 19394 19395void ix86_emit_i387_log1p (rtx op0, rtx op1) 19396{ 19397 rtx label1 = gen_label_rtx (); 19398 rtx label2 = gen_label_rtx (); 19399 19400 rtx tmp = gen_reg_rtx (XFmode); 19401 rtx tmp2 = gen_reg_rtx (XFmode); 19402 19403 emit_insn (gen_absxf2 (tmp, op1)); 19404 emit_insn (gen_cmpxf (tmp, 19405 CONST_DOUBLE_FROM_REAL_VALUE ( 19406 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 19407 XFmode))); 19408 emit_jump_insn (gen_bge (label1)); 19409 19410 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19411 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); 19412 emit_jump (label2); 19413 19414 emit_label (label1); 19415 emit_move_insn (tmp, CONST1_RTX (XFmode)); 19416 emit_insn (gen_addxf3 (tmp, op1, tmp)); 19417 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19418 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); 19419 19420 emit_label (label2); 19421} 19422 19423/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 19424 19425static void 19426i386_solaris_elf_named_section (const char *name, unsigned int flags, 19427 tree decl) 19428{ 19429 /* With Binutils 2.15, the "@unwind" marker must be specified on 19430 every occurrence of the ".eh_frame" section, not just the first 19431 one. */ 19432 if (TARGET_64BIT 19433 && strcmp (name, ".eh_frame") == 0) 19434 { 19435 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 19436 flags & SECTION_WRITE ? "aw" : "a"); 19437 return; 19438 } 19439 default_elf_asm_named_section (name, flags, decl); 19440} 19441 19442/* Return the mangling of TYPE if it is an extended fundamental type. */ 19443 19444static const char * 19445ix86_mangle_fundamental_type (tree type) 19446{ 19447 switch (TYPE_MODE (type)) 19448 { 19449 case TFmode: 19450 /* __float128 is "g". */ 19451 return "g"; 19452 case XFmode: 19453 /* "long double" or __float80 is "e". */ 19454 return "e"; 19455 default: 19456 return NULL; 19457 } 19458} 19459 19460/* For 32-bit code we can save PIC register setup by using 19461 __stack_chk_fail_local hidden function instead of calling 19462 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 19463 register, so it is better to call __stack_chk_fail directly. */ 19464 19465static tree 19466ix86_stack_protect_fail (void) 19467{ 19468 return TARGET_64BIT 19469 ? default_external_stack_protect_fail () 19470 : default_hidden_stack_protect_fail (); 19471} 19472 19473/* Select a format to encode pointers in exception handling data. CODE 19474 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 19475 true if the symbol may be affected by dynamic relocations. 19476 19477 ??? All x86 object file formats are capable of representing this. 19478 After all, the relocation needed is the same as for the call insn. 19479 Whether or not a particular assembler allows us to enter such, I 19480 guess we'll have to see. */ 19481int 19482asm_preferred_eh_data_format (int code, int global) 19483{ 19484 if (flag_pic) 19485 { 19486 int type = DW_EH_PE_sdata8; 19487 if (!TARGET_64BIT 19488 || ix86_cmodel == CM_SMALL_PIC 19489 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 19490 type = DW_EH_PE_sdata4; 19491 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 19492 } 19493 if (ix86_cmodel == CM_SMALL 19494 || (ix86_cmodel == CM_MEDIUM && code)) 19495 return DW_EH_PE_udata4; 19496 return DW_EH_PE_absptr; 19497} 19498 19499#include "gt-i386.h" 19500