1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to 19the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20Boston, MA 02110-1301, USA. */ 21 22/* $FreeBSD$ */ 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "tm.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-codes.h" 38#include "insn-attr.h" 39#include "flags.h" 40#include "except.h" 41#include "function.h" 42#include "recog.h" 43#include "expr.h" 44#include "optabs.h" 45#include "toplev.h" 46#include "basic-block.h" 47#include "ggc.h" 48#include "target.h" 49#include "target-def.h" 50#include "langhooks.h" 51#include "cgraph.h" 52#include "tree-gimple.h" 53#include "dwarf2.h" 54#include "tm-constrs.h" 55 56#ifndef CHECK_STACK_LIMIT 57#define CHECK_STACK_LIMIT (-1) 58#endif 59 60/* Return index of given mode in mult and division cost tables. */ 61#define MODE_INDEX(mode) \ 62 ((mode) == QImode ? 0 \ 63 : (mode) == HImode ? 1 \ 64 : (mode) == SImode ? 2 \ 65 : (mode) == DImode ? 3 \ 66 : 4) 67 68/* Processor costs (relative to an add) */ 69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 70#define COSTS_N_BYTES(N) ((N) * 2) 71 72static const 73struct processor_costs size_cost = { /* costs for tuning for size */ 74 COSTS_N_BYTES (2), /* cost of an add instruction */ 75 COSTS_N_BYTES (3), /* cost of a lea instruction */ 76 COSTS_N_BYTES (2), /* variable shift costs */ 77 COSTS_N_BYTES (3), /* constant shift costs */ 78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 79 COSTS_N_BYTES (3), /* HI */ 80 COSTS_N_BYTES (3), /* SI */ 81 COSTS_N_BYTES (3), /* DI */ 82 COSTS_N_BYTES (5)}, /* other */ 83 0, /* cost of multiply per each bit set */ 84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 85 COSTS_N_BYTES (3), /* HI */ 86 COSTS_N_BYTES (3), /* SI */ 87 COSTS_N_BYTES (3), /* DI */ 88 COSTS_N_BYTES (5)}, /* other */ 89 COSTS_N_BYTES (3), /* cost of movsx */ 90 COSTS_N_BYTES (3), /* cost of movzx */ 91 0, /* "large" insn */ 92 2, /* MOVE_RATIO */ 93 2, /* cost for loading QImode using movzbl */ 94 {2, 2, 2}, /* cost of loading integer registers 95 in QImode, HImode and SImode. 96 Relative to reg-reg move (2). */ 97 {2, 2, 2}, /* cost of storing integer registers */ 98 2, /* cost of reg,reg fld/fst */ 99 {2, 2, 2}, /* cost of loading fp registers 100 in SFmode, DFmode and XFmode */ 101 {2, 2, 2}, /* cost of storing fp registers 102 in SFmode, DFmode and XFmode */ 103 3, /* cost of moving MMX register */ 104 {3, 3}, /* cost of loading MMX registers 105 in SImode and DImode */ 106 {3, 3}, /* cost of storing MMX registers 107 in SImode and DImode */ 108 3, /* cost of moving SSE register */ 109 {3, 3, 3}, /* cost of loading SSE registers 110 in SImode, DImode and TImode */ 111 {3, 3, 3}, /* cost of storing SSE registers 112 in SImode, DImode and TImode */ 113 3, /* MMX or SSE register to integer */ 114 0, /* size of prefetch block */ 115 0, /* number of parallel prefetches */ 116 2, /* Branch cost */ 117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 120 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 123}; 124 125/* Processor costs (relative to an add) */ 126static const 127struct processor_costs i386_cost = { /* 386 specific costs */ 128 COSTS_N_INSNS (1), /* cost of an add instruction */ 129 COSTS_N_INSNS (1), /* cost of a lea instruction */ 130 COSTS_N_INSNS (3), /* variable shift costs */ 131 COSTS_N_INSNS (2), /* constant shift costs */ 132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 133 COSTS_N_INSNS (6), /* HI */ 134 COSTS_N_INSNS (6), /* SI */ 135 COSTS_N_INSNS (6), /* DI */ 136 COSTS_N_INSNS (6)}, /* other */ 137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 139 COSTS_N_INSNS (23), /* HI */ 140 COSTS_N_INSNS (23), /* SI */ 141 COSTS_N_INSNS (23), /* DI */ 142 COSTS_N_INSNS (23)}, /* other */ 143 COSTS_N_INSNS (3), /* cost of movsx */ 144 COSTS_N_INSNS (2), /* cost of movzx */ 145 15, /* "large" insn */ 146 3, /* MOVE_RATIO */ 147 4, /* cost for loading QImode using movzbl */ 148 {2, 4, 2}, /* cost of loading integer registers 149 in QImode, HImode and SImode. 150 Relative to reg-reg move (2). */ 151 {2, 4, 2}, /* cost of storing integer registers */ 152 2, /* cost of reg,reg fld/fst */ 153 {8, 8, 8}, /* cost of loading fp registers 154 in SFmode, DFmode and XFmode */ 155 {8, 8, 8}, /* cost of storing fp registers 156 in SFmode, DFmode and XFmode */ 157 2, /* cost of moving MMX register */ 158 {4, 8}, /* cost of loading MMX registers 159 in SImode and DImode */ 160 {4, 8}, /* cost of storing MMX registers 161 in SImode and DImode */ 162 2, /* cost of moving SSE register */ 163 {4, 8, 16}, /* cost of loading SSE registers 164 in SImode, DImode and TImode */ 165 {4, 8, 16}, /* cost of storing SSE registers 166 in SImode, DImode and TImode */ 167 3, /* MMX or SSE register to integer */ 168 0, /* size of prefetch block */ 169 0, /* number of parallel prefetches */ 170 1, /* Branch cost */ 171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 174 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 177}; 178 179static const 180struct processor_costs i486_cost = { /* 486 specific costs */ 181 COSTS_N_INSNS (1), /* cost of an add instruction */ 182 COSTS_N_INSNS (1), /* cost of a lea instruction */ 183 COSTS_N_INSNS (3), /* variable shift costs */ 184 COSTS_N_INSNS (2), /* constant shift costs */ 185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 186 COSTS_N_INSNS (12), /* HI */ 187 COSTS_N_INSNS (12), /* SI */ 188 COSTS_N_INSNS (12), /* DI */ 189 COSTS_N_INSNS (12)}, /* other */ 190 1, /* cost of multiply per each bit set */ 191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 192 COSTS_N_INSNS (40), /* HI */ 193 COSTS_N_INSNS (40), /* SI */ 194 COSTS_N_INSNS (40), /* DI */ 195 COSTS_N_INSNS (40)}, /* other */ 196 COSTS_N_INSNS (3), /* cost of movsx */ 197 COSTS_N_INSNS (2), /* cost of movzx */ 198 15, /* "large" insn */ 199 3, /* MOVE_RATIO */ 200 4, /* cost for loading QImode using movzbl */ 201 {2, 4, 2}, /* cost of loading integer registers 202 in QImode, HImode and SImode. 203 Relative to reg-reg move (2). */ 204 {2, 4, 2}, /* cost of storing integer registers */ 205 2, /* cost of reg,reg fld/fst */ 206 {8, 8, 8}, /* cost of loading fp registers 207 in SFmode, DFmode and XFmode */ 208 {8, 8, 8}, /* cost of storing fp registers 209 in SFmode, DFmode and XFmode */ 210 2, /* cost of moving MMX register */ 211 {4, 8}, /* cost of loading MMX registers 212 in SImode and DImode */ 213 {4, 8}, /* cost of storing MMX registers 214 in SImode and DImode */ 215 2, /* cost of moving SSE register */ 216 {4, 8, 16}, /* cost of loading SSE registers 217 in SImode, DImode and TImode */ 218 {4, 8, 16}, /* cost of storing SSE registers 219 in SImode, DImode and TImode */ 220 3, /* MMX or SSE register to integer */ 221 0, /* size of prefetch block */ 222 0, /* number of parallel prefetches */ 223 1, /* Branch cost */ 224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 227 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 230}; 231 232static const 233struct processor_costs pentium_cost = { 234 COSTS_N_INSNS (1), /* cost of an add instruction */ 235 COSTS_N_INSNS (1), /* cost of a lea instruction */ 236 COSTS_N_INSNS (4), /* variable shift costs */ 237 COSTS_N_INSNS (1), /* constant shift costs */ 238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 239 COSTS_N_INSNS (11), /* HI */ 240 COSTS_N_INSNS (11), /* SI */ 241 COSTS_N_INSNS (11), /* DI */ 242 COSTS_N_INSNS (11)}, /* other */ 243 0, /* cost of multiply per each bit set */ 244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 245 COSTS_N_INSNS (25), /* HI */ 246 COSTS_N_INSNS (25), /* SI */ 247 COSTS_N_INSNS (25), /* DI */ 248 COSTS_N_INSNS (25)}, /* other */ 249 COSTS_N_INSNS (3), /* cost of movsx */ 250 COSTS_N_INSNS (2), /* cost of movzx */ 251 8, /* "large" insn */ 252 6, /* MOVE_RATIO */ 253 6, /* cost for loading QImode using movzbl */ 254 {2, 4, 2}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 4, 2}, /* cost of storing integer registers */ 258 2, /* cost of reg,reg fld/fst */ 259 {2, 2, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 6}, /* cost of storing fp registers 262 in SFmode, DFmode and XFmode */ 263 8, /* cost of moving MMX register */ 264 {8, 8}, /* cost of loading MMX registers 265 in SImode and DImode */ 266 {8, 8}, /* cost of storing MMX registers 267 in SImode and DImode */ 268 2, /* cost of moving SSE register */ 269 {4, 8, 16}, /* cost of loading SSE registers 270 in SImode, DImode and TImode */ 271 {4, 8, 16}, /* cost of storing SSE registers 272 in SImode, DImode and TImode */ 273 3, /* MMX or SSE register to integer */ 274 0, /* size of prefetch block */ 275 0, /* number of parallel prefetches */ 276 2, /* Branch cost */ 277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 280 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 283}; 284 285static const 286struct processor_costs pentiumpro_cost = { 287 COSTS_N_INSNS (1), /* cost of an add instruction */ 288 COSTS_N_INSNS (1), /* cost of a lea instruction */ 289 COSTS_N_INSNS (1), /* variable shift costs */ 290 COSTS_N_INSNS (1), /* constant shift costs */ 291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 292 COSTS_N_INSNS (4), /* HI */ 293 COSTS_N_INSNS (4), /* SI */ 294 COSTS_N_INSNS (4), /* DI */ 295 COSTS_N_INSNS (4)}, /* other */ 296 0, /* cost of multiply per each bit set */ 297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 298 COSTS_N_INSNS (17), /* HI */ 299 COSTS_N_INSNS (17), /* SI */ 300 COSTS_N_INSNS (17), /* DI */ 301 COSTS_N_INSNS (17)}, /* other */ 302 COSTS_N_INSNS (1), /* cost of movsx */ 303 COSTS_N_INSNS (1), /* cost of movzx */ 304 8, /* "large" insn */ 305 6, /* MOVE_RATIO */ 306 2, /* cost for loading QImode using movzbl */ 307 {4, 4, 4}, /* cost of loading integer registers 308 in QImode, HImode and SImode. 309 Relative to reg-reg move (2). */ 310 {2, 2, 2}, /* cost of storing integer registers */ 311 2, /* cost of reg,reg fld/fst */ 312 {2, 2, 6}, /* cost of loading fp registers 313 in SFmode, DFmode and XFmode */ 314 {4, 4, 6}, /* cost of storing fp registers 315 in SFmode, DFmode and XFmode */ 316 2, /* cost of moving MMX register */ 317 {2, 2}, /* cost of loading MMX registers 318 in SImode and DImode */ 319 {2, 2}, /* cost of storing MMX registers 320 in SImode and DImode */ 321 2, /* cost of moving SSE register */ 322 {2, 2, 8}, /* cost of loading SSE registers 323 in SImode, DImode and TImode */ 324 {2, 2, 8}, /* cost of storing SSE registers 325 in SImode, DImode and TImode */ 326 3, /* MMX or SSE register to integer */ 327 32, /* size of prefetch block */ 328 6, /* number of parallel prefetches */ 329 2, /* Branch cost */ 330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 333 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 336}; 337 338static const 339struct processor_costs geode_cost = { 340 COSTS_N_INSNS (1), /* cost of an add instruction */ 341 COSTS_N_INSNS (1), /* cost of a lea instruction */ 342 COSTS_N_INSNS (2), /* variable shift costs */ 343 COSTS_N_INSNS (1), /* constant shift costs */ 344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 345 COSTS_N_INSNS (4), /* HI */ 346 COSTS_N_INSNS (7), /* SI */ 347 COSTS_N_INSNS (7), /* DI */ 348 COSTS_N_INSNS (7)}, /* other */ 349 0, /* cost of multiply per each bit set */ 350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ 351 COSTS_N_INSNS (23), /* HI */ 352 COSTS_N_INSNS (39), /* SI */ 353 COSTS_N_INSNS (39), /* DI */ 354 COSTS_N_INSNS (39)}, /* other */ 355 COSTS_N_INSNS (1), /* cost of movsx */ 356 COSTS_N_INSNS (1), /* cost of movzx */ 357 8, /* "large" insn */ 358 4, /* MOVE_RATIO */ 359 1, /* cost for loading QImode using movzbl */ 360 {1, 1, 1}, /* cost of loading integer registers 361 in QImode, HImode and SImode. 362 Relative to reg-reg move (2). */ 363 {1, 1, 1}, /* cost of storing integer registers */ 364 1, /* cost of reg,reg fld/fst */ 365 {1, 1, 1}, /* cost of loading fp registers 366 in SFmode, DFmode and XFmode */ 367 {4, 6, 6}, /* cost of storing fp registers 368 in SFmode, DFmode and XFmode */ 369 370 1, /* cost of moving MMX register */ 371 {1, 1}, /* cost of loading MMX registers 372 in SImode and DImode */ 373 {1, 1}, /* cost of storing MMX registers 374 in SImode and DImode */ 375 1, /* cost of moving SSE register */ 376 {1, 1, 1}, /* cost of loading SSE registers 377 in SImode, DImode and TImode */ 378 {1, 1, 1}, /* cost of storing SSE registers 379 in SImode, DImode and TImode */ 380 1, /* MMX or SSE register to integer */ 381 32, /* size of prefetch block */ 382 1, /* number of parallel prefetches */ 383 1, /* Branch cost */ 384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */ 386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */ 387 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ 390}; 391 392static const 393struct processor_costs k6_cost = { 394 COSTS_N_INSNS (1), /* cost of an add instruction */ 395 COSTS_N_INSNS (2), /* cost of a lea instruction */ 396 COSTS_N_INSNS (1), /* variable shift costs */ 397 COSTS_N_INSNS (1), /* constant shift costs */ 398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 399 COSTS_N_INSNS (3), /* HI */ 400 COSTS_N_INSNS (3), /* SI */ 401 COSTS_N_INSNS (3), /* DI */ 402 COSTS_N_INSNS (3)}, /* other */ 403 0, /* cost of multiply per each bit set */ 404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 405 COSTS_N_INSNS (18), /* HI */ 406 COSTS_N_INSNS (18), /* SI */ 407 COSTS_N_INSNS (18), /* DI */ 408 COSTS_N_INSNS (18)}, /* other */ 409 COSTS_N_INSNS (2), /* cost of movsx */ 410 COSTS_N_INSNS (2), /* cost of movzx */ 411 8, /* "large" insn */ 412 4, /* MOVE_RATIO */ 413 3, /* cost for loading QImode using movzbl */ 414 {4, 5, 4}, /* cost of loading integer registers 415 in QImode, HImode and SImode. 416 Relative to reg-reg move (2). */ 417 {2, 3, 2}, /* cost of storing integer registers */ 418 4, /* cost of reg,reg fld/fst */ 419 {6, 6, 6}, /* cost of loading fp registers 420 in SFmode, DFmode and XFmode */ 421 {4, 4, 4}, /* cost of storing fp registers 422 in SFmode, DFmode and XFmode */ 423 2, /* cost of moving MMX register */ 424 {2, 2}, /* cost of loading MMX registers 425 in SImode and DImode */ 426 {2, 2}, /* cost of storing MMX registers 427 in SImode and DImode */ 428 2, /* cost of moving SSE register */ 429 {2, 2, 8}, /* cost of loading SSE registers 430 in SImode, DImode and TImode */ 431 {2, 2, 8}, /* cost of storing SSE registers 432 in SImode, DImode and TImode */ 433 6, /* MMX or SSE register to integer */ 434 32, /* size of prefetch block */ 435 1, /* number of parallel prefetches */ 436 1, /* Branch cost */ 437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 440 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 443}; 444 445static const 446struct processor_costs athlon_cost = { 447 COSTS_N_INSNS (1), /* cost of an add instruction */ 448 COSTS_N_INSNS (2), /* cost of a lea instruction */ 449 COSTS_N_INSNS (1), /* variable shift costs */ 450 COSTS_N_INSNS (1), /* constant shift costs */ 451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 452 COSTS_N_INSNS (5), /* HI */ 453 COSTS_N_INSNS (5), /* SI */ 454 COSTS_N_INSNS (5), /* DI */ 455 COSTS_N_INSNS (5)}, /* other */ 456 0, /* cost of multiply per each bit set */ 457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 458 COSTS_N_INSNS (26), /* HI */ 459 COSTS_N_INSNS (42), /* SI */ 460 COSTS_N_INSNS (74), /* DI */ 461 COSTS_N_INSNS (74)}, /* other */ 462 COSTS_N_INSNS (1), /* cost of movsx */ 463 COSTS_N_INSNS (1), /* cost of movzx */ 464 8, /* "large" insn */ 465 9, /* MOVE_RATIO */ 466 4, /* cost for loading QImode using movzbl */ 467 {3, 4, 3}, /* cost of loading integer registers 468 in QImode, HImode and SImode. 469 Relative to reg-reg move (2). */ 470 {3, 4, 3}, /* cost of storing integer registers */ 471 4, /* cost of reg,reg fld/fst */ 472 {4, 4, 12}, /* cost of loading fp registers 473 in SFmode, DFmode and XFmode */ 474 {6, 6, 8}, /* cost of storing fp registers 475 in SFmode, DFmode and XFmode */ 476 2, /* cost of moving MMX register */ 477 {4, 4}, /* cost of loading MMX registers 478 in SImode and DImode */ 479 {4, 4}, /* cost of storing MMX registers 480 in SImode and DImode */ 481 2, /* cost of moving SSE register */ 482 {4, 4, 6}, /* cost of loading SSE registers 483 in SImode, DImode and TImode */ 484 {4, 4, 5}, /* cost of storing SSE registers 485 in SImode, DImode and TImode */ 486 5, /* MMX or SSE register to integer */ 487 64, /* size of prefetch block */ 488 6, /* number of parallel prefetches */ 489 5, /* Branch cost */ 490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 493 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 496}; 497 498static const 499struct processor_costs k8_cost = { 500 COSTS_N_INSNS (1), /* cost of an add instruction */ 501 COSTS_N_INSNS (2), /* cost of a lea instruction */ 502 COSTS_N_INSNS (1), /* variable shift costs */ 503 COSTS_N_INSNS (1), /* constant shift costs */ 504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 505 COSTS_N_INSNS (4), /* HI */ 506 COSTS_N_INSNS (3), /* SI */ 507 COSTS_N_INSNS (4), /* DI */ 508 COSTS_N_INSNS (5)}, /* other */ 509 0, /* cost of multiply per each bit set */ 510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 511 COSTS_N_INSNS (26), /* HI */ 512 COSTS_N_INSNS (42), /* SI */ 513 COSTS_N_INSNS (74), /* DI */ 514 COSTS_N_INSNS (74)}, /* other */ 515 COSTS_N_INSNS (1), /* cost of movsx */ 516 COSTS_N_INSNS (1), /* cost of movzx */ 517 8, /* "large" insn */ 518 9, /* MOVE_RATIO */ 519 4, /* cost for loading QImode using movzbl */ 520 {3, 4, 3}, /* cost of loading integer registers 521 in QImode, HImode and SImode. 522 Relative to reg-reg move (2). */ 523 {3, 4, 3}, /* cost of storing integer registers */ 524 4, /* cost of reg,reg fld/fst */ 525 {4, 4, 12}, /* cost of loading fp registers 526 in SFmode, DFmode and XFmode */ 527 {6, 6, 8}, /* cost of storing fp registers 528 in SFmode, DFmode and XFmode */ 529 2, /* cost of moving MMX register */ 530 {3, 3}, /* cost of loading MMX registers 531 in SImode and DImode */ 532 {4, 4}, /* cost of storing MMX registers 533 in SImode and DImode */ 534 2, /* cost of moving SSE register */ 535 {4, 3, 6}, /* cost of loading SSE registers 536 in SImode, DImode and TImode */ 537 {4, 4, 5}, /* cost of storing SSE registers 538 in SImode, DImode and TImode */ 539 5, /* MMX or SSE register to integer */ 540 64, /* size of prefetch block */ 541 6, /* number of parallel prefetches */ 542 5, /* Branch cost */ 543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 546 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 549}; 550 551struct processor_costs amdfam10_cost = { 552 COSTS_N_INSNS (1), /* cost of an add instruction */ 553 COSTS_N_INSNS (2), /* cost of a lea instruction */ 554 COSTS_N_INSNS (1), /* variable shift costs */ 555 COSTS_N_INSNS (1), /* constant shift costs */ 556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 557 COSTS_N_INSNS (4), /* HI */ 558 COSTS_N_INSNS (3), /* SI */ 559 COSTS_N_INSNS (4), /* DI */ 560 COSTS_N_INSNS (5)}, /* other */ 561 0, /* cost of multiply per each bit set */ 562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ 563 COSTS_N_INSNS (35), /* HI */ 564 COSTS_N_INSNS (51), /* SI */ 565 COSTS_N_INSNS (83), /* DI */ 566 COSTS_N_INSNS (83)}, /* other */ 567 COSTS_N_INSNS (1), /* cost of movsx */ 568 COSTS_N_INSNS (1), /* cost of movzx */ 569 8, /* "large" insn */ 570 9, /* MOVE_RATIO */ 571 4, /* cost for loading QImode using movzbl */ 572 {3, 4, 3}, /* cost of loading integer registers 573 in QImode, HImode and SImode. 574 Relative to reg-reg move (2). */ 575 {3, 4, 3}, /* cost of storing integer registers */ 576 4, /* cost of reg,reg fld/fst */ 577 {4, 4, 12}, /* cost of loading fp registers 578 in SFmode, DFmode and XFmode */ 579 {6, 6, 8}, /* cost of storing fp registers 580 in SFmode, DFmode and XFmode */ 581 2, /* cost of moving MMX register */ 582 {3, 3}, /* cost of loading MMX registers 583 in SImode and DImode */ 584 {4, 4}, /* cost of storing MMX registers 585 in SImode and DImode */ 586 2, /* cost of moving SSE register */ 587 {4, 4, 3}, /* cost of loading SSE registers 588 in SImode, DImode and TImode */ 589 {4, 4, 5}, /* cost of storing SSE registers 590 in SImode, DImode and TImode */ 591 3, /* MMX or SSE register to integer */ 592 /* On K8 593 MOVD reg64, xmmreg Double FSTORE 4 594 MOVD reg32, xmmreg Double FSTORE 4 595 On AMDFAM10 596 MOVD reg64, xmmreg Double FADD 3 597 1/1 1/1 598 MOVD reg32, xmmreg Double FADD 3 599 1/1 1/1 */ 600 64, /* size of prefetch block */ 601 /* New AMD processors never drop prefetches; if they cannot be performed 602 immediately, they are queued. We set number of simultaneous prefetches 603 to a large constant to reflect this (it probably is not a good idea not 604 to limit number of prefetches at all, as their execution also takes some 605 time). */ 606 100, /* number of parallel prefetches */ 607 5, /* Branch cost */ 608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 611 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 614}; 615 616static const 617struct processor_costs pentium4_cost = { 618 COSTS_N_INSNS (1), /* cost of an add instruction */ 619 COSTS_N_INSNS (3), /* cost of a lea instruction */ 620 COSTS_N_INSNS (4), /* variable shift costs */ 621 COSTS_N_INSNS (4), /* constant shift costs */ 622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 623 COSTS_N_INSNS (15), /* HI */ 624 COSTS_N_INSNS (15), /* SI */ 625 COSTS_N_INSNS (15), /* DI */ 626 COSTS_N_INSNS (15)}, /* other */ 627 0, /* cost of multiply per each bit set */ 628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 629 COSTS_N_INSNS (56), /* HI */ 630 COSTS_N_INSNS (56), /* SI */ 631 COSTS_N_INSNS (56), /* DI */ 632 COSTS_N_INSNS (56)}, /* other */ 633 COSTS_N_INSNS (1), /* cost of movsx */ 634 COSTS_N_INSNS (1), /* cost of movzx */ 635 16, /* "large" insn */ 636 6, /* MOVE_RATIO */ 637 2, /* cost for loading QImode using movzbl */ 638 {4, 5, 4}, /* cost of loading integer registers 639 in QImode, HImode and SImode. 640 Relative to reg-reg move (2). */ 641 {2, 3, 2}, /* cost of storing integer registers */ 642 2, /* cost of reg,reg fld/fst */ 643 {2, 2, 6}, /* cost of loading fp registers 644 in SFmode, DFmode and XFmode */ 645 {4, 4, 6}, /* cost of storing fp registers 646 in SFmode, DFmode and XFmode */ 647 2, /* cost of moving MMX register */ 648 {2, 2}, /* cost of loading MMX registers 649 in SImode and DImode */ 650 {2, 2}, /* cost of storing MMX registers 651 in SImode and DImode */ 652 12, /* cost of moving SSE register */ 653 {12, 12, 12}, /* cost of loading SSE registers 654 in SImode, DImode and TImode */ 655 {2, 2, 8}, /* cost of storing SSE registers 656 in SImode, DImode and TImode */ 657 10, /* MMX or SSE register to integer */ 658 64, /* size of prefetch block */ 659 6, /* number of parallel prefetches */ 660 2, /* Branch cost */ 661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 664 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 667}; 668 669static const 670struct processor_costs nocona_cost = { 671 COSTS_N_INSNS (1), /* cost of an add instruction */ 672 COSTS_N_INSNS (1), /* cost of a lea instruction */ 673 COSTS_N_INSNS (1), /* variable shift costs */ 674 COSTS_N_INSNS (1), /* constant shift costs */ 675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 676 COSTS_N_INSNS (10), /* HI */ 677 COSTS_N_INSNS (10), /* SI */ 678 COSTS_N_INSNS (10), /* DI */ 679 COSTS_N_INSNS (10)}, /* other */ 680 0, /* cost of multiply per each bit set */ 681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 682 COSTS_N_INSNS (66), /* HI */ 683 COSTS_N_INSNS (66), /* SI */ 684 COSTS_N_INSNS (66), /* DI */ 685 COSTS_N_INSNS (66)}, /* other */ 686 COSTS_N_INSNS (1), /* cost of movsx */ 687 COSTS_N_INSNS (1), /* cost of movzx */ 688 16, /* "large" insn */ 689 17, /* MOVE_RATIO */ 690 4, /* cost for loading QImode using movzbl */ 691 {4, 4, 4}, /* cost of loading integer registers 692 in QImode, HImode and SImode. 693 Relative to reg-reg move (2). */ 694 {4, 4, 4}, /* cost of storing integer registers */ 695 3, /* cost of reg,reg fld/fst */ 696 {12, 12, 12}, /* cost of loading fp registers 697 in SFmode, DFmode and XFmode */ 698 {4, 4, 4}, /* cost of storing fp registers 699 in SFmode, DFmode and XFmode */ 700 6, /* cost of moving MMX register */ 701 {12, 12}, /* cost of loading MMX registers 702 in SImode and DImode */ 703 {12, 12}, /* cost of storing MMX registers 704 in SImode and DImode */ 705 6, /* cost of moving SSE register */ 706 {12, 12, 12}, /* cost of loading SSE registers 707 in SImode, DImode and TImode */ 708 {12, 12, 12}, /* cost of storing SSE registers 709 in SImode, DImode and TImode */ 710 8, /* MMX or SSE register to integer */ 711 128, /* size of prefetch block */ 712 8, /* number of parallel prefetches */ 713 1, /* Branch cost */ 714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 717 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 720}; 721 722static const 723struct processor_costs core2_cost = { 724 COSTS_N_INSNS (1), /* cost of an add instruction */ 725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 726 COSTS_N_INSNS (1), /* variable shift costs */ 727 COSTS_N_INSNS (1), /* constant shift costs */ 728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 729 COSTS_N_INSNS (3), /* HI */ 730 COSTS_N_INSNS (3), /* SI */ 731 COSTS_N_INSNS (3), /* DI */ 732 COSTS_N_INSNS (3)}, /* other */ 733 0, /* cost of multiply per each bit set */ 734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ 735 COSTS_N_INSNS (22), /* HI */ 736 COSTS_N_INSNS (22), /* SI */ 737 COSTS_N_INSNS (22), /* DI */ 738 COSTS_N_INSNS (22)}, /* other */ 739 COSTS_N_INSNS (1), /* cost of movsx */ 740 COSTS_N_INSNS (1), /* cost of movzx */ 741 8, /* "large" insn */ 742 16, /* MOVE_RATIO */ 743 2, /* cost for loading QImode using movzbl */ 744 {6, 6, 6}, /* cost of loading integer registers 745 in QImode, HImode and SImode. 746 Relative to reg-reg move (2). */ 747 {4, 4, 4}, /* cost of storing integer registers */ 748 2, /* cost of reg,reg fld/fst */ 749 {6, 6, 6}, /* cost of loading fp registers 750 in SFmode, DFmode and XFmode */ 751 {4, 4, 4}, /* cost of loading integer registers */ 752 2, /* cost of moving MMX register */ 753 {6, 6}, /* cost of loading MMX registers 754 in SImode and DImode */ 755 {4, 4}, /* cost of storing MMX registers 756 in SImode and DImode */ 757 2, /* cost of moving SSE register */ 758 {6, 6, 6}, /* cost of loading SSE registers 759 in SImode, DImode and TImode */ 760 {4, 4, 4}, /* cost of storing SSE registers 761 in SImode, DImode and TImode */ 762 2, /* MMX or SSE register to integer */ 763 128, /* size of prefetch block */ 764 8, /* number of parallel prefetches */ 765 3, /* Branch cost */ 766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */ 769 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ 772}; 773 774/* Generic64 should produce code tuned for Nocona and K8. */ 775static const 776struct processor_costs generic64_cost = { 777 COSTS_N_INSNS (1), /* cost of an add instruction */ 778 /* On all chips taken into consideration lea is 2 cycles and more. With 779 this cost however our current implementation of synth_mult results in 780 use of unnecessary temporary registers causing regression on several 781 SPECfp benchmarks. */ 782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 783 COSTS_N_INSNS (1), /* variable shift costs */ 784 COSTS_N_INSNS (1), /* constant shift costs */ 785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 786 COSTS_N_INSNS (4), /* HI */ 787 COSTS_N_INSNS (3), /* SI */ 788 COSTS_N_INSNS (4), /* DI */ 789 COSTS_N_INSNS (2)}, /* other */ 790 0, /* cost of multiply per each bit set */ 791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 792 COSTS_N_INSNS (26), /* HI */ 793 COSTS_N_INSNS (42), /* SI */ 794 COSTS_N_INSNS (74), /* DI */ 795 COSTS_N_INSNS (74)}, /* other */ 796 COSTS_N_INSNS (1), /* cost of movsx */ 797 COSTS_N_INSNS (1), /* cost of movzx */ 798 8, /* "large" insn */ 799 17, /* MOVE_RATIO */ 800 4, /* cost for loading QImode using movzbl */ 801 {4, 4, 4}, /* cost of loading integer registers 802 in QImode, HImode and SImode. 803 Relative to reg-reg move (2). */ 804 {4, 4, 4}, /* cost of storing integer registers */ 805 4, /* cost of reg,reg fld/fst */ 806 {12, 12, 12}, /* cost of loading fp registers 807 in SFmode, DFmode and XFmode */ 808 {6, 6, 8}, /* cost of storing fp registers 809 in SFmode, DFmode and XFmode */ 810 2, /* cost of moving MMX register */ 811 {8, 8}, /* cost of loading MMX registers 812 in SImode and DImode */ 813 {8, 8}, /* cost of storing MMX registers 814 in SImode and DImode */ 815 2, /* cost of moving SSE register */ 816 {8, 8, 8}, /* cost of loading SSE registers 817 in SImode, DImode and TImode */ 818 {8, 8, 8}, /* cost of storing SSE registers 819 in SImode, DImode and TImode */ 820 5, /* MMX or SSE register to integer */ 821 64, /* size of prefetch block */ 822 6, /* number of parallel prefetches */ 823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 824 is increased to perhaps more appropriate value of 5. */ 825 3, /* Branch cost */ 826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 829 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 832}; 833 834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 835static const 836struct processor_costs generic32_cost = { 837 COSTS_N_INSNS (1), /* cost of an add instruction */ 838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 839 COSTS_N_INSNS (1), /* variable shift costs */ 840 COSTS_N_INSNS (1), /* constant shift costs */ 841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 842 COSTS_N_INSNS (4), /* HI */ 843 COSTS_N_INSNS (3), /* SI */ 844 COSTS_N_INSNS (4), /* DI */ 845 COSTS_N_INSNS (2)}, /* other */ 846 0, /* cost of multiply per each bit set */ 847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 848 COSTS_N_INSNS (26), /* HI */ 849 COSTS_N_INSNS (42), /* SI */ 850 COSTS_N_INSNS (74), /* DI */ 851 COSTS_N_INSNS (74)}, /* other */ 852 COSTS_N_INSNS (1), /* cost of movsx */ 853 COSTS_N_INSNS (1), /* cost of movzx */ 854 8, /* "large" insn */ 855 17, /* MOVE_RATIO */ 856 4, /* cost for loading QImode using movzbl */ 857 {4, 4, 4}, /* cost of loading integer registers 858 in QImode, HImode and SImode. 859 Relative to reg-reg move (2). */ 860 {4, 4, 4}, /* cost of storing integer registers */ 861 4, /* cost of reg,reg fld/fst */ 862 {12, 12, 12}, /* cost of loading fp registers 863 in SFmode, DFmode and XFmode */ 864 {6, 6, 8}, /* cost of storing fp registers 865 in SFmode, DFmode and XFmode */ 866 2, /* cost of moving MMX register */ 867 {8, 8}, /* cost of loading MMX registers 868 in SImode and DImode */ 869 {8, 8}, /* cost of storing MMX registers 870 in SImode and DImode */ 871 2, /* cost of moving SSE register */ 872 {8, 8, 8}, /* cost of loading SSE registers 873 in SImode, DImode and TImode */ 874 {8, 8, 8}, /* cost of storing SSE registers 875 in SImode, DImode and TImode */ 876 5, /* MMX or SSE register to integer */ 877 64, /* size of prefetch block */ 878 6, /* number of parallel prefetches */ 879 3, /* Branch cost */ 880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 883 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 886}; 887 888const struct processor_costs *ix86_cost = &pentium_cost; 889 890/* Processor feature/optimization bitmasks. */ 891#define m_386 (1<<PROCESSOR_I386) 892#define m_486 (1<<PROCESSOR_I486) 893#define m_PENT (1<<PROCESSOR_PENTIUM) 894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 895#define m_GEODE (1<<PROCESSOR_GEODE) 896#define m_K6_GEODE (m_K6 | m_GEODE) 897#define m_K6 (1<<PROCESSOR_K6) 898#define m_ATHLON (1<<PROCESSOR_ATHLON) 899#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 900#define m_K8 (1<<PROCESSOR_K8) 901#define m_ATHLON_K8 (m_K8 | m_ATHLON) 902#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) 903#define m_NOCONA (1<<PROCESSOR_NOCONA) 904#define m_CORE2 (1<<PROCESSOR_CORE2) 905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 907#define m_GENERIC (m_GENERIC32 | m_GENERIC64) 908#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10) 909 910/* Generic instruction choice should be common subset of supported CPUs 911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ 912 913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for 914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit 915 generic because it is not working well with PPro base chips. */ 916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 917 | m_GENERIC64; 918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 919 | m_NOCONA | m_CORE2 | m_GENERIC; 920const int x86_zero_extend_with_and = m_486 | m_PENT; 921/* Enable to zero extend integer registers to avoid partial dependencies */ 922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA 923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */; 924const int x86_double_with_add = ~m_386; 925const int x86_use_bit_test = m_386; 926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 927 | m_K6 | m_CORE2 | m_GENERIC; 928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 929 | m_NOCONA; 930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10; 931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 933/* Branch hints were put in P4 based on simulation result. But 934 after P4 was made, no performance benefit was observed with 935 branch hints. It also increases the code size. As the result, 936 icc never generates branch hints. */ 937const int x86_branch_hints = 0; 938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; 939 /*m_GENERIC | m_ATHLON_K8 ? */ 940/* We probably ought to watch for partial register stalls on Generic32 941 compilation setting as well. However in current implementation the 942 partial register stalls are not eliminated very well - they can 943 be introduced via subregs synthesized by combine and can happen 944 in caller/callee saving sequences. 945 Because this option pays back little on PPro based chips and is in conflict 946 with partial reg. dependencies used by Athlon/P4 based chips, it is better 947 to leave it off for generic32 for now. */ 948const int x86_partial_reg_stall = m_PPRO; 949const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC; 950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE; 951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT 952 | m_CORE2 | m_GENERIC); 953const int x86_use_mov0 = m_K6; 954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC); 955const int x86_read_modify_write = ~m_PENT; 956const int x86_read_modify = ~(m_PENT | m_PPRO); 957const int x86_split_long_moves = m_PPRO; 958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC; 960 /* m_PENT4 ? */ 961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; 963const int x86_qimode_math = ~(0); 964const int x86_promote_qi_regs = 0; 965/* On PPro this flag is meant to avoid partial register stalls. Just like 966 the x86_partial_reg_stall this option might be considered for Generic32 967 if our scheme for avoiding partial stalls was more effective. */ 968const int x86_himode_math = ~(m_PPRO); 969const int x86_promote_hi_regs = m_PPRO; 970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */ 971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA 972 | m_CORE2 | m_GENERIC; 973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486 974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA 976 | m_CORE2 | m_GENERIC; 977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386 978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 979/* Enable if integer moves are preferred for DFmode copies */ 980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE); 982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 983 | m_CORE2 | m_GENERIC; 984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 985 | m_CORE2 | m_GENERIC; 986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required 987 for outgoing arguments will be computed and placed into the variable 988 `current_function_outgoing_args_size'. No space will be pushed onto the stack 989 for each call; instead, the function prologue should increase the stack frame 990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is 991 not proper. */ 992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4 993 | m_NOCONA | m_PPRO | m_CORE2 994 | m_GENERIC; 995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 997const int x86_shift1 = ~m_486; 998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO 999 | m_ATHLON_K8_AMDFAM10 | m_PENT4 1000 | m_NOCONA | m_CORE2 | m_GENERIC; 1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips 1002 that thread 128bit SSE registers as single units versus K8 based chips that 1003 divide SSE registers to two 64bit halves. 1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit 1005 to allow register renaming on 128bit SSE units, but usually results in one 1006 extra microop on 64bit SSE units. Experimental results shows that disabling 1007 this option on P4 brings over 20% SPECfp regression, while enabling it on 1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling 1009 of moves. */ 1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1011 | m_GENERIC | m_AMDFAM10; 1012/* Set for machines where the type and dependencies are resolved on SSE 1013 register parts instead of whole registers, so we may maintain just 1014 lower part of scalar values in proper format leaving the upper part 1015 undefined. */ 1016const int x86_sse_split_regs = m_ATHLON_K8; 1017/* Code generation for scalar reg-reg moves of single and double precision data: 1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) 1019 movaps reg, reg 1020 else 1021 movss reg, reg 1022 if (x86_sse_partial_reg_dependency == true) 1023 movapd reg, reg 1024 else 1025 movsd reg, reg 1026 1027 Code generation for scalar loads of double precision data: 1028 if (x86_sse_split_regs == true) 1029 movlpd mem, reg (gas syntax) 1030 else 1031 movsd mem, reg 1032 1033 Code generation for unaligned packed loads of single precision data 1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): 1035 if (x86_sse_unaligned_move_optimal) 1036 movups mem, reg 1037 1038 if (x86_sse_partial_reg_dependency == true) 1039 { 1040 xorps reg, reg 1041 movlps mem, reg 1042 movhps mem+8, reg 1043 } 1044 else 1045 { 1046 movlps mem, reg 1047 movhps mem+8, reg 1048 } 1049 1050 Code generation for unaligned packed loads of double precision data 1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): 1052 if (x86_sse_unaligned_move_optimal) 1053 movupd mem, reg 1054 1055 if (x86_sse_split_regs == true) 1056 { 1057 movlpd mem, reg 1058 movhpd mem+8, reg 1059 } 1060 else 1061 { 1062 movsd mem, reg 1063 movhpd mem+8, reg 1064 } 1065 */ 1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10; 1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10; 1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; 1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10; 1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2; 1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC); 1072 1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put 1074 integer data in xmm registers. Which results in pretty abysmal code. */ 1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; 1076 1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 1079/* Some CPU cores are not able to predict more than 4 branch instructions in 1080 the 16 byte window. */ 1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 1082 | m_NOCONA | m_CORE2 | m_GENERIC; 1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT 1084 | m_CORE2 | m_GENERIC; 1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10; 1086/* Compare and exchange was added for 80486. */ 1087const int x86_cmpxchg = ~m_386; 1088/* Compare and exchange 8 bytes was added for pentium. */ 1089const int x86_cmpxchg8b = ~(m_386 | m_486); 1090/* Exchange and add was added for 80486. */ 1091const int x86_xadd = ~m_386; 1092/* Byteswap was added for 80486. */ 1093const int x86_bswap = ~m_386; 1094const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC; 1095 1096/* In case the average insn count for single function invocation is 1097 lower than this constant, emit fast (but longer) prologue and 1098 epilogue code. */ 1099#define FAST_PROLOGUE_INSN_COUNT 20 1100 1101/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 1102static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 1103static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 1104static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 1105 1106/* Array of the smallest class containing reg number REGNO, indexed by 1107 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 1108 1109enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 1110{ 1111 /* ax, dx, cx, bx */ 1112 AREG, DREG, CREG, BREG, 1113 /* si, di, bp, sp */ 1114 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 1115 /* FP registers */ 1116 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 1117 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 1118 /* arg pointer */ 1119 NON_Q_REGS, 1120 /* flags, fpsr, dirflag, frame */ 1121 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 1122 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1123 SSE_REGS, SSE_REGS, 1124 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 1125 MMX_REGS, MMX_REGS, 1126 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1127 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1128 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1129 SSE_REGS, SSE_REGS, 1130}; 1131 1132/* The "default" register map used in 32bit mode. */ 1133 1134int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 1135{ 1136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 1137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 1138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 1140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 1141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1143}; 1144 1145static int const x86_64_int_parameter_registers[6] = 1146{ 1147 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 1148 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 1149}; 1150 1151static int const x86_64_int_return_registers[4] = 1152{ 1153 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 1154}; 1155 1156/* The "default" register map used in 64bit mode. */ 1157int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 1158{ 1159 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 1160 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 1161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1162 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 1163 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 1164 8,9,10,11,12,13,14,15, /* extended integer registers */ 1165 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 1166}; 1167 1168/* Define the register numbers to be used in Dwarf debugging information. 1169 The SVR4 reference port C compiler uses the following register numbers 1170 in its Dwarf output code: 1171 0 for %eax (gcc regno = 0) 1172 1 for %ecx (gcc regno = 2) 1173 2 for %edx (gcc regno = 1) 1174 3 for %ebx (gcc regno = 3) 1175 4 for %esp (gcc regno = 7) 1176 5 for %ebp (gcc regno = 6) 1177 6 for %esi (gcc regno = 4) 1178 7 for %edi (gcc regno = 5) 1179 The following three DWARF register numbers are never generated by 1180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 1181 believes these numbers have these meanings. 1182 8 for %eip (no gcc equivalent) 1183 9 for %eflags (gcc regno = 17) 1184 10 for %trapno (no gcc equivalent) 1185 It is not at all clear how we should number the FP stack registers 1186 for the x86 architecture. If the version of SDB on x86/svr4 were 1187 a bit less brain dead with respect to floating-point then we would 1188 have a precedent to follow with respect to DWARF register numbers 1189 for x86 FP registers, but the SDB on x86/svr4 is so completely 1190 broken with respect to FP registers that it is hardly worth thinking 1191 of it as something to strive for compatibility with. 1192 The version of x86/svr4 SDB I have at the moment does (partially) 1193 seem to believe that DWARF register number 11 is associated with 1194 the x86 register %st(0), but that's about all. Higher DWARF 1195 register numbers don't seem to be associated with anything in 1196 particular, and even for DWARF regno 11, SDB only seems to under- 1197 stand that it should say that a variable lives in %st(0) (when 1198 asked via an `=' command) if we said it was in DWARF regno 11, 1199 but SDB still prints garbage when asked for the value of the 1200 variable in question (via a `/' command). 1201 (Also note that the labels SDB prints for various FP stack regs 1202 when doing an `x' command are all wrong.) 1203 Note that these problems generally don't affect the native SVR4 1204 C compiler because it doesn't allow the use of -O with -g and 1205 because when it is *not* optimizing, it allocates a memory 1206 location for each floating-point variable, and the memory 1207 location is what gets described in the DWARF AT_location 1208 attribute for the variable in question. 1209 Regardless of the severe mental illness of the x86/svr4 SDB, we 1210 do something sensible here and we use the following DWARF 1211 register numbers. Note that these are all stack-top-relative 1212 numbers. 1213 11 for %st(0) (gcc regno = 8) 1214 12 for %st(1) (gcc regno = 9) 1215 13 for %st(2) (gcc regno = 10) 1216 14 for %st(3) (gcc regno = 11) 1217 15 for %st(4) (gcc regno = 12) 1218 16 for %st(5) (gcc regno = 13) 1219 17 for %st(6) (gcc regno = 14) 1220 18 for %st(7) (gcc regno = 15) 1221*/ 1222int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 1223{ 1224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 1225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 1226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 1228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 1229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1231}; 1232 1233/* Test and compare insns in i386.md store the information needed to 1234 generate branch and scc insns here. */ 1235 1236rtx ix86_compare_op0 = NULL_RTX; 1237rtx ix86_compare_op1 = NULL_RTX; 1238rtx ix86_compare_emitted = NULL_RTX; 1239 1240/* Size of the register save area. */ 1241#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 1242 1243/* Define the structure for the machine field in struct function. */ 1244 1245struct stack_local_entry GTY(()) 1246{ 1247 unsigned short mode; 1248 unsigned short n; 1249 rtx rtl; 1250 struct stack_local_entry *next; 1251}; 1252 1253/* Structure describing stack frame layout. 1254 Stack grows downward: 1255 1256 [arguments] 1257 <- ARG_POINTER 1258 saved pc 1259 1260 saved frame pointer if frame_pointer_needed 1261 <- HARD_FRAME_POINTER 1262 [saved regs] 1263 1264 [padding1] \ 1265 ) 1266 [va_arg registers] ( 1267 > to_allocate <- FRAME_POINTER 1268 [frame] ( 1269 ) 1270 [padding2] / 1271 */ 1272struct ix86_frame 1273{ 1274 int nregs; 1275 int padding1; 1276 int va_arg_size; 1277 HOST_WIDE_INT frame; 1278 int padding2; 1279 int outgoing_arguments_size; 1280 int red_zone_size; 1281 1282 HOST_WIDE_INT to_allocate; 1283 /* The offsets relative to ARG_POINTER. */ 1284 HOST_WIDE_INT frame_pointer_offset; 1285 HOST_WIDE_INT hard_frame_pointer_offset; 1286 HOST_WIDE_INT stack_pointer_offset; 1287 1288 /* When save_regs_using_mov is set, emit prologue using 1289 move instead of push instructions. */ 1290 bool save_regs_using_mov; 1291}; 1292 1293/* Code model option. */ 1294enum cmodel ix86_cmodel; 1295/* Asm dialect. */ 1296enum asm_dialect ix86_asm_dialect = ASM_ATT; 1297/* TLS dialects. */ 1298enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1299 1300/* Which unit we are generating floating point math for. */ 1301enum fpmath_unit ix86_fpmath; 1302 1303/* Which cpu are we scheduling for. */ 1304enum processor_type ix86_tune; 1305/* Which instruction set architecture to use. */ 1306enum processor_type ix86_arch; 1307 1308/* true if sse prefetch instruction is not NOOP. */ 1309int x86_prefetch_sse; 1310 1311/* true if cmpxchg16b is supported. */ 1312int x86_cmpxchg16b; 1313 1314/* ix86_regparm_string as a number */ 1315static int ix86_regparm; 1316 1317/* -mstackrealign option */ 1318extern int ix86_force_align_arg_pointer; 1319static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; 1320 1321/* Preferred alignment for stack boundary in bits. */ 1322unsigned int ix86_preferred_stack_boundary; 1323 1324/* Values 1-5: see jump.c */ 1325int ix86_branch_cost; 1326 1327/* Variables which are this size or smaller are put in the data/bss 1328 or ldata/lbss sections. */ 1329 1330int ix86_section_threshold = 65536; 1331 1332/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1333char internal_label_prefix[16]; 1334int internal_label_prefix_len; 1335 1336static bool ix86_handle_option (size_t, const char *, int); 1337static void output_pic_addr_const (FILE *, rtx, int); 1338static void put_condition_code (enum rtx_code, enum machine_mode, 1339 int, int, FILE *); 1340static const char *get_some_local_dynamic_name (void); 1341static int get_some_local_dynamic_name_1 (rtx *, void *); 1342static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 1343static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 1344 rtx *); 1345static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 1346static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 1347 enum machine_mode); 1348static rtx get_thread_pointer (int); 1349static rtx legitimize_tls_address (rtx, enum tls_model, int); 1350static void get_pc_thunk_name (char [32], unsigned int); 1351static rtx gen_push (rtx); 1352static int ix86_flags_dependent (rtx, rtx, enum attr_type); 1353static int ix86_agi_dependent (rtx, rtx, enum attr_type); 1354static struct machine_function * ix86_init_machine_status (void); 1355static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 1356static int ix86_nsaved_regs (void); 1357static void ix86_emit_save_regs (void); 1358static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 1359static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 1360static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 1361static HOST_WIDE_INT ix86_GOT_alias_set (void); 1362static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 1363static rtx ix86_expand_aligntest (rtx, int); 1364static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 1365static int ix86_issue_rate (void); 1366static int ix86_adjust_cost (rtx, rtx, rtx, int); 1367static int ia32_multipass_dfa_lookahead (void); 1368static void ix86_init_mmx_sse_builtins (void); 1369static rtx x86_this_parameter (tree); 1370static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 1371 HOST_WIDE_INT, tree); 1372static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 1373static void x86_file_start (void); 1374static void ix86_reorg (void); 1375static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 1376static tree ix86_build_builtin_va_list (void); 1377static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 1378 tree, int *, int); 1379static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); 1380static bool ix86_scalar_mode_supported_p (enum machine_mode); 1381static bool ix86_vector_mode_supported_p (enum machine_mode); 1382 1383static int ix86_address_cost (rtx); 1384static bool ix86_cannot_force_const_mem (rtx); 1385static rtx ix86_delegitimize_address (rtx); 1386 1387static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 1388 1389struct builtin_description; 1390static rtx ix86_expand_sse_comi (const struct builtin_description *, 1391 tree, rtx); 1392static rtx ix86_expand_sse_compare (const struct builtin_description *, 1393 tree, rtx); 1394static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 1395static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 1396static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 1397static rtx ix86_expand_store_builtin (enum insn_code, tree); 1398static rtx safe_vector_operand (rtx, enum machine_mode); 1399static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 1400static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 1401static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 1402static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 1403static int ix86_fp_comparison_cost (enum rtx_code code); 1404static unsigned int ix86_select_alt_pic_regnum (void); 1405static int ix86_save_reg (unsigned int, int); 1406static void ix86_compute_frame_layout (struct ix86_frame *); 1407static int ix86_comp_type_attributes (tree, tree); 1408static int ix86_function_regparm (tree, tree); 1409const struct attribute_spec ix86_attribute_table[]; 1410static bool ix86_function_ok_for_sibcall (tree, tree); 1411static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); 1412static int ix86_value_regno (enum machine_mode, tree, tree); 1413static bool contains_128bit_aligned_vector_p (tree); 1414static rtx ix86_struct_value_rtx (tree, int); 1415static bool ix86_ms_bitfield_layout_p (tree); 1416static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 1417static int extended_reg_mentioned_1 (rtx *, void *); 1418static bool ix86_rtx_costs (rtx, int, int, int *); 1419static int min_insn_size (rtx); 1420static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); 1421static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); 1422static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 1423 tree, bool); 1424static void ix86_init_builtins (void); 1425static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 1426static const char *ix86_mangle_fundamental_type (tree); 1427static tree ix86_stack_protect_fail (void); 1428static rtx ix86_internal_arg_pointer (void); 1429static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); 1430 1431/* This function is only used on Solaris. */ 1432static void i386_solaris_elf_named_section (const char *, unsigned int, tree) 1433 ATTRIBUTE_UNUSED; 1434 1435/* Register class used for passing given 64bit part of the argument. 1436 These represent classes as documented by the PS ABI, with the exception 1437 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1438 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1439 1440 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1441 whenever possible (upper half does contain padding). 1442 */ 1443enum x86_64_reg_class 1444 { 1445 X86_64_NO_CLASS, 1446 X86_64_INTEGER_CLASS, 1447 X86_64_INTEGERSI_CLASS, 1448 X86_64_SSE_CLASS, 1449 X86_64_SSESF_CLASS, 1450 X86_64_SSEDF_CLASS, 1451 X86_64_SSEUP_CLASS, 1452 X86_64_X87_CLASS, 1453 X86_64_X87UP_CLASS, 1454 X86_64_COMPLEX_X87_CLASS, 1455 X86_64_MEMORY_CLASS 1456 }; 1457static const char * const x86_64_reg_class_name[] = { 1458 "no", "integer", "integerSI", "sse", "sseSF", "sseDF", 1459 "sseup", "x87", "x87up", "cplx87", "no" 1460}; 1461 1462#define MAX_CLASSES 4 1463 1464/* Table of constants used by fldpi, fldln2, etc.... */ 1465static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1466static bool ext_80387_constants_init = 0; 1467static void init_ext_80387_constants (void); 1468static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; 1469static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 1470static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; 1471static section *x86_64_elf_select_section (tree decl, int reloc, 1472 unsigned HOST_WIDE_INT align) 1473 ATTRIBUTE_UNUSED; 1474 1475/* Initialize the GCC target structure. */ 1476#undef TARGET_ATTRIBUTE_TABLE 1477#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 1478#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 1479# undef TARGET_MERGE_DECL_ATTRIBUTES 1480# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 1481#endif 1482 1483#undef TARGET_COMP_TYPE_ATTRIBUTES 1484#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 1485 1486#undef TARGET_INIT_BUILTINS 1487#define TARGET_INIT_BUILTINS ix86_init_builtins 1488#undef TARGET_EXPAND_BUILTIN 1489#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 1490 1491#undef TARGET_ASM_FUNCTION_EPILOGUE 1492#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 1493 1494#undef TARGET_ENCODE_SECTION_INFO 1495#ifndef SUBTARGET_ENCODE_SECTION_INFO 1496#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 1497#else 1498#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 1499#endif 1500 1501#undef TARGET_ASM_OPEN_PAREN 1502#define TARGET_ASM_OPEN_PAREN "" 1503#undef TARGET_ASM_CLOSE_PAREN 1504#define TARGET_ASM_CLOSE_PAREN "" 1505 1506#undef TARGET_ASM_ALIGNED_HI_OP 1507#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 1508#undef TARGET_ASM_ALIGNED_SI_OP 1509#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 1510#ifdef ASM_QUAD 1511#undef TARGET_ASM_ALIGNED_DI_OP 1512#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 1513#endif 1514 1515#undef TARGET_ASM_UNALIGNED_HI_OP 1516#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 1517#undef TARGET_ASM_UNALIGNED_SI_OP 1518#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 1519#undef TARGET_ASM_UNALIGNED_DI_OP 1520#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 1521 1522#undef TARGET_SCHED_ADJUST_COST 1523#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 1524#undef TARGET_SCHED_ISSUE_RATE 1525#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 1526#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1527#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 1528 ia32_multipass_dfa_lookahead 1529 1530#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1531#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 1532 1533#ifdef HAVE_AS_TLS 1534#undef TARGET_HAVE_TLS 1535#define TARGET_HAVE_TLS true 1536#endif 1537#undef TARGET_CANNOT_FORCE_CONST_MEM 1538#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1539#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1540#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true 1541 1542#undef TARGET_DELEGITIMIZE_ADDRESS 1543#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1544 1545#undef TARGET_MS_BITFIELD_LAYOUT_P 1546#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1547 1548#if TARGET_MACHO 1549#undef TARGET_BINDS_LOCAL_P 1550#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1551#endif 1552 1553#undef TARGET_ASM_OUTPUT_MI_THUNK 1554#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1555#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1556#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1557 1558#undef TARGET_ASM_FILE_START 1559#define TARGET_ASM_FILE_START x86_file_start 1560 1561#undef TARGET_DEFAULT_TARGET_FLAGS 1562#define TARGET_DEFAULT_TARGET_FLAGS \ 1563 (TARGET_DEFAULT \ 1564 | TARGET_64BIT_DEFAULT \ 1565 | TARGET_SUBTARGET_DEFAULT \ 1566 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 1567 1568#undef TARGET_HANDLE_OPTION 1569#define TARGET_HANDLE_OPTION ix86_handle_option 1570 1571#undef TARGET_RTX_COSTS 1572#define TARGET_RTX_COSTS ix86_rtx_costs 1573#undef TARGET_ADDRESS_COST 1574#define TARGET_ADDRESS_COST ix86_address_cost 1575 1576#undef TARGET_FIXED_CONDITION_CODE_REGS 1577#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1578#undef TARGET_CC_MODES_COMPATIBLE 1579#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1580 1581#undef TARGET_MACHINE_DEPENDENT_REORG 1582#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1583 1584#undef TARGET_BUILD_BUILTIN_VA_LIST 1585#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1586 1587#undef TARGET_MD_ASM_CLOBBERS 1588#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 1589 1590#undef TARGET_PROMOTE_PROTOTYPES 1591#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 1592#undef TARGET_STRUCT_VALUE_RTX 1593#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 1594#undef TARGET_SETUP_INCOMING_VARARGS 1595#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 1596#undef TARGET_MUST_PASS_IN_STACK 1597#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 1598#undef TARGET_PASS_BY_REFERENCE 1599#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 1600#undef TARGET_INTERNAL_ARG_POINTER 1601#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 1602#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 1603#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 1604 1605#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1606#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 1607 1608#undef TARGET_SCALAR_MODE_SUPPORTED_P 1609#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 1610 1611#undef TARGET_VECTOR_MODE_SUPPORTED_P 1612#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 1613 1614#ifdef HAVE_AS_TLS 1615#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1616#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 1617#endif 1618 1619#ifdef SUBTARGET_INSERT_ATTRIBUTES 1620#undef TARGET_INSERT_ATTRIBUTES 1621#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 1622#endif 1623 1624#undef TARGET_MANGLE_FUNDAMENTAL_TYPE 1625#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type 1626 1627#undef TARGET_STACK_PROTECT_FAIL 1628#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 1629 1630#undef TARGET_FUNCTION_VALUE 1631#define TARGET_FUNCTION_VALUE ix86_function_value 1632 1633struct gcc_target targetm = TARGET_INITIALIZER; 1634 1635 1636/* The svr4 ABI for the i386 says that records and unions are returned 1637 in memory. */ 1638#ifndef DEFAULT_PCC_STRUCT_RETURN 1639#define DEFAULT_PCC_STRUCT_RETURN 1 1640#endif 1641 1642/* Implement TARGET_HANDLE_OPTION. */ 1643 1644static bool 1645ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1646{ 1647 switch (code) 1648 { 1649 case OPT_m3dnow: 1650 if (!value) 1651 { 1652 target_flags &= ~MASK_3DNOW_A; 1653 target_flags_explicit |= MASK_3DNOW_A; 1654 } 1655 return true; 1656 1657 case OPT_mmmx: 1658 if (!value) 1659 { 1660 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); 1661 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; 1662 } 1663 return true; 1664 1665 case OPT_msse: 1666 if (!value) 1667 { 1668 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A); 1669 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A; 1670 } 1671 return true; 1672 1673 case OPT_msse2: 1674 if (!value) 1675 { 1676 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A); 1677 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A; 1678 } 1679 return true; 1680 1681 case OPT_msse3: 1682 if (!value) 1683 { 1684 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A); 1685 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A; 1686 } 1687 return true; 1688 1689 case OPT_maes: 1690 if (!value) 1691 { 1692 target_flags &= ~MASK_AES; 1693 target_flags_explicit |= MASK_AES; 1694 } 1695 return true; 1696 1697 default: 1698 return true; 1699 } 1700} 1701 1702/* Sometimes certain combinations of command options do not make 1703 sense on a particular target machine. You can define a macro 1704 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1705 defined, is executed once just after all the command options have 1706 been parsed. 1707 1708 Don't use this macro to turn on various extra optimizations for 1709 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1710 1711void 1712override_options (void) 1713{ 1714 int i; 1715 int ix86_tune_defaulted = 0; 1716 1717 /* Comes from final.c -- no real reason to change it. */ 1718#define MAX_CODE_ALIGN 16 1719 1720 static struct ptt 1721 { 1722 const struct processor_costs *cost; /* Processor costs */ 1723 const int target_enable; /* Target flags to enable. */ 1724 const int target_disable; /* Target flags to disable. */ 1725 const int align_loop; /* Default alignments. */ 1726 const int align_loop_max_skip; 1727 const int align_jump; 1728 const int align_jump_max_skip; 1729 const int align_func; 1730 } 1731 const processor_target_table[PROCESSOR_max] = 1732 { 1733 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1734 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1735 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1736 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1737 {&geode_cost, 0, 0, 0, 0, 0, 0, 0}, 1738 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1739 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1740 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1741 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, 1742 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, 1743 {&core2_cost, 0, 0, 16, 7, 16, 7, 16}, 1744 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, 1745 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}, 1746 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32} 1747 }; 1748 1749 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1750 static struct pta 1751 { 1752 const char *const name; /* processor name or nickname. */ 1753 const enum processor_type processor; 1754 const enum pta_flags 1755 { 1756 PTA_SSE = 1, 1757 PTA_SSE2 = 2, 1758 PTA_SSE3 = 4, 1759 PTA_MMX = 8, 1760 PTA_PREFETCH_SSE = 16, 1761 PTA_3DNOW = 32, 1762 PTA_3DNOW_A = 64, 1763 PTA_64BIT = 128, 1764 PTA_SSSE3 = 256, 1765 PTA_CX16 = 512, 1766 PTA_POPCNT = 1024, 1767 PTA_ABM = 2048, 1768 PTA_SSE4A = 4096 1769 } flags; 1770 } 1771 const processor_alias_table[] = 1772 { 1773 {"i386", PROCESSOR_I386, 0}, 1774 {"i486", PROCESSOR_I486, 0}, 1775 {"i586", PROCESSOR_PENTIUM, 0}, 1776 {"pentium", PROCESSOR_PENTIUM, 0}, 1777 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1778 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1779 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1780 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1781 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1782 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1783 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1784 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1785 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1786 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1787 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1788 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1789 | PTA_MMX | PTA_PREFETCH_SSE}, 1790 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1791 | PTA_MMX | PTA_PREFETCH_SSE}, 1792 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 1793 | PTA_MMX | PTA_PREFETCH_SSE}, 1794 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1795 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16}, 1796 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 1797 | PTA_64BIT | PTA_MMX 1798 | PTA_PREFETCH_SSE | PTA_CX16}, 1799 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1800 | PTA_3DNOW_A}, 1801 {"k6", PROCESSOR_K6, PTA_MMX}, 1802 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1803 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1804 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1805 | PTA_3DNOW_A}, 1806 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1807 | PTA_3DNOW | PTA_3DNOW_A}, 1808 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1809 | PTA_3DNOW_A | PTA_SSE}, 1810 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1811 | PTA_3DNOW_A | PTA_SSE}, 1812 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1813 | PTA_3DNOW_A | PTA_SSE}, 1814 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1815 | PTA_SSE | PTA_SSE2 }, 1816 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1818 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1820 | PTA_SSE3 }, 1821 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1823 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1824 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1825 | PTA_SSE3 }, 1826 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1827 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1828 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1829 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1830 | PTA_SSE3 }, 1831 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1832 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1833 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1834 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE 1835 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT 1836 | PTA_ABM | PTA_SSE4A | PTA_CX16}, 1837 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1838 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE 1839 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT 1840 | PTA_ABM | PTA_SSE4A | PTA_CX16}, 1841 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, 1842 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, 1843 }; 1844 1845 int const pta_size = ARRAY_SIZE (processor_alias_table); 1846 1847#ifdef SUBTARGET_OVERRIDE_OPTIONS 1848 SUBTARGET_OVERRIDE_OPTIONS; 1849#endif 1850 1851#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 1852 SUBSUBTARGET_OVERRIDE_OPTIONS; 1853#endif 1854 1855 /* -fPIC is the default for x86_64. */ 1856 if (TARGET_MACHO && TARGET_64BIT) 1857 flag_pic = 2; 1858 1859 /* Set the default values for switches whose default depends on TARGET_64BIT 1860 in case they weren't overwritten by command line options. */ 1861 if (TARGET_64BIT) 1862 { 1863 /* Mach-O doesn't support omitting the frame pointer for now. */ 1864 if (flag_omit_frame_pointer == 2) 1865 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 1866 if (flag_asynchronous_unwind_tables == 2) 1867 flag_asynchronous_unwind_tables = 1; 1868 if (flag_pcc_struct_return == 2) 1869 flag_pcc_struct_return = 0; 1870 } 1871 else 1872 { 1873 if (flag_omit_frame_pointer == 2) 1874 flag_omit_frame_pointer = 0; 1875 if (flag_asynchronous_unwind_tables == 2) 1876 flag_asynchronous_unwind_tables = 0; 1877 if (flag_pcc_struct_return == 2) 1878 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1879 } 1880 1881 /* Need to check -mtune=generic first. */ 1882 if (ix86_tune_string) 1883 { 1884 if (!strcmp (ix86_tune_string, "generic") 1885 || !strcmp (ix86_tune_string, "i686") 1886 /* As special support for cross compilers we read -mtune=native 1887 as -mtune=generic. With native compilers we won't see the 1888 -mtune=native, as it was changed by the driver. */ 1889 || !strcmp (ix86_tune_string, "native")) 1890 { 1891 if (TARGET_64BIT) 1892 ix86_tune_string = "generic64"; 1893 else 1894 ix86_tune_string = "generic32"; 1895 } 1896 else if (!strncmp (ix86_tune_string, "generic", 7)) 1897 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1898 } 1899 else 1900 { 1901 if (ix86_arch_string) 1902 ix86_tune_string = ix86_arch_string; 1903 if (!ix86_tune_string) 1904 { 1905 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1906 ix86_tune_defaulted = 1; 1907 } 1908 1909 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 1910 need to use a sensible tune option. */ 1911 if (!strcmp (ix86_tune_string, "generic") 1912 || !strcmp (ix86_tune_string, "x86-64") 1913 || !strcmp (ix86_tune_string, "i686")) 1914 { 1915 if (TARGET_64BIT) 1916 ix86_tune_string = "generic64"; 1917 else 1918 ix86_tune_string = "generic32"; 1919 } 1920 } 1921 if (!strcmp (ix86_tune_string, "x86-64")) 1922 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " 1923 "-mtune=generic instead as appropriate."); 1924 1925 if (!ix86_arch_string) 1926 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486"; 1927 if (!strcmp (ix86_arch_string, "generic")) 1928 error ("generic CPU can be used only for -mtune= switch"); 1929 if (!strncmp (ix86_arch_string, "generic", 7)) 1930 error ("bad value (%s) for -march= switch", ix86_arch_string); 1931 1932 if (ix86_cmodel_string != 0) 1933 { 1934 if (!strcmp (ix86_cmodel_string, "small")) 1935 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1936 else if (!strcmp (ix86_cmodel_string, "medium")) 1937 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 1938 else if (flag_pic) 1939 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1940 else if (!strcmp (ix86_cmodel_string, "32")) 1941 ix86_cmodel = CM_32; 1942 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1943 ix86_cmodel = CM_KERNEL; 1944 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1945 ix86_cmodel = CM_LARGE; 1946 else 1947 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1948 } 1949 else 1950 { 1951 ix86_cmodel = CM_32; 1952 if (TARGET_64BIT) 1953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1954 } 1955 if (ix86_asm_string != 0) 1956 { 1957 if (! TARGET_MACHO 1958 && !strcmp (ix86_asm_string, "intel")) 1959 ix86_asm_dialect = ASM_INTEL; 1960 else if (!strcmp (ix86_asm_string, "att")) 1961 ix86_asm_dialect = ASM_ATT; 1962 else 1963 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1964 } 1965 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1966 error ("code model %qs not supported in the %s bit mode", 1967 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1968 if (ix86_cmodel == CM_LARGE) 1969 sorry ("code model %<large%> not supported yet"); 1970 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1971 sorry ("%i-bit mode not compiled in", 1972 (target_flags & MASK_64BIT) ? 64 : 32); 1973 1974 for (i = 0; i < pta_size; i++) 1975 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1976 { 1977 ix86_arch = processor_alias_table[i].processor; 1978 /* Default cpu tuning to the architecture. */ 1979 ix86_tune = ix86_arch; 1980 if (processor_alias_table[i].flags & PTA_MMX 1981 && !(target_flags_explicit & MASK_MMX)) 1982 target_flags |= MASK_MMX; 1983 if (processor_alias_table[i].flags & PTA_3DNOW 1984 && !(target_flags_explicit & MASK_3DNOW)) 1985 target_flags |= MASK_3DNOW; 1986 if (processor_alias_table[i].flags & PTA_3DNOW_A 1987 && !(target_flags_explicit & MASK_3DNOW_A)) 1988 target_flags |= MASK_3DNOW_A; 1989 if (processor_alias_table[i].flags & PTA_SSE 1990 && !(target_flags_explicit & MASK_SSE)) 1991 target_flags |= MASK_SSE; 1992 if (processor_alias_table[i].flags & PTA_SSE2 1993 && !(target_flags_explicit & MASK_SSE2)) 1994 target_flags |= MASK_SSE2; 1995 if (processor_alias_table[i].flags & PTA_SSE3 1996 && !(target_flags_explicit & MASK_SSE3)) 1997 target_flags |= MASK_SSE3; 1998 if (processor_alias_table[i].flags & PTA_SSSE3 1999 && !(target_flags_explicit & MASK_SSSE3)) 2000 target_flags |= MASK_SSSE3; 2001 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 2002 x86_prefetch_sse = true; 2003 if (processor_alias_table[i].flags & PTA_CX16) 2004 x86_cmpxchg16b = true; 2005 if (processor_alias_table[i].flags & PTA_POPCNT 2006 && !(target_flags_explicit & MASK_POPCNT)) 2007 target_flags |= MASK_POPCNT; 2008 if (processor_alias_table[i].flags & PTA_ABM 2009 && !(target_flags_explicit & MASK_ABM)) 2010 target_flags |= MASK_ABM; 2011 if (processor_alias_table[i].flags & PTA_SSE4A 2012 && !(target_flags_explicit & MASK_SSE4A)) 2013 target_flags |= MASK_SSE4A; 2014 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2015 error ("CPU you selected does not support x86-64 " 2016 "instruction set"); 2017 break; 2018 } 2019 2020 if (i == pta_size) 2021 error ("bad value (%s) for -march= switch", ix86_arch_string); 2022 2023 for (i = 0; i < pta_size; i++) 2024 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 2025 { 2026 ix86_tune = processor_alias_table[i].processor; 2027 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2028 { 2029 if (ix86_tune_defaulted) 2030 { 2031 ix86_tune_string = "x86-64"; 2032 for (i = 0; i < pta_size; i++) 2033 if (! strcmp (ix86_tune_string, 2034 processor_alias_table[i].name)) 2035 break; 2036 ix86_tune = processor_alias_table[i].processor; 2037 } 2038 else 2039 error ("CPU you selected does not support x86-64 " 2040 "instruction set"); 2041 } 2042 /* Intel CPUs have always interpreted SSE prefetch instructions as 2043 NOPs; so, we can enable SSE prefetch instructions even when 2044 -mtune (rather than -march) points us to a processor that has them. 2045 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 2046 higher processors. */ 2047 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 2048 x86_prefetch_sse = true; 2049 break; 2050 } 2051 if (i == pta_size) 2052 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 2053 2054 if (optimize_size) 2055 ix86_cost = &size_cost; 2056 else 2057 ix86_cost = processor_target_table[ix86_tune].cost; 2058 target_flags |= processor_target_table[ix86_tune].target_enable; 2059 target_flags &= ~processor_target_table[ix86_tune].target_disable; 2060 2061 /* Arrange to set up i386_stack_locals for all functions. */ 2062 init_machine_status = ix86_init_machine_status; 2063 2064 /* Validate -mregparm= value. */ 2065 if (ix86_regparm_string) 2066 { 2067 i = atoi (ix86_regparm_string); 2068 if (i < 0 || i > REGPARM_MAX) 2069 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 2070 else 2071 ix86_regparm = i; 2072 } 2073 else 2074 if (TARGET_64BIT) 2075 ix86_regparm = REGPARM_MAX; 2076 2077 /* If the user has provided any of the -malign-* options, 2078 warn and use that value only if -falign-* is not set. 2079 Remove this code in GCC 3.2 or later. */ 2080 if (ix86_align_loops_string) 2081 { 2082 warning (0, "-malign-loops is obsolete, use -falign-loops"); 2083 if (align_loops == 0) 2084 { 2085 i = atoi (ix86_align_loops_string); 2086 if (i < 0 || i > MAX_CODE_ALIGN) 2087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2088 else 2089 align_loops = 1 << i; 2090 } 2091 } 2092 2093 if (ix86_align_jumps_string) 2094 { 2095 warning (0, "-malign-jumps is obsolete, use -falign-jumps"); 2096 if (align_jumps == 0) 2097 { 2098 i = atoi (ix86_align_jumps_string); 2099 if (i < 0 || i > MAX_CODE_ALIGN) 2100 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2101 else 2102 align_jumps = 1 << i; 2103 } 2104 } 2105 2106 if (ix86_align_funcs_string) 2107 { 2108 warning (0, "-malign-functions is obsolete, use -falign-functions"); 2109 if (align_functions == 0) 2110 { 2111 i = atoi (ix86_align_funcs_string); 2112 if (i < 0 || i > MAX_CODE_ALIGN) 2113 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2114 else 2115 align_functions = 1 << i; 2116 } 2117 } 2118 2119 /* Default align_* from the processor table. */ 2120 if (align_loops == 0) 2121 { 2122 align_loops = processor_target_table[ix86_tune].align_loop; 2123 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 2124 } 2125 if (align_jumps == 0) 2126 { 2127 align_jumps = processor_target_table[ix86_tune].align_jump; 2128 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 2129 } 2130 if (align_functions == 0) 2131 { 2132 align_functions = processor_target_table[ix86_tune].align_func; 2133 } 2134 2135 /* Validate -mbranch-cost= value, or provide default. */ 2136 ix86_branch_cost = ix86_cost->branch_cost; 2137 if (ix86_branch_cost_string) 2138 { 2139 i = atoi (ix86_branch_cost_string); 2140 if (i < 0 || i > 5) 2141 error ("-mbranch-cost=%d is not between 0 and 5", i); 2142 else 2143 ix86_branch_cost = i; 2144 } 2145 if (ix86_section_threshold_string) 2146 { 2147 i = atoi (ix86_section_threshold_string); 2148 if (i < 0) 2149 error ("-mlarge-data-threshold=%d is negative", i); 2150 else 2151 ix86_section_threshold = i; 2152 } 2153 2154 if (ix86_tls_dialect_string) 2155 { 2156 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 2157 ix86_tls_dialect = TLS_DIALECT_GNU; 2158 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 2159 ix86_tls_dialect = TLS_DIALECT_GNU2; 2160 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 2161 ix86_tls_dialect = TLS_DIALECT_SUN; 2162 else 2163 error ("bad value (%s) for -mtls-dialect= switch", 2164 ix86_tls_dialect_string); 2165 } 2166 2167 /* Keep nonleaf frame pointers. */ 2168 if (flag_omit_frame_pointer) 2169 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 2170 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 2171 flag_omit_frame_pointer = 1; 2172 2173 /* If we're doing fast math, we don't care about comparison order 2174 wrt NaNs. This lets us use a shorter comparison sequence. */ 2175 if (flag_finite_math_only) 2176 target_flags &= ~MASK_IEEE_FP; 2177 2178 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 2179 since the insns won't need emulation. */ 2180 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 2181 target_flags &= ~MASK_NO_FANCY_MATH_387; 2182 2183 /* Likewise, if the target doesn't have a 387, or we've specified 2184 software floating point, don't use 387 inline intrinsics. */ 2185 if (!TARGET_80387) 2186 target_flags |= MASK_NO_FANCY_MATH_387; 2187 2188 /* Turn on SSE3 builtins for -mssse3. */ 2189 if (TARGET_SSSE3) 2190 target_flags |= MASK_SSE3; 2191 2192 /* Turn on SSE3 builtins for -msse4a. */ 2193 if (TARGET_SSE4A) 2194 target_flags |= MASK_SSE3; 2195 2196 /* Turn on SSE2 builtins for -msse3. */ 2197 if (TARGET_SSE3) 2198 target_flags |= MASK_SSE2; 2199 2200 /* Turn on SSE2 builtins for -maes. */ 2201 if (TARGET_AES) 2202 target_flags |= MASK_SSE2; 2203 2204 /* Turn on SSE builtins for -msse2. */ 2205 if (TARGET_SSE2) 2206 target_flags |= MASK_SSE; 2207 2208 /* Turn on MMX builtins for -msse. */ 2209 if (TARGET_SSE) 2210 { 2211 target_flags |= MASK_MMX & ~target_flags_explicit; 2212 x86_prefetch_sse = true; 2213 } 2214 2215 /* Turn on MMX builtins for 3Dnow. */ 2216 if (TARGET_3DNOW) 2217 target_flags |= MASK_MMX; 2218 2219 /* Turn on POPCNT builtins for -mabm. */ 2220 if (TARGET_ABM) 2221 target_flags |= MASK_POPCNT; 2222 2223 if (TARGET_64BIT) 2224 { 2225 if (TARGET_ALIGN_DOUBLE) 2226 error ("-malign-double makes no sense in the 64bit mode"); 2227 if (TARGET_RTD) 2228 error ("-mrtd calling convention not supported in the 64bit mode"); 2229 2230 /* Enable by default the SSE and MMX builtins. Do allow the user to 2231 explicitly disable any of these. In particular, disabling SSE and 2232 MMX for kernel code is extremely useful. */ 2233 target_flags 2234 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) 2235 & ~target_flags_explicit); 2236 } 2237 else 2238 { 2239 /* i386 ABI does not specify red zone. It still makes sense to use it 2240 when programmer takes care to stack from being destroyed. */ 2241 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 2242 target_flags |= MASK_NO_RED_ZONE; 2243 } 2244 2245 /* Validate -mpreferred-stack-boundary= value, or provide default. 2246 The default of 128 bits is for Pentium III's SSE __m128. We can't 2247 change it because of optimize_size. Otherwise, we can't mix object 2248 files compiled with -Os and -On. */ 2249 ix86_preferred_stack_boundary = 128; 2250 if (ix86_preferred_stack_boundary_string) 2251 { 2252 i = atoi (ix86_preferred_stack_boundary_string); 2253 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 2254 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 2255 TARGET_64BIT ? 4 : 2); 2256 else 2257 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 2258 } 2259 2260 /* Accept -msseregparm only if at least SSE support is enabled. */ 2261 if (TARGET_SSEREGPARM 2262 && ! TARGET_SSE) 2263 error ("-msseregparm used without SSE enabled"); 2264 2265 ix86_fpmath = TARGET_FPMATH_DEFAULT; 2266 2267 if (ix86_fpmath_string != 0) 2268 { 2269 if (! strcmp (ix86_fpmath_string, "387")) 2270 ix86_fpmath = FPMATH_387; 2271 else if (! strcmp (ix86_fpmath_string, "sse")) 2272 { 2273 if (!TARGET_SSE) 2274 { 2275 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2276 ix86_fpmath = FPMATH_387; 2277 } 2278 else 2279 ix86_fpmath = FPMATH_SSE; 2280 } 2281 else if (! strcmp (ix86_fpmath_string, "387,sse") 2282 || ! strcmp (ix86_fpmath_string, "sse,387")) 2283 { 2284 if (!TARGET_SSE) 2285 { 2286 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2287 ix86_fpmath = FPMATH_387; 2288 } 2289 else if (!TARGET_80387) 2290 { 2291 warning (0, "387 instruction set disabled, using SSE arithmetics"); 2292 ix86_fpmath = FPMATH_SSE; 2293 } 2294 else 2295 ix86_fpmath = FPMATH_SSE | FPMATH_387; 2296 } 2297 else 2298 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 2299 } 2300 2301 /* If the i387 is disabled, then do not return values in it. */ 2302 if (!TARGET_80387) 2303 target_flags &= ~MASK_FLOAT_RETURNS; 2304 2305 if ((x86_accumulate_outgoing_args & TUNEMASK) 2306 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2307 && !optimize_size) 2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2309 2310 /* ??? Unwind info is not correct around the CFG unless either a frame 2311 pointer is present or M_A_O_A is set. Fixing this requires rewriting 2312 unwind info generation to be aware of the CFG and propagating states 2313 around edges. */ 2314 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 2315 || flag_exceptions || flag_non_call_exceptions) 2316 && flag_omit_frame_pointer 2317 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 2318 { 2319 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2320 warning (0, "unwind tables currently require either a frame pointer " 2321 "or -maccumulate-outgoing-args for correctness"); 2322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2323 } 2324 2325 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 2326 { 2327 char *p; 2328 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 2329 p = strchr (internal_label_prefix, 'X'); 2330 internal_label_prefix_len = p - internal_label_prefix; 2331 *p = '\0'; 2332 } 2333 2334 /* When scheduling description is not available, disable scheduler pass 2335 so it won't slow down the compilation and make x87 code slower. */ 2336 if (!TARGET_SCHEDULE) 2337 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 2338} 2339 2340/* switch to the appropriate section for output of DECL. 2341 DECL is either a `VAR_DECL' node or a constant of some sort. 2342 RELOC indicates whether forming the initial value of DECL requires 2343 link-time relocations. */ 2344 2345static section * 2346x86_64_elf_select_section (tree decl, int reloc, 2347 unsigned HOST_WIDE_INT align) 2348{ 2349 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2350 && ix86_in_large_data_p (decl)) 2351 { 2352 const char *sname = NULL; 2353 unsigned int flags = SECTION_WRITE; 2354 switch (categorize_decl_for_section (decl, reloc)) 2355 { 2356 case SECCAT_DATA: 2357 sname = ".ldata"; 2358 break; 2359 case SECCAT_DATA_REL: 2360 sname = ".ldata.rel"; 2361 break; 2362 case SECCAT_DATA_REL_LOCAL: 2363 sname = ".ldata.rel.local"; 2364 break; 2365 case SECCAT_DATA_REL_RO: 2366 sname = ".ldata.rel.ro"; 2367 break; 2368 case SECCAT_DATA_REL_RO_LOCAL: 2369 sname = ".ldata.rel.ro.local"; 2370 break; 2371 case SECCAT_BSS: 2372 sname = ".lbss"; 2373 flags |= SECTION_BSS; 2374 break; 2375 case SECCAT_RODATA: 2376 case SECCAT_RODATA_MERGE_STR: 2377 case SECCAT_RODATA_MERGE_STR_INIT: 2378 case SECCAT_RODATA_MERGE_CONST: 2379 sname = ".lrodata"; 2380 flags = 0; 2381 break; 2382 case SECCAT_SRODATA: 2383 case SECCAT_SDATA: 2384 case SECCAT_SBSS: 2385 gcc_unreachable (); 2386 case SECCAT_TEXT: 2387 case SECCAT_TDATA: 2388 case SECCAT_TBSS: 2389 /* We don't split these for medium model. Place them into 2390 default sections and hope for best. */ 2391 break; 2392 } 2393 if (sname) 2394 { 2395 /* We might get called with string constants, but get_named_section 2396 doesn't like them as they are not DECLs. Also, we need to set 2397 flags in that case. */ 2398 if (!DECL_P (decl)) 2399 return get_section (sname, flags, NULL); 2400 return get_named_section (decl, sname, reloc); 2401 } 2402 } 2403 return default_elf_select_section (decl, reloc, align); 2404} 2405 2406/* Build up a unique section name, expressed as a 2407 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 2408 RELOC indicates whether the initial value of EXP requires 2409 link-time relocations. */ 2410 2411static void 2412x86_64_elf_unique_section (tree decl, int reloc) 2413{ 2414 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2415 && ix86_in_large_data_p (decl)) 2416 { 2417 const char *prefix = NULL; 2418 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 2419 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 2420 2421 switch (categorize_decl_for_section (decl, reloc)) 2422 { 2423 case SECCAT_DATA: 2424 case SECCAT_DATA_REL: 2425 case SECCAT_DATA_REL_LOCAL: 2426 case SECCAT_DATA_REL_RO: 2427 case SECCAT_DATA_REL_RO_LOCAL: 2428 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; 2429 break; 2430 case SECCAT_BSS: 2431 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; 2432 break; 2433 case SECCAT_RODATA: 2434 case SECCAT_RODATA_MERGE_STR: 2435 case SECCAT_RODATA_MERGE_STR_INIT: 2436 case SECCAT_RODATA_MERGE_CONST: 2437 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; 2438 break; 2439 case SECCAT_SRODATA: 2440 case SECCAT_SDATA: 2441 case SECCAT_SBSS: 2442 gcc_unreachable (); 2443 case SECCAT_TEXT: 2444 case SECCAT_TDATA: 2445 case SECCAT_TBSS: 2446 /* We don't split these for medium model. Place them into 2447 default sections and hope for best. */ 2448 break; 2449 } 2450 if (prefix) 2451 { 2452 const char *name; 2453 size_t nlen, plen; 2454 char *string; 2455 plen = strlen (prefix); 2456 2457 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 2458 name = targetm.strip_name_encoding (name); 2459 nlen = strlen (name); 2460 2461 string = alloca (nlen + plen + 1); 2462 memcpy (string, prefix, plen); 2463 memcpy (string + plen, name, nlen + 1); 2464 2465 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); 2466 return; 2467 } 2468 } 2469 default_unique_section (decl, reloc); 2470} 2471 2472#ifdef COMMON_ASM_OP 2473/* This says how to output assembler code to declare an 2474 uninitialized external linkage data object. 2475 2476 For medium model x86-64 we need to use .largecomm opcode for 2477 large objects. */ 2478void 2479x86_elf_aligned_common (FILE *file, 2480 const char *name, unsigned HOST_WIDE_INT size, 2481 int align) 2482{ 2483 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2484 && size > (unsigned int)ix86_section_threshold) 2485 fprintf (file, ".largecomm\t"); 2486 else 2487 fprintf (file, "%s", COMMON_ASM_OP); 2488 assemble_name (file, name); 2489 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 2490 size, align / BITS_PER_UNIT); 2491} 2492 2493/* Utility function for targets to use in implementing 2494 ASM_OUTPUT_ALIGNED_BSS. */ 2495 2496void 2497x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 2498 const char *name, unsigned HOST_WIDE_INT size, 2499 int align) 2500{ 2501 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2502 && size > (unsigned int)ix86_section_threshold) 2503 switch_to_section (get_named_section (decl, ".lbss", 0)); 2504 else 2505 switch_to_section (bss_section); 2506 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 2507#ifdef ASM_DECLARE_OBJECT_NAME 2508 last_assemble_variable_decl = decl; 2509 ASM_DECLARE_OBJECT_NAME (file, name, decl); 2510#else 2511 /* Standard thing is just output label for the object. */ 2512 ASM_OUTPUT_LABEL (file, name); 2513#endif /* ASM_DECLARE_OBJECT_NAME */ 2514 ASM_OUTPUT_SKIP (file, size ? size : 1); 2515} 2516#endif 2517 2518void 2519optimization_options (int level, int size ATTRIBUTE_UNUSED) 2520{ 2521 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 2522 make the problem with not enough registers even worse. */ 2523#ifdef INSN_SCHEDULING 2524 if (level > 1) 2525 flag_schedule_insns = 0; 2526#endif 2527 2528 if (TARGET_MACHO) 2529 /* The Darwin libraries never set errno, so we might as well 2530 avoid calling them when that's the only reason we would. */ 2531 flag_errno_math = 0; 2532 2533 /* The default values of these switches depend on the TARGET_64BIT 2534 that is not known at this moment. Mark these values with 2 and 2535 let user the to override these. In case there is no command line option 2536 specifying them, we will set the defaults in override_options. */ 2537 if (optimize >= 1) 2538 flag_omit_frame_pointer = 2; 2539 flag_pcc_struct_return = 2; 2540 flag_asynchronous_unwind_tables = 2; 2541#ifdef SUBTARGET_OPTIMIZATION_OPTIONS 2542 SUBTARGET_OPTIMIZATION_OPTIONS; 2543#endif 2544} 2545 2546/* Table of valid machine attributes. */ 2547const struct attribute_spec ix86_attribute_table[] = 2548{ 2549 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 2550 /* Stdcall attribute says callee is responsible for popping arguments 2551 if they are not variable. */ 2552 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2553 /* Fastcall attribute says callee is responsible for popping arguments 2554 if they are not variable. */ 2555 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2556 /* Cdecl attribute says the callee is a normal C declaration */ 2557 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2558 /* Regparm attribute specifies how many integer arguments are to be 2559 passed in registers. */ 2560 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 2561 /* Sseregparm attribute says we are using x86_64 calling conventions 2562 for FP arguments. */ 2563 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2564 /* force_align_arg_pointer says this function realigns the stack at entry. */ 2565 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 2566 false, true, true, ix86_handle_cconv_attribute }, 2567#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2568 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 2569 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 2570 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 2571#endif 2572 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2573 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2574#ifdef SUBTARGET_ATTRIBUTE_TABLE 2575 SUBTARGET_ATTRIBUTE_TABLE, 2576#endif 2577 { NULL, 0, 0, false, false, false, NULL } 2578}; 2579 2580/* Decide whether we can make a sibling call to a function. DECL is the 2581 declaration of the function being targeted by the call and EXP is the 2582 CALL_EXPR representing the call. */ 2583 2584static bool 2585ix86_function_ok_for_sibcall (tree decl, tree exp) 2586{ 2587 tree func; 2588 rtx a, b; 2589 2590 /* If we are generating position-independent code, we cannot sibcall 2591 optimize any indirect call, or a direct call to a global function, 2592 as the PLT requires %ebx be live. */ 2593 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 2594 return false; 2595 2596 if (decl) 2597 func = decl; 2598 else 2599 { 2600 func = TREE_TYPE (TREE_OPERAND (exp, 0)); 2601 if (POINTER_TYPE_P (func)) 2602 func = TREE_TYPE (func); 2603 } 2604 2605 /* Check that the return value locations are the same. Like 2606 if we are returning floats on the 80387 register stack, we cannot 2607 make a sibcall from a function that doesn't return a float to a 2608 function that does or, conversely, from a function that does return 2609 a float to a function that doesn't; the necessary stack adjustment 2610 would not be executed. This is also the place we notice 2611 differences in the return value ABI. Note that it is ok for one 2612 of the functions to have void return type as long as the return 2613 value of the other is passed in a register. */ 2614 a = ix86_function_value (TREE_TYPE (exp), func, false); 2615 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 2616 cfun->decl, false); 2617 if (STACK_REG_P (a) || STACK_REG_P (b)) 2618 { 2619 if (!rtx_equal_p (a, b)) 2620 return false; 2621 } 2622 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 2623 ; 2624 else if (!rtx_equal_p (a, b)) 2625 return false; 2626 2627 /* If this call is indirect, we'll need to be able to use a call-clobbered 2628 register for the address of the target function. Make sure that all 2629 such registers are not used for passing parameters. */ 2630 if (!decl && !TARGET_64BIT) 2631 { 2632 tree type; 2633 2634 /* We're looking at the CALL_EXPR, we need the type of the function. */ 2635 type = TREE_OPERAND (exp, 0); /* pointer expression */ 2636 type = TREE_TYPE (type); /* pointer type */ 2637 type = TREE_TYPE (type); /* function type */ 2638 2639 if (ix86_function_regparm (type, NULL) >= 3) 2640 { 2641 /* ??? Need to count the actual number of registers to be used, 2642 not the possible number of registers. Fix later. */ 2643 return false; 2644 } 2645 } 2646 2647#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2648 /* Dllimport'd functions are also called indirectly. */ 2649 if (decl && DECL_DLLIMPORT_P (decl) 2650 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) 2651 return false; 2652#endif 2653 2654 /* If we forced aligned the stack, then sibcalling would unalign the 2655 stack, which may break the called function. */ 2656 if (cfun->machine->force_align_arg_pointer) 2657 return false; 2658 2659 /* Otherwise okay. That also includes certain types of indirect calls. */ 2660 return true; 2661} 2662 2663/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 2664 calling convention attributes; 2665 arguments as in struct attribute_spec.handler. */ 2666 2667static tree 2668ix86_handle_cconv_attribute (tree *node, tree name, 2669 tree args, 2670 int flags ATTRIBUTE_UNUSED, 2671 bool *no_add_attrs) 2672{ 2673 if (TREE_CODE (*node) != FUNCTION_TYPE 2674 && TREE_CODE (*node) != METHOD_TYPE 2675 && TREE_CODE (*node) != FIELD_DECL 2676 && TREE_CODE (*node) != TYPE_DECL) 2677 { 2678 warning (OPT_Wattributes, "%qs attribute only applies to functions", 2679 IDENTIFIER_POINTER (name)); 2680 *no_add_attrs = true; 2681 return NULL_TREE; 2682 } 2683 2684 /* Can combine regparm with all attributes but fastcall. */ 2685 if (is_attribute_p ("regparm", name)) 2686 { 2687 tree cst; 2688 2689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2690 { 2691 error ("fastcall and regparm attributes are not compatible"); 2692 } 2693 2694 cst = TREE_VALUE (args); 2695 if (TREE_CODE (cst) != INTEGER_CST) 2696 { 2697 warning (OPT_Wattributes, 2698 "%qs attribute requires an integer constant argument", 2699 IDENTIFIER_POINTER (name)); 2700 *no_add_attrs = true; 2701 } 2702 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 2703 { 2704 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 2705 IDENTIFIER_POINTER (name), REGPARM_MAX); 2706 *no_add_attrs = true; 2707 } 2708 2709 if (!TARGET_64BIT 2710 && lookup_attribute (ix86_force_align_arg_pointer_string, 2711 TYPE_ATTRIBUTES (*node)) 2712 && compare_tree_int (cst, REGPARM_MAX-1)) 2713 { 2714 error ("%s functions limited to %d register parameters", 2715 ix86_force_align_arg_pointer_string, REGPARM_MAX-1); 2716 } 2717 2718 return NULL_TREE; 2719 } 2720 2721 if (TARGET_64BIT) 2722 { 2723 warning (OPT_Wattributes, "%qs attribute ignored", 2724 IDENTIFIER_POINTER (name)); 2725 *no_add_attrs = true; 2726 return NULL_TREE; 2727 } 2728 2729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 2730 if (is_attribute_p ("fastcall", name)) 2731 { 2732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2733 { 2734 error ("fastcall and cdecl attributes are not compatible"); 2735 } 2736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2737 { 2738 error ("fastcall and stdcall attributes are not compatible"); 2739 } 2740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 2741 { 2742 error ("fastcall and regparm attributes are not compatible"); 2743 } 2744 } 2745 2746 /* Can combine stdcall with fastcall (redundant), regparm and 2747 sseregparm. */ 2748 else if (is_attribute_p ("stdcall", name)) 2749 { 2750 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2751 { 2752 error ("stdcall and cdecl attributes are not compatible"); 2753 } 2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2755 { 2756 error ("stdcall and fastcall attributes are not compatible"); 2757 } 2758 } 2759 2760 /* Can combine cdecl with regparm and sseregparm. */ 2761 else if (is_attribute_p ("cdecl", name)) 2762 { 2763 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2764 { 2765 error ("stdcall and cdecl attributes are not compatible"); 2766 } 2767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2768 { 2769 error ("fastcall and cdecl attributes are not compatible"); 2770 } 2771 } 2772 2773 /* Can combine sseregparm with all attributes. */ 2774 2775 return NULL_TREE; 2776} 2777 2778/* Return 0 if the attributes for two types are incompatible, 1 if they 2779 are compatible, and 2 if they are nearly compatible (which causes a 2780 warning to be generated). */ 2781 2782static int 2783ix86_comp_type_attributes (tree type1, tree type2) 2784{ 2785 /* Check for mismatch of non-default calling convention. */ 2786 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 2787 2788 if (TREE_CODE (type1) != FUNCTION_TYPE) 2789 return 1; 2790 2791 /* Check for mismatched fastcall/regparm types. */ 2792 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 2793 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 2794 || (ix86_function_regparm (type1, NULL) 2795 != ix86_function_regparm (type2, NULL))) 2796 return 0; 2797 2798 /* Check for mismatched sseregparm types. */ 2799 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 2800 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 2801 return 0; 2802 2803 /* Check for mismatched return types (cdecl vs stdcall). */ 2804 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 2805 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 2806 return 0; 2807 2808 return 1; 2809} 2810 2811/* Return the regparm value for a function with the indicated TYPE and DECL. 2812 DECL may be NULL when calling function indirectly 2813 or considering a libcall. */ 2814 2815static int 2816ix86_function_regparm (tree type, tree decl) 2817{ 2818 tree attr; 2819 int regparm = ix86_regparm; 2820 bool user_convention = false; 2821 2822 if (!TARGET_64BIT) 2823 { 2824 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 2825 if (attr) 2826 { 2827 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 2828 user_convention = true; 2829 } 2830 2831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 2832 { 2833 regparm = 2; 2834 user_convention = true; 2835 } 2836 2837 /* Use register calling convention for local functions when possible. */ 2838 if (!TARGET_64BIT && !user_convention && decl 2839 && flag_unit_at_a_time && !profile_flag) 2840 { 2841 struct cgraph_local_info *i = cgraph_local_info (decl); 2842 if (i && i->local) 2843 { 2844 int local_regparm, globals = 0, regno; 2845 2846 /* Make sure no regparm register is taken by a global register 2847 variable. */ 2848 for (local_regparm = 0; local_regparm < 3; local_regparm++) 2849 if (global_regs[local_regparm]) 2850 break; 2851 /* We can't use regparm(3) for nested functions as these use 2852 static chain pointer in third argument. */ 2853 if (local_regparm == 3 2854 && decl_function_context (decl) 2855 && !DECL_NO_STATIC_CHAIN (decl)) 2856 local_regparm = 2; 2857 /* If the function realigns its stackpointer, the 2858 prologue will clobber %ecx. If we've already 2859 generated code for the callee, the callee 2860 DECL_STRUCT_FUNCTION is gone, so we fall back to 2861 scanning the attributes for the self-realigning 2862 property. */ 2863 if ((DECL_STRUCT_FUNCTION (decl) 2864 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) 2865 || (!DECL_STRUCT_FUNCTION (decl) 2866 && lookup_attribute (ix86_force_align_arg_pointer_string, 2867 TYPE_ATTRIBUTES (TREE_TYPE (decl))))) 2868 local_regparm = 2; 2869 /* Each global register variable increases register preassure, 2870 so the more global reg vars there are, the smaller regparm 2871 optimization use, unless requested by the user explicitly. */ 2872 for (regno = 0; regno < 6; regno++) 2873 if (global_regs[regno]) 2874 globals++; 2875 local_regparm 2876 = globals < local_regparm ? local_regparm - globals : 0; 2877 2878 if (local_regparm > regparm) 2879 regparm = local_regparm; 2880 } 2881 } 2882 } 2883 return regparm; 2884} 2885 2886/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 2887 DFmode (2) arguments in SSE registers for a function with the 2888 indicated TYPE and DECL. DECL may be NULL when calling function 2889 indirectly or considering a libcall. Otherwise return 0. */ 2890 2891static int 2892ix86_function_sseregparm (tree type, tree decl) 2893{ 2894 /* Use SSE registers to pass SFmode and DFmode arguments if requested 2895 by the sseregparm attribute. */ 2896 if (TARGET_SSEREGPARM 2897 || (type 2898 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 2899 { 2900 if (!TARGET_SSE) 2901 { 2902 if (decl) 2903 error ("Calling %qD with attribute sseregparm without " 2904 "SSE/SSE2 enabled", decl); 2905 else 2906 error ("Calling %qT with attribute sseregparm without " 2907 "SSE/SSE2 enabled", type); 2908 return 0; 2909 } 2910 2911 return 2; 2912 } 2913 2914 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 2915 (and DFmode for SSE2) arguments in SSE registers, 2916 even for 32-bit targets. */ 2917 if (!TARGET_64BIT && decl 2918 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) 2919 { 2920 struct cgraph_local_info *i = cgraph_local_info (decl); 2921 if (i && i->local) 2922 return TARGET_SSE2 ? 2 : 1; 2923 } 2924 2925 return 0; 2926} 2927 2928/* Return true if EAX is live at the start of the function. Used by 2929 ix86_expand_prologue to determine if we need special help before 2930 calling allocate_stack_worker. */ 2931 2932static bool 2933ix86_eax_live_at_start_p (void) 2934{ 2935 /* Cheat. Don't bother working forward from ix86_function_regparm 2936 to the function type to whether an actual argument is located in 2937 eax. Instead just look at cfg info, which is still close enough 2938 to correct at this point. This gives false positives for broken 2939 functions that might use uninitialized data that happens to be 2940 allocated in eax, but who cares? */ 2941 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); 2942} 2943 2944/* Value is the number of bytes of arguments automatically 2945 popped when returning from a subroutine call. 2946 FUNDECL is the declaration node of the function (as a tree), 2947 FUNTYPE is the data type of the function (as a tree), 2948 or for a library call it is an identifier node for the subroutine name. 2949 SIZE is the number of bytes of arguments passed on the stack. 2950 2951 On the 80386, the RTD insn may be used to pop them if the number 2952 of args is fixed, but if the number is variable then the caller 2953 must pop them all. RTD can't be used for library calls now 2954 because the library is compiled with the Unix compiler. 2955 Use of RTD is a selectable option, since it is incompatible with 2956 standard Unix calling sequences. If the option is not selected, 2957 the caller must always pop the args. 2958 2959 The attribute stdcall is equivalent to RTD on a per module basis. */ 2960 2961int 2962ix86_return_pops_args (tree fundecl, tree funtype, int size) 2963{ 2964 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 2965 2966 /* Cdecl functions override -mrtd, and never pop the stack. */ 2967 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 2968 2969 /* Stdcall and fastcall functions will pop the stack if not 2970 variable args. */ 2971 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 2972 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 2973 rtd = 1; 2974 2975 if (rtd 2976 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 2977 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 2978 == void_type_node))) 2979 return size; 2980 } 2981 2982 /* Lose any fake structure return argument if it is passed on the stack. */ 2983 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 2984 && !TARGET_64BIT 2985 && !KEEP_AGGREGATE_RETURN_POINTER) 2986 { 2987 int nregs = ix86_function_regparm (funtype, fundecl); 2988 2989 if (!nregs) 2990 return GET_MODE_SIZE (Pmode); 2991 } 2992 2993 return 0; 2994} 2995 2996/* Argument support functions. */ 2997 2998/* Return true when register may be used to pass function parameters. */ 2999bool 3000ix86_function_arg_regno_p (int regno) 3001{ 3002 int i; 3003 if (!TARGET_64BIT) 3004 { 3005 if (TARGET_MACHO) 3006 return (regno < REGPARM_MAX 3007 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 3008 else 3009 return (regno < REGPARM_MAX 3010 || (TARGET_MMX && MMX_REGNO_P (regno) 3011 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 3012 || (TARGET_SSE && SSE_REGNO_P (regno) 3013 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 3014 } 3015 3016 if (TARGET_MACHO) 3017 { 3018 if (SSE_REGNO_P (regno) && TARGET_SSE) 3019 return true; 3020 } 3021 else 3022 { 3023 if (TARGET_SSE && SSE_REGNO_P (regno) 3024 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 3025 return true; 3026 } 3027 /* RAX is used as hidden argument to va_arg functions. */ 3028 if (!regno) 3029 return true; 3030 for (i = 0; i < REGPARM_MAX; i++) 3031 if (regno == x86_64_int_parameter_registers[i]) 3032 return true; 3033 return false; 3034} 3035 3036/* Return if we do not know how to pass TYPE solely in registers. */ 3037 3038static bool 3039ix86_must_pass_in_stack (enum machine_mode mode, tree type) 3040{ 3041 if (must_pass_in_stack_var_size_or_pad (mode, type)) 3042 return true; 3043 3044 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 3045 The layout_type routine is crafty and tries to trick us into passing 3046 currently unsupported vector types on the stack by using TImode. */ 3047 return (!TARGET_64BIT && mode == TImode 3048 && type && TREE_CODE (type) != VECTOR_TYPE); 3049} 3050 3051/* Initialize a variable CUM of type CUMULATIVE_ARGS 3052 for a call to a function whose data type is FNTYPE. 3053 For a library call, FNTYPE is 0. */ 3054 3055void 3056init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 3057 tree fntype, /* tree ptr for function decl */ 3058 rtx libname, /* SYMBOL_REF of library name or 0 */ 3059 tree fndecl) 3060{ 3061 static CUMULATIVE_ARGS zero_cum; 3062 tree param, next_param; 3063 3064 if (TARGET_DEBUG_ARG) 3065 { 3066 fprintf (stderr, "\ninit_cumulative_args ("); 3067 if (fntype) 3068 fprintf (stderr, "fntype code = %s, ret code = %s", 3069 tree_code_name[(int) TREE_CODE (fntype)], 3070 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 3071 else 3072 fprintf (stderr, "no fntype"); 3073 3074 if (libname) 3075 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 3076 } 3077 3078 *cum = zero_cum; 3079 3080 /* Set up the number of registers to use for passing arguments. */ 3081 cum->nregs = ix86_regparm; 3082 if (TARGET_SSE) 3083 cum->sse_nregs = SSE_REGPARM_MAX; 3084 if (TARGET_MMX) 3085 cum->mmx_nregs = MMX_REGPARM_MAX; 3086 cum->warn_sse = true; 3087 cum->warn_mmx = true; 3088 cum->maybe_vaarg = false; 3089 3090 /* Use ecx and edx registers if function has fastcall attribute, 3091 else look for regparm information. */ 3092 if (fntype && !TARGET_64BIT) 3093 { 3094 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 3095 { 3096 cum->nregs = 2; 3097 cum->fastcall = 1; 3098 } 3099 else 3100 cum->nregs = ix86_function_regparm (fntype, fndecl); 3101 } 3102 3103 /* Set up the number of SSE registers used for passing SFmode 3104 and DFmode arguments. Warn for mismatching ABI. */ 3105 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); 3106 3107 /* Determine if this function has variable arguments. This is 3108 indicated by the last argument being 'void_type_mode' if there 3109 are no variable arguments. If there are variable arguments, then 3110 we won't pass anything in registers in 32-bit mode. */ 3111 3112 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) 3113 { 3114 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 3115 param != 0; param = next_param) 3116 { 3117 next_param = TREE_CHAIN (param); 3118 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 3119 { 3120 if (!TARGET_64BIT) 3121 { 3122 cum->nregs = 0; 3123 cum->sse_nregs = 0; 3124 cum->mmx_nregs = 0; 3125 cum->warn_sse = 0; 3126 cum->warn_mmx = 0; 3127 cum->fastcall = 0; 3128 cum->float_in_sse = 0; 3129 } 3130 cum->maybe_vaarg = true; 3131 } 3132 } 3133 } 3134 if ((!fntype && !libname) 3135 || (fntype && !TYPE_ARG_TYPES (fntype))) 3136 cum->maybe_vaarg = true; 3137 3138 if (TARGET_DEBUG_ARG) 3139 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 3140 3141 return; 3142} 3143 3144/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 3145 But in the case of vector types, it is some vector mode. 3146 3147 When we have only some of our vector isa extensions enabled, then there 3148 are some modes for which vector_mode_supported_p is false. For these 3149 modes, the generic vector support in gcc will choose some non-vector mode 3150 in order to implement the type. By computing the natural mode, we'll 3151 select the proper ABI location for the operand and not depend on whatever 3152 the middle-end decides to do with these vector types. */ 3153 3154static enum machine_mode 3155type_natural_mode (tree type) 3156{ 3157 enum machine_mode mode = TYPE_MODE (type); 3158 3159 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 3160 { 3161 HOST_WIDE_INT size = int_size_in_bytes (type); 3162 if ((size == 8 || size == 16) 3163 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 3164 && TYPE_VECTOR_SUBPARTS (type) > 1) 3165 { 3166 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 3167 3168 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 3169 mode = MIN_MODE_VECTOR_FLOAT; 3170 else 3171 mode = MIN_MODE_VECTOR_INT; 3172 3173 /* Get the mode which has this inner mode and number of units. */ 3174 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 3175 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 3176 && GET_MODE_INNER (mode) == innermode) 3177 return mode; 3178 3179 gcc_unreachable (); 3180 } 3181 } 3182 3183 return mode; 3184} 3185 3186/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 3187 this may not agree with the mode that the type system has chosen for the 3188 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 3189 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 3190 3191static rtx 3192gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 3193 unsigned int regno) 3194{ 3195 rtx tmp; 3196 3197 if (orig_mode != BLKmode) 3198 tmp = gen_rtx_REG (orig_mode, regno); 3199 else 3200 { 3201 tmp = gen_rtx_REG (mode, regno); 3202 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 3203 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 3204 } 3205 3206 return tmp; 3207} 3208 3209/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 3210 of this code is to classify each 8bytes of incoming argument by the register 3211 class and assign registers accordingly. */ 3212 3213/* Return the union class of CLASS1 and CLASS2. 3214 See the x86-64 PS ABI for details. */ 3215 3216static enum x86_64_reg_class 3217merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 3218{ 3219 /* Rule #1: If both classes are equal, this is the resulting class. */ 3220 if (class1 == class2) 3221 return class1; 3222 3223 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 3224 the other class. */ 3225 if (class1 == X86_64_NO_CLASS) 3226 return class2; 3227 if (class2 == X86_64_NO_CLASS) 3228 return class1; 3229 3230 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 3231 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 3232 return X86_64_MEMORY_CLASS; 3233 3234 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 3235 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 3236 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 3237 return X86_64_INTEGERSI_CLASS; 3238 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 3239 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 3240 return X86_64_INTEGER_CLASS; 3241 3242 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 3243 MEMORY is used. */ 3244 if (class1 == X86_64_X87_CLASS 3245 || class1 == X86_64_X87UP_CLASS 3246 || class1 == X86_64_COMPLEX_X87_CLASS 3247 || class2 == X86_64_X87_CLASS 3248 || class2 == X86_64_X87UP_CLASS 3249 || class2 == X86_64_COMPLEX_X87_CLASS) 3250 return X86_64_MEMORY_CLASS; 3251 3252 /* Rule #6: Otherwise class SSE is used. */ 3253 return X86_64_SSE_CLASS; 3254} 3255 3256/* Classify the argument of type TYPE and mode MODE. 3257 CLASSES will be filled by the register class used to pass each word 3258 of the operand. The number of words is returned. In case the parameter 3259 should be passed in memory, 0 is returned. As a special case for zero 3260 sized containers, classes[0] will be NO_CLASS and 1 is returned. 3261 3262 BIT_OFFSET is used internally for handling records and specifies offset 3263 of the offset in bits modulo 256 to avoid overflow cases. 3264 3265 See the x86-64 PS ABI for details. 3266*/ 3267 3268static int 3269classify_argument (enum machine_mode mode, tree type, 3270 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 3271{ 3272 HOST_WIDE_INT bytes = 3273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3274 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3275 3276 /* Variable sized entities are always passed/returned in memory. */ 3277 if (bytes < 0) 3278 return 0; 3279 3280 if (mode != VOIDmode 3281 && targetm.calls.must_pass_in_stack (mode, type)) 3282 return 0; 3283 3284 if (type && AGGREGATE_TYPE_P (type)) 3285 { 3286 int i; 3287 tree field; 3288 enum x86_64_reg_class subclasses[MAX_CLASSES]; 3289 3290 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 3291 if (bytes > 16) 3292 return 0; 3293 3294 for (i = 0; i < words; i++) 3295 classes[i] = X86_64_NO_CLASS; 3296 3297 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 3298 signalize memory class, so handle it as special case. */ 3299 if (!words) 3300 { 3301 classes[0] = X86_64_NO_CLASS; 3302 return 1; 3303 } 3304 3305 /* Classify each field of record and merge classes. */ 3306 switch (TREE_CODE (type)) 3307 { 3308 case RECORD_TYPE: 3309 /* For classes first merge in the field of the subclasses. */ 3310 if (TYPE_BINFO (type)) 3311 { 3312 tree binfo, base_binfo; 3313 int basenum; 3314 3315 for (binfo = TYPE_BINFO (type), basenum = 0; 3316 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) 3317 { 3318 int num; 3319 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; 3320 tree type = BINFO_TYPE (base_binfo); 3321 3322 num = classify_argument (TYPE_MODE (type), 3323 type, subclasses, 3324 (offset + bit_offset) % 256); 3325 if (!num) 3326 return 0; 3327 for (i = 0; i < num; i++) 3328 { 3329 int pos = (offset + (bit_offset % 64)) / 8 / 8; 3330 classes[i + pos] = 3331 merge_classes (subclasses[i], classes[i + pos]); 3332 } 3333 } 3334 } 3335 /* And now merge the fields of structure. */ 3336 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3337 { 3338 if (TREE_CODE (field) == FIELD_DECL) 3339 { 3340 int num; 3341 3342 if (TREE_TYPE (field) == error_mark_node) 3343 continue; 3344 3345 /* Bitfields are always classified as integer. Handle them 3346 early, since later code would consider them to be 3347 misaligned integers. */ 3348 if (DECL_BIT_FIELD (field)) 3349 { 3350 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3351 i < ((int_bit_position (field) + (bit_offset % 64)) 3352 + tree_low_cst (DECL_SIZE (field), 0) 3353 + 63) / 8 / 8; i++) 3354 classes[i] = 3355 merge_classes (X86_64_INTEGER_CLASS, 3356 classes[i]); 3357 } 3358 else 3359 { 3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3361 TREE_TYPE (field), subclasses, 3362 (int_bit_position (field) 3363 + bit_offset) % 256); 3364 if (!num) 3365 return 0; 3366 for (i = 0; i < num; i++) 3367 { 3368 int pos = 3369 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3370 classes[i + pos] = 3371 merge_classes (subclasses[i], classes[i + pos]); 3372 } 3373 } 3374 } 3375 } 3376 break; 3377 3378 case ARRAY_TYPE: 3379 /* Arrays are handled as small records. */ 3380 { 3381 int num; 3382 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 3383 TREE_TYPE (type), subclasses, bit_offset); 3384 if (!num) 3385 return 0; 3386 3387 /* The partial classes are now full classes. */ 3388 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 3389 subclasses[0] = X86_64_SSE_CLASS; 3390 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 3391 subclasses[0] = X86_64_INTEGER_CLASS; 3392 3393 for (i = 0; i < words; i++) 3394 classes[i] = subclasses[i % num]; 3395 3396 break; 3397 } 3398 case UNION_TYPE: 3399 case QUAL_UNION_TYPE: 3400 /* Unions are similar to RECORD_TYPE but offset is always 0. 3401 */ 3402 3403 /* Unions are not derived. */ 3404 gcc_assert (!TYPE_BINFO (type) 3405 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); 3406 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3407 { 3408 if (TREE_CODE (field) == FIELD_DECL) 3409 { 3410 int num; 3411 3412 if (TREE_TYPE (field) == error_mark_node) 3413 continue; 3414 3415 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3416 TREE_TYPE (field), subclasses, 3417 bit_offset); 3418 if (!num) 3419 return 0; 3420 for (i = 0; i < num; i++) 3421 classes[i] = merge_classes (subclasses[i], classes[i]); 3422 } 3423 } 3424 break; 3425 3426 default: 3427 gcc_unreachable (); 3428 } 3429 3430 /* Final merger cleanup. */ 3431 for (i = 0; i < words; i++) 3432 { 3433 /* If one class is MEMORY, everything should be passed in 3434 memory. */ 3435 if (classes[i] == X86_64_MEMORY_CLASS) 3436 return 0; 3437 3438 /* The X86_64_SSEUP_CLASS should be always preceded by 3439 X86_64_SSE_CLASS. */ 3440 if (classes[i] == X86_64_SSEUP_CLASS 3441 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 3442 classes[i] = X86_64_SSE_CLASS; 3443 3444 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 3445 if (classes[i] == X86_64_X87UP_CLASS 3446 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 3447 classes[i] = X86_64_SSE_CLASS; 3448 } 3449 return words; 3450 } 3451 3452 /* Compute alignment needed. We align all types to natural boundaries with 3453 exception of XFmode that is aligned to 64bits. */ 3454 if (mode != VOIDmode && mode != BLKmode) 3455 { 3456 int mode_alignment = GET_MODE_BITSIZE (mode); 3457 3458 if (mode == XFmode) 3459 mode_alignment = 128; 3460 else if (mode == XCmode) 3461 mode_alignment = 256; 3462 if (COMPLEX_MODE_P (mode)) 3463 mode_alignment /= 2; 3464 /* Misaligned fields are always returned in memory. */ 3465 if (bit_offset % mode_alignment) 3466 return 0; 3467 } 3468 3469 /* for V1xx modes, just use the base mode */ 3470 if (VECTOR_MODE_P (mode) 3471 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 3472 mode = GET_MODE_INNER (mode); 3473 3474 /* Classification of atomic types. */ 3475 switch (mode) 3476 { 3477 case SDmode: 3478 case DDmode: 3479 classes[0] = X86_64_SSE_CLASS; 3480 return 1; 3481 case TDmode: 3482 classes[0] = X86_64_SSE_CLASS; 3483 classes[1] = X86_64_SSEUP_CLASS; 3484 return 2; 3485 case DImode: 3486 case SImode: 3487 case HImode: 3488 case QImode: 3489 case CSImode: 3490 case CHImode: 3491 case CQImode: 3492 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3493 classes[0] = X86_64_INTEGERSI_CLASS; 3494 else 3495 classes[0] = X86_64_INTEGER_CLASS; 3496 return 1; 3497 case CDImode: 3498 case TImode: 3499 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 3500 return 2; 3501 case CTImode: 3502 return 0; 3503 case SFmode: 3504 if (!(bit_offset % 64)) 3505 classes[0] = X86_64_SSESF_CLASS; 3506 else 3507 classes[0] = X86_64_SSE_CLASS; 3508 return 1; 3509 case DFmode: 3510 classes[0] = X86_64_SSEDF_CLASS; 3511 return 1; 3512 case XFmode: 3513 classes[0] = X86_64_X87_CLASS; 3514 classes[1] = X86_64_X87UP_CLASS; 3515 return 2; 3516 case TFmode: 3517 classes[0] = X86_64_SSE_CLASS; 3518 classes[1] = X86_64_SSEUP_CLASS; 3519 return 2; 3520 case SCmode: 3521 classes[0] = X86_64_SSE_CLASS; 3522 return 1; 3523 case DCmode: 3524 classes[0] = X86_64_SSEDF_CLASS; 3525 classes[1] = X86_64_SSEDF_CLASS; 3526 return 2; 3527 case XCmode: 3528 classes[0] = X86_64_COMPLEX_X87_CLASS; 3529 return 1; 3530 case TCmode: 3531 /* This modes is larger than 16 bytes. */ 3532 return 0; 3533 case V4SFmode: 3534 case V4SImode: 3535 case V16QImode: 3536 case V8HImode: 3537 case V2DFmode: 3538 case V2DImode: 3539 classes[0] = X86_64_SSE_CLASS; 3540 classes[1] = X86_64_SSEUP_CLASS; 3541 return 2; 3542 case V2SFmode: 3543 case V2SImode: 3544 case V4HImode: 3545 case V8QImode: 3546 classes[0] = X86_64_SSE_CLASS; 3547 return 1; 3548 case BLKmode: 3549 case VOIDmode: 3550 return 0; 3551 default: 3552 gcc_assert (VECTOR_MODE_P (mode)); 3553 3554 if (bytes > 16) 3555 return 0; 3556 3557 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 3558 3559 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3560 classes[0] = X86_64_INTEGERSI_CLASS; 3561 else 3562 classes[0] = X86_64_INTEGER_CLASS; 3563 classes[1] = X86_64_INTEGER_CLASS; 3564 return 1 + (bytes > 8); 3565 } 3566} 3567 3568/* Examine the argument and return set number of register required in each 3569 class. Return 0 iff parameter should be passed in memory. */ 3570static int 3571examine_argument (enum machine_mode mode, tree type, int in_return, 3572 int *int_nregs, int *sse_nregs) 3573{ 3574 enum x86_64_reg_class class[MAX_CLASSES]; 3575 int n = classify_argument (mode, type, class, 0); 3576 3577 *int_nregs = 0; 3578 *sse_nregs = 0; 3579 if (!n) 3580 return 0; 3581 for (n--; n >= 0; n--) 3582 switch (class[n]) 3583 { 3584 case X86_64_INTEGER_CLASS: 3585 case X86_64_INTEGERSI_CLASS: 3586 (*int_nregs)++; 3587 break; 3588 case X86_64_SSE_CLASS: 3589 case X86_64_SSESF_CLASS: 3590 case X86_64_SSEDF_CLASS: 3591 (*sse_nregs)++; 3592 break; 3593 case X86_64_NO_CLASS: 3594 case X86_64_SSEUP_CLASS: 3595 break; 3596 case X86_64_X87_CLASS: 3597 case X86_64_X87UP_CLASS: 3598 if (!in_return) 3599 return 0; 3600 break; 3601 case X86_64_COMPLEX_X87_CLASS: 3602 return in_return ? 2 : 0; 3603 case X86_64_MEMORY_CLASS: 3604 gcc_unreachable (); 3605 } 3606 return 1; 3607} 3608 3609/* Construct container for the argument used by GCC interface. See 3610 FUNCTION_ARG for the detailed description. */ 3611 3612static rtx 3613construct_container (enum machine_mode mode, enum machine_mode orig_mode, 3614 tree type, int in_return, int nintregs, int nsseregs, 3615 const int *intreg, int sse_regno) 3616{ 3617 /* The following variables hold the static issued_error state. */ 3618 static bool issued_sse_arg_error; 3619 static bool issued_sse_ret_error; 3620 static bool issued_x87_ret_error; 3621 3622 enum machine_mode tmpmode; 3623 int bytes = 3624 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3625 enum x86_64_reg_class class[MAX_CLASSES]; 3626 int n; 3627 int i; 3628 int nexps = 0; 3629 int needed_sseregs, needed_intregs; 3630 rtx exp[MAX_CLASSES]; 3631 rtx ret; 3632 3633 n = classify_argument (mode, type, class, 0); 3634 if (TARGET_DEBUG_ARG) 3635 { 3636 if (!n) 3637 fprintf (stderr, "Memory class\n"); 3638 else 3639 { 3640 fprintf (stderr, "Classes:"); 3641 for (i = 0; i < n; i++) 3642 { 3643 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 3644 } 3645 fprintf (stderr, "\n"); 3646 } 3647 } 3648 if (!n) 3649 return NULL; 3650 if (!examine_argument (mode, type, in_return, &needed_intregs, 3651 &needed_sseregs)) 3652 return NULL; 3653 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 3654 return NULL; 3655 3656 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 3657 some less clueful developer tries to use floating-point anyway. */ 3658 if (needed_sseregs && !TARGET_SSE) 3659 { 3660 if (in_return) 3661 { 3662 if (!issued_sse_ret_error) 3663 { 3664 error ("SSE register return with SSE disabled"); 3665 issued_sse_ret_error = true; 3666 } 3667 } 3668 else if (!issued_sse_arg_error) 3669 { 3670 error ("SSE register argument with SSE disabled"); 3671 issued_sse_arg_error = true; 3672 } 3673 return NULL; 3674 } 3675 3676 /* Likewise, error if the ABI requires us to return values in the 3677 x87 registers and the user specified -mno-80387. */ 3678 if (!TARGET_80387 && in_return) 3679 for (i = 0; i < n; i++) 3680 if (class[i] == X86_64_X87_CLASS 3681 || class[i] == X86_64_X87UP_CLASS 3682 || class[i] == X86_64_COMPLEX_X87_CLASS) 3683 { 3684 if (!issued_x87_ret_error) 3685 { 3686 error ("x87 register return with x87 disabled"); 3687 issued_x87_ret_error = true; 3688 } 3689 return NULL; 3690 } 3691 3692 /* First construct simple cases. Avoid SCmode, since we want to use 3693 single register to pass this type. */ 3694 if (n == 1 && mode != SCmode) 3695 switch (class[0]) 3696 { 3697 case X86_64_INTEGER_CLASS: 3698 case X86_64_INTEGERSI_CLASS: 3699 return gen_rtx_REG (mode, intreg[0]); 3700 case X86_64_SSE_CLASS: 3701 case X86_64_SSESF_CLASS: 3702 case X86_64_SSEDF_CLASS: 3703 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); 3704 case X86_64_X87_CLASS: 3705 case X86_64_COMPLEX_X87_CLASS: 3706 return gen_rtx_REG (mode, FIRST_STACK_REG); 3707 case X86_64_NO_CLASS: 3708 /* Zero sized array, struct or class. */ 3709 return NULL; 3710 default: 3711 gcc_unreachable (); 3712 } 3713 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 3714 && mode != BLKmode) 3715 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 3716 if (n == 2 3717 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 3718 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 3719 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 3720 && class[1] == X86_64_INTEGER_CLASS 3721 && (mode == CDImode || mode == TImode || mode == TFmode) 3722 && intreg[0] + 1 == intreg[1]) 3723 return gen_rtx_REG (mode, intreg[0]); 3724 3725 /* Otherwise figure out the entries of the PARALLEL. */ 3726 for (i = 0; i < n; i++) 3727 { 3728 switch (class[i]) 3729 { 3730 case X86_64_NO_CLASS: 3731 break; 3732 case X86_64_INTEGER_CLASS: 3733 case X86_64_INTEGERSI_CLASS: 3734 /* Merge TImodes on aligned occasions here too. */ 3735 if (i * 8 + 8 > bytes) 3736 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 3737 else if (class[i] == X86_64_INTEGERSI_CLASS) 3738 tmpmode = SImode; 3739 else 3740 tmpmode = DImode; 3741 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 3742 if (tmpmode == BLKmode) 3743 tmpmode = DImode; 3744 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3745 gen_rtx_REG (tmpmode, *intreg), 3746 GEN_INT (i*8)); 3747 intreg++; 3748 break; 3749 case X86_64_SSESF_CLASS: 3750 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3751 gen_rtx_REG (SFmode, 3752 SSE_REGNO (sse_regno)), 3753 GEN_INT (i*8)); 3754 sse_regno++; 3755 break; 3756 case X86_64_SSEDF_CLASS: 3757 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3758 gen_rtx_REG (DFmode, 3759 SSE_REGNO (sse_regno)), 3760 GEN_INT (i*8)); 3761 sse_regno++; 3762 break; 3763 case X86_64_SSE_CLASS: 3764 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 3765 tmpmode = TImode; 3766 else 3767 tmpmode = DImode; 3768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3769 gen_rtx_REG (tmpmode, 3770 SSE_REGNO (sse_regno)), 3771 GEN_INT (i*8)); 3772 if (tmpmode == TImode) 3773 i++; 3774 sse_regno++; 3775 break; 3776 default: 3777 gcc_unreachable (); 3778 } 3779 } 3780 3781 /* Empty aligned struct, union or class. */ 3782 if (nexps == 0) 3783 return NULL; 3784 3785 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 3786 for (i = 0; i < nexps; i++) 3787 XVECEXP (ret, 0, i) = exp [i]; 3788 return ret; 3789} 3790 3791/* Update the data in CUM to advance over an argument 3792 of mode MODE and data type TYPE. 3793 (TYPE is null for libcalls where that information may not be available.) */ 3794 3795void 3796function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3797 tree type, int named) 3798{ 3799 int bytes = 3800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3801 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3802 3803 if (type) 3804 mode = type_natural_mode (type); 3805 3806 if (TARGET_DEBUG_ARG) 3807 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " 3808 "mode=%s, named=%d)\n\n", 3809 words, cum->words, cum->nregs, cum->sse_nregs, 3810 GET_MODE_NAME (mode), named); 3811 3812 if (TARGET_64BIT) 3813 { 3814 int int_nregs, sse_nregs; 3815 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 3816 cum->words += words; 3817 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 3818 { 3819 cum->nregs -= int_nregs; 3820 cum->sse_nregs -= sse_nregs; 3821 cum->regno += int_nregs; 3822 cum->sse_regno += sse_nregs; 3823 } 3824 else 3825 cum->words += words; 3826 } 3827 else 3828 { 3829 switch (mode) 3830 { 3831 default: 3832 break; 3833 3834 case BLKmode: 3835 if (bytes < 0) 3836 break; 3837 /* FALLTHRU */ 3838 3839 case DImode: 3840 case SImode: 3841 case HImode: 3842 case QImode: 3843 cum->words += words; 3844 cum->nregs -= words; 3845 cum->regno += words; 3846 3847 if (cum->nregs <= 0) 3848 { 3849 cum->nregs = 0; 3850 cum->regno = 0; 3851 } 3852 break; 3853 3854 case DFmode: 3855 if (cum->float_in_sse < 2) 3856 break; 3857 case SFmode: 3858 if (cum->float_in_sse < 1) 3859 break; 3860 /* FALLTHRU */ 3861 3862 case TImode: 3863 case V16QImode: 3864 case V8HImode: 3865 case V4SImode: 3866 case V2DImode: 3867 case V4SFmode: 3868 case V2DFmode: 3869 if (!type || !AGGREGATE_TYPE_P (type)) 3870 { 3871 cum->sse_words += words; 3872 cum->sse_nregs -= 1; 3873 cum->sse_regno += 1; 3874 if (cum->sse_nregs <= 0) 3875 { 3876 cum->sse_nregs = 0; 3877 cum->sse_regno = 0; 3878 } 3879 } 3880 break; 3881 3882 case V8QImode: 3883 case V4HImode: 3884 case V2SImode: 3885 case V2SFmode: 3886 if (!type || !AGGREGATE_TYPE_P (type)) 3887 { 3888 cum->mmx_words += words; 3889 cum->mmx_nregs -= 1; 3890 cum->mmx_regno += 1; 3891 if (cum->mmx_nregs <= 0) 3892 { 3893 cum->mmx_nregs = 0; 3894 cum->mmx_regno = 0; 3895 } 3896 } 3897 break; 3898 } 3899 } 3900} 3901 3902/* Define where to put the arguments to a function. 3903 Value is zero to push the argument on the stack, 3904 or a hard register in which to store the argument. 3905 3906 MODE is the argument's machine mode. 3907 TYPE is the data type of the argument (as a tree). 3908 This is null for libcalls where that information may 3909 not be available. 3910 CUM is a variable of type CUMULATIVE_ARGS which gives info about 3911 the preceding args and about the function being called. 3912 NAMED is nonzero if this argument is a named parameter 3913 (otherwise it is an extra parameter matching an ellipsis). */ 3914 3915rtx 3916function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 3917 tree type, int named) 3918{ 3919 enum machine_mode mode = orig_mode; 3920 rtx ret = NULL_RTX; 3921 int bytes = 3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3924 static bool warnedsse, warnedmmx; 3925 3926 /* To simplify the code below, represent vector types with a vector mode 3927 even if MMX/SSE are not active. */ 3928 if (type && TREE_CODE (type) == VECTOR_TYPE) 3929 mode = type_natural_mode (type); 3930 3931 /* Handle a hidden AL argument containing number of registers for varargs 3932 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 3933 any AL settings. */ 3934 if (mode == VOIDmode) 3935 { 3936 if (TARGET_64BIT) 3937 return GEN_INT (cum->maybe_vaarg 3938 ? (cum->sse_nregs < 0 3939 ? SSE_REGPARM_MAX 3940 : cum->sse_regno) 3941 : -1); 3942 else 3943 return constm1_rtx; 3944 } 3945 if (TARGET_64BIT) 3946 ret = construct_container (mode, orig_mode, type, 0, cum->nregs, 3947 cum->sse_nregs, 3948 &x86_64_int_parameter_registers [cum->regno], 3949 cum->sse_regno); 3950 else 3951 switch (mode) 3952 { 3953 /* For now, pass fp/complex values on the stack. */ 3954 default: 3955 break; 3956 3957 case BLKmode: 3958 if (bytes < 0) 3959 break; 3960 /* FALLTHRU */ 3961 case DImode: 3962 case SImode: 3963 case HImode: 3964 case QImode: 3965 if (words <= cum->nregs) 3966 { 3967 int regno = cum->regno; 3968 3969 /* Fastcall allocates the first two DWORD (SImode) or 3970 smaller arguments to ECX and EDX. */ 3971 if (cum->fastcall) 3972 { 3973 if (mode == BLKmode || mode == DImode) 3974 break; 3975 3976 /* ECX not EAX is the first allocated register. */ 3977 if (regno == 0) 3978 regno = 2; 3979 } 3980 ret = gen_rtx_REG (mode, regno); 3981 } 3982 break; 3983 case DFmode: 3984 if (cum->float_in_sse < 2) 3985 break; 3986 case SFmode: 3987 if (cum->float_in_sse < 1) 3988 break; 3989 /* FALLTHRU */ 3990 case TImode: 3991 case V16QImode: 3992 case V8HImode: 3993 case V4SImode: 3994 case V2DImode: 3995 case V4SFmode: 3996 case V2DFmode: 3997 if (!type || !AGGREGATE_TYPE_P (type)) 3998 { 3999 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 4000 { 4001 warnedsse = true; 4002 warning (0, "SSE vector argument without SSE enabled " 4003 "changes the ABI"); 4004 } 4005 if (cum->sse_nregs) 4006 ret = gen_reg_or_parallel (mode, orig_mode, 4007 cum->sse_regno + FIRST_SSE_REG); 4008 } 4009 break; 4010 case V8QImode: 4011 case V4HImode: 4012 case V2SImode: 4013 case V2SFmode: 4014 if (!type || !AGGREGATE_TYPE_P (type)) 4015 { 4016 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 4017 { 4018 warnedmmx = true; 4019 warning (0, "MMX vector argument without MMX enabled " 4020 "changes the ABI"); 4021 } 4022 if (cum->mmx_nregs) 4023 ret = gen_reg_or_parallel (mode, orig_mode, 4024 cum->mmx_regno + FIRST_MMX_REG); 4025 } 4026 break; 4027 } 4028 4029 if (TARGET_DEBUG_ARG) 4030 { 4031 fprintf (stderr, 4032 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 4033 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 4034 4035 if (ret) 4036 print_simple_rtl (stderr, ret); 4037 else 4038 fprintf (stderr, ", stack"); 4039 4040 fprintf (stderr, " )\n"); 4041 } 4042 4043 return ret; 4044} 4045 4046/* A C expression that indicates when an argument must be passed by 4047 reference. If nonzero for an argument, a copy of that argument is 4048 made in memory and a pointer to the argument is passed instead of 4049 the argument itself. The pointer is passed in whatever way is 4050 appropriate for passing a pointer to that type. */ 4051 4052static bool 4053ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 4054 enum machine_mode mode ATTRIBUTE_UNUSED, 4055 tree type, bool named ATTRIBUTE_UNUSED) 4056{ 4057 if (!TARGET_64BIT) 4058 return 0; 4059 4060 if (type && int_size_in_bytes (type) == -1) 4061 { 4062 if (TARGET_DEBUG_ARG) 4063 fprintf (stderr, "function_arg_pass_by_reference\n"); 4064 return 1; 4065 } 4066 4067 return 0; 4068} 4069 4070/* Return true when TYPE should be 128bit aligned for 32bit argument passing 4071 ABI. Only called if TARGET_SSE. */ 4072static bool 4073contains_128bit_aligned_vector_p (tree type) 4074{ 4075 enum machine_mode mode = TYPE_MODE (type); 4076 if (SSE_REG_MODE_P (mode) 4077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 4078 return true; 4079 if (TYPE_ALIGN (type) < 128) 4080 return false; 4081 4082 if (AGGREGATE_TYPE_P (type)) 4083 { 4084 /* Walk the aggregates recursively. */ 4085 switch (TREE_CODE (type)) 4086 { 4087 case RECORD_TYPE: 4088 case UNION_TYPE: 4089 case QUAL_UNION_TYPE: 4090 { 4091 tree field; 4092 4093 if (TYPE_BINFO (type)) 4094 { 4095 tree binfo, base_binfo; 4096 int i; 4097 4098 for (binfo = TYPE_BINFO (type), i = 0; 4099 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) 4100 if (contains_128bit_aligned_vector_p 4101 (BINFO_TYPE (base_binfo))) 4102 return true; 4103 } 4104 /* And now merge the fields of structure. */ 4105 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4106 { 4107 if (TREE_CODE (field) == FIELD_DECL 4108 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 4109 return true; 4110 } 4111 break; 4112 } 4113 4114 case ARRAY_TYPE: 4115 /* Just for use if some languages passes arrays by value. */ 4116 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 4117 return true; 4118 break; 4119 4120 default: 4121 gcc_unreachable (); 4122 } 4123 } 4124 return false; 4125} 4126 4127/* Gives the alignment boundary, in bits, of an argument with the 4128 specified mode and type. */ 4129 4130int 4131ix86_function_arg_boundary (enum machine_mode mode, tree type) 4132{ 4133 int align; 4134 if (type) 4135 align = TYPE_ALIGN (type); 4136 else 4137 align = GET_MODE_ALIGNMENT (mode); 4138 if (align < PARM_BOUNDARY) 4139 align = PARM_BOUNDARY; 4140 if (!TARGET_64BIT) 4141 { 4142 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 4143 make an exception for SSE modes since these require 128bit 4144 alignment. 4145 4146 The handling here differs from field_alignment. ICC aligns MMX 4147 arguments to 4 byte boundaries, while structure fields are aligned 4148 to 8 byte boundaries. */ 4149 if (!TARGET_SSE) 4150 align = PARM_BOUNDARY; 4151 else if (!type) 4152 { 4153 if (!SSE_REG_MODE_P (mode)) 4154 align = PARM_BOUNDARY; 4155 } 4156 else 4157 { 4158 if (!contains_128bit_aligned_vector_p (type)) 4159 align = PARM_BOUNDARY; 4160 } 4161 } 4162 if (align > 128) 4163 align = 128; 4164 return align; 4165} 4166 4167/* Return true if N is a possible register number of function value. */ 4168bool 4169ix86_function_value_regno_p (int regno) 4170{ 4171 if (TARGET_MACHO) 4172 { 4173 if (!TARGET_64BIT) 4174 { 4175 return ((regno) == 0 4176 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 4177 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 4178 } 4179 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 4180 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 4181 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 4182 } 4183 else 4184 { 4185 if (regno == 0 4186 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 4187 || (regno == FIRST_SSE_REG && TARGET_SSE)) 4188 return true; 4189 4190 if (!TARGET_64BIT 4191 && (regno == FIRST_MMX_REG && TARGET_MMX)) 4192 return true; 4193 4194 return false; 4195 } 4196} 4197 4198/* Define how to find the value returned by a function. 4199 VALTYPE is the data type of the value (as a tree). 4200 If the precise function being called is known, FUNC is its FUNCTION_DECL; 4201 otherwise, FUNC is 0. */ 4202rtx 4203ix86_function_value (tree valtype, tree fntype_or_decl, 4204 bool outgoing ATTRIBUTE_UNUSED) 4205{ 4206 enum machine_mode natmode = type_natural_mode (valtype); 4207 4208 if (TARGET_64BIT) 4209 { 4210 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, 4211 1, REGPARM_MAX, SSE_REGPARM_MAX, 4212 x86_64_int_return_registers, 0); 4213 /* For zero sized structures, construct_container return NULL, but we 4214 need to keep rest of compiler happy by returning meaningful value. */ 4215 if (!ret) 4216 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 4217 return ret; 4218 } 4219 else 4220 { 4221 tree fn = NULL_TREE, fntype; 4222 if (fntype_or_decl 4223 && DECL_P (fntype_or_decl)) 4224 fn = fntype_or_decl; 4225 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 4226 return gen_rtx_REG (TYPE_MODE (valtype), 4227 ix86_value_regno (natmode, fn, fntype)); 4228 } 4229} 4230 4231/* Return true iff type is returned in memory. */ 4232int 4233ix86_return_in_memory (tree type) 4234{ 4235 int needed_intregs, needed_sseregs, size; 4236 enum machine_mode mode = type_natural_mode (type); 4237 4238 if (TARGET_64BIT) 4239 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 4240 4241 if (mode == BLKmode) 4242 return 1; 4243 4244 size = int_size_in_bytes (type); 4245 4246 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 4247 return 0; 4248 4249 if (VECTOR_MODE_P (mode) || mode == TImode) 4250 { 4251 /* User-created vectors small enough to fit in EAX. */ 4252 if (size < 8) 4253 return 0; 4254 4255 /* MMX/3dNow values are returned in MM0, 4256 except when it doesn't exits. */ 4257 if (size == 8) 4258 return (TARGET_MMX ? 0 : 1); 4259 4260 /* SSE values are returned in XMM0, except when it doesn't exist. */ 4261 if (size == 16) 4262 return (TARGET_SSE ? 0 : 1); 4263 } 4264 4265 if (mode == XFmode) 4266 return 0; 4267 4268 if (mode == TDmode) 4269 return 1; 4270 4271 if (size > 12) 4272 return 1; 4273 return 0; 4274} 4275 4276/* When returning SSE vector types, we have a choice of either 4277 (1) being abi incompatible with a -march switch, or 4278 (2) generating an error. 4279 Given no good solution, I think the safest thing is one warning. 4280 The user won't be able to use -Werror, but.... 4281 4282 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 4283 called in response to actually generating a caller or callee that 4284 uses such a type. As opposed to RETURN_IN_MEMORY, which is called 4285 via aggregate_value_p for general type probing from tree-ssa. */ 4286 4287static rtx 4288ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 4289{ 4290 static bool warnedsse, warnedmmx; 4291 4292 if (type) 4293 { 4294 /* Look at the return type of the function, not the function type. */ 4295 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 4296 4297 if (!TARGET_SSE && !warnedsse) 4298 { 4299 if (mode == TImode 4300 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4301 { 4302 warnedsse = true; 4303 warning (0, "SSE vector return without SSE enabled " 4304 "changes the ABI"); 4305 } 4306 } 4307 4308 if (!TARGET_MMX && !warnedmmx) 4309 { 4310 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4311 { 4312 warnedmmx = true; 4313 warning (0, "MMX vector return without MMX enabled " 4314 "changes the ABI"); 4315 } 4316 } 4317 } 4318 4319 return NULL; 4320} 4321 4322/* Define how to find the value returned by a library function 4323 assuming the value has mode MODE. */ 4324rtx 4325ix86_libcall_value (enum machine_mode mode) 4326{ 4327 if (TARGET_64BIT) 4328 { 4329 switch (mode) 4330 { 4331 case SFmode: 4332 case SCmode: 4333 case DFmode: 4334 case DCmode: 4335 case TFmode: 4336 case SDmode: 4337 case DDmode: 4338 case TDmode: 4339 return gen_rtx_REG (mode, FIRST_SSE_REG); 4340 case XFmode: 4341 case XCmode: 4342 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 4343 case TCmode: 4344 return NULL; 4345 default: 4346 return gen_rtx_REG (mode, 0); 4347 } 4348 } 4349 else 4350 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); 4351} 4352 4353/* Given a mode, return the register to use for a return value. */ 4354 4355static int 4356ix86_value_regno (enum machine_mode mode, tree func, tree fntype) 4357{ 4358 gcc_assert (!TARGET_64BIT); 4359 4360 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 4361 we normally prevent this case when mmx is not available. However 4362 some ABIs may require the result to be returned like DImode. */ 4363 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4364 return TARGET_MMX ? FIRST_MMX_REG : 0; 4365 4366 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 4367 we prevent this case when sse is not available. However some ABIs 4368 may require the result to be returned like integer TImode. */ 4369 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4370 return TARGET_SSE ? FIRST_SSE_REG : 0; 4371 4372 /* Decimal floating point values can go in %eax, unlike other float modes. */ 4373 if (DECIMAL_FLOAT_MODE_P (mode)) 4374 return 0; 4375 4376 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ 4377 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) 4378 return 0; 4379 4380 /* Floating point return values in %st(0), except for local functions when 4381 SSE math is enabled or for functions with sseregparm attribute. */ 4382 if ((func || fntype) 4383 && (mode == SFmode || mode == DFmode)) 4384 { 4385 int sse_level = ix86_function_sseregparm (fntype, func); 4386 if ((sse_level >= 1 && mode == SFmode) 4387 || (sse_level == 2 && mode == DFmode)) 4388 return FIRST_SSE_REG; 4389 } 4390 4391 return FIRST_FLOAT_REG; 4392} 4393 4394/* Create the va_list data type. */ 4395 4396static tree 4397ix86_build_builtin_va_list (void) 4398{ 4399 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 4400 4401 /* For i386 we use plain pointer to argument area. */ 4402 if (!TARGET_64BIT) 4403 return build_pointer_type (char_type_node); 4404 4405 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4406 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 4407 4408 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 4409 unsigned_type_node); 4410 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 4411 unsigned_type_node); 4412 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 4413 ptr_type_node); 4414 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 4415 ptr_type_node); 4416 4417 va_list_gpr_counter_field = f_gpr; 4418 va_list_fpr_counter_field = f_fpr; 4419 4420 DECL_FIELD_CONTEXT (f_gpr) = record; 4421 DECL_FIELD_CONTEXT (f_fpr) = record; 4422 DECL_FIELD_CONTEXT (f_ovf) = record; 4423 DECL_FIELD_CONTEXT (f_sav) = record; 4424 4425 TREE_CHAIN (record) = type_decl; 4426 TYPE_NAME (record) = type_decl; 4427 TYPE_FIELDS (record) = f_gpr; 4428 TREE_CHAIN (f_gpr) = f_fpr; 4429 TREE_CHAIN (f_fpr) = f_ovf; 4430 TREE_CHAIN (f_ovf) = f_sav; 4431 4432 layout_type (record); 4433 4434 /* The correct type is an array type of one element. */ 4435 return build_array_type (record, build_index_type (size_zero_node)); 4436} 4437 4438/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 4439 4440static void 4441ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4442 tree type, int *pretend_size ATTRIBUTE_UNUSED, 4443 int no_rtl) 4444{ 4445 CUMULATIVE_ARGS next_cum; 4446 rtx save_area = NULL_RTX, mem; 4447 rtx label; 4448 rtx label_ref; 4449 rtx tmp_reg; 4450 rtx nsse_reg; 4451 int set; 4452 tree fntype; 4453 int stdarg_p; 4454 int i; 4455 4456 if (!TARGET_64BIT) 4457 return; 4458 4459 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) 4460 return; 4461 4462 /* Indicate to allocate space on the stack for varargs save area. */ 4463 ix86_save_varrargs_registers = 1; 4464 4465 cfun->stack_alignment_needed = 128; 4466 4467 fntype = TREE_TYPE (current_function_decl); 4468 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 4469 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 4470 != void_type_node)); 4471 4472 /* For varargs, we do not want to skip the dummy va_dcl argument. 4473 For stdargs, we do want to skip the last named argument. */ 4474 next_cum = *cum; 4475 if (stdarg_p) 4476 function_arg_advance (&next_cum, mode, type, 1); 4477 4478 if (!no_rtl) 4479 save_area = frame_pointer_rtx; 4480 4481 set = get_varargs_alias_set (); 4482 4483 for (i = next_cum.regno; 4484 i < ix86_regparm 4485 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4486 i++) 4487 { 4488 mem = gen_rtx_MEM (Pmode, 4489 plus_constant (save_area, i * UNITS_PER_WORD)); 4490 MEM_NOTRAP_P (mem) = 1; 4491 set_mem_alias_set (mem, set); 4492 emit_move_insn (mem, gen_rtx_REG (Pmode, 4493 x86_64_int_parameter_registers[i])); 4494 } 4495 4496 if (next_cum.sse_nregs && cfun->va_list_fpr_size) 4497 { 4498 /* Now emit code to save SSE registers. The AX parameter contains number 4499 of SSE parameter registers used to call this function. We use 4500 sse_prologue_save insn template that produces computed jump across 4501 SSE saves. We need some preparation work to get this working. */ 4502 4503 label = gen_label_rtx (); 4504 label_ref = gen_rtx_LABEL_REF (Pmode, label); 4505 4506 /* Compute address to jump to : 4507 label - 5*eax + nnamed_sse_arguments*5 */ 4508 tmp_reg = gen_reg_rtx (Pmode); 4509 nsse_reg = gen_reg_rtx (Pmode); 4510 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 4511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4512 gen_rtx_MULT (Pmode, nsse_reg, 4513 GEN_INT (4)))); 4514 if (next_cum.sse_regno) 4515 emit_move_insn 4516 (nsse_reg, 4517 gen_rtx_CONST (DImode, 4518 gen_rtx_PLUS (DImode, 4519 label_ref, 4520 GEN_INT (next_cum.sse_regno * 4)))); 4521 else 4522 emit_move_insn (nsse_reg, label_ref); 4523 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 4524 4525 /* Compute address of memory block we save into. We always use pointer 4526 pointing 127 bytes after first byte to store - this is needed to keep 4527 instruction size limited by 4 bytes. */ 4528 tmp_reg = gen_reg_rtx (Pmode); 4529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4530 plus_constant (save_area, 4531 8 * REGPARM_MAX + 127))); 4532 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 4533 MEM_NOTRAP_P (mem) = 1; 4534 set_mem_alias_set (mem, set); 4535 set_mem_align (mem, BITS_PER_WORD); 4536 4537 /* And finally do the dirty job! */ 4538 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 4539 GEN_INT (next_cum.sse_regno), label)); 4540 } 4541 4542} 4543 4544/* Implement va_start. */ 4545 4546void 4547ix86_va_start (tree valist, rtx nextarg) 4548{ 4549 HOST_WIDE_INT words, n_gpr, n_fpr; 4550 tree f_gpr, f_fpr, f_ovf, f_sav; 4551 tree gpr, fpr, ovf, sav, t; 4552 tree type; 4553 4554 /* Only 64bit target needs something special. */ 4555 if (!TARGET_64BIT) 4556 { 4557 std_expand_builtin_va_start (valist, nextarg); 4558 return; 4559 } 4560 4561 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4562 f_fpr = TREE_CHAIN (f_gpr); 4563 f_ovf = TREE_CHAIN (f_fpr); 4564 f_sav = TREE_CHAIN (f_ovf); 4565 4566 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4567 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4568 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4569 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4570 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4571 4572 /* Count number of gp and fp argument registers used. */ 4573 words = current_function_args_info.words; 4574 n_gpr = current_function_args_info.regno; 4575 n_fpr = current_function_args_info.sse_regno; 4576 4577 if (TARGET_DEBUG_ARG) 4578 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 4579 (int) words, (int) n_gpr, (int) n_fpr); 4580 4581 if (cfun->va_list_gpr_size) 4582 { 4583 type = TREE_TYPE (gpr); 4584 t = build2 (MODIFY_EXPR, type, gpr, 4585 build_int_cst (type, n_gpr * 8)); 4586 TREE_SIDE_EFFECTS (t) = 1; 4587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4588 } 4589 4590 if (cfun->va_list_fpr_size) 4591 { 4592 type = TREE_TYPE (fpr); 4593 t = build2 (MODIFY_EXPR, type, fpr, 4594 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); 4595 TREE_SIDE_EFFECTS (t) = 1; 4596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4597 } 4598 4599 /* Find the overflow area. */ 4600 type = TREE_TYPE (ovf); 4601 t = make_tree (type, virtual_incoming_args_rtx); 4602 if (words != 0) 4603 t = build2 (PLUS_EXPR, type, t, 4604 build_int_cst (type, words * UNITS_PER_WORD)); 4605 t = build2 (MODIFY_EXPR, type, ovf, t); 4606 TREE_SIDE_EFFECTS (t) = 1; 4607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4608 4609 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) 4610 { 4611 /* Find the register save area. 4612 Prologue of the function save it right above stack frame. */ 4613 type = TREE_TYPE (sav); 4614 t = make_tree (type, frame_pointer_rtx); 4615 t = build2 (MODIFY_EXPR, type, sav, t); 4616 TREE_SIDE_EFFECTS (t) = 1; 4617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4618 } 4619} 4620 4621/* Implement va_arg. */ 4622 4623tree 4624ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) 4625{ 4626 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4627 tree f_gpr, f_fpr, f_ovf, f_sav; 4628 tree gpr, fpr, ovf, sav, t; 4629 int size, rsize; 4630 tree lab_false, lab_over = NULL_TREE; 4631 tree addr, t2; 4632 rtx container; 4633 int indirect_p = 0; 4634 tree ptrtype; 4635 enum machine_mode nat_mode; 4636 4637 /* Only 64bit target needs something special. */ 4638 if (!TARGET_64BIT) 4639 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4640 4641 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4642 f_fpr = TREE_CHAIN (f_gpr); 4643 f_ovf = TREE_CHAIN (f_fpr); 4644 f_sav = TREE_CHAIN (f_ovf); 4645 4646 valist = build_va_arg_indirect_ref (valist); 4647 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4648 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4649 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4650 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4651 4652 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 4653 if (indirect_p) 4654 type = build_pointer_type (type); 4655 size = int_size_in_bytes (type); 4656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4657 4658 nat_mode = type_natural_mode (type); 4659 container = construct_container (nat_mode, TYPE_MODE (type), type, 0, 4660 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 4661 4662 /* Pull the value out of the saved registers. */ 4663 4664 addr = create_tmp_var (ptr_type_node, "addr"); 4665 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 4666 4667 if (container) 4668 { 4669 int needed_intregs, needed_sseregs; 4670 bool need_temp; 4671 tree int_addr, sse_addr; 4672 4673 lab_false = create_artificial_label (); 4674 lab_over = create_artificial_label (); 4675 4676 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4677 4678 need_temp = (!REG_P (container) 4679 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4680 || TYPE_ALIGN (type) > 128)); 4681 4682 /* In case we are passing structure, verify that it is consecutive block 4683 on the register save area. If not we need to do moves. */ 4684 if (!need_temp && !REG_P (container)) 4685 { 4686 /* Verify that all registers are strictly consecutive */ 4687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4688 { 4689 int i; 4690 4691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4692 { 4693 rtx slot = XVECEXP (container, 0, i); 4694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4695 || INTVAL (XEXP (slot, 1)) != i * 16) 4696 need_temp = 1; 4697 } 4698 } 4699 else 4700 { 4701 int i; 4702 4703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4704 { 4705 rtx slot = XVECEXP (container, 0, i); 4706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4707 || INTVAL (XEXP (slot, 1)) != i * 8) 4708 need_temp = 1; 4709 } 4710 } 4711 } 4712 if (!need_temp) 4713 { 4714 int_addr = addr; 4715 sse_addr = addr; 4716 } 4717 else 4718 { 4719 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4720 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 4721 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4722 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 4723 } 4724 4725 /* First ensure that we fit completely in registers. */ 4726 if (needed_intregs) 4727 { 4728 t = build_int_cst (TREE_TYPE (gpr), 4729 (REGPARM_MAX - needed_intregs + 1) * 8); 4730 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4731 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4732 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4733 gimplify_and_add (t, pre_p); 4734 } 4735 if (needed_sseregs) 4736 { 4737 t = build_int_cst (TREE_TYPE (fpr), 4738 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4739 + REGPARM_MAX * 8); 4740 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4741 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4742 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4743 gimplify_and_add (t, pre_p); 4744 } 4745 4746 /* Compute index to start of area used for integer regs. */ 4747 if (needed_intregs) 4748 { 4749 /* int_addr = gpr + sav; */ 4750 t = fold_convert (ptr_type_node, gpr); 4751 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4752 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); 4753 gimplify_and_add (t, pre_p); 4754 } 4755 if (needed_sseregs) 4756 { 4757 /* sse_addr = fpr + sav; */ 4758 t = fold_convert (ptr_type_node, fpr); 4759 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4760 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); 4761 gimplify_and_add (t, pre_p); 4762 } 4763 if (need_temp) 4764 { 4765 int i; 4766 tree temp = create_tmp_var (type, "va_arg_tmp"); 4767 4768 /* addr = &temp; */ 4769 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4770 t = build2 (MODIFY_EXPR, void_type_node, addr, t); 4771 gimplify_and_add (t, pre_p); 4772 4773 for (i = 0; i < XVECLEN (container, 0); i++) 4774 { 4775 rtx slot = XVECEXP (container, 0, i); 4776 rtx reg = XEXP (slot, 0); 4777 enum machine_mode mode = GET_MODE (reg); 4778 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 4779 tree addr_type = build_pointer_type (piece_type); 4780 tree src_addr, src; 4781 int src_offset; 4782 tree dest_addr, dest; 4783 4784 if (SSE_REGNO_P (REGNO (reg))) 4785 { 4786 src_addr = sse_addr; 4787 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4788 } 4789 else 4790 { 4791 src_addr = int_addr; 4792 src_offset = REGNO (reg) * 8; 4793 } 4794 src_addr = fold_convert (addr_type, src_addr); 4795 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, 4796 size_int (src_offset))); 4797 src = build_va_arg_indirect_ref (src_addr); 4798 4799 dest_addr = fold_convert (addr_type, addr); 4800 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, 4801 size_int (INTVAL (XEXP (slot, 1))))); 4802 dest = build_va_arg_indirect_ref (dest_addr); 4803 4804 t = build2 (MODIFY_EXPR, void_type_node, dest, src); 4805 gimplify_and_add (t, pre_p); 4806 } 4807 } 4808 4809 if (needed_intregs) 4810 { 4811 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4812 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4813 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 4814 gimplify_and_add (t, pre_p); 4815 } 4816 if (needed_sseregs) 4817 { 4818 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4819 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4820 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 4821 gimplify_and_add (t, pre_p); 4822 } 4823 4824 t = build1 (GOTO_EXPR, void_type_node, lab_over); 4825 gimplify_and_add (t, pre_p); 4826 4827 t = build1 (LABEL_EXPR, void_type_node, lab_false); 4828 append_to_statement_list (t, pre_p); 4829 } 4830 4831 /* ... otherwise out of the overflow area. */ 4832 4833 /* Care for on-stack alignment if needed. */ 4834 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 4835 || integer_zerop (TYPE_SIZE (type))) 4836 t = ovf; 4837 else 4838 { 4839 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 4840 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, 4841 build_int_cst (TREE_TYPE (ovf), align - 1)); 4842 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4843 build_int_cst (TREE_TYPE (t), -align)); 4844 } 4845 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4846 4847 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); 4848 gimplify_and_add (t2, pre_p); 4849 4850 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 4851 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); 4852 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 4853 gimplify_and_add (t, pre_p); 4854 4855 if (container) 4856 { 4857 t = build1 (LABEL_EXPR, void_type_node, lab_over); 4858 append_to_statement_list (t, pre_p); 4859 } 4860 4861 ptrtype = build_pointer_type (type); 4862 addr = fold_convert (ptrtype, addr); 4863 4864 if (indirect_p) 4865 addr = build_va_arg_indirect_ref (addr); 4866 return build_va_arg_indirect_ref (addr); 4867} 4868 4869/* Return nonzero if OPNUM's MEM should be matched 4870 in movabs* patterns. */ 4871 4872int 4873ix86_check_movabs (rtx insn, int opnum) 4874{ 4875 rtx set, mem; 4876 4877 set = PATTERN (insn); 4878 if (GET_CODE (set) == PARALLEL) 4879 set = XVECEXP (set, 0, 0); 4880 gcc_assert (GET_CODE (set) == SET); 4881 mem = XEXP (set, opnum); 4882 while (GET_CODE (mem) == SUBREG) 4883 mem = SUBREG_REG (mem); 4884 gcc_assert (GET_CODE (mem) == MEM); 4885 return (volatile_ok || !MEM_VOLATILE_P (mem)); 4886} 4887 4888/* Initialize the table of extra 80387 mathematical constants. */ 4889 4890static void 4891init_ext_80387_constants (void) 4892{ 4893 static const char * cst[5] = 4894 { 4895 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4896 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4897 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4898 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4899 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4900 }; 4901 int i; 4902 4903 for (i = 0; i < 5; i++) 4904 { 4905 real_from_string (&ext_80387_constants_table[i], cst[i]); 4906 /* Ensure each constant is rounded to XFmode precision. */ 4907 real_convert (&ext_80387_constants_table[i], 4908 XFmode, &ext_80387_constants_table[i]); 4909 } 4910 4911 ext_80387_constants_init = 1; 4912} 4913 4914/* Return true if the constant is something that can be loaded with 4915 a special instruction. */ 4916 4917int 4918standard_80387_constant_p (rtx x) 4919{ 4920 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4921 return -1; 4922 4923 if (x == CONST0_RTX (GET_MODE (x))) 4924 return 1; 4925 if (x == CONST1_RTX (GET_MODE (x))) 4926 return 2; 4927 4928 /* For XFmode constants, try to find a special 80387 instruction when 4929 optimizing for size or on those CPUs that benefit from them. */ 4930 if (GET_MODE (x) == XFmode 4931 && (optimize_size || x86_ext_80387_constants & TUNEMASK)) 4932 { 4933 REAL_VALUE_TYPE r; 4934 int i; 4935 4936 if (! ext_80387_constants_init) 4937 init_ext_80387_constants (); 4938 4939 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4940 for (i = 0; i < 5; i++) 4941 if (real_identical (&r, &ext_80387_constants_table[i])) 4942 return i + 3; 4943 } 4944 4945 return 0; 4946} 4947 4948/* Return the opcode of the special instruction to be used to load 4949 the constant X. */ 4950 4951const char * 4952standard_80387_constant_opcode (rtx x) 4953{ 4954 switch (standard_80387_constant_p (x)) 4955 { 4956 case 1: 4957 return "fldz"; 4958 case 2: 4959 return "fld1"; 4960 case 3: 4961 return "fldlg2"; 4962 case 4: 4963 return "fldln2"; 4964 case 5: 4965 return "fldl2e"; 4966 case 6: 4967 return "fldl2t"; 4968 case 7: 4969 return "fldpi"; 4970 default: 4971 gcc_unreachable (); 4972 } 4973} 4974 4975/* Return the CONST_DOUBLE representing the 80387 constant that is 4976 loaded by the specified special instruction. The argument IDX 4977 matches the return value from standard_80387_constant_p. */ 4978 4979rtx 4980standard_80387_constant_rtx (int idx) 4981{ 4982 int i; 4983 4984 if (! ext_80387_constants_init) 4985 init_ext_80387_constants (); 4986 4987 switch (idx) 4988 { 4989 case 3: 4990 case 4: 4991 case 5: 4992 case 6: 4993 case 7: 4994 i = idx - 3; 4995 break; 4996 4997 default: 4998 gcc_unreachable (); 4999 } 5000 5001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 5002 XFmode); 5003} 5004 5005/* Return 1 if mode is a valid mode for sse. */ 5006static int 5007standard_sse_mode_p (enum machine_mode mode) 5008{ 5009 switch (mode) 5010 { 5011 case V16QImode: 5012 case V8HImode: 5013 case V4SImode: 5014 case V2DImode: 5015 case V4SFmode: 5016 case V2DFmode: 5017 return 1; 5018 5019 default: 5020 return 0; 5021 } 5022} 5023 5024/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 5025 */ 5026int 5027standard_sse_constant_p (rtx x) 5028{ 5029 enum machine_mode mode = GET_MODE (x); 5030 5031 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 5032 return 1; 5033 if (vector_all_ones_operand (x, mode) 5034 && standard_sse_mode_p (mode)) 5035 return TARGET_SSE2 ? 2 : -1; 5036 5037 return 0; 5038} 5039 5040/* Return the opcode of the special instruction to be used to load 5041 the constant X. */ 5042 5043const char * 5044standard_sse_constant_opcode (rtx insn, rtx x) 5045{ 5046 switch (standard_sse_constant_p (x)) 5047 { 5048 case 1: 5049 if (get_attr_mode (insn) == MODE_V4SF) 5050 return "xorps\t%0, %0"; 5051 else if (get_attr_mode (insn) == MODE_V2DF) 5052 return "xorpd\t%0, %0"; 5053 else 5054 return "pxor\t%0, %0"; 5055 case 2: 5056 return "pcmpeqd\t%0, %0"; 5057 } 5058 gcc_unreachable (); 5059} 5060 5061/* Returns 1 if OP contains a symbol reference */ 5062 5063int 5064symbolic_reference_mentioned_p (rtx op) 5065{ 5066 const char *fmt; 5067 int i; 5068 5069 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 5070 return 1; 5071 5072 fmt = GET_RTX_FORMAT (GET_CODE (op)); 5073 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 5074 { 5075 if (fmt[i] == 'E') 5076 { 5077 int j; 5078 5079 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 5080 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 5081 return 1; 5082 } 5083 5084 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 5085 return 1; 5086 } 5087 5088 return 0; 5089} 5090 5091/* Return 1 if it is appropriate to emit `ret' instructions in the 5092 body of a function. Do this only if the epilogue is simple, needing a 5093 couple of insns. Prior to reloading, we can't tell how many registers 5094 must be saved, so return 0 then. Return 0 if there is no frame 5095 marker to de-allocate. */ 5096 5097int 5098ix86_can_use_return_insn_p (void) 5099{ 5100 struct ix86_frame frame; 5101 5102 if (! reload_completed || frame_pointer_needed) 5103 return 0; 5104 5105 /* Don't allow more than 32 pop, since that's all we can do 5106 with one instruction. */ 5107 if (current_function_pops_args 5108 && current_function_args_size >= 32768) 5109 return 0; 5110 5111 ix86_compute_frame_layout (&frame); 5112 return frame.to_allocate == 0 && frame.nregs == 0; 5113} 5114 5115/* Value should be nonzero if functions must have frame pointers. 5116 Zero means the frame pointer need not be set up (and parms may 5117 be accessed via the stack pointer) in functions that seem suitable. */ 5118 5119int 5120ix86_frame_pointer_required (void) 5121{ 5122 /* If we accessed previous frames, then the generated code expects 5123 to be able to access the saved ebp value in our frame. */ 5124 if (cfun->machine->accesses_prev_frame) 5125 return 1; 5126 5127 /* Several x86 os'es need a frame pointer for other reasons, 5128 usually pertaining to setjmp. */ 5129 if (SUBTARGET_FRAME_POINTER_REQUIRED) 5130 return 1; 5131 5132 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 5133 the frame pointer by default. Turn it back on now if we've not 5134 got a leaf function. */ 5135 if (TARGET_OMIT_LEAF_FRAME_POINTER 5136 && (!current_function_is_leaf 5137 || ix86_current_function_calls_tls_descriptor)) 5138 return 1; 5139 5140 if (current_function_profile) 5141 return 1; 5142 5143 return 0; 5144} 5145 5146/* Record that the current function accesses previous call frames. */ 5147 5148void 5149ix86_setup_frame_addresses (void) 5150{ 5151 cfun->machine->accesses_prev_frame = 1; 5152} 5153 5154#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 5155# define USE_HIDDEN_LINKONCE 1 5156#else 5157# define USE_HIDDEN_LINKONCE 0 5158#endif 5159 5160static int pic_labels_used; 5161 5162/* Fills in the label name that should be used for a pc thunk for 5163 the given register. */ 5164 5165static void 5166get_pc_thunk_name (char name[32], unsigned int regno) 5167{ 5168 gcc_assert (!TARGET_64BIT); 5169 5170 if (USE_HIDDEN_LINKONCE) 5171 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 5172 else 5173 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 5174} 5175 5176 5177/* This function generates code for -fpic that loads %ebx with 5178 the return address of the caller and then returns. */ 5179 5180void 5181ix86_file_end (void) 5182{ 5183 rtx xops[2]; 5184 int regno; 5185 5186 for (regno = 0; regno < 8; ++regno) 5187 { 5188 char name[32]; 5189 5190 if (! ((pic_labels_used >> regno) & 1)) 5191 continue; 5192 5193 get_pc_thunk_name (name, regno); 5194 5195#if TARGET_MACHO 5196 if (TARGET_MACHO) 5197 { 5198 switch_to_section (darwin_sections[text_coal_section]); 5199 fputs ("\t.weak_definition\t", asm_out_file); 5200 assemble_name (asm_out_file, name); 5201 fputs ("\n\t.private_extern\t", asm_out_file); 5202 assemble_name (asm_out_file, name); 5203 fputs ("\n", asm_out_file); 5204 ASM_OUTPUT_LABEL (asm_out_file, name); 5205 } 5206 else 5207#endif 5208 if (USE_HIDDEN_LINKONCE) 5209 { 5210 tree decl; 5211 5212 decl = build_decl (FUNCTION_DECL, get_identifier (name), 5213 error_mark_node); 5214 TREE_PUBLIC (decl) = 1; 5215 TREE_STATIC (decl) = 1; 5216 DECL_ONE_ONLY (decl) = 1; 5217 5218 (*targetm.asm_out.unique_section) (decl, 0); 5219 switch_to_section (get_named_section (decl, NULL, 0)); 5220 5221 (*targetm.asm_out.globalize_label) (asm_out_file, name); 5222 fputs ("\t.hidden\t", asm_out_file); 5223 assemble_name (asm_out_file, name); 5224 fputc ('\n', asm_out_file); 5225 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 5226 } 5227 else 5228 { 5229 switch_to_section (text_section); 5230 ASM_OUTPUT_LABEL (asm_out_file, name); 5231 } 5232 5233 xops[0] = gen_rtx_REG (SImode, regno); 5234 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 5235 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 5236 output_asm_insn ("ret", xops); 5237 } 5238 5239 if (NEED_INDICATE_EXEC_STACK) 5240 file_end_indicate_exec_stack (); 5241} 5242 5243/* Emit code for the SET_GOT patterns. */ 5244 5245const char * 5246output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 5247{ 5248 rtx xops[3]; 5249 5250 xops[0] = dest; 5251 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 5252 5253 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 5254 { 5255 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 5256 5257 if (!flag_pic) 5258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 5259 else 5260 output_asm_insn ("call\t%a2", xops); 5261 5262#if TARGET_MACHO 5263 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5264 is what will be referenced by the Mach-O PIC subsystem. */ 5265 if (!label) 5266 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5267#endif 5268 5269 (*targetm.asm_out.internal_label) (asm_out_file, "L", 5270 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 5271 5272 if (flag_pic) 5273 output_asm_insn ("pop{l}\t%0", xops); 5274 } 5275 else 5276 { 5277 char name[32]; 5278 get_pc_thunk_name (name, REGNO (dest)); 5279 pic_labels_used |= 1 << REGNO (dest); 5280 5281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 5282 xops[2] = gen_rtx_MEM (QImode, xops[2]); 5283 output_asm_insn ("call\t%X2", xops); 5284 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5285 is what will be referenced by the Mach-O PIC subsystem. */ 5286#if TARGET_MACHO 5287 if (!label) 5288 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5289 else 5290 targetm.asm_out.internal_label (asm_out_file, "L", 5291 CODE_LABEL_NUMBER (label)); 5292#endif 5293 } 5294 5295 if (TARGET_MACHO) 5296 return ""; 5297 5298 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 5299 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 5300 else 5301 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 5302 5303 return ""; 5304} 5305 5306/* Generate an "push" pattern for input ARG. */ 5307 5308static rtx 5309gen_push (rtx arg) 5310{ 5311 return gen_rtx_SET (VOIDmode, 5312 gen_rtx_MEM (Pmode, 5313 gen_rtx_PRE_DEC (Pmode, 5314 stack_pointer_rtx)), 5315 arg); 5316} 5317 5318/* Return >= 0 if there is an unused call-clobbered register available 5319 for the entire function. */ 5320 5321static unsigned int 5322ix86_select_alt_pic_regnum (void) 5323{ 5324 if (current_function_is_leaf && !current_function_profile 5325 && !ix86_current_function_calls_tls_descriptor) 5326 { 5327 int i; 5328 for (i = 2; i >= 0; --i) 5329 if (!regs_ever_live[i]) 5330 return i; 5331 } 5332 5333 return INVALID_REGNUM; 5334} 5335 5336/* Return 1 if we need to save REGNO. */ 5337static int 5338ix86_save_reg (unsigned int regno, int maybe_eh_return) 5339{ 5340 if (pic_offset_table_rtx 5341 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 5342 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5343 || current_function_profile 5344 || current_function_calls_eh_return 5345 || current_function_uses_const_pool)) 5346 { 5347 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 5348 return 0; 5349 return 1; 5350 } 5351 5352 if (current_function_calls_eh_return && maybe_eh_return) 5353 { 5354 unsigned i; 5355 for (i = 0; ; i++) 5356 { 5357 unsigned test = EH_RETURN_DATA_REGNO (i); 5358 if (test == INVALID_REGNUM) 5359 break; 5360 if (test == regno) 5361 return 1; 5362 } 5363 } 5364 5365 if (cfun->machine->force_align_arg_pointer 5366 && regno == REGNO (cfun->machine->force_align_arg_pointer)) 5367 return 1; 5368 5369 return (regs_ever_live[regno] 5370 && !call_used_regs[regno] 5371 && !fixed_regs[regno] 5372 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5373} 5374 5375/* Return number of registers to be saved on the stack. */ 5376 5377static int 5378ix86_nsaved_regs (void) 5379{ 5380 int nregs = 0; 5381 int regno; 5382 5383 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5384 if (ix86_save_reg (regno, true)) 5385 nregs++; 5386 return nregs; 5387} 5388 5389/* Return the offset between two registers, one to be eliminated, and the other 5390 its replacement, at the start of a routine. */ 5391 5392HOST_WIDE_INT 5393ix86_initial_elimination_offset (int from, int to) 5394{ 5395 struct ix86_frame frame; 5396 ix86_compute_frame_layout (&frame); 5397 5398 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5399 return frame.hard_frame_pointer_offset; 5400 else if (from == FRAME_POINTER_REGNUM 5401 && to == HARD_FRAME_POINTER_REGNUM) 5402 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5403 else 5404 { 5405 gcc_assert (to == STACK_POINTER_REGNUM); 5406 5407 if (from == ARG_POINTER_REGNUM) 5408 return frame.stack_pointer_offset; 5409 5410 gcc_assert (from == FRAME_POINTER_REGNUM); 5411 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5412 } 5413} 5414 5415/* Fill structure ix86_frame about frame of currently computed function. */ 5416 5417static void 5418ix86_compute_frame_layout (struct ix86_frame *frame) 5419{ 5420 HOST_WIDE_INT total_size; 5421 unsigned int stack_alignment_needed; 5422 HOST_WIDE_INT offset; 5423 unsigned int preferred_alignment; 5424 HOST_WIDE_INT size = get_frame_size (); 5425 5426 frame->nregs = ix86_nsaved_regs (); 5427 total_size = size; 5428 5429 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5430 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5431 5432 /* During reload iteration the amount of registers saved can change. 5433 Recompute the value as needed. Do not recompute when amount of registers 5434 didn't change as reload does multiple calls to the function and does not 5435 expect the decision to change within single iteration. */ 5436 if (!optimize_size 5437 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5438 { 5439 int count = frame->nregs; 5440 5441 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5442 /* The fast prologue uses move instead of push to save registers. This 5443 is significantly longer, but also executes faster as modern hardware 5444 can execute the moves in parallel, but can't do that for push/pop. 5445 5446 Be careful about choosing what prologue to emit: When function takes 5447 many instructions to execute we may use slow version as well as in 5448 case function is known to be outside hot spot (this is known with 5449 feedback only). Weight the size of function by number of registers 5450 to save as it is cheap to use one or two push instructions but very 5451 slow to use many of them. */ 5452 if (count) 5453 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5454 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5455 || (flag_branch_probabilities 5456 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5457 cfun->machine->use_fast_prologue_epilogue = false; 5458 else 5459 cfun->machine->use_fast_prologue_epilogue 5460 = !expensive_function_p (count); 5461 } 5462 if (TARGET_PROLOGUE_USING_MOVE 5463 && cfun->machine->use_fast_prologue_epilogue) 5464 frame->save_regs_using_mov = true; 5465 else 5466 frame->save_regs_using_mov = false; 5467 5468 5469 /* Skip return address and saved base pointer. */ 5470 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5471 5472 frame->hard_frame_pointer_offset = offset; 5473 5474 /* Do some sanity checking of stack_alignment_needed and 5475 preferred_alignment, since i386 port is the only using those features 5476 that may break easily. */ 5477 5478 gcc_assert (!size || stack_alignment_needed); 5479 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 5480 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5481 gcc_assert (stack_alignment_needed 5482 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5483 5484 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5485 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5486 5487 /* Register save area */ 5488 offset += frame->nregs * UNITS_PER_WORD; 5489 5490 /* Va-arg area */ 5491 if (ix86_save_varrargs_registers) 5492 { 5493 offset += X86_64_VARARGS_SIZE; 5494 frame->va_arg_size = X86_64_VARARGS_SIZE; 5495 } 5496 else 5497 frame->va_arg_size = 0; 5498 5499 /* Align start of frame for local function. */ 5500 frame->padding1 = ((offset + stack_alignment_needed - 1) 5501 & -stack_alignment_needed) - offset; 5502 5503 offset += frame->padding1; 5504 5505 /* Frame pointer points here. */ 5506 frame->frame_pointer_offset = offset; 5507 5508 offset += size; 5509 5510 /* Add outgoing arguments area. Can be skipped if we eliminated 5511 all the function calls as dead code. 5512 Skipping is however impossible when function calls alloca. Alloca 5513 expander assumes that last current_function_outgoing_args_size 5514 of stack frame are unused. */ 5515 if (ACCUMULATE_OUTGOING_ARGS 5516 && (!current_function_is_leaf || current_function_calls_alloca 5517 || ix86_current_function_calls_tls_descriptor)) 5518 { 5519 offset += current_function_outgoing_args_size; 5520 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5521 } 5522 else 5523 frame->outgoing_arguments_size = 0; 5524 5525 /* Align stack boundary. Only needed if we're calling another function 5526 or using alloca. */ 5527 if (!current_function_is_leaf || current_function_calls_alloca 5528 || ix86_current_function_calls_tls_descriptor) 5529 frame->padding2 = ((offset + preferred_alignment - 1) 5530 & -preferred_alignment) - offset; 5531 else 5532 frame->padding2 = 0; 5533 5534 offset += frame->padding2; 5535 5536 /* We've reached end of stack frame. */ 5537 frame->stack_pointer_offset = offset; 5538 5539 /* Size prologue needs to allocate. */ 5540 frame->to_allocate = 5541 (size + frame->padding1 + frame->padding2 5542 + frame->outgoing_arguments_size + frame->va_arg_size); 5543 5544 if ((!frame->to_allocate && frame->nregs <= 1) 5545 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5546 frame->save_regs_using_mov = false; 5547 5548 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5549 && current_function_is_leaf 5550 && !ix86_current_function_calls_tls_descriptor) 5551 { 5552 frame->red_zone_size = frame->to_allocate; 5553 if (frame->save_regs_using_mov) 5554 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5555 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5556 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5557 } 5558 else 5559 frame->red_zone_size = 0; 5560 frame->to_allocate -= frame->red_zone_size; 5561 frame->stack_pointer_offset -= frame->red_zone_size; 5562#if 0 5563 fprintf (stderr, "nregs: %i\n", frame->nregs); 5564 fprintf (stderr, "size: %i\n", size); 5565 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5566 fprintf (stderr, "padding1: %i\n", frame->padding1); 5567 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5568 fprintf (stderr, "padding2: %i\n", frame->padding2); 5569 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5570 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5571 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5572 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5573 frame->hard_frame_pointer_offset); 5574 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5575#endif 5576} 5577 5578/* Emit code to save registers in the prologue. */ 5579 5580static void 5581ix86_emit_save_regs (void) 5582{ 5583 unsigned int regno; 5584 rtx insn; 5585 5586 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) 5587 if (ix86_save_reg (regno, true)) 5588 { 5589 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5590 RTX_FRAME_RELATED_P (insn) = 1; 5591 } 5592} 5593 5594/* Emit code to save registers using MOV insns. First register 5595 is restored from POINTER + OFFSET. */ 5596static void 5597ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5598{ 5599 unsigned int regno; 5600 rtx insn; 5601 5602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5603 if (ix86_save_reg (regno, true)) 5604 { 5605 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5606 Pmode, offset), 5607 gen_rtx_REG (Pmode, regno)); 5608 RTX_FRAME_RELATED_P (insn) = 1; 5609 offset += UNITS_PER_WORD; 5610 } 5611} 5612 5613/* Expand prologue or epilogue stack adjustment. 5614 The pattern exist to put a dependency on all ebp-based memory accesses. 5615 STYLE should be negative if instructions should be marked as frame related, 5616 zero if %r11 register is live and cannot be freely used and positive 5617 otherwise. */ 5618 5619static void 5620pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5621{ 5622 rtx insn; 5623 5624 if (! TARGET_64BIT) 5625 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5626 else if (x86_64_immediate_operand (offset, DImode)) 5627 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5628 else 5629 { 5630 rtx r11; 5631 /* r11 is used by indirect sibcall return as well, set before the 5632 epilogue and used after the epilogue. ATM indirect sibcall 5633 shouldn't be used together with huge frame sizes in one 5634 function because of the frame_size check in sibcall.c. */ 5635 gcc_assert (style); 5636 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5637 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5638 if (style < 0) 5639 RTX_FRAME_RELATED_P (insn) = 1; 5640 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5641 offset)); 5642 } 5643 if (style < 0) 5644 RTX_FRAME_RELATED_P (insn) = 1; 5645} 5646 5647/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 5648 5649static rtx 5650ix86_internal_arg_pointer (void) 5651{ 5652 bool has_force_align_arg_pointer = 5653 (0 != lookup_attribute (ix86_force_align_arg_pointer_string, 5654 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); 5655 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN 5656 && DECL_NAME (current_function_decl) 5657 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 5658 && DECL_FILE_SCOPE_P (current_function_decl)) 5659 || ix86_force_align_arg_pointer 5660 || has_force_align_arg_pointer) 5661 { 5662 /* Nested functions can't realign the stack due to a register 5663 conflict. */ 5664 if (DECL_CONTEXT (current_function_decl) 5665 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) 5666 { 5667 if (ix86_force_align_arg_pointer) 5668 warning (0, "-mstackrealign ignored for nested functions"); 5669 if (has_force_align_arg_pointer) 5670 error ("%s not supported for nested functions", 5671 ix86_force_align_arg_pointer_string); 5672 return virtual_incoming_args_rtx; 5673 } 5674 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); 5675 return copy_to_reg (cfun->machine->force_align_arg_pointer); 5676 } 5677 else 5678 return virtual_incoming_args_rtx; 5679} 5680 5681/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 5682 This is called from dwarf2out.c to emit call frame instructions 5683 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 5684static void 5685ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 5686{ 5687 rtx unspec = SET_SRC (pattern); 5688 gcc_assert (GET_CODE (unspec) == UNSPEC); 5689 5690 switch (index) 5691 { 5692 case UNSPEC_REG_SAVE: 5693 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 5694 SET_DEST (pattern)); 5695 break; 5696 case UNSPEC_DEF_CFA: 5697 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 5698 INTVAL (XVECEXP (unspec, 0, 0))); 5699 break; 5700 default: 5701 gcc_unreachable (); 5702 } 5703} 5704 5705/* Expand the prologue into a bunch of separate insns. */ 5706 5707void 5708ix86_expand_prologue (void) 5709{ 5710 rtx insn; 5711 bool pic_reg_used; 5712 struct ix86_frame frame; 5713 HOST_WIDE_INT allocate; 5714 5715 ix86_compute_frame_layout (&frame); 5716 5717 if (cfun->machine->force_align_arg_pointer) 5718 { 5719 rtx x, y; 5720 5721 /* Grab the argument pointer. */ 5722 x = plus_constant (stack_pointer_rtx, 4); 5723 y = cfun->machine->force_align_arg_pointer; 5724 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 5725 RTX_FRAME_RELATED_P (insn) = 1; 5726 5727 /* The unwind info consists of two parts: install the fafp as the cfa, 5728 and record the fafp as the "save register" of the stack pointer. 5729 The later is there in order that the unwinder can see where it 5730 should restore the stack pointer across the and insn. */ 5731 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); 5732 x = gen_rtx_SET (VOIDmode, y, x); 5733 RTX_FRAME_RELATED_P (x) = 1; 5734 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), 5735 UNSPEC_REG_SAVE); 5736 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); 5737 RTX_FRAME_RELATED_P (y) = 1; 5738 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); 5739 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5740 REG_NOTES (insn) = x; 5741 5742 /* Align the stack. */ 5743 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, 5744 GEN_INT (-16))); 5745 5746 /* And here we cheat like madmen with the unwind info. We force the 5747 cfa register back to sp+4, which is exactly what it was at the 5748 start of the function. Re-pushing the return address results in 5749 the return at the same spot relative to the cfa, and thus is 5750 correct wrt the unwind info. */ 5751 x = cfun->machine->force_align_arg_pointer; 5752 x = gen_frame_mem (Pmode, plus_constant (x, -4)); 5753 insn = emit_insn (gen_push (x)); 5754 RTX_FRAME_RELATED_P (insn) = 1; 5755 5756 x = GEN_INT (4); 5757 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); 5758 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); 5759 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5760 REG_NOTES (insn) = x; 5761 } 5762 5763 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5764 slower on all targets. Also sdb doesn't like it. */ 5765 5766 if (frame_pointer_needed) 5767 { 5768 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5769 RTX_FRAME_RELATED_P (insn) = 1; 5770 5771 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5772 RTX_FRAME_RELATED_P (insn) = 1; 5773 } 5774 5775 allocate = frame.to_allocate; 5776 5777 if (!frame.save_regs_using_mov) 5778 ix86_emit_save_regs (); 5779 else 5780 allocate += frame.nregs * UNITS_PER_WORD; 5781 5782 /* When using red zone we may start register saving before allocating 5783 the stack frame saving one cycle of the prologue. */ 5784 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5785 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5786 : stack_pointer_rtx, 5787 -frame.nregs * UNITS_PER_WORD); 5788 5789 if (allocate == 0) 5790 ; 5791 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5792 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5793 GEN_INT (-allocate), -1); 5794 else 5795 { 5796 /* Only valid for Win32. */ 5797 rtx eax = gen_rtx_REG (SImode, 0); 5798 bool eax_live = ix86_eax_live_at_start_p (); 5799 rtx t; 5800 5801 gcc_assert (!TARGET_64BIT); 5802 5803 if (eax_live) 5804 { 5805 emit_insn (gen_push (eax)); 5806 allocate -= 4; 5807 } 5808 5809 emit_move_insn (eax, GEN_INT (allocate)); 5810 5811 insn = emit_insn (gen_allocate_stack_worker (eax)); 5812 RTX_FRAME_RELATED_P (insn) = 1; 5813 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 5814 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 5815 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 5816 t, REG_NOTES (insn)); 5817 5818 if (eax_live) 5819 { 5820 if (frame_pointer_needed) 5821 t = plus_constant (hard_frame_pointer_rtx, 5822 allocate 5823 - frame.to_allocate 5824 - frame.nregs * UNITS_PER_WORD); 5825 else 5826 t = plus_constant (stack_pointer_rtx, allocate); 5827 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5828 } 5829 } 5830 5831 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5832 { 5833 if (!frame_pointer_needed || !frame.to_allocate) 5834 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5835 else 5836 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5837 -frame.nregs * UNITS_PER_WORD); 5838 } 5839 5840 pic_reg_used = false; 5841 if (pic_offset_table_rtx 5842 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5843 || current_function_profile)) 5844 { 5845 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5846 5847 if (alt_pic_reg_used != INVALID_REGNUM) 5848 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5849 5850 pic_reg_used = true; 5851 } 5852 5853 if (pic_reg_used) 5854 { 5855 if (TARGET_64BIT) 5856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 5857 else 5858 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5859 5860 /* Even with accurate pre-reload life analysis, we can wind up 5861 deleting all references to the pic register after reload. 5862 Consider if cross-jumping unifies two sides of a branch 5863 controlled by a comparison vs the only read from a global. 5864 In which case, allow the set_got to be deleted, though we're 5865 too late to do anything about the ebx save in the prologue. */ 5866 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5867 } 5868 5869 /* Prevent function calls from be scheduled before the call to mcount. 5870 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5871 if (current_function_profile) 5872 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5873} 5874 5875/* Emit code to restore saved registers using MOV insns. First register 5876 is restored from POINTER + OFFSET. */ 5877static void 5878ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5879 int maybe_eh_return) 5880{ 5881 int regno; 5882 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5883 5884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5885 if (ix86_save_reg (regno, maybe_eh_return)) 5886 { 5887 /* Ensure that adjust_address won't be forced to produce pointer 5888 out of range allowed by x86-64 instruction set. */ 5889 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5890 { 5891 rtx r11; 5892 5893 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5894 emit_move_insn (r11, GEN_INT (offset)); 5895 emit_insn (gen_adddi3 (r11, r11, pointer)); 5896 base_address = gen_rtx_MEM (Pmode, r11); 5897 offset = 0; 5898 } 5899 emit_move_insn (gen_rtx_REG (Pmode, regno), 5900 adjust_address (base_address, Pmode, offset)); 5901 offset += UNITS_PER_WORD; 5902 } 5903} 5904 5905/* Restore function stack, frame, and registers. */ 5906 5907void 5908ix86_expand_epilogue (int style) 5909{ 5910 int regno; 5911 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5912 struct ix86_frame frame; 5913 HOST_WIDE_INT offset; 5914 5915 ix86_compute_frame_layout (&frame); 5916 5917 /* Calculate start of saved registers relative to ebp. Special care 5918 must be taken for the normal return case of a function using 5919 eh_return: the eax and edx registers are marked as saved, but not 5920 restored along this path. */ 5921 offset = frame.nregs; 5922 if (current_function_calls_eh_return && style != 2) 5923 offset -= 2; 5924 offset *= -UNITS_PER_WORD; 5925 5926 /* If we're only restoring one register and sp is not valid then 5927 using a move instruction to restore the register since it's 5928 less work than reloading sp and popping the register. 5929 5930 The default code result in stack adjustment using add/lea instruction, 5931 while this code results in LEAVE instruction (or discrete equivalent), 5932 so it is profitable in some other cases as well. Especially when there 5933 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5934 and there is exactly one register to pop. This heuristic may need some 5935 tuning in future. */ 5936 if ((!sp_valid && frame.nregs <= 1) 5937 || (TARGET_EPILOGUE_USING_MOVE 5938 && cfun->machine->use_fast_prologue_epilogue 5939 && (frame.nregs > 1 || frame.to_allocate)) 5940 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5941 || (frame_pointer_needed && TARGET_USE_LEAVE 5942 && cfun->machine->use_fast_prologue_epilogue 5943 && frame.nregs == 1) 5944 || current_function_calls_eh_return) 5945 { 5946 /* Restore registers. We can use ebp or esp to address the memory 5947 locations. If both are available, default to ebp, since offsets 5948 are known to be small. Only exception is esp pointing directly to the 5949 end of block of saved registers, where we may simplify addressing 5950 mode. */ 5951 5952 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5953 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5954 frame.to_allocate, style == 2); 5955 else 5956 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5957 offset, style == 2); 5958 5959 /* eh_return epilogues need %ecx added to the stack pointer. */ 5960 if (style == 2) 5961 { 5962 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5963 5964 if (frame_pointer_needed) 5965 { 5966 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5967 tmp = plus_constant (tmp, UNITS_PER_WORD); 5968 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5969 5970 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5971 emit_move_insn (hard_frame_pointer_rtx, tmp); 5972 5973 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5974 const0_rtx, style); 5975 } 5976 else 5977 { 5978 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5979 tmp = plus_constant (tmp, (frame.to_allocate 5980 + frame.nregs * UNITS_PER_WORD)); 5981 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5982 } 5983 } 5984 else if (!frame_pointer_needed) 5985 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5986 GEN_INT (frame.to_allocate 5987 + frame.nregs * UNITS_PER_WORD), 5988 style); 5989 /* If not an i386, mov & pop is faster than "leave". */ 5990 else if (TARGET_USE_LEAVE || optimize_size 5991 || !cfun->machine->use_fast_prologue_epilogue) 5992 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5993 else 5994 { 5995 pro_epilogue_adjust_stack (stack_pointer_rtx, 5996 hard_frame_pointer_rtx, 5997 const0_rtx, style); 5998 if (TARGET_64BIT) 5999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 6000 else 6001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 6002 } 6003 } 6004 else 6005 { 6006 /* First step is to deallocate the stack frame so that we can 6007 pop the registers. */ 6008 if (!sp_valid) 6009 { 6010 gcc_assert (frame_pointer_needed); 6011 pro_epilogue_adjust_stack (stack_pointer_rtx, 6012 hard_frame_pointer_rtx, 6013 GEN_INT (offset), style); 6014 } 6015 else if (frame.to_allocate) 6016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 6017 GEN_INT (frame.to_allocate), style); 6018 6019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 6020 if (ix86_save_reg (regno, false)) 6021 { 6022 if (TARGET_64BIT) 6023 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 6024 else 6025 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 6026 } 6027 if (frame_pointer_needed) 6028 { 6029 /* Leave results in shorter dependency chains on CPUs that are 6030 able to grok it fast. */ 6031 if (TARGET_USE_LEAVE) 6032 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 6033 else if (TARGET_64BIT) 6034 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 6035 else 6036 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 6037 } 6038 } 6039 6040 if (cfun->machine->force_align_arg_pointer) 6041 { 6042 emit_insn (gen_addsi3 (stack_pointer_rtx, 6043 cfun->machine->force_align_arg_pointer, 6044 GEN_INT (-4))); 6045 } 6046 6047 /* Sibcall epilogues don't want a return instruction. */ 6048 if (style == 0) 6049 return; 6050 6051 if (current_function_pops_args && current_function_args_size) 6052 { 6053 rtx popc = GEN_INT (current_function_pops_args); 6054 6055 /* i386 can only pop 64K bytes. If asked to pop more, pop 6056 return address, do explicit add, and jump indirectly to the 6057 caller. */ 6058 6059 if (current_function_pops_args >= 65536) 6060 { 6061 rtx ecx = gen_rtx_REG (SImode, 2); 6062 6063 /* There is no "pascal" calling convention in 64bit ABI. */ 6064 gcc_assert (!TARGET_64BIT); 6065 6066 emit_insn (gen_popsi1 (ecx)); 6067 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 6068 emit_jump_insn (gen_return_indirect_internal (ecx)); 6069 } 6070 else 6071 emit_jump_insn (gen_return_pop_internal (popc)); 6072 } 6073 else 6074 emit_jump_insn (gen_return_internal ()); 6075} 6076 6077/* Reset from the function's potential modifications. */ 6078 6079static void 6080ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 6081 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 6082{ 6083 if (pic_offset_table_rtx) 6084 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 6085#if TARGET_MACHO 6086 /* Mach-O doesn't support labels at the end of objects, so if 6087 it looks like we might want one, insert a NOP. */ 6088 { 6089 rtx insn = get_last_insn (); 6090 while (insn 6091 && NOTE_P (insn) 6092 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) 6093 insn = PREV_INSN (insn); 6094 if (insn 6095 && (LABEL_P (insn) 6096 || (NOTE_P (insn) 6097 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) 6098 fputs ("\tnop\n", file); 6099 } 6100#endif 6101 6102} 6103 6104/* Extract the parts of an RTL expression that is a valid memory address 6105 for an instruction. Return 0 if the structure of the address is 6106 grossly off. Return -1 if the address contains ASHIFT, so it is not 6107 strictly valid, but still used for computing length of lea instruction. */ 6108 6109int 6110ix86_decompose_address (rtx addr, struct ix86_address *out) 6111{ 6112 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 6113 rtx base_reg, index_reg; 6114 HOST_WIDE_INT scale = 1; 6115 rtx scale_rtx = NULL_RTX; 6116 int retval = 1; 6117 enum ix86_address_seg seg = SEG_DEFAULT; 6118 6119 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 6120 base = addr; 6121 else if (GET_CODE (addr) == PLUS) 6122 { 6123 rtx addends[4], op; 6124 int n = 0, i; 6125 6126 op = addr; 6127 do 6128 { 6129 if (n >= 4) 6130 return 0; 6131 addends[n++] = XEXP (op, 1); 6132 op = XEXP (op, 0); 6133 } 6134 while (GET_CODE (op) == PLUS); 6135 if (n >= 4) 6136 return 0; 6137 addends[n] = op; 6138 6139 for (i = n; i >= 0; --i) 6140 { 6141 op = addends[i]; 6142 switch (GET_CODE (op)) 6143 { 6144 case MULT: 6145 if (index) 6146 return 0; 6147 index = XEXP (op, 0); 6148 scale_rtx = XEXP (op, 1); 6149 break; 6150 6151 case UNSPEC: 6152 if (XINT (op, 1) == UNSPEC_TP 6153 && TARGET_TLS_DIRECT_SEG_REFS 6154 && seg == SEG_DEFAULT) 6155 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 6156 else 6157 return 0; 6158 break; 6159 6160 case REG: 6161 case SUBREG: 6162 if (!base) 6163 base = op; 6164 else if (!index) 6165 index = op; 6166 else 6167 return 0; 6168 break; 6169 6170 case CONST: 6171 case CONST_INT: 6172 case SYMBOL_REF: 6173 case LABEL_REF: 6174 if (disp) 6175 return 0; 6176 disp = op; 6177 break; 6178 6179 default: 6180 return 0; 6181 } 6182 } 6183 } 6184 else if (GET_CODE (addr) == MULT) 6185 { 6186 index = XEXP (addr, 0); /* index*scale */ 6187 scale_rtx = XEXP (addr, 1); 6188 } 6189 else if (GET_CODE (addr) == ASHIFT) 6190 { 6191 rtx tmp; 6192 6193 /* We're called for lea too, which implements ashift on occasion. */ 6194 index = XEXP (addr, 0); 6195 tmp = XEXP (addr, 1); 6196 if (GET_CODE (tmp) != CONST_INT) 6197 return 0; 6198 scale = INTVAL (tmp); 6199 if ((unsigned HOST_WIDE_INT) scale > 3) 6200 return 0; 6201 scale = 1 << scale; 6202 retval = -1; 6203 } 6204 else 6205 disp = addr; /* displacement */ 6206 6207 /* Extract the integral value of scale. */ 6208 if (scale_rtx) 6209 { 6210 if (GET_CODE (scale_rtx) != CONST_INT) 6211 return 0; 6212 scale = INTVAL (scale_rtx); 6213 } 6214 6215 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 6216 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 6217 6218 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 6219 if (base_reg && index_reg && scale == 1 6220 && (index_reg == arg_pointer_rtx 6221 || index_reg == frame_pointer_rtx 6222 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 6223 { 6224 rtx tmp; 6225 tmp = base, base = index, index = tmp; 6226 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 6227 } 6228 6229 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 6230 if ((base_reg == hard_frame_pointer_rtx 6231 || base_reg == frame_pointer_rtx 6232 || base_reg == arg_pointer_rtx) && !disp) 6233 disp = const0_rtx; 6234 6235 /* Special case: on K6, [%esi] makes the instruction vector decoded. 6236 Avoid this by transforming to [%esi+0]. */ 6237 if (ix86_tune == PROCESSOR_K6 && !optimize_size 6238 && base_reg && !index_reg && !disp 6239 && REG_P (base_reg) 6240 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 6241 disp = const0_rtx; 6242 6243 /* Special case: encode reg+reg instead of reg*2. */ 6244 if (!base && index && scale && scale == 2) 6245 base = index, base_reg = index_reg, scale = 1; 6246 6247 /* Special case: scaling cannot be encoded without base or displacement. */ 6248 if (!base && !disp && index && scale != 1) 6249 disp = const0_rtx; 6250 6251 out->base = base; 6252 out->index = index; 6253 out->disp = disp; 6254 out->scale = scale; 6255 out->seg = seg; 6256 6257 return retval; 6258} 6259 6260/* Return cost of the memory address x. 6261 For i386, it is better to use a complex address than let gcc copy 6262 the address into a reg and make a new pseudo. But not if the address 6263 requires to two regs - that would mean more pseudos with longer 6264 lifetimes. */ 6265static int 6266ix86_address_cost (rtx x) 6267{ 6268 struct ix86_address parts; 6269 int cost = 1; 6270 int ok = ix86_decompose_address (x, &parts); 6271 6272 gcc_assert (ok); 6273 6274 if (parts.base && GET_CODE (parts.base) == SUBREG) 6275 parts.base = SUBREG_REG (parts.base); 6276 if (parts.index && GET_CODE (parts.index) == SUBREG) 6277 parts.index = SUBREG_REG (parts.index); 6278 6279 /* More complex memory references are better. */ 6280 if (parts.disp && parts.disp != const0_rtx) 6281 cost--; 6282 if (parts.seg != SEG_DEFAULT) 6283 cost--; 6284 6285 /* Attempt to minimize number of registers in the address. */ 6286 if ((parts.base 6287 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 6288 || (parts.index 6289 && (!REG_P (parts.index) 6290 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 6291 cost++; 6292 6293 if (parts.base 6294 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 6295 && parts.index 6296 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 6297 && parts.base != parts.index) 6298 cost++; 6299 6300 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 6301 since it's predecode logic can't detect the length of instructions 6302 and it degenerates to vector decoded. Increase cost of such 6303 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 6304 to split such addresses or even refuse such addresses at all. 6305 6306 Following addressing modes are affected: 6307 [base+scale*index] 6308 [scale*index+disp] 6309 [base+index] 6310 6311 The first and last case may be avoidable by explicitly coding the zero in 6312 memory address, but I don't have AMD-K6 machine handy to check this 6313 theory. */ 6314 6315 if (TARGET_K6 6316 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 6317 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 6318 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 6319 cost += 10; 6320 6321 return cost; 6322} 6323 6324/* If X is a machine specific address (i.e. a symbol or label being 6325 referenced as a displacement from the GOT implemented using an 6326 UNSPEC), then return the base term. Otherwise return X. */ 6327 6328rtx 6329ix86_find_base_term (rtx x) 6330{ 6331 rtx term; 6332 6333 if (TARGET_64BIT) 6334 { 6335 if (GET_CODE (x) != CONST) 6336 return x; 6337 term = XEXP (x, 0); 6338 if (GET_CODE (term) == PLUS 6339 && (GET_CODE (XEXP (term, 1)) == CONST_INT 6340 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 6341 term = XEXP (term, 0); 6342 if (GET_CODE (term) != UNSPEC 6343 || XINT (term, 1) != UNSPEC_GOTPCREL) 6344 return x; 6345 6346 term = XVECEXP (term, 0, 0); 6347 6348 if (GET_CODE (term) != SYMBOL_REF 6349 && GET_CODE (term) != LABEL_REF) 6350 return x; 6351 6352 return term; 6353 } 6354 6355 term = ix86_delegitimize_address (x); 6356 6357 if (GET_CODE (term) != SYMBOL_REF 6358 && GET_CODE (term) != LABEL_REF) 6359 return x; 6360 6361 return term; 6362} 6363 6364/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 6365 this is used for to form addresses to local data when -fPIC is in 6366 use. */ 6367 6368static bool 6369darwin_local_data_pic (rtx disp) 6370{ 6371 if (GET_CODE (disp) == MINUS) 6372 { 6373 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 6374 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 6375 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 6376 { 6377 const char *sym_name = XSTR (XEXP (disp, 1), 0); 6378 if (! strcmp (sym_name, "<pic base>")) 6379 return true; 6380 } 6381 } 6382 6383 return false; 6384} 6385 6386/* Determine if a given RTX is a valid constant. We already know this 6387 satisfies CONSTANT_P. */ 6388 6389bool 6390legitimate_constant_p (rtx x) 6391{ 6392 switch (GET_CODE (x)) 6393 { 6394 case CONST: 6395 x = XEXP (x, 0); 6396 6397 if (GET_CODE (x) == PLUS) 6398 { 6399 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6400 return false; 6401 x = XEXP (x, 0); 6402 } 6403 6404 if (TARGET_MACHO && darwin_local_data_pic (x)) 6405 return true; 6406 6407 /* Only some unspecs are valid as "constants". */ 6408 if (GET_CODE (x) == UNSPEC) 6409 switch (XINT (x, 1)) 6410 { 6411 case UNSPEC_GOTOFF: 6412 return TARGET_64BIT; 6413 case UNSPEC_TPOFF: 6414 case UNSPEC_NTPOFF: 6415 x = XVECEXP (x, 0, 0); 6416 return (GET_CODE (x) == SYMBOL_REF 6417 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6418 case UNSPEC_DTPOFF: 6419 x = XVECEXP (x, 0, 0); 6420 return (GET_CODE (x) == SYMBOL_REF 6421 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 6422 default: 6423 return false; 6424 } 6425 6426 /* We must have drilled down to a symbol. */ 6427 if (GET_CODE (x) == LABEL_REF) 6428 return true; 6429 if (GET_CODE (x) != SYMBOL_REF) 6430 return false; 6431 /* FALLTHRU */ 6432 6433 case SYMBOL_REF: 6434 /* TLS symbols are never valid. */ 6435 if (SYMBOL_REF_TLS_MODEL (x)) 6436 return false; 6437 break; 6438 6439 case CONST_DOUBLE: 6440 if (GET_MODE (x) == TImode 6441 && x != CONST0_RTX (TImode) 6442 && !TARGET_64BIT) 6443 return false; 6444 break; 6445 6446 case CONST_VECTOR: 6447 if (x == CONST0_RTX (GET_MODE (x))) 6448 return true; 6449 return false; 6450 6451 default: 6452 break; 6453 } 6454 6455 /* Otherwise we handle everything else in the move patterns. */ 6456 return true; 6457} 6458 6459/* Determine if it's legal to put X into the constant pool. This 6460 is not possible for the address of thread-local symbols, which 6461 is checked above. */ 6462 6463static bool 6464ix86_cannot_force_const_mem (rtx x) 6465{ 6466 /* We can always put integral constants and vectors in memory. */ 6467 switch (GET_CODE (x)) 6468 { 6469 case CONST_INT: 6470 case CONST_DOUBLE: 6471 case CONST_VECTOR: 6472 return false; 6473 6474 default: 6475 break; 6476 } 6477 return !legitimate_constant_p (x); 6478} 6479 6480/* Determine if a given RTX is a valid constant address. */ 6481 6482bool 6483constant_address_p (rtx x) 6484{ 6485 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 6486} 6487 6488/* Nonzero if the constant value X is a legitimate general operand 6489 when generating PIC code. It is given that flag_pic is on and 6490 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 6491 6492bool 6493legitimate_pic_operand_p (rtx x) 6494{ 6495 rtx inner; 6496 6497 switch (GET_CODE (x)) 6498 { 6499 case CONST: 6500 inner = XEXP (x, 0); 6501 if (GET_CODE (inner) == PLUS 6502 && GET_CODE (XEXP (inner, 1)) == CONST_INT) 6503 inner = XEXP (inner, 0); 6504 6505 /* Only some unspecs are valid as "constants". */ 6506 if (GET_CODE (inner) == UNSPEC) 6507 switch (XINT (inner, 1)) 6508 { 6509 case UNSPEC_GOTOFF: 6510 return TARGET_64BIT; 6511 case UNSPEC_TPOFF: 6512 x = XVECEXP (inner, 0, 0); 6513 return (GET_CODE (x) == SYMBOL_REF 6514 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6515 default: 6516 return false; 6517 } 6518 /* FALLTHRU */ 6519 6520 case SYMBOL_REF: 6521 case LABEL_REF: 6522 return legitimate_pic_address_disp_p (x); 6523 6524 default: 6525 return true; 6526 } 6527} 6528 6529/* Determine if a given CONST RTX is a valid memory displacement 6530 in PIC mode. */ 6531 6532int 6533legitimate_pic_address_disp_p (rtx disp) 6534{ 6535 bool saw_plus; 6536 6537 /* In 64bit mode we can allow direct addresses of symbols and labels 6538 when they are not dynamic symbols. */ 6539 if (TARGET_64BIT) 6540 { 6541 rtx op0 = disp, op1; 6542 6543 switch (GET_CODE (disp)) 6544 { 6545 case LABEL_REF: 6546 return true; 6547 6548 case CONST: 6549 if (GET_CODE (XEXP (disp, 0)) != PLUS) 6550 break; 6551 op0 = XEXP (XEXP (disp, 0), 0); 6552 op1 = XEXP (XEXP (disp, 0), 1); 6553 if (GET_CODE (op1) != CONST_INT 6554 || INTVAL (op1) >= 16*1024*1024 6555 || INTVAL (op1) < -16*1024*1024) 6556 break; 6557 if (GET_CODE (op0) == LABEL_REF) 6558 return true; 6559 if (GET_CODE (op0) != SYMBOL_REF) 6560 break; 6561 /* FALLTHRU */ 6562 6563 case SYMBOL_REF: 6564 /* TLS references should always be enclosed in UNSPEC. */ 6565 if (SYMBOL_REF_TLS_MODEL (op0)) 6566 return false; 6567 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) 6568 return true; 6569 break; 6570 6571 default: 6572 break; 6573 } 6574 } 6575 if (GET_CODE (disp) != CONST) 6576 return 0; 6577 disp = XEXP (disp, 0); 6578 6579 if (TARGET_64BIT) 6580 { 6581 /* We are unsafe to allow PLUS expressions. This limit allowed distance 6582 of GOT tables. We should not need these anyway. */ 6583 if (GET_CODE (disp) != UNSPEC 6584 || (XINT (disp, 1) != UNSPEC_GOTPCREL 6585 && XINT (disp, 1) != UNSPEC_GOTOFF)) 6586 return 0; 6587 6588 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 6589 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 6590 return 0; 6591 return 1; 6592 } 6593 6594 saw_plus = false; 6595 if (GET_CODE (disp) == PLUS) 6596 { 6597 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 6598 return 0; 6599 disp = XEXP (disp, 0); 6600 saw_plus = true; 6601 } 6602 6603 if (TARGET_MACHO && darwin_local_data_pic (disp)) 6604 return 1; 6605 6606 if (GET_CODE (disp) != UNSPEC) 6607 return 0; 6608 6609 switch (XINT (disp, 1)) 6610 { 6611 case UNSPEC_GOT: 6612 if (saw_plus) 6613 return false; 6614 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6615 case UNSPEC_GOTOFF: 6616 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 6617 While ABI specify also 32bit relocation but we don't produce it in 6618 small PIC model at all. */ 6619 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6620 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6621 && !TARGET_64BIT) 6622 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6623 return false; 6624 case UNSPEC_GOTTPOFF: 6625 case UNSPEC_GOTNTPOFF: 6626 case UNSPEC_INDNTPOFF: 6627 if (saw_plus) 6628 return false; 6629 disp = XVECEXP (disp, 0, 0); 6630 return (GET_CODE (disp) == SYMBOL_REF 6631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 6632 case UNSPEC_NTPOFF: 6633 disp = XVECEXP (disp, 0, 0); 6634 return (GET_CODE (disp) == SYMBOL_REF 6635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 6636 case UNSPEC_DTPOFF: 6637 disp = XVECEXP (disp, 0, 0); 6638 return (GET_CODE (disp) == SYMBOL_REF 6639 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 6640 } 6641 6642 return 0; 6643} 6644 6645/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6646 memory address for an instruction. The MODE argument is the machine mode 6647 for the MEM expression that wants to use this address. 6648 6649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6650 convert common non-canonical forms to canonical form so that they will 6651 be recognized. */ 6652 6653int 6654legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6655{ 6656 struct ix86_address parts; 6657 rtx base, index, disp; 6658 HOST_WIDE_INT scale; 6659 const char *reason = NULL; 6660 rtx reason_rtx = NULL_RTX; 6661 6662 if (TARGET_DEBUG_ADDR) 6663 { 6664 fprintf (stderr, 6665 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6666 GET_MODE_NAME (mode), strict); 6667 debug_rtx (addr); 6668 } 6669 6670 if (ix86_decompose_address (addr, &parts) <= 0) 6671 { 6672 reason = "decomposition failed"; 6673 goto report_error; 6674 } 6675 6676 base = parts.base; 6677 index = parts.index; 6678 disp = parts.disp; 6679 scale = parts.scale; 6680 6681 /* Validate base register. 6682 6683 Don't allow SUBREG's that span more than a word here. It can lead to spill 6684 failures when the base is one word out of a two word structure, which is 6685 represented internally as a DImode int. */ 6686 6687 if (base) 6688 { 6689 rtx reg; 6690 reason_rtx = base; 6691 6692 if (REG_P (base)) 6693 reg = base; 6694 else if (GET_CODE (base) == SUBREG 6695 && REG_P (SUBREG_REG (base)) 6696 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 6697 <= UNITS_PER_WORD) 6698 reg = SUBREG_REG (base); 6699 else 6700 { 6701 reason = "base is not a register"; 6702 goto report_error; 6703 } 6704 6705 if (GET_MODE (base) != Pmode) 6706 { 6707 reason = "base is not in Pmode"; 6708 goto report_error; 6709 } 6710 6711 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 6712 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 6713 { 6714 reason = "base is not valid"; 6715 goto report_error; 6716 } 6717 } 6718 6719 /* Validate index register. 6720 6721 Don't allow SUBREG's that span more than a word here -- same as above. */ 6722 6723 if (index) 6724 { 6725 rtx reg; 6726 reason_rtx = index; 6727 6728 if (REG_P (index)) 6729 reg = index; 6730 else if (GET_CODE (index) == SUBREG 6731 && REG_P (SUBREG_REG (index)) 6732 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 6733 <= UNITS_PER_WORD) 6734 reg = SUBREG_REG (index); 6735 else 6736 { 6737 reason = "index is not a register"; 6738 goto report_error; 6739 } 6740 6741 if (GET_MODE (index) != Pmode) 6742 { 6743 reason = "index is not in Pmode"; 6744 goto report_error; 6745 } 6746 6747 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 6748 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 6749 { 6750 reason = "index is not valid"; 6751 goto report_error; 6752 } 6753 } 6754 6755 /* Validate scale factor. */ 6756 if (scale != 1) 6757 { 6758 reason_rtx = GEN_INT (scale); 6759 if (!index) 6760 { 6761 reason = "scale without index"; 6762 goto report_error; 6763 } 6764 6765 if (scale != 2 && scale != 4 && scale != 8) 6766 { 6767 reason = "scale is not a valid multiplier"; 6768 goto report_error; 6769 } 6770 } 6771 6772 /* Validate displacement. */ 6773 if (disp) 6774 { 6775 reason_rtx = disp; 6776 6777 if (GET_CODE (disp) == CONST 6778 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6779 switch (XINT (XEXP (disp, 0), 1)) 6780 { 6781 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 6782 used. While ABI specify also 32bit relocations, we don't produce 6783 them at all and use IP relative instead. */ 6784 case UNSPEC_GOT: 6785 case UNSPEC_GOTOFF: 6786 gcc_assert (flag_pic); 6787 if (!TARGET_64BIT) 6788 goto is_legitimate_pic; 6789 reason = "64bit address unspec"; 6790 goto report_error; 6791 6792 case UNSPEC_GOTPCREL: 6793 gcc_assert (flag_pic); 6794 goto is_legitimate_pic; 6795 6796 case UNSPEC_GOTTPOFF: 6797 case UNSPEC_GOTNTPOFF: 6798 case UNSPEC_INDNTPOFF: 6799 case UNSPEC_NTPOFF: 6800 case UNSPEC_DTPOFF: 6801 break; 6802 6803 default: 6804 reason = "invalid address unspec"; 6805 goto report_error; 6806 } 6807 6808 else if (SYMBOLIC_CONST (disp) 6809 && (flag_pic 6810 || (TARGET_MACHO 6811#if TARGET_MACHO 6812 && MACHOPIC_INDIRECT 6813 && !machopic_operand_p (disp) 6814#endif 6815 ))) 6816 { 6817 6818 is_legitimate_pic: 6819 if (TARGET_64BIT && (index || base)) 6820 { 6821 /* foo@dtpoff(%rX) is ok. */ 6822 if (GET_CODE (disp) != CONST 6823 || GET_CODE (XEXP (disp, 0)) != PLUS 6824 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6825 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6826 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6827 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6828 { 6829 reason = "non-constant pic memory reference"; 6830 goto report_error; 6831 } 6832 } 6833 else if (! legitimate_pic_address_disp_p (disp)) 6834 { 6835 reason = "displacement is an invalid pic construct"; 6836 goto report_error; 6837 } 6838 6839 /* This code used to verify that a symbolic pic displacement 6840 includes the pic_offset_table_rtx register. 6841 6842 While this is good idea, unfortunately these constructs may 6843 be created by "adds using lea" optimization for incorrect 6844 code like: 6845 6846 int a; 6847 int foo(int i) 6848 { 6849 return *(&a+i); 6850 } 6851 6852 This code is nonsensical, but results in addressing 6853 GOT table with pic_offset_table_rtx base. We can't 6854 just refuse it easily, since it gets matched by 6855 "addsi3" pattern, that later gets split to lea in the 6856 case output register differs from input. While this 6857 can be handled by separate addsi pattern for this case 6858 that never results in lea, this seems to be easier and 6859 correct fix for crash to disable this test. */ 6860 } 6861 else if (GET_CODE (disp) != LABEL_REF 6862 && GET_CODE (disp) != CONST_INT 6863 && (GET_CODE (disp) != CONST 6864 || !legitimate_constant_p (disp)) 6865 && (GET_CODE (disp) != SYMBOL_REF 6866 || !legitimate_constant_p (disp))) 6867 { 6868 reason = "displacement is not constant"; 6869 goto report_error; 6870 } 6871 else if (TARGET_64BIT 6872 && !x86_64_immediate_operand (disp, VOIDmode)) 6873 { 6874 reason = "displacement is out of range"; 6875 goto report_error; 6876 } 6877 } 6878 6879 /* Everything looks valid. */ 6880 if (TARGET_DEBUG_ADDR) 6881 fprintf (stderr, "Success.\n"); 6882 return TRUE; 6883 6884 report_error: 6885 if (TARGET_DEBUG_ADDR) 6886 { 6887 fprintf (stderr, "Error: %s\n", reason); 6888 debug_rtx (reason_rtx); 6889 } 6890 return FALSE; 6891} 6892 6893/* Return a unique alias set for the GOT. */ 6894 6895static HOST_WIDE_INT 6896ix86_GOT_alias_set (void) 6897{ 6898 static HOST_WIDE_INT set = -1; 6899 if (set == -1) 6900 set = new_alias_set (); 6901 return set; 6902} 6903 6904/* Return a legitimate reference for ORIG (an address) using the 6905 register REG. If REG is 0, a new pseudo is generated. 6906 6907 There are two types of references that must be handled: 6908 6909 1. Global data references must load the address from the GOT, via 6910 the PIC reg. An insn is emitted to do this load, and the reg is 6911 returned. 6912 6913 2. Static data references, constant pool addresses, and code labels 6914 compute the address as an offset from the GOT, whose base is in 6915 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6916 differentiate them from global data objects. The returned 6917 address is the PIC reg + an unspec constant. 6918 6919 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6920 reg also appears in the address. */ 6921 6922static rtx 6923legitimize_pic_address (rtx orig, rtx reg) 6924{ 6925 rtx addr = orig; 6926 rtx new = orig; 6927 rtx base; 6928 6929#if TARGET_MACHO 6930 if (TARGET_MACHO && !TARGET_64BIT) 6931 { 6932 if (reg == 0) 6933 reg = gen_reg_rtx (Pmode); 6934 /* Use the generic Mach-O PIC machinery. */ 6935 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6936 } 6937#endif 6938 6939 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6940 new = addr; 6941 else if (TARGET_64BIT 6942 && ix86_cmodel != CM_SMALL_PIC 6943 && local_symbolic_operand (addr, Pmode)) 6944 { 6945 rtx tmpreg; 6946 /* This symbol may be referenced via a displacement from the PIC 6947 base address (@GOTOFF). */ 6948 6949 if (reload_in_progress) 6950 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6951 if (GET_CODE (addr) == CONST) 6952 addr = XEXP (addr, 0); 6953 if (GET_CODE (addr) == PLUS) 6954 { 6955 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6956 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6957 } 6958 else 6959 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6960 new = gen_rtx_CONST (Pmode, new); 6961 if (!reg) 6962 tmpreg = gen_reg_rtx (Pmode); 6963 else 6964 tmpreg = reg; 6965 emit_move_insn (tmpreg, new); 6966 6967 if (reg != 0) 6968 { 6969 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 6970 tmpreg, 1, OPTAB_DIRECT); 6971 new = reg; 6972 } 6973 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 6974 } 6975 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6976 { 6977 /* This symbol may be referenced via a displacement from the PIC 6978 base address (@GOTOFF). */ 6979 6980 if (reload_in_progress) 6981 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6982 if (GET_CODE (addr) == CONST) 6983 addr = XEXP (addr, 0); 6984 if (GET_CODE (addr) == PLUS) 6985 { 6986 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6987 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6988 } 6989 else 6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6991 new = gen_rtx_CONST (Pmode, new); 6992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6993 6994 if (reg != 0) 6995 { 6996 emit_move_insn (reg, new); 6997 new = reg; 6998 } 6999 } 7000 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 7001 { 7002 if (TARGET_64BIT) 7003 { 7004 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 7005 new = gen_rtx_CONST (Pmode, new); 7006 new = gen_const_mem (Pmode, new); 7007 set_mem_alias_set (new, ix86_GOT_alias_set ()); 7008 7009 if (reg == 0) 7010 reg = gen_reg_rtx (Pmode); 7011 /* Use directly gen_movsi, otherwise the address is loaded 7012 into register for CSE. We don't want to CSE this addresses, 7013 instead we CSE addresses from the GOT table, so skip this. */ 7014 emit_insn (gen_movsi (reg, new)); 7015 new = reg; 7016 } 7017 else 7018 { 7019 /* This symbol must be referenced via a load from the 7020 Global Offset Table (@GOT). */ 7021 7022 if (reload_in_progress) 7023 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7024 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 7025 new = gen_rtx_CONST (Pmode, new); 7026 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 7027 new = gen_const_mem (Pmode, new); 7028 set_mem_alias_set (new, ix86_GOT_alias_set ()); 7029 7030 if (reg == 0) 7031 reg = gen_reg_rtx (Pmode); 7032 emit_move_insn (reg, new); 7033 new = reg; 7034 } 7035 } 7036 else 7037 { 7038 if (GET_CODE (addr) == CONST_INT 7039 && !x86_64_immediate_operand (addr, VOIDmode)) 7040 { 7041 if (reg) 7042 { 7043 emit_move_insn (reg, addr); 7044 new = reg; 7045 } 7046 else 7047 new = force_reg (Pmode, addr); 7048 } 7049 else if (GET_CODE (addr) == CONST) 7050 { 7051 addr = XEXP (addr, 0); 7052 7053 /* We must match stuff we generate before. Assume the only 7054 unspecs that can get here are ours. Not that we could do 7055 anything with them anyway.... */ 7056 if (GET_CODE (addr) == UNSPEC 7057 || (GET_CODE (addr) == PLUS 7058 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 7059 return orig; 7060 gcc_assert (GET_CODE (addr) == PLUS); 7061 } 7062 if (GET_CODE (addr) == PLUS) 7063 { 7064 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 7065 7066 /* Check first to see if this is a constant offset from a @GOTOFF 7067 symbol reference. */ 7068 if (local_symbolic_operand (op0, Pmode) 7069 && GET_CODE (op1) == CONST_INT) 7070 { 7071 if (!TARGET_64BIT) 7072 { 7073 if (reload_in_progress) 7074 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7075 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7076 UNSPEC_GOTOFF); 7077 new = gen_rtx_PLUS (Pmode, new, op1); 7078 new = gen_rtx_CONST (Pmode, new); 7079 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 7080 7081 if (reg != 0) 7082 { 7083 emit_move_insn (reg, new); 7084 new = reg; 7085 } 7086 } 7087 else 7088 { 7089 if (INTVAL (op1) < -16*1024*1024 7090 || INTVAL (op1) >= 16*1024*1024) 7091 { 7092 if (!x86_64_immediate_operand (op1, Pmode)) 7093 op1 = force_reg (Pmode, op1); 7094 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 7095 } 7096 } 7097 } 7098 else 7099 { 7100 base = legitimize_pic_address (XEXP (addr, 0), reg); 7101 new = legitimize_pic_address (XEXP (addr, 1), 7102 base == reg ? NULL_RTX : reg); 7103 7104 if (GET_CODE (new) == CONST_INT) 7105 new = plus_constant (base, INTVAL (new)); 7106 else 7107 { 7108 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 7109 { 7110 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 7111 new = XEXP (new, 1); 7112 } 7113 new = gen_rtx_PLUS (Pmode, base, new); 7114 } 7115 } 7116 } 7117 } 7118 return new; 7119} 7120 7121/* Load the thread pointer. If TO_REG is true, force it into a register. */ 7122 7123static rtx 7124get_thread_pointer (int to_reg) 7125{ 7126 rtx tp, reg, insn; 7127 7128 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 7129 if (!to_reg) 7130 return tp; 7131 7132 reg = gen_reg_rtx (Pmode); 7133 insn = gen_rtx_SET (VOIDmode, reg, tp); 7134 insn = emit_insn (insn); 7135 7136 return reg; 7137} 7138 7139/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 7140 false if we expect this to be used for a memory address and true if 7141 we expect to load the address into a register. */ 7142 7143static rtx 7144legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 7145{ 7146 rtx dest, base, off, pic, tp; 7147 int type; 7148 7149 switch (model) 7150 { 7151 case TLS_MODEL_GLOBAL_DYNAMIC: 7152 dest = gen_reg_rtx (Pmode); 7153 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 7154 7155 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 7156 { 7157 rtx rax = gen_rtx_REG (Pmode, 0), insns; 7158 7159 start_sequence (); 7160 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 7161 insns = get_insns (); 7162 end_sequence (); 7163 7164 emit_libcall_block (insns, dest, rax, x); 7165 } 7166 else if (TARGET_64BIT && TARGET_GNU2_TLS) 7167 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 7168 else 7169 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 7170 7171 if (TARGET_GNU2_TLS) 7172 { 7173 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 7174 7175 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 7176 } 7177 break; 7178 7179 case TLS_MODEL_LOCAL_DYNAMIC: 7180 base = gen_reg_rtx (Pmode); 7181 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 7182 7183 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 7184 { 7185 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 7186 7187 start_sequence (); 7188 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 7189 insns = get_insns (); 7190 end_sequence (); 7191 7192 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 7193 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 7194 emit_libcall_block (insns, base, rax, note); 7195 } 7196 else if (TARGET_64BIT && TARGET_GNU2_TLS) 7197 emit_insn (gen_tls_local_dynamic_base_64 (base)); 7198 else 7199 emit_insn (gen_tls_local_dynamic_base_32 (base)); 7200 7201 if (TARGET_GNU2_TLS) 7202 { 7203 rtx x = ix86_tls_module_base (); 7204 7205 set_unique_reg_note (get_last_insn (), REG_EQUIV, 7206 gen_rtx_MINUS (Pmode, x, tp)); 7207 } 7208 7209 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 7210 off = gen_rtx_CONST (Pmode, off); 7211 7212 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 7213 7214 if (TARGET_GNU2_TLS) 7215 { 7216 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 7217 7218 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 7219 } 7220 7221 break; 7222 7223 case TLS_MODEL_INITIAL_EXEC: 7224 if (TARGET_64BIT) 7225 { 7226 pic = NULL; 7227 type = UNSPEC_GOTNTPOFF; 7228 } 7229 else if (flag_pic) 7230 { 7231 if (reload_in_progress) 7232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7233 pic = pic_offset_table_rtx; 7234 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 7235 } 7236 else if (!TARGET_ANY_GNU_TLS) 7237 { 7238 pic = gen_reg_rtx (Pmode); 7239 emit_insn (gen_set_got (pic)); 7240 type = UNSPEC_GOTTPOFF; 7241 } 7242 else 7243 { 7244 pic = NULL; 7245 type = UNSPEC_INDNTPOFF; 7246 } 7247 7248 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 7249 off = gen_rtx_CONST (Pmode, off); 7250 if (pic) 7251 off = gen_rtx_PLUS (Pmode, pic, off); 7252 off = gen_const_mem (Pmode, off); 7253 set_mem_alias_set (off, ix86_GOT_alias_set ()); 7254 7255 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7256 { 7257 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7258 off = force_reg (Pmode, off); 7259 return gen_rtx_PLUS (Pmode, base, off); 7260 } 7261 else 7262 { 7263 base = get_thread_pointer (true); 7264 dest = gen_reg_rtx (Pmode); 7265 emit_insn (gen_subsi3 (dest, base, off)); 7266 } 7267 break; 7268 7269 case TLS_MODEL_LOCAL_EXEC: 7270 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 7271 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7272 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 7273 off = gen_rtx_CONST (Pmode, off); 7274 7275 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7276 { 7277 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7278 return gen_rtx_PLUS (Pmode, base, off); 7279 } 7280 else 7281 { 7282 base = get_thread_pointer (true); 7283 dest = gen_reg_rtx (Pmode); 7284 emit_insn (gen_subsi3 (dest, base, off)); 7285 } 7286 break; 7287 7288 default: 7289 gcc_unreachable (); 7290 } 7291 7292 return dest; 7293} 7294 7295/* Try machine-dependent ways of modifying an illegitimate address 7296 to be legitimate. If we find one, return the new, valid address. 7297 This macro is used in only one place: `memory_address' in explow.c. 7298 7299 OLDX is the address as it was before break_out_memory_refs was called. 7300 In some cases it is useful to look at this to decide what needs to be done. 7301 7302 MODE and WIN are passed so that this macro can use 7303 GO_IF_LEGITIMATE_ADDRESS. 7304 7305 It is always safe for this macro to do nothing. It exists to recognize 7306 opportunities to optimize the output. 7307 7308 For the 80386, we handle X+REG by loading X into a register R and 7309 using R+REG. R will go in a general reg and indexing will be used. 7310 However, if REG is a broken-out memory address or multiplication, 7311 nothing needs to be done because REG can certainly go in a general reg. 7312 7313 When -fpic is used, special handling is needed for symbolic references. 7314 See comments by legitimize_pic_address in i386.c for details. */ 7315 7316rtx 7317legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 7318{ 7319 int changed = 0; 7320 unsigned log; 7321 7322 if (TARGET_DEBUG_ADDR) 7323 { 7324 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 7325 GET_MODE_NAME (mode)); 7326 debug_rtx (x); 7327 } 7328 7329 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 7330 if (log) 7331 return legitimize_tls_address (x, log, false); 7332 if (GET_CODE (x) == CONST 7333 && GET_CODE (XEXP (x, 0)) == PLUS 7334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 7335 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 7336 { 7337 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); 7338 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 7339 } 7340 7341 if (flag_pic && SYMBOLIC_CONST (x)) 7342 return legitimize_pic_address (x, 0); 7343 7344 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 7345 if (GET_CODE (x) == ASHIFT 7346 && GET_CODE (XEXP (x, 1)) == CONST_INT 7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 7348 { 7349 changed = 1; 7350 log = INTVAL (XEXP (x, 1)); 7351 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 7352 GEN_INT (1 << log)); 7353 } 7354 7355 if (GET_CODE (x) == PLUS) 7356 { 7357 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 7358 7359 if (GET_CODE (XEXP (x, 0)) == ASHIFT 7360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 7361 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 7362 { 7363 changed = 1; 7364 log = INTVAL (XEXP (XEXP (x, 0), 1)); 7365 XEXP (x, 0) = gen_rtx_MULT (Pmode, 7366 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 7367 GEN_INT (1 << log)); 7368 } 7369 7370 if (GET_CODE (XEXP (x, 1)) == ASHIFT 7371 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 7372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 7373 { 7374 changed = 1; 7375 log = INTVAL (XEXP (XEXP (x, 1), 1)); 7376 XEXP (x, 1) = gen_rtx_MULT (Pmode, 7377 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 7378 GEN_INT (1 << log)); 7379 } 7380 7381 /* Put multiply first if it isn't already. */ 7382 if (GET_CODE (XEXP (x, 1)) == MULT) 7383 { 7384 rtx tmp = XEXP (x, 0); 7385 XEXP (x, 0) = XEXP (x, 1); 7386 XEXP (x, 1) = tmp; 7387 changed = 1; 7388 } 7389 7390 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 7392 created by virtual register instantiation, register elimination, and 7393 similar optimizations. */ 7394 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 7395 { 7396 changed = 1; 7397 x = gen_rtx_PLUS (Pmode, 7398 gen_rtx_PLUS (Pmode, XEXP (x, 0), 7399 XEXP (XEXP (x, 1), 0)), 7400 XEXP (XEXP (x, 1), 1)); 7401 } 7402 7403 /* Canonicalize 7404 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 7405 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 7406 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 7408 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 7409 && CONSTANT_P (XEXP (x, 1))) 7410 { 7411 rtx constant; 7412 rtx other = NULL_RTX; 7413 7414 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7415 { 7416 constant = XEXP (x, 1); 7417 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 7418 } 7419 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 7420 { 7421 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 7422 other = XEXP (x, 1); 7423 } 7424 else 7425 constant = 0; 7426 7427 if (constant) 7428 { 7429 changed = 1; 7430 x = gen_rtx_PLUS (Pmode, 7431 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 7432 XEXP (XEXP (XEXP (x, 0), 1), 0)), 7433 plus_constant (other, INTVAL (constant))); 7434 } 7435 } 7436 7437 if (changed && legitimate_address_p (mode, x, FALSE)) 7438 return x; 7439 7440 if (GET_CODE (XEXP (x, 0)) == MULT) 7441 { 7442 changed = 1; 7443 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 7444 } 7445 7446 if (GET_CODE (XEXP (x, 1)) == MULT) 7447 { 7448 changed = 1; 7449 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 7450 } 7451 7452 if (changed 7453 && GET_CODE (XEXP (x, 1)) == REG 7454 && GET_CODE (XEXP (x, 0)) == REG) 7455 return x; 7456 7457 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 7458 { 7459 changed = 1; 7460 x = legitimize_pic_address (x, 0); 7461 } 7462 7463 if (changed && legitimate_address_p (mode, x, FALSE)) 7464 return x; 7465 7466 if (GET_CODE (XEXP (x, 0)) == REG) 7467 { 7468 rtx temp = gen_reg_rtx (Pmode); 7469 rtx val = force_operand (XEXP (x, 1), temp); 7470 if (val != temp) 7471 emit_move_insn (temp, val); 7472 7473 XEXP (x, 1) = temp; 7474 return x; 7475 } 7476 7477 else if (GET_CODE (XEXP (x, 1)) == REG) 7478 { 7479 rtx temp = gen_reg_rtx (Pmode); 7480 rtx val = force_operand (XEXP (x, 0), temp); 7481 if (val != temp) 7482 emit_move_insn (temp, val); 7483 7484 XEXP (x, 0) = temp; 7485 return x; 7486 } 7487 } 7488 7489 return x; 7490} 7491 7492/* Print an integer constant expression in assembler syntax. Addition 7493 and subtraction are the only arithmetic that may appear in these 7494 expressions. FILE is the stdio stream to write to, X is the rtx, and 7495 CODE is the operand print code from the output string. */ 7496 7497static void 7498output_pic_addr_const (FILE *file, rtx x, int code) 7499{ 7500 char buf[256]; 7501 7502 switch (GET_CODE (x)) 7503 { 7504 case PC: 7505 gcc_assert (flag_pic); 7506 putc ('.', file); 7507 break; 7508 7509 case SYMBOL_REF: 7510 if (! TARGET_MACHO || TARGET_64BIT) 7511 output_addr_const (file, x); 7512 else 7513 { 7514 const char *name = XSTR (x, 0); 7515 7516 /* Mark the decl as referenced so that cgraph will output the function. */ 7517 if (SYMBOL_REF_DECL (x)) 7518 mark_decl_referenced (SYMBOL_REF_DECL (x)); 7519 7520#if TARGET_MACHO 7521 if (MACHOPIC_INDIRECT 7522 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 7523 name = machopic_indirection_name (x, /*stub_p=*/true); 7524#endif 7525 assemble_name (file, name); 7526 } 7527 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 7528 fputs ("@PLT", file); 7529 break; 7530 7531 case LABEL_REF: 7532 x = XEXP (x, 0); 7533 /* FALLTHRU */ 7534 case CODE_LABEL: 7535 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 7536 assemble_name (asm_out_file, buf); 7537 break; 7538 7539 case CONST_INT: 7540 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7541 break; 7542 7543 case CONST: 7544 /* This used to output parentheses around the expression, 7545 but that does not work on the 386 (either ATT or BSD assembler). */ 7546 output_pic_addr_const (file, XEXP (x, 0), code); 7547 break; 7548 7549 case CONST_DOUBLE: 7550 if (GET_MODE (x) == VOIDmode) 7551 { 7552 /* We can use %d if the number is <32 bits and positive. */ 7553 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 7554 fprintf (file, "0x%lx%08lx", 7555 (unsigned long) CONST_DOUBLE_HIGH (x), 7556 (unsigned long) CONST_DOUBLE_LOW (x)); 7557 else 7558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 7559 } 7560 else 7561 /* We can't handle floating point constants; 7562 PRINT_OPERAND must handle them. */ 7563 output_operand_lossage ("floating constant misused"); 7564 break; 7565 7566 case PLUS: 7567 /* Some assemblers need integer constants to appear first. */ 7568 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 7569 { 7570 output_pic_addr_const (file, XEXP (x, 0), code); 7571 putc ('+', file); 7572 output_pic_addr_const (file, XEXP (x, 1), code); 7573 } 7574 else 7575 { 7576 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); 7577 output_pic_addr_const (file, XEXP (x, 1), code); 7578 putc ('+', file); 7579 output_pic_addr_const (file, XEXP (x, 0), code); 7580 } 7581 break; 7582 7583 case MINUS: 7584 if (!TARGET_MACHO) 7585 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 7586 output_pic_addr_const (file, XEXP (x, 0), code); 7587 putc ('-', file); 7588 output_pic_addr_const (file, XEXP (x, 1), code); 7589 if (!TARGET_MACHO) 7590 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 7591 break; 7592 7593 case UNSPEC: 7594 gcc_assert (XVECLEN (x, 0) == 1); 7595 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 7596 switch (XINT (x, 1)) 7597 { 7598 case UNSPEC_GOT: 7599 fputs ("@GOT", file); 7600 break; 7601 case UNSPEC_GOTOFF: 7602 fputs ("@GOTOFF", file); 7603 break; 7604 case UNSPEC_GOTPCREL: 7605 fputs ("@GOTPCREL(%rip)", file); 7606 break; 7607 case UNSPEC_GOTTPOFF: 7608 /* FIXME: This might be @TPOFF in Sun ld too. */ 7609 fputs ("@GOTTPOFF", file); 7610 break; 7611 case UNSPEC_TPOFF: 7612 fputs ("@TPOFF", file); 7613 break; 7614 case UNSPEC_NTPOFF: 7615 if (TARGET_64BIT) 7616 fputs ("@TPOFF", file); 7617 else 7618 fputs ("@NTPOFF", file); 7619 break; 7620 case UNSPEC_DTPOFF: 7621 fputs ("@DTPOFF", file); 7622 break; 7623 case UNSPEC_GOTNTPOFF: 7624 if (TARGET_64BIT) 7625 fputs ("@GOTTPOFF(%rip)", file); 7626 else 7627 fputs ("@GOTNTPOFF", file); 7628 break; 7629 case UNSPEC_INDNTPOFF: 7630 fputs ("@INDNTPOFF", file); 7631 break; 7632 default: 7633 output_operand_lossage ("invalid UNSPEC as operand"); 7634 break; 7635 } 7636 break; 7637 7638 default: 7639 output_operand_lossage ("invalid expression as operand"); 7640 } 7641} 7642 7643/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7644 We need to emit DTP-relative relocations. */ 7645 7646static void 7647i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 7648{ 7649 fputs (ASM_LONG, file); 7650 output_addr_const (file, x); 7651 fputs ("@DTPOFF", file); 7652 switch (size) 7653 { 7654 case 4: 7655 break; 7656 case 8: 7657 fputs (", 0", file); 7658 break; 7659 default: 7660 gcc_unreachable (); 7661 } 7662} 7663 7664/* In the name of slightly smaller debug output, and to cater to 7665 general assembler lossage, recognize PIC+GOTOFF and turn it back 7666 into a direct symbol reference. 7667 7668 On Darwin, this is necessary to avoid a crash, because Darwin 7669 has a different PIC label for each routine but the DWARF debugging 7670 information is not associated with any particular routine, so it's 7671 necessary to remove references to the PIC label from RTL stored by 7672 the DWARF output code. */ 7673 7674static rtx 7675ix86_delegitimize_address (rtx orig_x) 7676{ 7677 rtx x = orig_x; 7678 /* reg_addend is NULL or a multiple of some register. */ 7679 rtx reg_addend = NULL_RTX; 7680 /* const_addend is NULL or a const_int. */ 7681 rtx const_addend = NULL_RTX; 7682 /* This is the result, or NULL. */ 7683 rtx result = NULL_RTX; 7684 7685 if (GET_CODE (x) == MEM) 7686 x = XEXP (x, 0); 7687 7688 if (TARGET_64BIT) 7689 { 7690 if (GET_CODE (x) != CONST 7691 || GET_CODE (XEXP (x, 0)) != UNSPEC 7692 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 7693 || GET_CODE (orig_x) != MEM) 7694 return orig_x; 7695 return XVECEXP (XEXP (x, 0), 0, 0); 7696 } 7697 7698 if (GET_CODE (x) != PLUS 7699 || GET_CODE (XEXP (x, 1)) != CONST) 7700 return orig_x; 7701 7702 if (GET_CODE (XEXP (x, 0)) == REG 7703 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7704 /* %ebx + GOT/GOTOFF */ 7705 ; 7706 else if (GET_CODE (XEXP (x, 0)) == PLUS) 7707 { 7708 /* %ebx + %reg * scale + GOT/GOTOFF */ 7709 reg_addend = XEXP (x, 0); 7710 if (GET_CODE (XEXP (reg_addend, 0)) == REG 7711 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) 7712 reg_addend = XEXP (reg_addend, 1); 7713 else if (GET_CODE (XEXP (reg_addend, 1)) == REG 7714 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) 7715 reg_addend = XEXP (reg_addend, 0); 7716 else 7717 return orig_x; 7718 if (GET_CODE (reg_addend) != REG 7719 && GET_CODE (reg_addend) != MULT 7720 && GET_CODE (reg_addend) != ASHIFT) 7721 return orig_x; 7722 } 7723 else 7724 return orig_x; 7725 7726 x = XEXP (XEXP (x, 1), 0); 7727 if (GET_CODE (x) == PLUS 7728 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7729 { 7730 const_addend = XEXP (x, 1); 7731 x = XEXP (x, 0); 7732 } 7733 7734 if (GET_CODE (x) == UNSPEC 7735 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7736 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 7737 result = XVECEXP (x, 0, 0); 7738 7739 if (TARGET_MACHO && darwin_local_data_pic (x) 7740 && GET_CODE (orig_x) != MEM) 7741 result = XEXP (x, 0); 7742 7743 if (! result) 7744 return orig_x; 7745 7746 if (const_addend) 7747 result = gen_rtx_PLUS (Pmode, result, const_addend); 7748 if (reg_addend) 7749 result = gen_rtx_PLUS (Pmode, reg_addend, result); 7750 return result; 7751} 7752 7753static void 7754put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7755 int fp, FILE *file) 7756{ 7757 const char *suffix; 7758 7759 if (mode == CCFPmode || mode == CCFPUmode) 7760 { 7761 enum rtx_code second_code, bypass_code; 7762 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7763 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 7764 code = ix86_fp_compare_code_to_integer (code); 7765 mode = CCmode; 7766 } 7767 if (reverse) 7768 code = reverse_condition (code); 7769 7770 switch (code) 7771 { 7772 case EQ: 7773 suffix = "e"; 7774 break; 7775 case NE: 7776 suffix = "ne"; 7777 break; 7778 case GT: 7779 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 7780 suffix = "g"; 7781 break; 7782 case GTU: 7783 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 7784 Those same assemblers have the same but opposite lossage on cmov. */ 7785 gcc_assert (mode == CCmode); 7786 suffix = fp ? "nbe" : "a"; 7787 break; 7788 case LT: 7789 switch (mode) 7790 { 7791 case CCNOmode: 7792 case CCGOCmode: 7793 suffix = "s"; 7794 break; 7795 7796 case CCmode: 7797 case CCGCmode: 7798 suffix = "l"; 7799 break; 7800 7801 default: 7802 gcc_unreachable (); 7803 } 7804 break; 7805 case LTU: 7806 gcc_assert (mode == CCmode); 7807 suffix = "b"; 7808 break; 7809 case GE: 7810 switch (mode) 7811 { 7812 case CCNOmode: 7813 case CCGOCmode: 7814 suffix = "ns"; 7815 break; 7816 7817 case CCmode: 7818 case CCGCmode: 7819 suffix = "ge"; 7820 break; 7821 7822 default: 7823 gcc_unreachable (); 7824 } 7825 break; 7826 case GEU: 7827 /* ??? As above. */ 7828 gcc_assert (mode == CCmode); 7829 suffix = fp ? "nb" : "ae"; 7830 break; 7831 case LE: 7832 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 7833 suffix = "le"; 7834 break; 7835 case LEU: 7836 gcc_assert (mode == CCmode); 7837 suffix = "be"; 7838 break; 7839 case UNORDERED: 7840 suffix = fp ? "u" : "p"; 7841 break; 7842 case ORDERED: 7843 suffix = fp ? "nu" : "np"; 7844 break; 7845 default: 7846 gcc_unreachable (); 7847 } 7848 fputs (suffix, file); 7849} 7850 7851/* Print the name of register X to FILE based on its machine mode and number. 7852 If CODE is 'w', pretend the mode is HImode. 7853 If CODE is 'b', pretend the mode is QImode. 7854 If CODE is 'k', pretend the mode is SImode. 7855 If CODE is 'q', pretend the mode is DImode. 7856 If CODE is 'h', pretend the reg is the 'high' byte register. 7857 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7858 7859void 7860print_reg (rtx x, int code, FILE *file) 7861{ 7862 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM 7863 && REGNO (x) != FRAME_POINTER_REGNUM 7864 && REGNO (x) != FLAGS_REG 7865 && REGNO (x) != FPSR_REG); 7866 7867 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7868 putc ('%', file); 7869 7870 if (code == 'w' || MMX_REG_P (x)) 7871 code = 2; 7872 else if (code == 'b') 7873 code = 1; 7874 else if (code == 'k') 7875 code = 4; 7876 else if (code == 'q') 7877 code = 8; 7878 else if (code == 'y') 7879 code = 3; 7880 else if (code == 'h') 7881 code = 0; 7882 else 7883 code = GET_MODE_SIZE (GET_MODE (x)); 7884 7885 /* Irritatingly, AMD extended registers use different naming convention 7886 from the normal registers. */ 7887 if (REX_INT_REG_P (x)) 7888 { 7889 gcc_assert (TARGET_64BIT); 7890 switch (code) 7891 { 7892 case 0: 7893 error ("extended registers have no high halves"); 7894 break; 7895 case 1: 7896 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7897 break; 7898 case 2: 7899 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7900 break; 7901 case 4: 7902 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7903 break; 7904 case 8: 7905 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7906 break; 7907 default: 7908 error ("unsupported operand size for extended register"); 7909 break; 7910 } 7911 return; 7912 } 7913 switch (code) 7914 { 7915 case 3: 7916 if (STACK_TOP_P (x)) 7917 { 7918 fputs ("st(0)", file); 7919 break; 7920 } 7921 /* FALLTHRU */ 7922 case 8: 7923 case 4: 7924 case 12: 7925 if (! ANY_FP_REG_P (x)) 7926 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7927 /* FALLTHRU */ 7928 case 16: 7929 case 2: 7930 normal: 7931 fputs (hi_reg_name[REGNO (x)], file); 7932 break; 7933 case 1: 7934 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7935 goto normal; 7936 fputs (qi_reg_name[REGNO (x)], file); 7937 break; 7938 case 0: 7939 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7940 goto normal; 7941 fputs (qi_high_reg_name[REGNO (x)], file); 7942 break; 7943 default: 7944 gcc_unreachable (); 7945 } 7946} 7947 7948/* Locate some local-dynamic symbol still in use by this function 7949 so that we can print its name in some tls_local_dynamic_base 7950 pattern. */ 7951 7952static const char * 7953get_some_local_dynamic_name (void) 7954{ 7955 rtx insn; 7956 7957 if (cfun->machine->some_ld_name) 7958 return cfun->machine->some_ld_name; 7959 7960 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7961 if (INSN_P (insn) 7962 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7963 return cfun->machine->some_ld_name; 7964 7965 gcc_unreachable (); 7966} 7967 7968static int 7969get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7970{ 7971 rtx x = *px; 7972 7973 if (GET_CODE (x) == SYMBOL_REF 7974 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 7975 { 7976 cfun->machine->some_ld_name = XSTR (x, 0); 7977 return 1; 7978 } 7979 7980 return 0; 7981} 7982 7983/* Meaning of CODE: 7984 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7985 C -- print opcode suffix for set/cmov insn. 7986 c -- like C, but print reversed condition 7987 F,f -- likewise, but for floating-point. 7988 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7989 otherwise nothing 7990 R -- print the prefix for register names. 7991 z -- print the opcode suffix for the size of the current operand. 7992 * -- print a star (in certain assembler syntax) 7993 A -- print an absolute memory reference. 7994 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7995 s -- print a shift double count, followed by the assemblers argument 7996 delimiter. 7997 b -- print the QImode name of the register for the indicated operand. 7998 %b0 would print %al if operands[0] is reg 0. 7999 w -- likewise, print the HImode name of the register. 8000 k -- likewise, print the SImode name of the register. 8001 q -- likewise, print the DImode name of the register. 8002 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 8003 y -- print "st(0)" instead of "st" as a register. 8004 D -- print condition for SSE cmp instruction. 8005 P -- if PIC, print an @PLT suffix. 8006 X -- don't print any sort of PIC '@' suffix for a symbol. 8007 & -- print some in-use local-dynamic symbol name. 8008 H -- print a memory address offset by 8; used for sse high-parts 8009 */ 8010 8011void 8012print_operand (FILE *file, rtx x, int code) 8013{ 8014 if (code) 8015 { 8016 switch (code) 8017 { 8018 case '*': 8019 if (ASSEMBLER_DIALECT == ASM_ATT) 8020 putc ('*', file); 8021 return; 8022 8023 case '&': 8024 assemble_name (file, get_some_local_dynamic_name ()); 8025 return; 8026 8027 case 'A': 8028 switch (ASSEMBLER_DIALECT) 8029 { 8030 case ASM_ATT: 8031 putc ('*', file); 8032 break; 8033 8034 case ASM_INTEL: 8035 /* Intel syntax. For absolute addresses, registers should not 8036 be surrounded by braces. */ 8037 if (GET_CODE (x) != REG) 8038 { 8039 putc ('[', file); 8040 PRINT_OPERAND (file, x, 0); 8041 putc (']', file); 8042 return; 8043 } 8044 break; 8045 8046 default: 8047 gcc_unreachable (); 8048 } 8049 8050 PRINT_OPERAND (file, x, 0); 8051 return; 8052 8053 8054 case 'L': 8055 if (ASSEMBLER_DIALECT == ASM_ATT) 8056 putc ('l', file); 8057 return; 8058 8059 case 'W': 8060 if (ASSEMBLER_DIALECT == ASM_ATT) 8061 putc ('w', file); 8062 return; 8063 8064 case 'B': 8065 if (ASSEMBLER_DIALECT == ASM_ATT) 8066 putc ('b', file); 8067 return; 8068 8069 case 'Q': 8070 if (ASSEMBLER_DIALECT == ASM_ATT) 8071 putc ('l', file); 8072 return; 8073 8074 case 'S': 8075 if (ASSEMBLER_DIALECT == ASM_ATT) 8076 putc ('s', file); 8077 return; 8078 8079 case 'T': 8080 if (ASSEMBLER_DIALECT == ASM_ATT) 8081 putc ('t', file); 8082 return; 8083 8084 case 'z': 8085 /* 387 opcodes don't get size suffixes if the operands are 8086 registers. */ 8087 if (STACK_REG_P (x)) 8088 return; 8089 8090 /* Likewise if using Intel opcodes. */ 8091 if (ASSEMBLER_DIALECT == ASM_INTEL) 8092 return; 8093 8094 /* This is the size of op from size of operand. */ 8095 switch (GET_MODE_SIZE (GET_MODE (x))) 8096 { 8097 case 2: 8098#ifdef HAVE_GAS_FILDS_FISTS 8099 putc ('s', file); 8100#endif 8101 return; 8102 8103 case 4: 8104 if (GET_MODE (x) == SFmode) 8105 { 8106 putc ('s', file); 8107 return; 8108 } 8109 else 8110 putc ('l', file); 8111 return; 8112 8113 case 12: 8114 case 16: 8115 putc ('t', file); 8116 return; 8117 8118 case 8: 8119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 8120 { 8121#ifdef GAS_MNEMONICS 8122 putc ('q', file); 8123#else 8124 putc ('l', file); 8125 putc ('l', file); 8126#endif 8127 } 8128 else 8129 putc ('l', file); 8130 return; 8131 8132 default: 8133 gcc_unreachable (); 8134 } 8135 8136 case 'b': 8137 case 'w': 8138 case 'k': 8139 case 'q': 8140 case 'h': 8141 case 'y': 8142 case 'X': 8143 case 'P': 8144 break; 8145 8146 case 's': 8147 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 8148 { 8149 PRINT_OPERAND (file, x, 0); 8150 putc (',', file); 8151 } 8152 return; 8153 8154 case 'D': 8155 /* Little bit of braindamage here. The SSE compare instructions 8156 does use completely different names for the comparisons that the 8157 fp conditional moves. */ 8158 switch (GET_CODE (x)) 8159 { 8160 case EQ: 8161 case UNEQ: 8162 fputs ("eq", file); 8163 break; 8164 case LT: 8165 case UNLT: 8166 fputs ("lt", file); 8167 break; 8168 case LE: 8169 case UNLE: 8170 fputs ("le", file); 8171 break; 8172 case UNORDERED: 8173 fputs ("unord", file); 8174 break; 8175 case NE: 8176 case LTGT: 8177 fputs ("neq", file); 8178 break; 8179 case UNGE: 8180 case GE: 8181 fputs ("nlt", file); 8182 break; 8183 case UNGT: 8184 case GT: 8185 fputs ("nle", file); 8186 break; 8187 case ORDERED: 8188 fputs ("ord", file); 8189 break; 8190 default: 8191 gcc_unreachable (); 8192 } 8193 return; 8194 case 'O': 8195#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8196 if (ASSEMBLER_DIALECT == ASM_ATT) 8197 { 8198 switch (GET_MODE (x)) 8199 { 8200 case HImode: putc ('w', file); break; 8201 case SImode: 8202 case SFmode: putc ('l', file); break; 8203 case DImode: 8204 case DFmode: putc ('q', file); break; 8205 default: gcc_unreachable (); 8206 } 8207 putc ('.', file); 8208 } 8209#endif 8210 return; 8211 case 'C': 8212 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 8213 return; 8214 case 'F': 8215#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8216 if (ASSEMBLER_DIALECT == ASM_ATT) 8217 putc ('.', file); 8218#endif 8219 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 8220 return; 8221 8222 /* Like above, but reverse condition */ 8223 case 'c': 8224 /* Check to see if argument to %c is really a constant 8225 and not a condition code which needs to be reversed. */ 8226 if (!COMPARISON_P (x)) 8227 { 8228 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 8229 return; 8230 } 8231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 8232 return; 8233 case 'f': 8234#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8235 if (ASSEMBLER_DIALECT == ASM_ATT) 8236 putc ('.', file); 8237#endif 8238 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 8239 return; 8240 8241 case 'H': 8242 /* It doesn't actually matter what mode we use here, as we're 8243 only going to use this for printing. */ 8244 x = adjust_address_nv (x, DImode, 8); 8245 break; 8246 8247 case '+': 8248 { 8249 rtx x; 8250 8251 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 8252 return; 8253 8254 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 8255 if (x) 8256 { 8257 int pred_val = INTVAL (XEXP (x, 0)); 8258 8259 if (pred_val < REG_BR_PROB_BASE * 45 / 100 8260 || pred_val > REG_BR_PROB_BASE * 55 / 100) 8261 { 8262 int taken = pred_val > REG_BR_PROB_BASE / 2; 8263 int cputaken = final_forward_branch_p (current_output_insn) == 0; 8264 8265 /* Emit hints only in the case default branch prediction 8266 heuristics would fail. */ 8267 if (taken != cputaken) 8268 { 8269 /* We use 3e (DS) prefix for taken branches and 8270 2e (CS) prefix for not taken branches. */ 8271 if (taken) 8272 fputs ("ds ; ", file); 8273 else 8274 fputs ("cs ; ", file); 8275 } 8276 } 8277 } 8278 return; 8279 } 8280 default: 8281 output_operand_lossage ("invalid operand code '%c'", code); 8282 } 8283 } 8284 8285 if (GET_CODE (x) == REG) 8286 print_reg (x, code, file); 8287 8288 else if (GET_CODE (x) == MEM) 8289 { 8290 /* No `byte ptr' prefix for call instructions. */ 8291 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 8292 { 8293 const char * size; 8294 switch (GET_MODE_SIZE (GET_MODE (x))) 8295 { 8296 case 1: size = "BYTE"; break; 8297 case 2: size = "WORD"; break; 8298 case 4: size = "DWORD"; break; 8299 case 8: size = "QWORD"; break; 8300 case 12: size = "XWORD"; break; 8301 case 16: size = "XMMWORD"; break; 8302 default: 8303 gcc_unreachable (); 8304 } 8305 8306 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 8307 if (code == 'b') 8308 size = "BYTE"; 8309 else if (code == 'w') 8310 size = "WORD"; 8311 else if (code == 'k') 8312 size = "DWORD"; 8313 8314 fputs (size, file); 8315 fputs (" PTR ", file); 8316 } 8317 8318 x = XEXP (x, 0); 8319 /* Avoid (%rip) for call operands. */ 8320 if (CONSTANT_ADDRESS_P (x) && code == 'P' 8321 && GET_CODE (x) != CONST_INT) 8322 output_addr_const (file, x); 8323 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 8324 output_operand_lossage ("invalid constraints for operand"); 8325 else 8326 output_address (x); 8327 } 8328 8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 8330 { 8331 REAL_VALUE_TYPE r; 8332 long l; 8333 8334 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 8335 REAL_VALUE_TO_TARGET_SINGLE (r, l); 8336 8337 if (ASSEMBLER_DIALECT == ASM_ATT) 8338 putc ('$', file); 8339 fprintf (file, "0x%08lx", l); 8340 } 8341 8342 /* These float cases don't actually occur as immediate operands. */ 8343 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 8344 { 8345 char dstr[30]; 8346 8347 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8348 fprintf (file, "%s", dstr); 8349 } 8350 8351 else if (GET_CODE (x) == CONST_DOUBLE 8352 && GET_MODE (x) == XFmode) 8353 { 8354 char dstr[30]; 8355 8356 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8357 fprintf (file, "%s", dstr); 8358 } 8359 8360 else 8361 { 8362 /* We have patterns that allow zero sets of memory, for instance. 8363 In 64-bit mode, we should probably support all 8-byte vectors, 8364 since we can in fact encode that into an immediate. */ 8365 if (GET_CODE (x) == CONST_VECTOR) 8366 { 8367 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 8368 x = const0_rtx; 8369 } 8370 8371 if (code != 'P') 8372 { 8373 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8374 { 8375 if (ASSEMBLER_DIALECT == ASM_ATT) 8376 putc ('$', file); 8377 } 8378 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 8379 || GET_CODE (x) == LABEL_REF) 8380 { 8381 if (ASSEMBLER_DIALECT == ASM_ATT) 8382 putc ('$', file); 8383 else 8384 fputs ("OFFSET FLAT:", file); 8385 } 8386 } 8387 if (GET_CODE (x) == CONST_INT) 8388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 8389 else if (flag_pic) 8390 output_pic_addr_const (file, x, code); 8391 else 8392 output_addr_const (file, x); 8393 } 8394} 8395 8396/* Print a memory operand whose address is ADDR. */ 8397 8398void 8399print_operand_address (FILE *file, rtx addr) 8400{ 8401 struct ix86_address parts; 8402 rtx base, index, disp; 8403 int scale; 8404 int ok = ix86_decompose_address (addr, &parts); 8405 8406 gcc_assert (ok); 8407 8408 base = parts.base; 8409 index = parts.index; 8410 disp = parts.disp; 8411 scale = parts.scale; 8412 8413 switch (parts.seg) 8414 { 8415 case SEG_DEFAULT: 8416 break; 8417 case SEG_FS: 8418 case SEG_GS: 8419 if (USER_LABEL_PREFIX[0] == 0) 8420 putc ('%', file); 8421 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 8422 break; 8423 default: 8424 gcc_unreachable (); 8425 } 8426 8427 if (!base && !index) 8428 { 8429 /* Displacement only requires special attention. */ 8430 8431 if (GET_CODE (disp) == CONST_INT) 8432 { 8433 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 8434 { 8435 if (USER_LABEL_PREFIX[0] == 0) 8436 putc ('%', file); 8437 fputs ("ds:", file); 8438 } 8439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 8440 } 8441 else if (flag_pic) 8442 output_pic_addr_const (file, disp, 0); 8443 else 8444 output_addr_const (file, disp); 8445 8446 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 8447 if (TARGET_64BIT) 8448 { 8449 if (GET_CODE (disp) == CONST 8450 && GET_CODE (XEXP (disp, 0)) == PLUS 8451 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8452 disp = XEXP (XEXP (disp, 0), 0); 8453 if (GET_CODE (disp) == LABEL_REF 8454 || (GET_CODE (disp) == SYMBOL_REF 8455 && SYMBOL_REF_TLS_MODEL (disp) == 0)) 8456 fputs ("(%rip)", file); 8457 } 8458 } 8459 else 8460 { 8461 if (ASSEMBLER_DIALECT == ASM_ATT) 8462 { 8463 if (disp) 8464 { 8465 if (flag_pic) 8466 output_pic_addr_const (file, disp, 0); 8467 else if (GET_CODE (disp) == LABEL_REF) 8468 output_asm_label (disp); 8469 else 8470 output_addr_const (file, disp); 8471 } 8472 8473 putc ('(', file); 8474 if (base) 8475 print_reg (base, 0, file); 8476 if (index) 8477 { 8478 putc (',', file); 8479 print_reg (index, 0, file); 8480 if (scale != 1) 8481 fprintf (file, ",%d", scale); 8482 } 8483 putc (')', file); 8484 } 8485 else 8486 { 8487 rtx offset = NULL_RTX; 8488 8489 if (disp) 8490 { 8491 /* Pull out the offset of a symbol; print any symbol itself. */ 8492 if (GET_CODE (disp) == CONST 8493 && GET_CODE (XEXP (disp, 0)) == PLUS 8494 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8495 { 8496 offset = XEXP (XEXP (disp, 0), 1); 8497 disp = gen_rtx_CONST (VOIDmode, 8498 XEXP (XEXP (disp, 0), 0)); 8499 } 8500 8501 if (flag_pic) 8502 output_pic_addr_const (file, disp, 0); 8503 else if (GET_CODE (disp) == LABEL_REF) 8504 output_asm_label (disp); 8505 else if (GET_CODE (disp) == CONST_INT) 8506 offset = disp; 8507 else 8508 output_addr_const (file, disp); 8509 } 8510 8511 putc ('[', file); 8512 if (base) 8513 { 8514 print_reg (base, 0, file); 8515 if (offset) 8516 { 8517 if (INTVAL (offset) >= 0) 8518 putc ('+', file); 8519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8520 } 8521 } 8522 else if (offset) 8523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8524 else 8525 putc ('0', file); 8526 8527 if (index) 8528 { 8529 putc ('+', file); 8530 print_reg (index, 0, file); 8531 if (scale != 1) 8532 fprintf (file, "*%d", scale); 8533 } 8534 putc (']', file); 8535 } 8536 } 8537} 8538 8539bool 8540output_addr_const_extra (FILE *file, rtx x) 8541{ 8542 rtx op; 8543 8544 if (GET_CODE (x) != UNSPEC) 8545 return false; 8546 8547 op = XVECEXP (x, 0, 0); 8548 switch (XINT (x, 1)) 8549 { 8550 case UNSPEC_GOTTPOFF: 8551 output_addr_const (file, op); 8552 /* FIXME: This might be @TPOFF in Sun ld. */ 8553 fputs ("@GOTTPOFF", file); 8554 break; 8555 case UNSPEC_TPOFF: 8556 output_addr_const (file, op); 8557 fputs ("@TPOFF", file); 8558 break; 8559 case UNSPEC_NTPOFF: 8560 output_addr_const (file, op); 8561 if (TARGET_64BIT) 8562 fputs ("@TPOFF", file); 8563 else 8564 fputs ("@NTPOFF", file); 8565 break; 8566 case UNSPEC_DTPOFF: 8567 output_addr_const (file, op); 8568 fputs ("@DTPOFF", file); 8569 break; 8570 case UNSPEC_GOTNTPOFF: 8571 output_addr_const (file, op); 8572 if (TARGET_64BIT) 8573 fputs ("@GOTTPOFF(%rip)", file); 8574 else 8575 fputs ("@GOTNTPOFF", file); 8576 break; 8577 case UNSPEC_INDNTPOFF: 8578 output_addr_const (file, op); 8579 fputs ("@INDNTPOFF", file); 8580 break; 8581 8582 default: 8583 return false; 8584 } 8585 8586 return true; 8587} 8588 8589/* Split one or more DImode RTL references into pairs of SImode 8590 references. The RTL can be REG, offsettable MEM, integer constant, or 8591 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8592 split and "num" is its length. lo_half and hi_half are output arrays 8593 that parallel "operands". */ 8594 8595void 8596split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8597{ 8598 while (num--) 8599 { 8600 rtx op = operands[num]; 8601 8602 /* simplify_subreg refuse to split volatile memory addresses, 8603 but we still have to handle it. */ 8604 if (GET_CODE (op) == MEM) 8605 { 8606 lo_half[num] = adjust_address (op, SImode, 0); 8607 hi_half[num] = adjust_address (op, SImode, 4); 8608 } 8609 else 8610 { 8611 lo_half[num] = simplify_gen_subreg (SImode, op, 8612 GET_MODE (op) == VOIDmode 8613 ? DImode : GET_MODE (op), 0); 8614 hi_half[num] = simplify_gen_subreg (SImode, op, 8615 GET_MODE (op) == VOIDmode 8616 ? DImode : GET_MODE (op), 4); 8617 } 8618 } 8619} 8620/* Split one or more TImode RTL references into pairs of DImode 8621 references. The RTL can be REG, offsettable MEM, integer constant, or 8622 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8623 split and "num" is its length. lo_half and hi_half are output arrays 8624 that parallel "operands". */ 8625 8626void 8627split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8628{ 8629 while (num--) 8630 { 8631 rtx op = operands[num]; 8632 8633 /* simplify_subreg refuse to split volatile memory addresses, but we 8634 still have to handle it. */ 8635 if (GET_CODE (op) == MEM) 8636 { 8637 lo_half[num] = adjust_address (op, DImode, 0); 8638 hi_half[num] = adjust_address (op, DImode, 8); 8639 } 8640 else 8641 { 8642 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 8643 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 8644 } 8645 } 8646} 8647 8648/* Output code to perform a 387 binary operation in INSN, one of PLUS, 8649 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 8650 is the expression of the binary operation. The output may either be 8651 emitted here, or returned to the caller, like all output_* functions. 8652 8653 There is no guarantee that the operands are the same mode, as they 8654 might be within FLOAT or FLOAT_EXTEND expressions. */ 8655 8656#ifndef SYSV386_COMPAT 8657/* Set to 1 for compatibility with brain-damaged assemblers. No-one 8658 wants to fix the assemblers because that causes incompatibility 8659 with gcc. No-one wants to fix gcc because that causes 8660 incompatibility with assemblers... You can use the option of 8661 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 8662#define SYSV386_COMPAT 1 8663#endif 8664 8665const char * 8666output_387_binary_op (rtx insn, rtx *operands) 8667{ 8668 static char buf[30]; 8669 const char *p; 8670 const char *ssep; 8671 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 8672 8673#ifdef ENABLE_CHECKING 8674 /* Even if we do not want to check the inputs, this documents input 8675 constraints. Which helps in understanding the following code. */ 8676 if (STACK_REG_P (operands[0]) 8677 && ((REG_P (operands[1]) 8678 && REGNO (operands[0]) == REGNO (operands[1]) 8679 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 8680 || (REG_P (operands[2]) 8681 && REGNO (operands[0]) == REGNO (operands[2]) 8682 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 8683 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 8684 ; /* ok */ 8685 else 8686 gcc_assert (is_sse); 8687#endif 8688 8689 switch (GET_CODE (operands[3])) 8690 { 8691 case PLUS: 8692 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8693 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8694 p = "fiadd"; 8695 else 8696 p = "fadd"; 8697 ssep = "add"; 8698 break; 8699 8700 case MINUS: 8701 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8702 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8703 p = "fisub"; 8704 else 8705 p = "fsub"; 8706 ssep = "sub"; 8707 break; 8708 8709 case MULT: 8710 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8711 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8712 p = "fimul"; 8713 else 8714 p = "fmul"; 8715 ssep = "mul"; 8716 break; 8717 8718 case DIV: 8719 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8720 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8721 p = "fidiv"; 8722 else 8723 p = "fdiv"; 8724 ssep = "div"; 8725 break; 8726 8727 default: 8728 gcc_unreachable (); 8729 } 8730 8731 if (is_sse) 8732 { 8733 strcpy (buf, ssep); 8734 if (GET_MODE (operands[0]) == SFmode) 8735 strcat (buf, "ss\t{%2, %0|%0, %2}"); 8736 else 8737 strcat (buf, "sd\t{%2, %0|%0, %2}"); 8738 return buf; 8739 } 8740 strcpy (buf, p); 8741 8742 switch (GET_CODE (operands[3])) 8743 { 8744 case MULT: 8745 case PLUS: 8746 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 8747 { 8748 rtx temp = operands[2]; 8749 operands[2] = operands[1]; 8750 operands[1] = temp; 8751 } 8752 8753 /* know operands[0] == operands[1]. */ 8754 8755 if (GET_CODE (operands[2]) == MEM) 8756 { 8757 p = "%z2\t%2"; 8758 break; 8759 } 8760 8761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8762 { 8763 if (STACK_TOP_P (operands[0])) 8764 /* How is it that we are storing to a dead operand[2]? 8765 Well, presumably operands[1] is dead too. We can't 8766 store the result to st(0) as st(0) gets popped on this 8767 instruction. Instead store to operands[2] (which I 8768 think has to be st(1)). st(1) will be popped later. 8769 gcc <= 2.8.1 didn't have this check and generated 8770 assembly code that the Unixware assembler rejected. */ 8771 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8772 else 8773 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8774 break; 8775 } 8776 8777 if (STACK_TOP_P (operands[0])) 8778 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8779 else 8780 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8781 break; 8782 8783 case MINUS: 8784 case DIV: 8785 if (GET_CODE (operands[1]) == MEM) 8786 { 8787 p = "r%z1\t%1"; 8788 break; 8789 } 8790 8791 if (GET_CODE (operands[2]) == MEM) 8792 { 8793 p = "%z2\t%2"; 8794 break; 8795 } 8796 8797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8798 { 8799#if SYSV386_COMPAT 8800 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8801 derived assemblers, confusingly reverse the direction of 8802 the operation for fsub{r} and fdiv{r} when the 8803 destination register is not st(0). The Intel assembler 8804 doesn't have this brain damage. Read !SYSV386_COMPAT to 8805 figure out what the hardware really does. */ 8806 if (STACK_TOP_P (operands[0])) 8807 p = "{p\t%0, %2|rp\t%2, %0}"; 8808 else 8809 p = "{rp\t%2, %0|p\t%0, %2}"; 8810#else 8811 if (STACK_TOP_P (operands[0])) 8812 /* As above for fmul/fadd, we can't store to st(0). */ 8813 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8814 else 8815 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8816#endif 8817 break; 8818 } 8819 8820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8821 { 8822#if SYSV386_COMPAT 8823 if (STACK_TOP_P (operands[0])) 8824 p = "{rp\t%0, %1|p\t%1, %0}"; 8825 else 8826 p = "{p\t%1, %0|rp\t%0, %1}"; 8827#else 8828 if (STACK_TOP_P (operands[0])) 8829 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8830 else 8831 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8832#endif 8833 break; 8834 } 8835 8836 if (STACK_TOP_P (operands[0])) 8837 { 8838 if (STACK_TOP_P (operands[1])) 8839 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8840 else 8841 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8842 break; 8843 } 8844 else if (STACK_TOP_P (operands[1])) 8845 { 8846#if SYSV386_COMPAT 8847 p = "{\t%1, %0|r\t%0, %1}"; 8848#else 8849 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8850#endif 8851 } 8852 else 8853 { 8854#if SYSV386_COMPAT 8855 p = "{r\t%2, %0|\t%0, %2}"; 8856#else 8857 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8858#endif 8859 } 8860 break; 8861 8862 default: 8863 gcc_unreachable (); 8864 } 8865 8866 strcat (buf, p); 8867 return buf; 8868} 8869 8870/* Return needed mode for entity in optimize_mode_switching pass. */ 8871 8872int 8873ix86_mode_needed (int entity, rtx insn) 8874{ 8875 enum attr_i387_cw mode; 8876 8877 /* The mode UNINITIALIZED is used to store control word after a 8878 function call or ASM pattern. The mode ANY specify that function 8879 has no requirements on the control word and make no changes in the 8880 bits we are interested in. */ 8881 8882 if (CALL_P (insn) 8883 || (NONJUMP_INSN_P (insn) 8884 && (asm_noperands (PATTERN (insn)) >= 0 8885 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 8886 return I387_CW_UNINITIALIZED; 8887 8888 if (recog_memoized (insn) < 0) 8889 return I387_CW_ANY; 8890 8891 mode = get_attr_i387_cw (insn); 8892 8893 switch (entity) 8894 { 8895 case I387_TRUNC: 8896 if (mode == I387_CW_TRUNC) 8897 return mode; 8898 break; 8899 8900 case I387_FLOOR: 8901 if (mode == I387_CW_FLOOR) 8902 return mode; 8903 break; 8904 8905 case I387_CEIL: 8906 if (mode == I387_CW_CEIL) 8907 return mode; 8908 break; 8909 8910 case I387_MASK_PM: 8911 if (mode == I387_CW_MASK_PM) 8912 return mode; 8913 break; 8914 8915 default: 8916 gcc_unreachable (); 8917 } 8918 8919 return I387_CW_ANY; 8920} 8921 8922/* Output code to initialize control word copies used by trunc?f?i and 8923 rounding patterns. CURRENT_MODE is set to current control word, 8924 while NEW_MODE is set to new control word. */ 8925 8926void 8927emit_i387_cw_initialization (int mode) 8928{ 8929 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 8930 rtx new_mode; 8931 8932 int slot; 8933 8934 rtx reg = gen_reg_rtx (HImode); 8935 8936 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 8937 emit_move_insn (reg, stored_mode); 8938 8939 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) 8940 { 8941 switch (mode) 8942 { 8943 case I387_CW_TRUNC: 8944 /* round toward zero (truncate) */ 8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 8946 slot = SLOT_CW_TRUNC; 8947 break; 8948 8949 case I387_CW_FLOOR: 8950 /* round down toward -oo */ 8951 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8952 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 8953 slot = SLOT_CW_FLOOR; 8954 break; 8955 8956 case I387_CW_CEIL: 8957 /* round up toward +oo */ 8958 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8959 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 8960 slot = SLOT_CW_CEIL; 8961 break; 8962 8963 case I387_CW_MASK_PM: 8964 /* mask precision exception for nearbyint() */ 8965 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8966 slot = SLOT_CW_MASK_PM; 8967 break; 8968 8969 default: 8970 gcc_unreachable (); 8971 } 8972 } 8973 else 8974 { 8975 switch (mode) 8976 { 8977 case I387_CW_TRUNC: 8978 /* round toward zero (truncate) */ 8979 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8980 slot = SLOT_CW_TRUNC; 8981 break; 8982 8983 case I387_CW_FLOOR: 8984 /* round down toward -oo */ 8985 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 8986 slot = SLOT_CW_FLOOR; 8987 break; 8988 8989 case I387_CW_CEIL: 8990 /* round up toward +oo */ 8991 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 8992 slot = SLOT_CW_CEIL; 8993 break; 8994 8995 case I387_CW_MASK_PM: 8996 /* mask precision exception for nearbyint() */ 8997 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8998 slot = SLOT_CW_MASK_PM; 8999 break; 9000 9001 default: 9002 gcc_unreachable (); 9003 } 9004 } 9005 9006 gcc_assert (slot < MAX_386_STACK_LOCALS); 9007 9008 new_mode = assign_386_stack_local (HImode, slot); 9009 emit_move_insn (new_mode, reg); 9010} 9011 9012/* Output code for INSN to convert a float to a signed int. OPERANDS 9013 are the insn operands. The output may be [HSD]Imode and the input 9014 operand may be [SDX]Fmode. */ 9015 9016const char * 9017output_fix_trunc (rtx insn, rtx *operands, int fisttp) 9018{ 9019 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 9020 int dimode_p = GET_MODE (operands[0]) == DImode; 9021 int round_mode = get_attr_i387_cw (insn); 9022 9023 /* Jump through a hoop or two for DImode, since the hardware has no 9024 non-popping instruction. We used to do this a different way, but 9025 that was somewhat fragile and broke with post-reload splitters. */ 9026 if ((dimode_p || fisttp) && !stack_top_dies) 9027 output_asm_insn ("fld\t%y1", operands); 9028 9029 gcc_assert (STACK_TOP_P (operands[1])); 9030 gcc_assert (GET_CODE (operands[0]) == MEM); 9031 9032 if (fisttp) 9033 output_asm_insn ("fisttp%z0\t%0", operands); 9034 else 9035 { 9036 if (round_mode != I387_CW_ANY) 9037 output_asm_insn ("fldcw\t%3", operands); 9038 if (stack_top_dies || dimode_p) 9039 output_asm_insn ("fistp%z0\t%0", operands); 9040 else 9041 output_asm_insn ("fist%z0\t%0", operands); 9042 if (round_mode != I387_CW_ANY) 9043 output_asm_insn ("fldcw\t%2", operands); 9044 } 9045 9046 return ""; 9047} 9048 9049/* Output code for x87 ffreep insn. The OPNO argument, which may only 9050 have the values zero or one, indicates the ffreep insn's operand 9051 from the OPERANDS array. */ 9052 9053static const char * 9054output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 9055{ 9056 if (TARGET_USE_FFREEP) 9057#if HAVE_AS_IX86_FFREEP 9058 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 9059#else 9060 switch (REGNO (operands[opno])) 9061 { 9062 case FIRST_STACK_REG + 0: return ".word\t0xc0df"; 9063 case FIRST_STACK_REG + 1: return ".word\t0xc1df"; 9064 case FIRST_STACK_REG + 2: return ".word\t0xc2df"; 9065 case FIRST_STACK_REG + 3: return ".word\t0xc3df"; 9066 case FIRST_STACK_REG + 4: return ".word\t0xc4df"; 9067 case FIRST_STACK_REG + 5: return ".word\t0xc5df"; 9068 case FIRST_STACK_REG + 6: return ".word\t0xc6df"; 9069 case FIRST_STACK_REG + 7: return ".word\t0xc7df"; 9070 } 9071#endif 9072 9073 return opno ? "fstp\t%y1" : "fstp\t%y0"; 9074} 9075 9076 9077/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 9078 should be used. UNORDERED_P is true when fucom should be used. */ 9079 9080const char * 9081output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 9082{ 9083 int stack_top_dies; 9084 rtx cmp_op0, cmp_op1; 9085 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 9086 9087 if (eflags_p) 9088 { 9089 cmp_op0 = operands[0]; 9090 cmp_op1 = operands[1]; 9091 } 9092 else 9093 { 9094 cmp_op0 = operands[1]; 9095 cmp_op1 = operands[2]; 9096 } 9097 9098 if (is_sse) 9099 { 9100 if (GET_MODE (operands[0]) == SFmode) 9101 if (unordered_p) 9102 return "ucomiss\t{%1, %0|%0, %1}"; 9103 else 9104 return "comiss\t{%1, %0|%0, %1}"; 9105 else 9106 if (unordered_p) 9107 return "ucomisd\t{%1, %0|%0, %1}"; 9108 else 9109 return "comisd\t{%1, %0|%0, %1}"; 9110 } 9111 9112 gcc_assert (STACK_TOP_P (cmp_op0)); 9113 9114 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 9115 9116 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 9117 { 9118 if (stack_top_dies) 9119 { 9120 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 9121 return output_387_ffreep (operands, 1); 9122 } 9123 else 9124 return "ftst\n\tfnstsw\t%0"; 9125 } 9126 9127 if (STACK_REG_P (cmp_op1) 9128 && stack_top_dies 9129 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 9130 && REGNO (cmp_op1) != FIRST_STACK_REG) 9131 { 9132 /* If both the top of the 387 stack dies, and the other operand 9133 is also a stack register that dies, then this must be a 9134 `fcompp' float compare */ 9135 9136 if (eflags_p) 9137 { 9138 /* There is no double popping fcomi variant. Fortunately, 9139 eflags is immune from the fstp's cc clobbering. */ 9140 if (unordered_p) 9141 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 9142 else 9143 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 9144 return output_387_ffreep (operands, 0); 9145 } 9146 else 9147 { 9148 if (unordered_p) 9149 return "fucompp\n\tfnstsw\t%0"; 9150 else 9151 return "fcompp\n\tfnstsw\t%0"; 9152 } 9153 } 9154 else 9155 { 9156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 9157 9158 static const char * const alt[16] = 9159 { 9160 "fcom%z2\t%y2\n\tfnstsw\t%0", 9161 "fcomp%z2\t%y2\n\tfnstsw\t%0", 9162 "fucom%z2\t%y2\n\tfnstsw\t%0", 9163 "fucomp%z2\t%y2\n\tfnstsw\t%0", 9164 9165 "ficom%z2\t%y2\n\tfnstsw\t%0", 9166 "ficomp%z2\t%y2\n\tfnstsw\t%0", 9167 NULL, 9168 NULL, 9169 9170 "fcomi\t{%y1, %0|%0, %y1}", 9171 "fcomip\t{%y1, %0|%0, %y1}", 9172 "fucomi\t{%y1, %0|%0, %y1}", 9173 "fucomip\t{%y1, %0|%0, %y1}", 9174 9175 NULL, 9176 NULL, 9177 NULL, 9178 NULL 9179 }; 9180 9181 int mask; 9182 const char *ret; 9183 9184 mask = eflags_p << 3; 9185 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 9186 mask |= unordered_p << 1; 9187 mask |= stack_top_dies; 9188 9189 gcc_assert (mask < 16); 9190 ret = alt[mask]; 9191 gcc_assert (ret); 9192 9193 return ret; 9194 } 9195} 9196 9197void 9198ix86_output_addr_vec_elt (FILE *file, int value) 9199{ 9200 const char *directive = ASM_LONG; 9201 9202#ifdef ASM_QUAD 9203 if (TARGET_64BIT) 9204 directive = ASM_QUAD; 9205#else 9206 gcc_assert (!TARGET_64BIT); 9207#endif 9208 9209 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 9210} 9211 9212void 9213ix86_output_addr_diff_elt (FILE *file, int value, int rel) 9214{ 9215 if (TARGET_64BIT) 9216 fprintf (file, "%s%s%d-%s%d\n", 9217 ASM_LONG, LPREFIX, value, LPREFIX, rel); 9218 else if (HAVE_AS_GOTOFF_IN_DATA) 9219 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 9220#if TARGET_MACHO 9221 else if (TARGET_MACHO) 9222 { 9223 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 9224 machopic_output_function_base_name (file); 9225 fprintf(file, "\n"); 9226 } 9227#endif 9228 else 9229 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 9230 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 9231} 9232 9233/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 9234 for the target. */ 9235 9236void 9237ix86_expand_clear (rtx dest) 9238{ 9239 rtx tmp; 9240 9241 /* We play register width games, which are only valid after reload. */ 9242 gcc_assert (reload_completed); 9243 9244 /* Avoid HImode and its attendant prefix byte. */ 9245 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 9246 dest = gen_rtx_REG (SImode, REGNO (dest)); 9247 9248 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 9249 9250 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 9251 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 9252 { 9253 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 9255 } 9256 9257 emit_insn (tmp); 9258} 9259 9260/* X is an unchanging MEM. If it is a constant pool reference, return 9261 the constant pool rtx, else NULL. */ 9262 9263rtx 9264maybe_get_pool_constant (rtx x) 9265{ 9266 x = ix86_delegitimize_address (XEXP (x, 0)); 9267 9268 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 9269 return get_pool_constant (x); 9270 9271 return NULL_RTX; 9272} 9273 9274void 9275ix86_expand_move (enum machine_mode mode, rtx operands[]) 9276{ 9277 int strict = (reload_in_progress || reload_completed); 9278 rtx op0, op1; 9279 enum tls_model model; 9280 9281 op0 = operands[0]; 9282 op1 = operands[1]; 9283 9284 if (GET_CODE (op1) == SYMBOL_REF) 9285 { 9286 model = SYMBOL_REF_TLS_MODEL (op1); 9287 if (model) 9288 { 9289 op1 = legitimize_tls_address (op1, model, true); 9290 op1 = force_operand (op1, op0); 9291 if (op1 == op0) 9292 return; 9293 } 9294 } 9295 else if (GET_CODE (op1) == CONST 9296 && GET_CODE (XEXP (op1, 0)) == PLUS 9297 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 9298 { 9299 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); 9300 if (model) 9301 { 9302 rtx addend = XEXP (XEXP (op1, 0), 1); 9303 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); 9304 op1 = force_operand (op1, NULL); 9305 op1 = expand_simple_binop (Pmode, PLUS, op1, addend, 9306 op0, 1, OPTAB_DIRECT); 9307 if (op1 == op0) 9308 return; 9309 } 9310 } 9311 9312 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 9313 { 9314 if (TARGET_MACHO && !TARGET_64BIT) 9315 { 9316#if TARGET_MACHO 9317 if (MACHOPIC_PURE) 9318 { 9319 rtx temp = ((reload_in_progress 9320 || ((op0 && GET_CODE (op0) == REG) 9321 && mode == Pmode)) 9322 ? op0 : gen_reg_rtx (Pmode)); 9323 op1 = machopic_indirect_data_reference (op1, temp); 9324 op1 = machopic_legitimize_pic_address (op1, mode, 9325 temp == op1 ? 0 : temp); 9326 } 9327 else if (MACHOPIC_INDIRECT) 9328 op1 = machopic_indirect_data_reference (op1, 0); 9329 if (op0 == op1) 9330 return; 9331#endif 9332 } 9333 else 9334 { 9335 if (GET_CODE (op0) == MEM) 9336 op1 = force_reg (Pmode, op1); 9337 else 9338 op1 = legitimize_address (op1, op1, Pmode); 9339 } 9340 } 9341 else 9342 { 9343 if (GET_CODE (op0) == MEM 9344 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 9345 || !push_operand (op0, mode)) 9346 && GET_CODE (op1) == MEM) 9347 op1 = force_reg (mode, op1); 9348 9349 if (push_operand (op0, mode) 9350 && ! general_no_elim_operand (op1, mode)) 9351 op1 = copy_to_mode_reg (mode, op1); 9352 9353 /* Force large constants in 64bit compilation into register 9354 to get them CSEed. */ 9355 if (TARGET_64BIT && mode == DImode 9356 && immediate_operand (op1, mode) 9357 && !x86_64_zext_immediate_operand (op1, VOIDmode) 9358 && !register_operand (op0, mode) 9359 && optimize && !reload_completed && !reload_in_progress) 9360 op1 = copy_to_mode_reg (mode, op1); 9361 9362 if (FLOAT_MODE_P (mode)) 9363 { 9364 /* If we are loading a floating point constant to a register, 9365 force the value to memory now, since we'll get better code 9366 out the back end. */ 9367 9368 if (strict) 9369 ; 9370 else if (GET_CODE (op1) == CONST_DOUBLE) 9371 { 9372 op1 = validize_mem (force_const_mem (mode, op1)); 9373 if (!register_operand (op0, mode)) 9374 { 9375 rtx temp = gen_reg_rtx (mode); 9376 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 9377 emit_move_insn (op0, temp); 9378 return; 9379 } 9380 } 9381 } 9382 } 9383 9384 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9385} 9386 9387void 9388ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 9389{ 9390 rtx op0 = operands[0], op1 = operands[1]; 9391 9392 /* Force constants other than zero into memory. We do not know how 9393 the instructions used to build constants modify the upper 64 bits 9394 of the register, once we have that information we may be able 9395 to handle some of them more efficiently. */ 9396 if ((reload_in_progress | reload_completed) == 0 9397 && register_operand (op0, mode) 9398 && CONSTANT_P (op1) 9399 && standard_sse_constant_p (op1) <= 0) 9400 op1 = validize_mem (force_const_mem (mode, op1)); 9401 9402 /* Make operand1 a register if it isn't already. */ 9403 if (!no_new_pseudos 9404 && !register_operand (op0, mode) 9405 && !register_operand (op1, mode)) 9406 { 9407 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 9408 return; 9409 } 9410 9411 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9412} 9413 9414/* Implement the movmisalign patterns for SSE. Non-SSE modes go 9415 straight to ix86_expand_vector_move. */ 9416 9417void 9418ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 9419{ 9420 rtx op0, op1, m; 9421 9422 op0 = operands[0]; 9423 op1 = operands[1]; 9424 9425 if (MEM_P (op1)) 9426 { 9427 /* If we're optimizing for size, movups is the smallest. */ 9428 if (optimize_size) 9429 { 9430 op0 = gen_lowpart (V4SFmode, op0); 9431 op1 = gen_lowpart (V4SFmode, op1); 9432 emit_insn (gen_sse_movups (op0, op1)); 9433 return; 9434 } 9435 9436 /* ??? If we have typed data, then it would appear that using 9437 movdqu is the only way to get unaligned data loaded with 9438 integer type. */ 9439 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9440 { 9441 op0 = gen_lowpart (V16QImode, op0); 9442 op1 = gen_lowpart (V16QImode, op1); 9443 emit_insn (gen_sse2_movdqu (op0, op1)); 9444 return; 9445 } 9446 9447 if (TARGET_SSE2 && mode == V2DFmode) 9448 { 9449 rtx zero; 9450 9451 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 9452 { 9453 op0 = gen_lowpart (V2DFmode, op0); 9454 op1 = gen_lowpart (V2DFmode, op1); 9455 emit_insn (gen_sse2_movupd (op0, op1)); 9456 return; 9457 } 9458 9459 /* When SSE registers are split into halves, we can avoid 9460 writing to the top half twice. */ 9461 if (TARGET_SSE_SPLIT_REGS) 9462 { 9463 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9464 zero = op0; 9465 } 9466 else 9467 { 9468 /* ??? Not sure about the best option for the Intel chips. 9469 The following would seem to satisfy; the register is 9470 entirely cleared, breaking the dependency chain. We 9471 then store to the upper half, with a dependency depth 9472 of one. A rumor has it that Intel recommends two movsd 9473 followed by an unpacklpd, but this is unconfirmed. And 9474 given that the dependency depth of the unpacklpd would 9475 still be one, I'm not sure why this would be better. */ 9476 zero = CONST0_RTX (V2DFmode); 9477 } 9478 9479 m = adjust_address (op1, DFmode, 0); 9480 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 9481 m = adjust_address (op1, DFmode, 8); 9482 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 9483 } 9484 else 9485 { 9486 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 9487 { 9488 op0 = gen_lowpart (V4SFmode, op0); 9489 op1 = gen_lowpart (V4SFmode, op1); 9490 emit_insn (gen_sse_movups (op0, op1)); 9491 return; 9492 } 9493 9494 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 9495 emit_move_insn (op0, CONST0_RTX (mode)); 9496 else 9497 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9498 9499 if (mode != V4SFmode) 9500 op0 = gen_lowpart (V4SFmode, op0); 9501 m = adjust_address (op1, V2SFmode, 0); 9502 emit_insn (gen_sse_loadlps (op0, op0, m)); 9503 m = adjust_address (op1, V2SFmode, 8); 9504 emit_insn (gen_sse_loadhps (op0, op0, m)); 9505 } 9506 } 9507 else if (MEM_P (op0)) 9508 { 9509 /* If we're optimizing for size, movups is the smallest. */ 9510 if (optimize_size) 9511 { 9512 op0 = gen_lowpart (V4SFmode, op0); 9513 op1 = gen_lowpart (V4SFmode, op1); 9514 emit_insn (gen_sse_movups (op0, op1)); 9515 return; 9516 } 9517 9518 /* ??? Similar to above, only less clear because of quote 9519 typeless stores unquote. */ 9520 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 9521 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9522 { 9523 op0 = gen_lowpart (V16QImode, op0); 9524 op1 = gen_lowpart (V16QImode, op1); 9525 emit_insn (gen_sse2_movdqu (op0, op1)); 9526 return; 9527 } 9528 9529 if (TARGET_SSE2 && mode == V2DFmode) 9530 { 9531 m = adjust_address (op0, DFmode, 0); 9532 emit_insn (gen_sse2_storelpd (m, op1)); 9533 m = adjust_address (op0, DFmode, 8); 9534 emit_insn (gen_sse2_storehpd (m, op1)); 9535 } 9536 else 9537 { 9538 if (mode != V4SFmode) 9539 op1 = gen_lowpart (V4SFmode, op1); 9540 m = adjust_address (op0, V2SFmode, 0); 9541 emit_insn (gen_sse_storelps (m, op1)); 9542 m = adjust_address (op0, V2SFmode, 8); 9543 emit_insn (gen_sse_storehps (m, op1)); 9544 } 9545 } 9546 else 9547 gcc_unreachable (); 9548} 9549 9550/* Expand a push in MODE. This is some mode for which we do not support 9551 proper push instructions, at least from the registers that we expect 9552 the value to live in. */ 9553 9554void 9555ix86_expand_push (enum machine_mode mode, rtx x) 9556{ 9557 rtx tmp; 9558 9559 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 9560 GEN_INT (-GET_MODE_SIZE (mode)), 9561 stack_pointer_rtx, 1, OPTAB_DIRECT); 9562 if (tmp != stack_pointer_rtx) 9563 emit_move_insn (stack_pointer_rtx, tmp); 9564 9565 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 9566 emit_move_insn (tmp, x); 9567} 9568 9569/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 9570 destination to use for the operation. If different from the true 9571 destination in operands[0], a copy operation will be required. */ 9572 9573rtx 9574ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 9575 rtx operands[]) 9576{ 9577 int matching_memory; 9578 rtx src1, src2, dst; 9579 9580 dst = operands[0]; 9581 src1 = operands[1]; 9582 src2 = operands[2]; 9583 9584 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 9585 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9586 && (rtx_equal_p (dst, src2) 9587 || immediate_operand (src1, mode))) 9588 { 9589 rtx temp = src1; 9590 src1 = src2; 9591 src2 = temp; 9592 } 9593 9594 /* If the destination is memory, and we do not have matching source 9595 operands, do things in registers. */ 9596 matching_memory = 0; 9597 if (GET_CODE (dst) == MEM) 9598 { 9599 if (rtx_equal_p (dst, src1)) 9600 matching_memory = 1; 9601 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9602 && rtx_equal_p (dst, src2)) 9603 matching_memory = 2; 9604 else 9605 dst = gen_reg_rtx (mode); 9606 } 9607 9608 /* Both source operands cannot be in memory. */ 9609 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 9610 { 9611 if (matching_memory != 2) 9612 src2 = force_reg (mode, src2); 9613 else 9614 src1 = force_reg (mode, src1); 9615 } 9616 9617 /* If the operation is not commutable, source 1 cannot be a constant 9618 or non-matching memory. */ 9619 if ((CONSTANT_P (src1) 9620 || (!matching_memory && GET_CODE (src1) == MEM)) 9621 && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9622 src1 = force_reg (mode, src1); 9623 9624 src1 = operands[1] = src1; 9625 src2 = operands[2] = src2; 9626 return dst; 9627} 9628 9629/* Similarly, but assume that the destination has already been 9630 set up properly. */ 9631 9632void 9633ix86_fixup_binary_operands_no_copy (enum rtx_code code, 9634 enum machine_mode mode, rtx operands[]) 9635{ 9636 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 9637 gcc_assert (dst == operands[0]); 9638} 9639 9640/* Attempt to expand a binary operator. Make the expansion closer to the 9641 actual machine, then just general_operand, which will allow 3 separate 9642 memory references (one output, two input) in a single insn. */ 9643 9644void 9645ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 9646 rtx operands[]) 9647{ 9648 rtx src1, src2, dst, op, clob; 9649 9650 dst = ix86_fixup_binary_operands (code, mode, operands); 9651 src1 = operands[1]; 9652 src2 = operands[2]; 9653 9654 /* Emit the instruction. */ 9655 9656 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 9657 if (reload_in_progress) 9658 { 9659 /* Reload doesn't know about the flags register, and doesn't know that 9660 it doesn't want to clobber it. We can only do this with PLUS. */ 9661 gcc_assert (code == PLUS); 9662 emit_insn (op); 9663 } 9664 else 9665 { 9666 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9667 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9668 } 9669 9670 /* Fix up the destination if needed. */ 9671 if (dst != operands[0]) 9672 emit_move_insn (operands[0], dst); 9673} 9674 9675/* Return TRUE or FALSE depending on whether the binary operator meets the 9676 appropriate constraints. */ 9677 9678int 9679ix86_binary_operator_ok (enum rtx_code code, 9680 enum machine_mode mode ATTRIBUTE_UNUSED, 9681 rtx operands[3]) 9682{ 9683 /* Both source operands cannot be in memory. */ 9684 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 9685 return 0; 9686 /* If the operation is not commutable, source 1 cannot be a constant. */ 9687 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9688 return 0; 9689 /* If the destination is memory, we must have a matching source operand. */ 9690 if (GET_CODE (operands[0]) == MEM 9691 && ! (rtx_equal_p (operands[0], operands[1]) 9692 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9693 && rtx_equal_p (operands[0], operands[2])))) 9694 return 0; 9695 /* If the operation is not commutable and the source 1 is memory, we must 9696 have a matching destination. */ 9697 if (GET_CODE (operands[1]) == MEM 9698 && GET_RTX_CLASS (code) != RTX_COMM_ARITH 9699 && ! rtx_equal_p (operands[0], operands[1])) 9700 return 0; 9701 return 1; 9702} 9703 9704/* Attempt to expand a unary operator. Make the expansion closer to the 9705 actual machine, then just general_operand, which will allow 2 separate 9706 memory references (one output, one input) in a single insn. */ 9707 9708void 9709ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 9710 rtx operands[]) 9711{ 9712 int matching_memory; 9713 rtx src, dst, op, clob; 9714 9715 dst = operands[0]; 9716 src = operands[1]; 9717 9718 /* If the destination is memory, and we do not have matching source 9719 operands, do things in registers. */ 9720 matching_memory = 0; 9721 if (MEM_P (dst)) 9722 { 9723 if (rtx_equal_p (dst, src)) 9724 matching_memory = 1; 9725 else 9726 dst = gen_reg_rtx (mode); 9727 } 9728 9729 /* When source operand is memory, destination must match. */ 9730 if (MEM_P (src) && !matching_memory) 9731 src = force_reg (mode, src); 9732 9733 /* Emit the instruction. */ 9734 9735 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 9736 if (reload_in_progress || code == NOT) 9737 { 9738 /* Reload doesn't know about the flags register, and doesn't know that 9739 it doesn't want to clobber it. */ 9740 gcc_assert (code == NOT); 9741 emit_insn (op); 9742 } 9743 else 9744 { 9745 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9746 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9747 } 9748 9749 /* Fix up the destination if needed. */ 9750 if (dst != operands[0]) 9751 emit_move_insn (operands[0], dst); 9752} 9753 9754/* Return TRUE or FALSE depending on whether the unary operator meets the 9755 appropriate constraints. */ 9756 9757int 9758ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 9759 enum machine_mode mode ATTRIBUTE_UNUSED, 9760 rtx operands[2] ATTRIBUTE_UNUSED) 9761{ 9762 /* If one of operands is memory, source and destination must match. */ 9763 if ((GET_CODE (operands[0]) == MEM 9764 || GET_CODE (operands[1]) == MEM) 9765 && ! rtx_equal_p (operands[0], operands[1])) 9766 return FALSE; 9767 return TRUE; 9768} 9769 9770/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. 9771 Create a mask for the sign bit in MODE for an SSE register. If VECT is 9772 true, then replicate the mask for all elements of the vector register. 9773 If INVERT is true, then create a mask excluding the sign bit. */ 9774 9775rtx 9776ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 9777{ 9778 enum machine_mode vec_mode; 9779 HOST_WIDE_INT hi, lo; 9780 int shift = 63; 9781 rtvec v; 9782 rtx mask; 9783 9784 /* Find the sign bit, sign extended to 2*HWI. */ 9785 if (mode == SFmode) 9786 lo = 0x80000000, hi = lo < 0; 9787 else if (HOST_BITS_PER_WIDE_INT >= 64) 9788 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 9789 else 9790 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 9791 9792 if (invert) 9793 lo = ~lo, hi = ~hi; 9794 9795 /* Force this value into the low part of a fp vector constant. */ 9796 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); 9797 mask = gen_lowpart (mode, mask); 9798 9799 if (mode == SFmode) 9800 { 9801 if (vect) 9802 v = gen_rtvec (4, mask, mask, mask, mask); 9803 else 9804 v = gen_rtvec (4, mask, CONST0_RTX (SFmode), 9805 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9806 vec_mode = V4SFmode; 9807 } 9808 else 9809 { 9810 if (vect) 9811 v = gen_rtvec (2, mask, mask); 9812 else 9813 v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); 9814 vec_mode = V2DFmode; 9815 } 9816 9817 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); 9818} 9819 9820/* Generate code for floating point ABS or NEG. */ 9821 9822void 9823ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 9824 rtx operands[]) 9825{ 9826 rtx mask, set, use, clob, dst, src; 9827 bool matching_memory; 9828 bool use_sse = false; 9829 bool vector_mode = VECTOR_MODE_P (mode); 9830 enum machine_mode elt_mode = mode; 9831 9832 if (vector_mode) 9833 { 9834 elt_mode = GET_MODE_INNER (mode); 9835 use_sse = true; 9836 } 9837 else if (TARGET_SSE_MATH) 9838 use_sse = SSE_FLOAT_MODE_P (mode); 9839 9840 /* NEG and ABS performed with SSE use bitwise mask operations. 9841 Create the appropriate mask now. */ 9842 if (use_sse) 9843 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 9844 else 9845 mask = NULL_RTX; 9846 9847 dst = operands[0]; 9848 src = operands[1]; 9849 9850 /* If the destination is memory, and we don't have matching source 9851 operands or we're using the x87, do things in registers. */ 9852 matching_memory = false; 9853 if (MEM_P (dst)) 9854 { 9855 if (use_sse && rtx_equal_p (dst, src)) 9856 matching_memory = true; 9857 else 9858 dst = gen_reg_rtx (mode); 9859 } 9860 if (MEM_P (src) && !matching_memory) 9861 src = force_reg (mode, src); 9862 9863 if (vector_mode) 9864 { 9865 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 9866 set = gen_rtx_SET (VOIDmode, dst, set); 9867 emit_insn (set); 9868 } 9869 else 9870 { 9871 set = gen_rtx_fmt_e (code, mode, src); 9872 set = gen_rtx_SET (VOIDmode, dst, set); 9873 if (mask) 9874 { 9875 use = gen_rtx_USE (VOIDmode, mask); 9876 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9877 emit_insn (gen_rtx_PARALLEL (VOIDmode, 9878 gen_rtvec (3, set, use, clob))); 9879 } 9880 else 9881 emit_insn (set); 9882 } 9883 9884 if (dst != operands[0]) 9885 emit_move_insn (operands[0], dst); 9886} 9887 9888/* Expand a copysign operation. Special case operand 0 being a constant. */ 9889 9890void 9891ix86_expand_copysign (rtx operands[]) 9892{ 9893 enum machine_mode mode, vmode; 9894 rtx dest, op0, op1, mask, nmask; 9895 9896 dest = operands[0]; 9897 op0 = operands[1]; 9898 op1 = operands[2]; 9899 9900 mode = GET_MODE (dest); 9901 vmode = mode == SFmode ? V4SFmode : V2DFmode; 9902 9903 if (GET_CODE (op0) == CONST_DOUBLE) 9904 { 9905 rtvec v; 9906 9907 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 9908 op0 = simplify_unary_operation (ABS, mode, op0, mode); 9909 9910 if (op0 == CONST0_RTX (mode)) 9911 op0 = CONST0_RTX (vmode); 9912 else 9913 { 9914 if (mode == SFmode) 9915 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 9916 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9917 else 9918 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 9919 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 9920 } 9921 9922 mask = ix86_build_signbit_mask (mode, 0, 0); 9923 9924 if (mode == SFmode) 9925 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); 9926 else 9927 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); 9928 } 9929 else 9930 { 9931 nmask = ix86_build_signbit_mask (mode, 0, 1); 9932 mask = ix86_build_signbit_mask (mode, 0, 0); 9933 9934 if (mode == SFmode) 9935 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); 9936 else 9937 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); 9938 } 9939} 9940 9941/* Deconstruct a copysign operation into bit masks. Operand 0 is known to 9942 be a constant, and so has already been expanded into a vector constant. */ 9943 9944void 9945ix86_split_copysign_const (rtx operands[]) 9946{ 9947 enum machine_mode mode, vmode; 9948 rtx dest, op0, op1, mask, x; 9949 9950 dest = operands[0]; 9951 op0 = operands[1]; 9952 op1 = operands[2]; 9953 mask = operands[3]; 9954 9955 mode = GET_MODE (dest); 9956 vmode = GET_MODE (mask); 9957 9958 dest = simplify_gen_subreg (vmode, dest, mode, 0); 9959 x = gen_rtx_AND (vmode, dest, mask); 9960 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9961 9962 if (op0 != CONST0_RTX (vmode)) 9963 { 9964 x = gen_rtx_IOR (vmode, dest, op0); 9965 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9966 } 9967} 9968 9969/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 9970 so we have to do two masks. */ 9971 9972void 9973ix86_split_copysign_var (rtx operands[]) 9974{ 9975 enum machine_mode mode, vmode; 9976 rtx dest, scratch, op0, op1, mask, nmask, x; 9977 9978 dest = operands[0]; 9979 scratch = operands[1]; 9980 op0 = operands[2]; 9981 op1 = operands[3]; 9982 nmask = operands[4]; 9983 mask = operands[5]; 9984 9985 mode = GET_MODE (dest); 9986 vmode = GET_MODE (mask); 9987 9988 if (rtx_equal_p (op0, op1)) 9989 { 9990 /* Shouldn't happen often (it's useless, obviously), but when it does 9991 we'd generate incorrect code if we continue below. */ 9992 emit_move_insn (dest, op0); 9993 return; 9994 } 9995 9996 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 9997 { 9998 gcc_assert (REGNO (op1) == REGNO (scratch)); 9999 10000 x = gen_rtx_AND (vmode, scratch, mask); 10001 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 10002 10003 dest = mask; 10004 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 10005 x = gen_rtx_NOT (vmode, dest); 10006 x = gen_rtx_AND (vmode, x, op0); 10007 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 10008 } 10009 else 10010 { 10011 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 10012 { 10013 x = gen_rtx_AND (vmode, scratch, mask); 10014 } 10015 else /* alternative 2,4 */ 10016 { 10017 gcc_assert (REGNO (mask) == REGNO (scratch)); 10018 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 10019 x = gen_rtx_AND (vmode, scratch, op1); 10020 } 10021 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 10022 10023 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 10024 { 10025 dest = simplify_gen_subreg (vmode, op0, mode, 0); 10026 x = gen_rtx_AND (vmode, dest, nmask); 10027 } 10028 else /* alternative 3,4 */ 10029 { 10030 gcc_assert (REGNO (nmask) == REGNO (dest)); 10031 dest = nmask; 10032 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 10033 x = gen_rtx_AND (vmode, dest, op0); 10034 } 10035 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 10036 } 10037 10038 x = gen_rtx_IOR (vmode, dest, scratch); 10039 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 10040} 10041 10042/* Return TRUE or FALSE depending on whether the first SET in INSN 10043 has source and destination with matching CC modes, and that the 10044 CC mode is at least as constrained as REQ_MODE. */ 10045 10046int 10047ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 10048{ 10049 rtx set; 10050 enum machine_mode set_mode; 10051 10052 set = PATTERN (insn); 10053 if (GET_CODE (set) == PARALLEL) 10054 set = XVECEXP (set, 0, 0); 10055 gcc_assert (GET_CODE (set) == SET); 10056 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 10057 10058 set_mode = GET_MODE (SET_DEST (set)); 10059 switch (set_mode) 10060 { 10061 case CCNOmode: 10062 if (req_mode != CCNOmode 10063 && (req_mode != CCmode 10064 || XEXP (SET_SRC (set), 1) != const0_rtx)) 10065 return 0; 10066 break; 10067 case CCmode: 10068 if (req_mode == CCGCmode) 10069 return 0; 10070 /* FALLTHRU */ 10071 case CCGCmode: 10072 if (req_mode == CCGOCmode || req_mode == CCNOmode) 10073 return 0; 10074 /* FALLTHRU */ 10075 case CCGOCmode: 10076 if (req_mode == CCZmode) 10077 return 0; 10078 /* FALLTHRU */ 10079 case CCZmode: 10080 break; 10081 10082 default: 10083 gcc_unreachable (); 10084 } 10085 10086 return (GET_MODE (SET_SRC (set)) == set_mode); 10087} 10088 10089/* Generate insn patterns to do an integer compare of OPERANDS. */ 10090 10091static rtx 10092ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 10093{ 10094 enum machine_mode cmpmode; 10095 rtx tmp, flags; 10096 10097 cmpmode = SELECT_CC_MODE (code, op0, op1); 10098 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 10099 10100 /* This is very simple, but making the interface the same as in the 10101 FP case makes the rest of the code easier. */ 10102 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 10103 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 10104 10105 /* Return the test that should be put into the flags user, i.e. 10106 the bcc, scc, or cmov instruction. */ 10107 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 10108} 10109 10110/* Figure out whether to use ordered or unordered fp comparisons. 10111 Return the appropriate mode to use. */ 10112 10113enum machine_mode 10114ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 10115{ 10116 /* ??? In order to make all comparisons reversible, we do all comparisons 10117 non-trapping when compiling for IEEE. Once gcc is able to distinguish 10118 all forms trapping and nontrapping comparisons, we can make inequality 10119 comparisons trapping again, since it results in better code when using 10120 FCOM based compares. */ 10121 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 10122} 10123 10124enum machine_mode 10125ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 10126{ 10127 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10128 return ix86_fp_compare_mode (code); 10129 switch (code) 10130 { 10131 /* Only zero flag is needed. */ 10132 case EQ: /* ZF=0 */ 10133 case NE: /* ZF!=0 */ 10134 return CCZmode; 10135 /* Codes needing carry flag. */ 10136 case GEU: /* CF=0 */ 10137 case GTU: /* CF=0 & ZF=0 */ 10138 case LTU: /* CF=1 */ 10139 case LEU: /* CF=1 | ZF=1 */ 10140 return CCmode; 10141 /* Codes possibly doable only with sign flag when 10142 comparing against zero. */ 10143 case GE: /* SF=OF or SF=0 */ 10144 case LT: /* SF<>OF or SF=1 */ 10145 if (op1 == const0_rtx) 10146 return CCGOCmode; 10147 else 10148 /* For other cases Carry flag is not required. */ 10149 return CCGCmode; 10150 /* Codes doable only with sign flag when comparing 10151 against zero, but we miss jump instruction for it 10152 so we need to use relational tests against overflow 10153 that thus needs to be zero. */ 10154 case GT: /* ZF=0 & SF=OF */ 10155 case LE: /* ZF=1 | SF<>OF */ 10156 if (op1 == const0_rtx) 10157 return CCNOmode; 10158 else 10159 return CCGCmode; 10160 /* strcmp pattern do (use flags) and combine may ask us for proper 10161 mode. */ 10162 case USE: 10163 return CCmode; 10164 default: 10165 gcc_unreachable (); 10166 } 10167} 10168 10169/* Return the fixed registers used for condition codes. */ 10170 10171static bool 10172ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 10173{ 10174 *p1 = FLAGS_REG; 10175 *p2 = FPSR_REG; 10176 return true; 10177} 10178 10179/* If two condition code modes are compatible, return a condition code 10180 mode which is compatible with both. Otherwise, return 10181 VOIDmode. */ 10182 10183static enum machine_mode 10184ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 10185{ 10186 if (m1 == m2) 10187 return m1; 10188 10189 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 10190 return VOIDmode; 10191 10192 if ((m1 == CCGCmode && m2 == CCGOCmode) 10193 || (m1 == CCGOCmode && m2 == CCGCmode)) 10194 return CCGCmode; 10195 10196 switch (m1) 10197 { 10198 default: 10199 gcc_unreachable (); 10200 10201 case CCmode: 10202 case CCGCmode: 10203 case CCGOCmode: 10204 case CCNOmode: 10205 case CCZmode: 10206 switch (m2) 10207 { 10208 default: 10209 return VOIDmode; 10210 10211 case CCmode: 10212 case CCGCmode: 10213 case CCGOCmode: 10214 case CCNOmode: 10215 case CCZmode: 10216 return CCmode; 10217 } 10218 10219 case CCFPmode: 10220 case CCFPUmode: 10221 /* These are only compatible with themselves, which we already 10222 checked above. */ 10223 return VOIDmode; 10224 } 10225} 10226 10227/* Return true if we should use an FCOMI instruction for this fp comparison. */ 10228 10229int 10230ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 10231{ 10232 enum rtx_code swapped_code = swap_condition (code); 10233 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 10234 || (ix86_fp_comparison_cost (swapped_code) 10235 == ix86_fp_comparison_fcomi_cost (swapped_code))); 10236} 10237 10238/* Swap, force into registers, or otherwise massage the two operands 10239 to a fp comparison. The operands are updated in place; the new 10240 comparison code is returned. */ 10241 10242static enum rtx_code 10243ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 10244{ 10245 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 10246 rtx op0 = *pop0, op1 = *pop1; 10247 enum machine_mode op_mode = GET_MODE (op0); 10248 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 10249 10250 /* All of the unordered compare instructions only work on registers. 10251 The same is true of the fcomi compare instructions. The XFmode 10252 compare instructions require registers except when comparing 10253 against zero or when converting operand 1 from fixed point to 10254 floating point. */ 10255 10256 if (!is_sse 10257 && (fpcmp_mode == CCFPUmode 10258 || (op_mode == XFmode 10259 && ! (standard_80387_constant_p (op0) == 1 10260 || standard_80387_constant_p (op1) == 1) 10261 && GET_CODE (op1) != FLOAT) 10262 || ix86_use_fcomi_compare (code))) 10263 { 10264 op0 = force_reg (op_mode, op0); 10265 op1 = force_reg (op_mode, op1); 10266 } 10267 else 10268 { 10269 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 10270 things around if they appear profitable, otherwise force op0 10271 into a register. */ 10272 10273 if (standard_80387_constant_p (op0) == 0 10274 || (GET_CODE (op0) == MEM 10275 && ! (standard_80387_constant_p (op1) == 0 10276 || GET_CODE (op1) == MEM))) 10277 { 10278 rtx tmp; 10279 tmp = op0, op0 = op1, op1 = tmp; 10280 code = swap_condition (code); 10281 } 10282 10283 if (GET_CODE (op0) != REG) 10284 op0 = force_reg (op_mode, op0); 10285 10286 if (CONSTANT_P (op1)) 10287 { 10288 int tmp = standard_80387_constant_p (op1); 10289 if (tmp == 0) 10290 op1 = validize_mem (force_const_mem (op_mode, op1)); 10291 else if (tmp == 1) 10292 { 10293 if (TARGET_CMOVE) 10294 op1 = force_reg (op_mode, op1); 10295 } 10296 else 10297 op1 = force_reg (op_mode, op1); 10298 } 10299 } 10300 10301 /* Try to rearrange the comparison to make it cheaper. */ 10302 if (ix86_fp_comparison_cost (code) 10303 > ix86_fp_comparison_cost (swap_condition (code)) 10304 && (GET_CODE (op1) == REG || !no_new_pseudos)) 10305 { 10306 rtx tmp; 10307 tmp = op0, op0 = op1, op1 = tmp; 10308 code = swap_condition (code); 10309 if (GET_CODE (op0) != REG) 10310 op0 = force_reg (op_mode, op0); 10311 } 10312 10313 *pop0 = op0; 10314 *pop1 = op1; 10315 return code; 10316} 10317 10318/* Convert comparison codes we use to represent FP comparison to integer 10319 code that will result in proper branch. Return UNKNOWN if no such code 10320 is available. */ 10321 10322enum rtx_code 10323ix86_fp_compare_code_to_integer (enum rtx_code code) 10324{ 10325 switch (code) 10326 { 10327 case GT: 10328 return GTU; 10329 case GE: 10330 return GEU; 10331 case ORDERED: 10332 case UNORDERED: 10333 return code; 10334 break; 10335 case UNEQ: 10336 return EQ; 10337 break; 10338 case UNLT: 10339 return LTU; 10340 break; 10341 case UNLE: 10342 return LEU; 10343 break; 10344 case LTGT: 10345 return NE; 10346 break; 10347 default: 10348 return UNKNOWN; 10349 } 10350} 10351 10352/* Split comparison code CODE into comparisons we can do using branch 10353 instructions. BYPASS_CODE is comparison code for branch that will 10354 branch around FIRST_CODE and SECOND_CODE. If some of branches 10355 is not required, set value to UNKNOWN. 10356 We never require more than two branches. */ 10357 10358void 10359ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 10360 enum rtx_code *first_code, 10361 enum rtx_code *second_code) 10362{ 10363 *first_code = code; 10364 *bypass_code = UNKNOWN; 10365 *second_code = UNKNOWN; 10366 10367 /* The fcomi comparison sets flags as follows: 10368 10369 cmp ZF PF CF 10370 > 0 0 0 10371 < 0 0 1 10372 = 1 0 0 10373 un 1 1 1 */ 10374 10375 switch (code) 10376 { 10377 case GT: /* GTU - CF=0 & ZF=0 */ 10378 case GE: /* GEU - CF=0 */ 10379 case ORDERED: /* PF=0 */ 10380 case UNORDERED: /* PF=1 */ 10381 case UNEQ: /* EQ - ZF=1 */ 10382 case UNLT: /* LTU - CF=1 */ 10383 case UNLE: /* LEU - CF=1 | ZF=1 */ 10384 case LTGT: /* EQ - ZF=0 */ 10385 break; 10386 case LT: /* LTU - CF=1 - fails on unordered */ 10387 *first_code = UNLT; 10388 *bypass_code = UNORDERED; 10389 break; 10390 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 10391 *first_code = UNLE; 10392 *bypass_code = UNORDERED; 10393 break; 10394 case EQ: /* EQ - ZF=1 - fails on unordered */ 10395 *first_code = UNEQ; 10396 *bypass_code = UNORDERED; 10397 break; 10398 case NE: /* NE - ZF=0 - fails on unordered */ 10399 *first_code = LTGT; 10400 *second_code = UNORDERED; 10401 break; 10402 case UNGE: /* GEU - CF=0 - fails on unordered */ 10403 *first_code = GE; 10404 *second_code = UNORDERED; 10405 break; 10406 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 10407 *first_code = GT; 10408 *second_code = UNORDERED; 10409 break; 10410 default: 10411 gcc_unreachable (); 10412 } 10413 if (!TARGET_IEEE_FP) 10414 { 10415 *second_code = UNKNOWN; 10416 *bypass_code = UNKNOWN; 10417 } 10418} 10419 10420/* Return cost of comparison done fcom + arithmetics operations on AX. 10421 All following functions do use number of instructions as a cost metrics. 10422 In future this should be tweaked to compute bytes for optimize_size and 10423 take into account performance of various instructions on various CPUs. */ 10424static int 10425ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 10426{ 10427 if (!TARGET_IEEE_FP) 10428 return 4; 10429 /* The cost of code output by ix86_expand_fp_compare. */ 10430 switch (code) 10431 { 10432 case UNLE: 10433 case UNLT: 10434 case LTGT: 10435 case GT: 10436 case GE: 10437 case UNORDERED: 10438 case ORDERED: 10439 case UNEQ: 10440 return 4; 10441 break; 10442 case LT: 10443 case NE: 10444 case EQ: 10445 case UNGE: 10446 return 5; 10447 break; 10448 case LE: 10449 case UNGT: 10450 return 6; 10451 break; 10452 default: 10453 gcc_unreachable (); 10454 } 10455} 10456 10457/* Return cost of comparison done using fcomi operation. 10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10459static int 10460ix86_fp_comparison_fcomi_cost (enum rtx_code code) 10461{ 10462 enum rtx_code bypass_code, first_code, second_code; 10463 /* Return arbitrarily high cost when instruction is not supported - this 10464 prevents gcc from using it. */ 10465 if (!TARGET_CMOVE) 10466 return 1024; 10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 10469} 10470 10471/* Return cost of comparison done using sahf operation. 10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10473static int 10474ix86_fp_comparison_sahf_cost (enum rtx_code code) 10475{ 10476 enum rtx_code bypass_code, first_code, second_code; 10477 /* Return arbitrarily high cost when instruction is not preferred - this 10478 avoids gcc from using it. */ 10479 if (!TARGET_USE_SAHF && !optimize_size) 10480 return 1024; 10481 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10482 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 10483} 10484 10485/* Compute cost of the comparison done using any method. 10486 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10487static int 10488ix86_fp_comparison_cost (enum rtx_code code) 10489{ 10490 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 10491 int min; 10492 10493 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 10494 sahf_cost = ix86_fp_comparison_sahf_cost (code); 10495 10496 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 10497 if (min > sahf_cost) 10498 min = sahf_cost; 10499 if (min > fcomi_cost) 10500 min = fcomi_cost; 10501 return min; 10502} 10503 10504/* Generate insn patterns to do a floating point compare of OPERANDS. */ 10505 10506static rtx 10507ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 10508 rtx *second_test, rtx *bypass_test) 10509{ 10510 enum machine_mode fpcmp_mode, intcmp_mode; 10511 rtx tmp, tmp2; 10512 int cost = ix86_fp_comparison_cost (code); 10513 enum rtx_code bypass_code, first_code, second_code; 10514 10515 fpcmp_mode = ix86_fp_compare_mode (code); 10516 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 10517 10518 if (second_test) 10519 *second_test = NULL_RTX; 10520 if (bypass_test) 10521 *bypass_test = NULL_RTX; 10522 10523 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10524 10525 /* Do fcomi/sahf based test when profitable. */ 10526 if ((bypass_code == UNKNOWN || bypass_test) 10527 && (second_code == UNKNOWN || second_test) 10528 && ix86_fp_comparison_arithmetics_cost (code) > cost) 10529 { 10530 if (TARGET_CMOVE) 10531 { 10532 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10533 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 10534 tmp); 10535 emit_insn (tmp); 10536 } 10537 else 10538 { 10539 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10540 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10541 if (!scratch) 10542 scratch = gen_reg_rtx (HImode); 10543 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10544 emit_insn (gen_x86_sahf_1 (scratch)); 10545 } 10546 10547 /* The FP codes work out to act like unsigned. */ 10548 intcmp_mode = fpcmp_mode; 10549 code = first_code; 10550 if (bypass_code != UNKNOWN) 10551 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 10552 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10553 const0_rtx); 10554 if (second_code != UNKNOWN) 10555 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 10556 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10557 const0_rtx); 10558 } 10559 else 10560 { 10561 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 10562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10563 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10564 if (!scratch) 10565 scratch = gen_reg_rtx (HImode); 10566 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10567 10568 /* In the unordered case, we have to check C2 for NaN's, which 10569 doesn't happen to work out to anything nice combination-wise. 10570 So do some bit twiddling on the value we've got in AH to come 10571 up with an appropriate set of condition codes. */ 10572 10573 intcmp_mode = CCNOmode; 10574 switch (code) 10575 { 10576 case GT: 10577 case UNGT: 10578 if (code == GT || !TARGET_IEEE_FP) 10579 { 10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10581 code = EQ; 10582 } 10583 else 10584 { 10585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10586 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10587 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 10588 intcmp_mode = CCmode; 10589 code = GEU; 10590 } 10591 break; 10592 case LT: 10593 case UNLT: 10594 if (code == LT && TARGET_IEEE_FP) 10595 { 10596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10597 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 10598 intcmp_mode = CCmode; 10599 code = EQ; 10600 } 10601 else 10602 { 10603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 10604 code = NE; 10605 } 10606 break; 10607 case GE: 10608 case UNGE: 10609 if (code == GE || !TARGET_IEEE_FP) 10610 { 10611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 10612 code = EQ; 10613 } 10614 else 10615 { 10616 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10617 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10618 GEN_INT (0x01))); 10619 code = NE; 10620 } 10621 break; 10622 case LE: 10623 case UNLE: 10624 if (code == LE && TARGET_IEEE_FP) 10625 { 10626 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10627 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10628 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10629 intcmp_mode = CCmode; 10630 code = LTU; 10631 } 10632 else 10633 { 10634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10635 code = NE; 10636 } 10637 break; 10638 case EQ: 10639 case UNEQ: 10640 if (code == EQ && TARGET_IEEE_FP) 10641 { 10642 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10643 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10644 intcmp_mode = CCmode; 10645 code = EQ; 10646 } 10647 else 10648 { 10649 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10650 code = NE; 10651 break; 10652 } 10653 break; 10654 case NE: 10655 case LTGT: 10656 if (code == NE && TARGET_IEEE_FP) 10657 { 10658 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10659 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10660 GEN_INT (0x40))); 10661 code = NE; 10662 } 10663 else 10664 { 10665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10666 code = EQ; 10667 } 10668 break; 10669 10670 case UNORDERED: 10671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10672 code = NE; 10673 break; 10674 case ORDERED: 10675 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10676 code = EQ; 10677 break; 10678 10679 default: 10680 gcc_unreachable (); 10681 } 10682 } 10683 10684 /* Return the test that should be put into the flags user, i.e. 10685 the bcc, scc, or cmov instruction. */ 10686 return gen_rtx_fmt_ee (code, VOIDmode, 10687 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10688 const0_rtx); 10689} 10690 10691rtx 10692ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 10693{ 10694 rtx op0, op1, ret; 10695 op0 = ix86_compare_op0; 10696 op1 = ix86_compare_op1; 10697 10698 if (second_test) 10699 *second_test = NULL_RTX; 10700 if (bypass_test) 10701 *bypass_test = NULL_RTX; 10702 10703 if (ix86_compare_emitted) 10704 { 10705 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 10706 ix86_compare_emitted = NULL_RTX; 10707 } 10708 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10709 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10710 second_test, bypass_test); 10711 else 10712 ret = ix86_expand_int_compare (code, op0, op1); 10713 10714 return ret; 10715} 10716 10717/* Return true if the CODE will result in nontrivial jump sequence. */ 10718bool 10719ix86_fp_jump_nontrivial_p (enum rtx_code code) 10720{ 10721 enum rtx_code bypass_code, first_code, second_code; 10722 if (!TARGET_CMOVE) 10723 return true; 10724 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10725 return bypass_code != UNKNOWN || second_code != UNKNOWN; 10726} 10727 10728void 10729ix86_expand_branch (enum rtx_code code, rtx label) 10730{ 10731 rtx tmp; 10732 10733 /* If we have emitted a compare insn, go straight to simple. 10734 ix86_expand_compare won't emit anything if ix86_compare_emitted 10735 is non NULL. */ 10736 if (ix86_compare_emitted) 10737 goto simple; 10738 10739 switch (GET_MODE (ix86_compare_op0)) 10740 { 10741 case QImode: 10742 case HImode: 10743 case SImode: 10744 simple: 10745 tmp = ix86_expand_compare (code, NULL, NULL); 10746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10747 gen_rtx_LABEL_REF (VOIDmode, label), 10748 pc_rtx); 10749 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 10750 return; 10751 10752 case SFmode: 10753 case DFmode: 10754 case XFmode: 10755 { 10756 rtvec vec; 10757 int use_fcomi; 10758 enum rtx_code bypass_code, first_code, second_code; 10759 10760 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 10761 &ix86_compare_op1); 10762 10763 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10764 10765 /* Check whether we will use the natural sequence with one jump. If 10766 so, we can expand jump early. Otherwise delay expansion by 10767 creating compound insn to not confuse optimizers. */ 10768 if (bypass_code == UNKNOWN && second_code == UNKNOWN 10769 && TARGET_CMOVE) 10770 { 10771 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 10772 gen_rtx_LABEL_REF (VOIDmode, label), 10773 pc_rtx, NULL_RTX, NULL_RTX); 10774 } 10775 else 10776 { 10777 tmp = gen_rtx_fmt_ee (code, VOIDmode, 10778 ix86_compare_op0, ix86_compare_op1); 10779 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10780 gen_rtx_LABEL_REF (VOIDmode, label), 10781 pc_rtx); 10782 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 10783 10784 use_fcomi = ix86_use_fcomi_compare (code); 10785 vec = rtvec_alloc (3 + !use_fcomi); 10786 RTVEC_ELT (vec, 0) = tmp; 10787 RTVEC_ELT (vec, 1) 10788 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 10789 RTVEC_ELT (vec, 2) 10790 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 10791 if (! use_fcomi) 10792 RTVEC_ELT (vec, 3) 10793 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 10794 10795 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 10796 } 10797 return; 10798 } 10799 10800 case DImode: 10801 if (TARGET_64BIT) 10802 goto simple; 10803 case TImode: 10804 /* Expand DImode branch into multiple compare+branch. */ 10805 { 10806 rtx lo[2], hi[2], label2; 10807 enum rtx_code code1, code2, code3; 10808 enum machine_mode submode; 10809 10810 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 10811 { 10812 tmp = ix86_compare_op0; 10813 ix86_compare_op0 = ix86_compare_op1; 10814 ix86_compare_op1 = tmp; 10815 code = swap_condition (code); 10816 } 10817 if (GET_MODE (ix86_compare_op0) == DImode) 10818 { 10819 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 10820 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 10821 submode = SImode; 10822 } 10823 else 10824 { 10825 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 10826 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 10827 submode = DImode; 10828 } 10829 10830 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 10831 avoid two branches. This costs one extra insn, so disable when 10832 optimizing for size. */ 10833 10834 if ((code == EQ || code == NE) 10835 && (!optimize_size 10836 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 10837 { 10838 rtx xor0, xor1; 10839 10840 xor1 = hi[0]; 10841 if (hi[1] != const0_rtx) 10842 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 10843 NULL_RTX, 0, OPTAB_WIDEN); 10844 10845 xor0 = lo[0]; 10846 if (lo[1] != const0_rtx) 10847 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 10848 NULL_RTX, 0, OPTAB_WIDEN); 10849 10850 tmp = expand_binop (submode, ior_optab, xor1, xor0, 10851 NULL_RTX, 0, OPTAB_WIDEN); 10852 10853 ix86_compare_op0 = tmp; 10854 ix86_compare_op1 = const0_rtx; 10855 ix86_expand_branch (code, label); 10856 return; 10857 } 10858 10859 /* Otherwise, if we are doing less-than or greater-or-equal-than, 10860 op1 is a constant and the low word is zero, then we can just 10861 examine the high word. */ 10862 10863 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 10864 switch (code) 10865 { 10866 case LT: case LTU: case GE: case GEU: 10867 ix86_compare_op0 = hi[0]; 10868 ix86_compare_op1 = hi[1]; 10869 ix86_expand_branch (code, label); 10870 return; 10871 default: 10872 break; 10873 } 10874 10875 /* Otherwise, we need two or three jumps. */ 10876 10877 label2 = gen_label_rtx (); 10878 10879 code1 = code; 10880 code2 = swap_condition (code); 10881 code3 = unsigned_condition (code); 10882 10883 switch (code) 10884 { 10885 case LT: case GT: case LTU: case GTU: 10886 break; 10887 10888 case LE: code1 = LT; code2 = GT; break; 10889 case GE: code1 = GT; code2 = LT; break; 10890 case LEU: code1 = LTU; code2 = GTU; break; 10891 case GEU: code1 = GTU; code2 = LTU; break; 10892 10893 case EQ: code1 = UNKNOWN; code2 = NE; break; 10894 case NE: code2 = UNKNOWN; break; 10895 10896 default: 10897 gcc_unreachable (); 10898 } 10899 10900 /* 10901 * a < b => 10902 * if (hi(a) < hi(b)) goto true; 10903 * if (hi(a) > hi(b)) goto false; 10904 * if (lo(a) < lo(b)) goto true; 10905 * false: 10906 */ 10907 10908 ix86_compare_op0 = hi[0]; 10909 ix86_compare_op1 = hi[1]; 10910 10911 if (code1 != UNKNOWN) 10912 ix86_expand_branch (code1, label); 10913 if (code2 != UNKNOWN) 10914 ix86_expand_branch (code2, label2); 10915 10916 ix86_compare_op0 = lo[0]; 10917 ix86_compare_op1 = lo[1]; 10918 ix86_expand_branch (code3, label); 10919 10920 if (code2 != UNKNOWN) 10921 emit_label (label2); 10922 return; 10923 } 10924 10925 default: 10926 gcc_unreachable (); 10927 } 10928} 10929 10930/* Split branch based on floating point condition. */ 10931void 10932ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 10933 rtx target1, rtx target2, rtx tmp, rtx pushed) 10934{ 10935 rtx second, bypass; 10936 rtx label = NULL_RTX; 10937 rtx condition; 10938 int bypass_probability = -1, second_probability = -1, probability = -1; 10939 rtx i; 10940 10941 if (target2 != pc_rtx) 10942 { 10943 rtx tmp = target2; 10944 code = reverse_condition_maybe_unordered (code); 10945 target2 = target1; 10946 target1 = tmp; 10947 } 10948 10949 condition = ix86_expand_fp_compare (code, op1, op2, 10950 tmp, &second, &bypass); 10951 10952 /* Remove pushed operand from stack. */ 10953 if (pushed) 10954 ix86_free_from_memory (GET_MODE (pushed)); 10955 10956 if (split_branch_probability >= 0) 10957 { 10958 /* Distribute the probabilities across the jumps. 10959 Assume the BYPASS and SECOND to be always test 10960 for UNORDERED. */ 10961 probability = split_branch_probability; 10962 10963 /* Value of 1 is low enough to make no need for probability 10964 to be updated. Later we may run some experiments and see 10965 if unordered values are more frequent in practice. */ 10966 if (bypass) 10967 bypass_probability = 1; 10968 if (second) 10969 second_probability = 1; 10970 } 10971 if (bypass != NULL_RTX) 10972 { 10973 label = gen_label_rtx (); 10974 i = emit_jump_insn (gen_rtx_SET 10975 (VOIDmode, pc_rtx, 10976 gen_rtx_IF_THEN_ELSE (VOIDmode, 10977 bypass, 10978 gen_rtx_LABEL_REF (VOIDmode, 10979 label), 10980 pc_rtx))); 10981 if (bypass_probability >= 0) 10982 REG_NOTES (i) 10983 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10984 GEN_INT (bypass_probability), 10985 REG_NOTES (i)); 10986 } 10987 i = emit_jump_insn (gen_rtx_SET 10988 (VOIDmode, pc_rtx, 10989 gen_rtx_IF_THEN_ELSE (VOIDmode, 10990 condition, target1, target2))); 10991 if (probability >= 0) 10992 REG_NOTES (i) 10993 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10994 GEN_INT (probability), 10995 REG_NOTES (i)); 10996 if (second != NULL_RTX) 10997 { 10998 i = emit_jump_insn (gen_rtx_SET 10999 (VOIDmode, pc_rtx, 11000 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 11001 target2))); 11002 if (second_probability >= 0) 11003 REG_NOTES (i) 11004 = gen_rtx_EXPR_LIST (REG_BR_PROB, 11005 GEN_INT (second_probability), 11006 REG_NOTES (i)); 11007 } 11008 if (label != NULL_RTX) 11009 emit_label (label); 11010} 11011 11012int 11013ix86_expand_setcc (enum rtx_code code, rtx dest) 11014{ 11015 rtx ret, tmp, tmpreg, equiv; 11016 rtx second_test, bypass_test; 11017 11018 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 11019 return 0; /* FAIL */ 11020 11021 gcc_assert (GET_MODE (dest) == QImode); 11022 11023 ret = ix86_expand_compare (code, &second_test, &bypass_test); 11024 PUT_MODE (ret, QImode); 11025 11026 tmp = dest; 11027 tmpreg = dest; 11028 11029 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 11030 if (bypass_test || second_test) 11031 { 11032 rtx test = second_test; 11033 int bypass = 0; 11034 rtx tmp2 = gen_reg_rtx (QImode); 11035 if (bypass_test) 11036 { 11037 gcc_assert (!second_test); 11038 test = bypass_test; 11039 bypass = 1; 11040 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 11041 } 11042 PUT_MODE (test, QImode); 11043 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 11044 11045 if (bypass) 11046 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 11047 else 11048 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 11049 } 11050 11051 /* Attach a REG_EQUAL note describing the comparison result. */ 11052 if (ix86_compare_op0 && ix86_compare_op1) 11053 { 11054 equiv = simplify_gen_relational (code, QImode, 11055 GET_MODE (ix86_compare_op0), 11056 ix86_compare_op0, ix86_compare_op1); 11057 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 11058 } 11059 11060 return 1; /* DONE */ 11061} 11062 11063/* Expand comparison setting or clearing carry flag. Return true when 11064 successful and set pop for the operation. */ 11065static bool 11066ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 11067{ 11068 enum machine_mode mode = 11069 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 11070 11071 /* Do not handle DImode compares that go through special path. Also we can't 11072 deal with FP compares yet. This is possible to add. */ 11073 if (mode == (TARGET_64BIT ? TImode : DImode)) 11074 return false; 11075 if (FLOAT_MODE_P (mode)) 11076 { 11077 rtx second_test = NULL, bypass_test = NULL; 11078 rtx compare_op, compare_seq; 11079 11080 /* Shortcut: following common codes never translate into carry flag compares. */ 11081 if (code == EQ || code == NE || code == UNEQ || code == LTGT 11082 || code == ORDERED || code == UNORDERED) 11083 return false; 11084 11085 /* These comparisons require zero flag; swap operands so they won't. */ 11086 if ((code == GT || code == UNLE || code == LE || code == UNGT) 11087 && !TARGET_IEEE_FP) 11088 { 11089 rtx tmp = op0; 11090 op0 = op1; 11091 op1 = tmp; 11092 code = swap_condition (code); 11093 } 11094 11095 /* Try to expand the comparison and verify that we end up with carry flag 11096 based comparison. This is fails to be true only when we decide to expand 11097 comparison using arithmetic that is not too common scenario. */ 11098 start_sequence (); 11099 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 11100 &second_test, &bypass_test); 11101 compare_seq = get_insns (); 11102 end_sequence (); 11103 11104 if (second_test || bypass_test) 11105 return false; 11106 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11107 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11108 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 11109 else 11110 code = GET_CODE (compare_op); 11111 if (code != LTU && code != GEU) 11112 return false; 11113 emit_insn (compare_seq); 11114 *pop = compare_op; 11115 return true; 11116 } 11117 if (!INTEGRAL_MODE_P (mode)) 11118 return false; 11119 switch (code) 11120 { 11121 case LTU: 11122 case GEU: 11123 break; 11124 11125 /* Convert a==0 into (unsigned)a<1. */ 11126 case EQ: 11127 case NE: 11128 if (op1 != const0_rtx) 11129 return false; 11130 op1 = const1_rtx; 11131 code = (code == EQ ? LTU : GEU); 11132 break; 11133 11134 /* Convert a>b into b<a or a>=b-1. */ 11135 case GTU: 11136 case LEU: 11137 if (GET_CODE (op1) == CONST_INT) 11138 { 11139 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 11140 /* Bail out on overflow. We still can swap operands but that 11141 would force loading of the constant into register. */ 11142 if (op1 == const0_rtx 11143 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 11144 return false; 11145 code = (code == GTU ? GEU : LTU); 11146 } 11147 else 11148 { 11149 rtx tmp = op1; 11150 op1 = op0; 11151 op0 = tmp; 11152 code = (code == GTU ? LTU : GEU); 11153 } 11154 break; 11155 11156 /* Convert a>=0 into (unsigned)a<0x80000000. */ 11157 case LT: 11158 case GE: 11159 if (mode == DImode || op1 != const0_rtx) 11160 return false; 11161 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 11162 code = (code == LT ? GEU : LTU); 11163 break; 11164 case LE: 11165 case GT: 11166 if (mode == DImode || op1 != constm1_rtx) 11167 return false; 11168 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 11169 code = (code == LE ? GEU : LTU); 11170 break; 11171 11172 default: 11173 return false; 11174 } 11175 /* Swapping operands may cause constant to appear as first operand. */ 11176 if (!nonimmediate_operand (op0, VOIDmode)) 11177 { 11178 if (no_new_pseudos) 11179 return false; 11180 op0 = force_reg (mode, op0); 11181 } 11182 ix86_compare_op0 = op0; 11183 ix86_compare_op1 = op1; 11184 *pop = ix86_expand_compare (code, NULL, NULL); 11185 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 11186 return true; 11187} 11188 11189int 11190ix86_expand_int_movcc (rtx operands[]) 11191{ 11192 enum rtx_code code = GET_CODE (operands[1]), compare_code; 11193 rtx compare_seq, compare_op; 11194 rtx second_test, bypass_test; 11195 enum machine_mode mode = GET_MODE (operands[0]); 11196 bool sign_bit_compare_p = false;; 11197 11198 start_sequence (); 11199 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11200 compare_seq = get_insns (); 11201 end_sequence (); 11202 11203 compare_code = GET_CODE (compare_op); 11204 11205 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 11206 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 11207 sign_bit_compare_p = true; 11208 11209 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 11210 HImode insns, we'd be swallowed in word prefix ops. */ 11211 11212 if ((mode != HImode || TARGET_FAST_PREFIX) 11213 && (mode != (TARGET_64BIT ? TImode : DImode)) 11214 && GET_CODE (operands[2]) == CONST_INT 11215 && GET_CODE (operands[3]) == CONST_INT) 11216 { 11217 rtx out = operands[0]; 11218 HOST_WIDE_INT ct = INTVAL (operands[2]); 11219 HOST_WIDE_INT cf = INTVAL (operands[3]); 11220 HOST_WIDE_INT diff; 11221 11222 diff = ct - cf; 11223 /* Sign bit compares are better done using shifts than we do by using 11224 sbb. */ 11225 if (sign_bit_compare_p 11226 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11227 ix86_compare_op1, &compare_op)) 11228 { 11229 /* Detect overlap between destination and compare sources. */ 11230 rtx tmp = out; 11231 11232 if (!sign_bit_compare_p) 11233 { 11234 bool fpcmp = false; 11235 11236 compare_code = GET_CODE (compare_op); 11237 11238 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11239 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11240 { 11241 fpcmp = true; 11242 compare_code = ix86_fp_compare_code_to_integer (compare_code); 11243 } 11244 11245 /* To simplify rest of code, restrict to the GEU case. */ 11246 if (compare_code == LTU) 11247 { 11248 HOST_WIDE_INT tmp = ct; 11249 ct = cf; 11250 cf = tmp; 11251 compare_code = reverse_condition (compare_code); 11252 code = reverse_condition (code); 11253 } 11254 else 11255 { 11256 if (fpcmp) 11257 PUT_CODE (compare_op, 11258 reverse_condition_maybe_unordered 11259 (GET_CODE (compare_op))); 11260 else 11261 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11262 } 11263 diff = ct - cf; 11264 11265 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 11266 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 11267 tmp = gen_reg_rtx (mode); 11268 11269 if (mode == DImode) 11270 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 11271 else 11272 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 11273 } 11274 else 11275 { 11276 if (code == GT || code == GE) 11277 code = reverse_condition (code); 11278 else 11279 { 11280 HOST_WIDE_INT tmp = ct; 11281 ct = cf; 11282 cf = tmp; 11283 diff = ct - cf; 11284 } 11285 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 11286 ix86_compare_op1, VOIDmode, 0, -1); 11287 } 11288 11289 if (diff == 1) 11290 { 11291 /* 11292 * cmpl op0,op1 11293 * sbbl dest,dest 11294 * [addl dest, ct] 11295 * 11296 * Size 5 - 8. 11297 */ 11298 if (ct) 11299 tmp = expand_simple_binop (mode, PLUS, 11300 tmp, GEN_INT (ct), 11301 copy_rtx (tmp), 1, OPTAB_DIRECT); 11302 } 11303 else if (cf == -1) 11304 { 11305 /* 11306 * cmpl op0,op1 11307 * sbbl dest,dest 11308 * orl $ct, dest 11309 * 11310 * Size 8. 11311 */ 11312 tmp = expand_simple_binop (mode, IOR, 11313 tmp, GEN_INT (ct), 11314 copy_rtx (tmp), 1, OPTAB_DIRECT); 11315 } 11316 else if (diff == -1 && ct) 11317 { 11318 /* 11319 * cmpl op0,op1 11320 * sbbl dest,dest 11321 * notl dest 11322 * [addl dest, cf] 11323 * 11324 * Size 8 - 11. 11325 */ 11326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11327 if (cf) 11328 tmp = expand_simple_binop (mode, PLUS, 11329 copy_rtx (tmp), GEN_INT (cf), 11330 copy_rtx (tmp), 1, OPTAB_DIRECT); 11331 } 11332 else 11333 { 11334 /* 11335 * cmpl op0,op1 11336 * sbbl dest,dest 11337 * [notl dest] 11338 * andl cf - ct, dest 11339 * [addl dest, ct] 11340 * 11341 * Size 8 - 11. 11342 */ 11343 11344 if (cf == 0) 11345 { 11346 cf = ct; 11347 ct = 0; 11348 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11349 } 11350 11351 tmp = expand_simple_binop (mode, AND, 11352 copy_rtx (tmp), 11353 gen_int_mode (cf - ct, mode), 11354 copy_rtx (tmp), 1, OPTAB_DIRECT); 11355 if (ct) 11356 tmp = expand_simple_binop (mode, PLUS, 11357 copy_rtx (tmp), GEN_INT (ct), 11358 copy_rtx (tmp), 1, OPTAB_DIRECT); 11359 } 11360 11361 if (!rtx_equal_p (tmp, out)) 11362 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 11363 11364 return 1; /* DONE */ 11365 } 11366 11367 if (diff < 0) 11368 { 11369 HOST_WIDE_INT tmp; 11370 tmp = ct, ct = cf, cf = tmp; 11371 diff = -diff; 11372 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11373 { 11374 /* We may be reversing unordered compare to normal compare, that 11375 is not valid in general (we may convert non-trapping condition 11376 to trapping one), however on i386 we currently emit all 11377 comparisons unordered. */ 11378 compare_code = reverse_condition_maybe_unordered (compare_code); 11379 code = reverse_condition_maybe_unordered (code); 11380 } 11381 else 11382 { 11383 compare_code = reverse_condition (compare_code); 11384 code = reverse_condition (code); 11385 } 11386 } 11387 11388 compare_code = UNKNOWN; 11389 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 11390 && GET_CODE (ix86_compare_op1) == CONST_INT) 11391 { 11392 if (ix86_compare_op1 == const0_rtx 11393 && (code == LT || code == GE)) 11394 compare_code = code; 11395 else if (ix86_compare_op1 == constm1_rtx) 11396 { 11397 if (code == LE) 11398 compare_code = LT; 11399 else if (code == GT) 11400 compare_code = GE; 11401 } 11402 } 11403 11404 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 11405 if (compare_code != UNKNOWN 11406 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 11407 && (cf == -1 || ct == -1)) 11408 { 11409 /* If lea code below could be used, only optimize 11410 if it results in a 2 insn sequence. */ 11411 11412 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 11413 || diff == 3 || diff == 5 || diff == 9) 11414 || (compare_code == LT && ct == -1) 11415 || (compare_code == GE && cf == -1)) 11416 { 11417 /* 11418 * notl op1 (if necessary) 11419 * sarl $31, op1 11420 * orl cf, op1 11421 */ 11422 if (ct != -1) 11423 { 11424 cf = ct; 11425 ct = -1; 11426 code = reverse_condition (code); 11427 } 11428 11429 out = emit_store_flag (out, code, ix86_compare_op0, 11430 ix86_compare_op1, VOIDmode, 0, -1); 11431 11432 out = expand_simple_binop (mode, IOR, 11433 out, GEN_INT (cf), 11434 out, 1, OPTAB_DIRECT); 11435 if (out != operands[0]) 11436 emit_move_insn (operands[0], out); 11437 11438 return 1; /* DONE */ 11439 } 11440 } 11441 11442 11443 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 11444 || diff == 3 || diff == 5 || diff == 9) 11445 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 11446 && (mode != DImode 11447 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 11448 { 11449 /* 11450 * xorl dest,dest 11451 * cmpl op1,op2 11452 * setcc dest 11453 * lea cf(dest*(ct-cf)),dest 11454 * 11455 * Size 14. 11456 * 11457 * This also catches the degenerate setcc-only case. 11458 */ 11459 11460 rtx tmp; 11461 int nops; 11462 11463 out = emit_store_flag (out, code, ix86_compare_op0, 11464 ix86_compare_op1, VOIDmode, 0, 1); 11465 11466 nops = 0; 11467 /* On x86_64 the lea instruction operates on Pmode, so we need 11468 to get arithmetics done in proper mode to match. */ 11469 if (diff == 1) 11470 tmp = copy_rtx (out); 11471 else 11472 { 11473 rtx out1; 11474 out1 = copy_rtx (out); 11475 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 11476 nops++; 11477 if (diff & 1) 11478 { 11479 tmp = gen_rtx_PLUS (mode, tmp, out1); 11480 nops++; 11481 } 11482 } 11483 if (cf != 0) 11484 { 11485 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 11486 nops++; 11487 } 11488 if (!rtx_equal_p (tmp, out)) 11489 { 11490 if (nops == 1) 11491 out = force_operand (tmp, copy_rtx (out)); 11492 else 11493 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 11494 } 11495 if (!rtx_equal_p (out, operands[0])) 11496 emit_move_insn (operands[0], copy_rtx (out)); 11497 11498 return 1; /* DONE */ 11499 } 11500 11501 /* 11502 * General case: Jumpful: 11503 * xorl dest,dest cmpl op1, op2 11504 * cmpl op1, op2 movl ct, dest 11505 * setcc dest jcc 1f 11506 * decl dest movl cf, dest 11507 * andl (cf-ct),dest 1: 11508 * addl ct,dest 11509 * 11510 * Size 20. Size 14. 11511 * 11512 * This is reasonably steep, but branch mispredict costs are 11513 * high on modern cpus, so consider failing only if optimizing 11514 * for space. 11515 */ 11516 11517 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11518 && BRANCH_COST >= 2) 11519 { 11520 if (cf == 0) 11521 { 11522 cf = ct; 11523 ct = 0; 11524 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11525 /* We may be reversing unordered compare to normal compare, 11526 that is not valid in general (we may convert non-trapping 11527 condition to trapping one), however on i386 we currently 11528 emit all comparisons unordered. */ 11529 code = reverse_condition_maybe_unordered (code); 11530 else 11531 { 11532 code = reverse_condition (code); 11533 if (compare_code != UNKNOWN) 11534 compare_code = reverse_condition (compare_code); 11535 } 11536 } 11537 11538 if (compare_code != UNKNOWN) 11539 { 11540 /* notl op1 (if needed) 11541 sarl $31, op1 11542 andl (cf-ct), op1 11543 addl ct, op1 11544 11545 For x < 0 (resp. x <= -1) there will be no notl, 11546 so if possible swap the constants to get rid of the 11547 complement. 11548 True/false will be -1/0 while code below (store flag 11549 followed by decrement) is 0/-1, so the constants need 11550 to be exchanged once more. */ 11551 11552 if (compare_code == GE || !cf) 11553 { 11554 code = reverse_condition (code); 11555 compare_code = LT; 11556 } 11557 else 11558 { 11559 HOST_WIDE_INT tmp = cf; 11560 cf = ct; 11561 ct = tmp; 11562 } 11563 11564 out = emit_store_flag (out, code, ix86_compare_op0, 11565 ix86_compare_op1, VOIDmode, 0, -1); 11566 } 11567 else 11568 { 11569 out = emit_store_flag (out, code, ix86_compare_op0, 11570 ix86_compare_op1, VOIDmode, 0, 1); 11571 11572 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 11573 copy_rtx (out), 1, OPTAB_DIRECT); 11574 } 11575 11576 out = expand_simple_binop (mode, AND, copy_rtx (out), 11577 gen_int_mode (cf - ct, mode), 11578 copy_rtx (out), 1, OPTAB_DIRECT); 11579 if (ct) 11580 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 11581 copy_rtx (out), 1, OPTAB_DIRECT); 11582 if (!rtx_equal_p (out, operands[0])) 11583 emit_move_insn (operands[0], copy_rtx (out)); 11584 11585 return 1; /* DONE */ 11586 } 11587 } 11588 11589 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11590 { 11591 /* Try a few things more with specific constants and a variable. */ 11592 11593 optab op; 11594 rtx var, orig_out, out, tmp; 11595 11596 if (BRANCH_COST <= 2) 11597 return 0; /* FAIL */ 11598 11599 /* If one of the two operands is an interesting constant, load a 11600 constant with the above and mask it in with a logical operation. */ 11601 11602 if (GET_CODE (operands[2]) == CONST_INT) 11603 { 11604 var = operands[3]; 11605 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 11606 operands[3] = constm1_rtx, op = and_optab; 11607 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 11608 operands[3] = const0_rtx, op = ior_optab; 11609 else 11610 return 0; /* FAIL */ 11611 } 11612 else if (GET_CODE (operands[3]) == CONST_INT) 11613 { 11614 var = operands[2]; 11615 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 11616 operands[2] = constm1_rtx, op = and_optab; 11617 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 11618 operands[2] = const0_rtx, op = ior_optab; 11619 else 11620 return 0; /* FAIL */ 11621 } 11622 else 11623 return 0; /* FAIL */ 11624 11625 orig_out = operands[0]; 11626 tmp = gen_reg_rtx (mode); 11627 operands[0] = tmp; 11628 11629 /* Recurse to get the constant loaded. */ 11630 if (ix86_expand_int_movcc (operands) == 0) 11631 return 0; /* FAIL */ 11632 11633 /* Mask in the interesting variable. */ 11634 out = expand_binop (mode, op, var, tmp, orig_out, 0, 11635 OPTAB_WIDEN); 11636 if (!rtx_equal_p (out, orig_out)) 11637 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 11638 11639 return 1; /* DONE */ 11640 } 11641 11642 /* 11643 * For comparison with above, 11644 * 11645 * movl cf,dest 11646 * movl ct,tmp 11647 * cmpl op1,op2 11648 * cmovcc tmp,dest 11649 * 11650 * Size 15. 11651 */ 11652 11653 if (! nonimmediate_operand (operands[2], mode)) 11654 operands[2] = force_reg (mode, operands[2]); 11655 if (! nonimmediate_operand (operands[3], mode)) 11656 operands[3] = force_reg (mode, operands[3]); 11657 11658 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11659 { 11660 rtx tmp = gen_reg_rtx (mode); 11661 emit_move_insn (tmp, operands[3]); 11662 operands[3] = tmp; 11663 } 11664 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11665 { 11666 rtx tmp = gen_reg_rtx (mode); 11667 emit_move_insn (tmp, operands[2]); 11668 operands[2] = tmp; 11669 } 11670 11671 if (! register_operand (operands[2], VOIDmode) 11672 && (mode == QImode 11673 || ! register_operand (operands[3], VOIDmode))) 11674 operands[2] = force_reg (mode, operands[2]); 11675 11676 if (mode == QImode 11677 && ! register_operand (operands[3], VOIDmode)) 11678 operands[3] = force_reg (mode, operands[3]); 11679 11680 emit_insn (compare_seq); 11681 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11682 gen_rtx_IF_THEN_ELSE (mode, 11683 compare_op, operands[2], 11684 operands[3]))); 11685 if (bypass_test) 11686 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11687 gen_rtx_IF_THEN_ELSE (mode, 11688 bypass_test, 11689 copy_rtx (operands[3]), 11690 copy_rtx (operands[0])))); 11691 if (second_test) 11692 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11693 gen_rtx_IF_THEN_ELSE (mode, 11694 second_test, 11695 copy_rtx (operands[2]), 11696 copy_rtx (operands[0])))); 11697 11698 return 1; /* DONE */ 11699} 11700 11701/* Swap, force into registers, or otherwise massage the two operands 11702 to an sse comparison with a mask result. Thus we differ a bit from 11703 ix86_prepare_fp_compare_args which expects to produce a flags result. 11704 11705 The DEST operand exists to help determine whether to commute commutative 11706 operators. The POP0/POP1 operands are updated in place. The new 11707 comparison code is returned, or UNKNOWN if not implementable. */ 11708 11709static enum rtx_code 11710ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 11711 rtx *pop0, rtx *pop1) 11712{ 11713 rtx tmp; 11714 11715 switch (code) 11716 { 11717 case LTGT: 11718 case UNEQ: 11719 /* We have no LTGT as an operator. We could implement it with 11720 NE & ORDERED, but this requires an extra temporary. It's 11721 not clear that it's worth it. */ 11722 return UNKNOWN; 11723 11724 case LT: 11725 case LE: 11726 case UNGT: 11727 case UNGE: 11728 /* These are supported directly. */ 11729 break; 11730 11731 case EQ: 11732 case NE: 11733 case UNORDERED: 11734 case ORDERED: 11735 /* For commutative operators, try to canonicalize the destination 11736 operand to be first in the comparison - this helps reload to 11737 avoid extra moves. */ 11738 if (!dest || !rtx_equal_p (dest, *pop1)) 11739 break; 11740 /* FALLTHRU */ 11741 11742 case GE: 11743 case GT: 11744 case UNLE: 11745 case UNLT: 11746 /* These are not supported directly. Swap the comparison operands 11747 to transform into something that is supported. */ 11748 tmp = *pop0; 11749 *pop0 = *pop1; 11750 *pop1 = tmp; 11751 code = swap_condition (code); 11752 break; 11753 11754 default: 11755 gcc_unreachable (); 11756 } 11757 11758 return code; 11759} 11760 11761/* Detect conditional moves that exactly match min/max operational 11762 semantics. Note that this is IEEE safe, as long as we don't 11763 interchange the operands. 11764 11765 Returns FALSE if this conditional move doesn't match a MIN/MAX, 11766 and TRUE if the operation is successful and instructions are emitted. */ 11767 11768static bool 11769ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 11770 rtx cmp_op1, rtx if_true, rtx if_false) 11771{ 11772 enum machine_mode mode; 11773 bool is_min; 11774 rtx tmp; 11775 11776 if (code == LT) 11777 ; 11778 else if (code == UNGE) 11779 { 11780 tmp = if_true; 11781 if_true = if_false; 11782 if_false = tmp; 11783 } 11784 else 11785 return false; 11786 11787 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 11788 is_min = true; 11789 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 11790 is_min = false; 11791 else 11792 return false; 11793 11794 mode = GET_MODE (dest); 11795 11796 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 11797 but MODE may be a vector mode and thus not appropriate. */ 11798 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 11799 { 11800 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 11801 rtvec v; 11802 11803 if_true = force_reg (mode, if_true); 11804 v = gen_rtvec (2, if_true, if_false); 11805 tmp = gen_rtx_UNSPEC (mode, v, u); 11806 } 11807 else 11808 { 11809 code = is_min ? SMIN : SMAX; 11810 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 11811 } 11812 11813 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 11814 return true; 11815} 11816 11817/* Expand an sse vector comparison. Return the register with the result. */ 11818 11819static rtx 11820ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 11821 rtx op_true, rtx op_false) 11822{ 11823 enum machine_mode mode = GET_MODE (dest); 11824 rtx x; 11825 11826 cmp_op0 = force_reg (mode, cmp_op0); 11827 if (!nonimmediate_operand (cmp_op1, mode)) 11828 cmp_op1 = force_reg (mode, cmp_op1); 11829 11830 if (optimize 11831 || reg_overlap_mentioned_p (dest, op_true) 11832 || reg_overlap_mentioned_p (dest, op_false)) 11833 dest = gen_reg_rtx (mode); 11834 11835 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 11836 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11837 11838 return dest; 11839} 11840 11841/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 11842 operations. This is used for both scalar and vector conditional moves. */ 11843 11844static void 11845ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 11846{ 11847 enum machine_mode mode = GET_MODE (dest); 11848 rtx t2, t3, x; 11849 11850 if (op_false == CONST0_RTX (mode)) 11851 { 11852 op_true = force_reg (mode, op_true); 11853 x = gen_rtx_AND (mode, cmp, op_true); 11854 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11855 } 11856 else if (op_true == CONST0_RTX (mode)) 11857 { 11858 op_false = force_reg (mode, op_false); 11859 x = gen_rtx_NOT (mode, cmp); 11860 x = gen_rtx_AND (mode, x, op_false); 11861 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11862 } 11863 else 11864 { 11865 op_true = force_reg (mode, op_true); 11866 op_false = force_reg (mode, op_false); 11867 11868 t2 = gen_reg_rtx (mode); 11869 if (optimize) 11870 t3 = gen_reg_rtx (mode); 11871 else 11872 t3 = dest; 11873 11874 x = gen_rtx_AND (mode, op_true, cmp); 11875 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 11876 11877 x = gen_rtx_NOT (mode, cmp); 11878 x = gen_rtx_AND (mode, x, op_false); 11879 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 11880 11881 x = gen_rtx_IOR (mode, t3, t2); 11882 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11883 } 11884} 11885 11886/* Expand a floating-point conditional move. Return true if successful. */ 11887 11888int 11889ix86_expand_fp_movcc (rtx operands[]) 11890{ 11891 enum machine_mode mode = GET_MODE (operands[0]); 11892 enum rtx_code code = GET_CODE (operands[1]); 11893 rtx tmp, compare_op, second_test, bypass_test; 11894 11895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 11896 { 11897 enum machine_mode cmode; 11898 11899 /* Since we've no cmove for sse registers, don't force bad register 11900 allocation just to gain access to it. Deny movcc when the 11901 comparison mode doesn't match the move mode. */ 11902 cmode = GET_MODE (ix86_compare_op0); 11903 if (cmode == VOIDmode) 11904 cmode = GET_MODE (ix86_compare_op1); 11905 if (cmode != mode) 11906 return 0; 11907 11908 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11909 &ix86_compare_op0, 11910 &ix86_compare_op1); 11911 if (code == UNKNOWN) 11912 return 0; 11913 11914 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 11915 ix86_compare_op1, operands[2], 11916 operands[3])) 11917 return 1; 11918 11919 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 11920 ix86_compare_op1, operands[2], operands[3]); 11921 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 11922 return 1; 11923 } 11924 11925 /* The floating point conditional move instructions don't directly 11926 support conditions resulting from a signed integer comparison. */ 11927 11928 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11929 11930 /* The floating point conditional move instructions don't directly 11931 support signed integer comparisons. */ 11932 11933 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 11934 { 11935 gcc_assert (!second_test && !bypass_test); 11936 tmp = gen_reg_rtx (QImode); 11937 ix86_expand_setcc (code, tmp); 11938 code = NE; 11939 ix86_compare_op0 = tmp; 11940 ix86_compare_op1 = const0_rtx; 11941 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11942 } 11943 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11944 { 11945 tmp = gen_reg_rtx (mode); 11946 emit_move_insn (tmp, operands[3]); 11947 operands[3] = tmp; 11948 } 11949 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11950 { 11951 tmp = gen_reg_rtx (mode); 11952 emit_move_insn (tmp, operands[2]); 11953 operands[2] = tmp; 11954 } 11955 11956 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11957 gen_rtx_IF_THEN_ELSE (mode, compare_op, 11958 operands[2], operands[3]))); 11959 if (bypass_test) 11960 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11961 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 11962 operands[3], operands[0]))); 11963 if (second_test) 11964 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11965 gen_rtx_IF_THEN_ELSE (mode, second_test, 11966 operands[2], operands[0]))); 11967 11968 return 1; 11969} 11970 11971/* Expand a floating-point vector conditional move; a vcond operation 11972 rather than a movcc operation. */ 11973 11974bool 11975ix86_expand_fp_vcond (rtx operands[]) 11976{ 11977 enum rtx_code code = GET_CODE (operands[3]); 11978 rtx cmp; 11979 11980 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11981 &operands[4], &operands[5]); 11982 if (code == UNKNOWN) 11983 return false; 11984 11985 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 11986 operands[5], operands[1], operands[2])) 11987 return true; 11988 11989 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 11990 operands[1], operands[2]); 11991 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 11992 return true; 11993} 11994 11995/* Expand a signed integral vector conditional move. */ 11996 11997bool 11998ix86_expand_int_vcond (rtx operands[]) 11999{ 12000 enum machine_mode mode = GET_MODE (operands[0]); 12001 enum rtx_code code = GET_CODE (operands[3]); 12002 bool negate = false; 12003 rtx x, cop0, cop1; 12004 12005 cop0 = operands[4]; 12006 cop1 = operands[5]; 12007 12008 /* Canonicalize the comparison to EQ, GT, GTU. */ 12009 switch (code) 12010 { 12011 case EQ: 12012 case GT: 12013 case GTU: 12014 break; 12015 12016 case NE: 12017 case LE: 12018 case LEU: 12019 code = reverse_condition (code); 12020 negate = true; 12021 break; 12022 12023 case GE: 12024 case GEU: 12025 code = reverse_condition (code); 12026 negate = true; 12027 /* FALLTHRU */ 12028 12029 case LT: 12030 case LTU: 12031 code = swap_condition (code); 12032 x = cop0, cop0 = cop1, cop1 = x; 12033 break; 12034 12035 default: 12036 gcc_unreachable (); 12037 } 12038 12039 /* Unsigned parallel compare is not supported by the hardware. Play some 12040 tricks to turn this into a signed comparison against 0. */ 12041 if (code == GTU) 12042 { 12043 cop0 = force_reg (mode, cop0); 12044 12045 switch (mode) 12046 { 12047 case V4SImode: 12048 { 12049 rtx t1, t2, mask; 12050 12051 /* Perform a parallel modulo subtraction. */ 12052 t1 = gen_reg_rtx (mode); 12053 emit_insn (gen_subv4si3 (t1, cop0, cop1)); 12054 12055 /* Extract the original sign bit of op0. */ 12056 mask = GEN_INT (-0x80000000); 12057 mask = gen_rtx_CONST_VECTOR (mode, 12058 gen_rtvec (4, mask, mask, mask, mask)); 12059 mask = force_reg (mode, mask); 12060 t2 = gen_reg_rtx (mode); 12061 emit_insn (gen_andv4si3 (t2, cop0, mask)); 12062 12063 /* XOR it back into the result of the subtraction. This results 12064 in the sign bit set iff we saw unsigned underflow. */ 12065 x = gen_reg_rtx (mode); 12066 emit_insn (gen_xorv4si3 (x, t1, t2)); 12067 12068 code = GT; 12069 } 12070 break; 12071 12072 case V16QImode: 12073 case V8HImode: 12074 /* Perform a parallel unsigned saturating subtraction. */ 12075 x = gen_reg_rtx (mode); 12076 emit_insn (gen_rtx_SET (VOIDmode, x, 12077 gen_rtx_US_MINUS (mode, cop0, cop1))); 12078 12079 code = EQ; 12080 negate = !negate; 12081 break; 12082 12083 default: 12084 gcc_unreachable (); 12085 } 12086 12087 cop0 = x; 12088 cop1 = CONST0_RTX (mode); 12089 } 12090 12091 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 12092 operands[1+negate], operands[2-negate]); 12093 12094 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 12095 operands[2-negate]); 12096 return true; 12097} 12098 12099/* Expand conditional increment or decrement using adb/sbb instructions. 12100 The default case using setcc followed by the conditional move can be 12101 done by generic code. */ 12102int 12103ix86_expand_int_addcc (rtx operands[]) 12104{ 12105 enum rtx_code code = GET_CODE (operands[1]); 12106 rtx compare_op; 12107 rtx val = const0_rtx; 12108 bool fpcmp = false; 12109 enum machine_mode mode = GET_MODE (operands[0]); 12110 12111 if (operands[3] != const1_rtx 12112 && operands[3] != constm1_rtx) 12113 return 0; 12114 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 12115 ix86_compare_op1, &compare_op)) 12116 return 0; 12117 code = GET_CODE (compare_op); 12118 12119 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 12120 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 12121 { 12122 fpcmp = true; 12123 code = ix86_fp_compare_code_to_integer (code); 12124 } 12125 12126 if (code != LTU) 12127 { 12128 val = constm1_rtx; 12129 if (fpcmp) 12130 PUT_CODE (compare_op, 12131 reverse_condition_maybe_unordered 12132 (GET_CODE (compare_op))); 12133 else 12134 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 12135 } 12136 PUT_MODE (compare_op, mode); 12137 12138 /* Construct either adc or sbb insn. */ 12139 if ((code == LTU) == (operands[3] == constm1_rtx)) 12140 { 12141 switch (GET_MODE (operands[0])) 12142 { 12143 case QImode: 12144 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 12145 break; 12146 case HImode: 12147 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 12148 break; 12149 case SImode: 12150 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 12151 break; 12152 case DImode: 12153 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 12154 break; 12155 default: 12156 gcc_unreachable (); 12157 } 12158 } 12159 else 12160 { 12161 switch (GET_MODE (operands[0])) 12162 { 12163 case QImode: 12164 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 12165 break; 12166 case HImode: 12167 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 12168 break; 12169 case SImode: 12170 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 12171 break; 12172 case DImode: 12173 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 12174 break; 12175 default: 12176 gcc_unreachable (); 12177 } 12178 } 12179 return 1; /* DONE */ 12180} 12181 12182 12183/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 12184 works for floating pointer parameters and nonoffsetable memories. 12185 For pushes, it returns just stack offsets; the values will be saved 12186 in the right order. Maximally three parts are generated. */ 12187 12188static int 12189ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 12190{ 12191 int size; 12192 12193 if (!TARGET_64BIT) 12194 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 12195 else 12196 size = (GET_MODE_SIZE (mode) + 4) / 8; 12197 12198 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); 12199 gcc_assert (size >= 2 && size <= 3); 12200 12201 /* Optimize constant pool reference to immediates. This is used by fp 12202 moves, that force all constants to memory to allow combining. */ 12203 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) 12204 { 12205 rtx tmp = maybe_get_pool_constant (operand); 12206 if (tmp) 12207 operand = tmp; 12208 } 12209 12210 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 12211 { 12212 /* The only non-offsetable memories we handle are pushes. */ 12213 int ok = push_operand (operand, VOIDmode); 12214 12215 gcc_assert (ok); 12216 12217 operand = copy_rtx (operand); 12218 PUT_MODE (operand, Pmode); 12219 parts[0] = parts[1] = parts[2] = operand; 12220 return size; 12221 } 12222 12223 if (GET_CODE (operand) == CONST_VECTOR) 12224 { 12225 enum machine_mode imode = int_mode_for_mode (mode); 12226 /* Caution: if we looked through a constant pool memory above, 12227 the operand may actually have a different mode now. That's 12228 ok, since we want to pun this all the way back to an integer. */ 12229 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 12230 gcc_assert (operand != NULL); 12231 mode = imode; 12232 } 12233 12234 if (!TARGET_64BIT) 12235 { 12236 if (mode == DImode) 12237 split_di (&operand, 1, &parts[0], &parts[1]); 12238 else 12239 { 12240 if (REG_P (operand)) 12241 { 12242 gcc_assert (reload_completed); 12243 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 12244 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 12245 if (size == 3) 12246 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 12247 } 12248 else if (offsettable_memref_p (operand)) 12249 { 12250 operand = adjust_address (operand, SImode, 0); 12251 parts[0] = operand; 12252 parts[1] = adjust_address (operand, SImode, 4); 12253 if (size == 3) 12254 parts[2] = adjust_address (operand, SImode, 8); 12255 } 12256 else if (GET_CODE (operand) == CONST_DOUBLE) 12257 { 12258 REAL_VALUE_TYPE r; 12259 long l[4]; 12260 12261 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12262 switch (mode) 12263 { 12264 case XFmode: 12265 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 12266 parts[2] = gen_int_mode (l[2], SImode); 12267 break; 12268 case DFmode: 12269 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 12270 break; 12271 default: 12272 gcc_unreachable (); 12273 } 12274 parts[1] = gen_int_mode (l[1], SImode); 12275 parts[0] = gen_int_mode (l[0], SImode); 12276 } 12277 else 12278 gcc_unreachable (); 12279 } 12280 } 12281 else 12282 { 12283 if (mode == TImode) 12284 split_ti (&operand, 1, &parts[0], &parts[1]); 12285 if (mode == XFmode || mode == TFmode) 12286 { 12287 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 12288 if (REG_P (operand)) 12289 { 12290 gcc_assert (reload_completed); 12291 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 12292 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 12293 } 12294 else if (offsettable_memref_p (operand)) 12295 { 12296 operand = adjust_address (operand, DImode, 0); 12297 parts[0] = operand; 12298 parts[1] = adjust_address (operand, upper_mode, 8); 12299 } 12300 else if (GET_CODE (operand) == CONST_DOUBLE) 12301 { 12302 REAL_VALUE_TYPE r; 12303 long l[4]; 12304 12305 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12306 real_to_target (l, &r, mode); 12307 12308 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 12309 if (HOST_BITS_PER_WIDE_INT >= 64) 12310 parts[0] 12311 = gen_int_mode 12312 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12313 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 12314 DImode); 12315 else 12316 parts[0] = immed_double_const (l[0], l[1], DImode); 12317 12318 if (upper_mode == SImode) 12319 parts[1] = gen_int_mode (l[2], SImode); 12320 else if (HOST_BITS_PER_WIDE_INT >= 64) 12321 parts[1] 12322 = gen_int_mode 12323 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12324 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 12325 DImode); 12326 else 12327 parts[1] = immed_double_const (l[2], l[3], DImode); 12328 } 12329 else 12330 gcc_unreachable (); 12331 } 12332 } 12333 12334 return size; 12335} 12336 12337/* Emit insns to perform a move or push of DI, DF, and XF values. 12338 Return false when normal moves are needed; true when all required 12339 insns have been emitted. Operands 2-4 contain the input values 12340 int the correct order; operands 5-7 contain the output values. */ 12341 12342void 12343ix86_split_long_move (rtx operands[]) 12344{ 12345 rtx part[2][3]; 12346 int nparts; 12347 int push = 0; 12348 int collisions = 0; 12349 enum machine_mode mode = GET_MODE (operands[0]); 12350 12351 /* The DFmode expanders may ask us to move double. 12352 For 64bit target this is single move. By hiding the fact 12353 here we simplify i386.md splitters. */ 12354 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 12355 { 12356 /* Optimize constant pool reference to immediates. This is used by 12357 fp moves, that force all constants to memory to allow combining. */ 12358 12359 if (GET_CODE (operands[1]) == MEM 12360 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 12361 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 12362 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 12363 if (push_operand (operands[0], VOIDmode)) 12364 { 12365 operands[0] = copy_rtx (operands[0]); 12366 PUT_MODE (operands[0], Pmode); 12367 } 12368 else 12369 operands[0] = gen_lowpart (DImode, operands[0]); 12370 operands[1] = gen_lowpart (DImode, operands[1]); 12371 emit_move_insn (operands[0], operands[1]); 12372 return; 12373 } 12374 12375 /* The only non-offsettable memory we handle is push. */ 12376 if (push_operand (operands[0], VOIDmode)) 12377 push = 1; 12378 else 12379 gcc_assert (GET_CODE (operands[0]) != MEM 12380 || offsettable_memref_p (operands[0])); 12381 12382 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 12383 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 12384 12385 /* When emitting push, take care for source operands on the stack. */ 12386 if (push && GET_CODE (operands[1]) == MEM 12387 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 12388 { 12389 if (nparts == 3) 12390 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 12391 XEXP (part[1][2], 0)); 12392 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 12393 XEXP (part[1][1], 0)); 12394 } 12395 12396 /* We need to do copy in the right order in case an address register 12397 of the source overlaps the destination. */ 12398 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 12399 { 12400 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 12401 collisions++; 12402 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12403 collisions++; 12404 if (nparts == 3 12405 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 12406 collisions++; 12407 12408 /* Collision in the middle part can be handled by reordering. */ 12409 if (collisions == 1 && nparts == 3 12410 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12411 { 12412 rtx tmp; 12413 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 12414 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 12415 } 12416 12417 /* If there are more collisions, we can't handle it by reordering. 12418 Do an lea to the last part and use only one colliding move. */ 12419 else if (collisions > 1) 12420 { 12421 rtx base; 12422 12423 collisions = 1; 12424 12425 base = part[0][nparts - 1]; 12426 12427 /* Handle the case when the last part isn't valid for lea. 12428 Happens in 64-bit mode storing the 12-byte XFmode. */ 12429 if (GET_MODE (base) != Pmode) 12430 base = gen_rtx_REG (Pmode, REGNO (base)); 12431 12432 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 12433 part[1][0] = replace_equiv_address (part[1][0], base); 12434 part[1][1] = replace_equiv_address (part[1][1], 12435 plus_constant (base, UNITS_PER_WORD)); 12436 if (nparts == 3) 12437 part[1][2] = replace_equiv_address (part[1][2], 12438 plus_constant (base, 8)); 12439 } 12440 } 12441 12442 if (push) 12443 { 12444 if (!TARGET_64BIT) 12445 { 12446 if (nparts == 3) 12447 { 12448 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 12449 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 12450 emit_move_insn (part[0][2], part[1][2]); 12451 } 12452 } 12453 else 12454 { 12455 /* In 64bit mode we don't have 32bit push available. In case this is 12456 register, it is OK - we will just use larger counterpart. We also 12457 retype memory - these comes from attempt to avoid REX prefix on 12458 moving of second half of TFmode value. */ 12459 if (GET_MODE (part[1][1]) == SImode) 12460 { 12461 switch (GET_CODE (part[1][1])) 12462 { 12463 case MEM: 12464 part[1][1] = adjust_address (part[1][1], DImode, 0); 12465 break; 12466 12467 case REG: 12468 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 12469 break; 12470 12471 default: 12472 gcc_unreachable (); 12473 } 12474 12475 if (GET_MODE (part[1][0]) == SImode) 12476 part[1][0] = part[1][1]; 12477 } 12478 } 12479 emit_move_insn (part[0][1], part[1][1]); 12480 emit_move_insn (part[0][0], part[1][0]); 12481 return; 12482 } 12483 12484 /* Choose correct order to not overwrite the source before it is copied. */ 12485 if ((REG_P (part[0][0]) 12486 && REG_P (part[1][1]) 12487 && (REGNO (part[0][0]) == REGNO (part[1][1]) 12488 || (nparts == 3 12489 && REGNO (part[0][0]) == REGNO (part[1][2])))) 12490 || (collisions > 0 12491 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 12492 { 12493 if (nparts == 3) 12494 { 12495 operands[2] = part[0][2]; 12496 operands[3] = part[0][1]; 12497 operands[4] = part[0][0]; 12498 operands[5] = part[1][2]; 12499 operands[6] = part[1][1]; 12500 operands[7] = part[1][0]; 12501 } 12502 else 12503 { 12504 operands[2] = part[0][1]; 12505 operands[3] = part[0][0]; 12506 operands[5] = part[1][1]; 12507 operands[6] = part[1][0]; 12508 } 12509 } 12510 else 12511 { 12512 if (nparts == 3) 12513 { 12514 operands[2] = part[0][0]; 12515 operands[3] = part[0][1]; 12516 operands[4] = part[0][2]; 12517 operands[5] = part[1][0]; 12518 operands[6] = part[1][1]; 12519 operands[7] = part[1][2]; 12520 } 12521 else 12522 { 12523 operands[2] = part[0][0]; 12524 operands[3] = part[0][1]; 12525 operands[5] = part[1][0]; 12526 operands[6] = part[1][1]; 12527 } 12528 } 12529 12530 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 12531 if (optimize_size) 12532 { 12533 if (GET_CODE (operands[5]) == CONST_INT 12534 && operands[5] != const0_rtx 12535 && REG_P (operands[2])) 12536 { 12537 if (GET_CODE (operands[6]) == CONST_INT 12538 && INTVAL (operands[6]) == INTVAL (operands[5])) 12539 operands[6] = operands[2]; 12540 12541 if (nparts == 3 12542 && GET_CODE (operands[7]) == CONST_INT 12543 && INTVAL (operands[7]) == INTVAL (operands[5])) 12544 operands[7] = operands[2]; 12545 } 12546 12547 if (nparts == 3 12548 && GET_CODE (operands[6]) == CONST_INT 12549 && operands[6] != const0_rtx 12550 && REG_P (operands[3]) 12551 && GET_CODE (operands[7]) == CONST_INT 12552 && INTVAL (operands[7]) == INTVAL (operands[6])) 12553 operands[7] = operands[3]; 12554 } 12555 12556 emit_move_insn (operands[2], operands[5]); 12557 emit_move_insn (operands[3], operands[6]); 12558 if (nparts == 3) 12559 emit_move_insn (operands[4], operands[7]); 12560 12561 return; 12562} 12563 12564/* Helper function of ix86_split_ashl used to generate an SImode/DImode 12565 left shift by a constant, either using a single shift or 12566 a sequence of add instructions. */ 12567 12568static void 12569ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 12570{ 12571 if (count == 1) 12572 { 12573 emit_insn ((mode == DImode 12574 ? gen_addsi3 12575 : gen_adddi3) (operand, operand, operand)); 12576 } 12577 else if (!optimize_size 12578 && count * ix86_cost->add <= ix86_cost->shift_const) 12579 { 12580 int i; 12581 for (i=0; i<count; i++) 12582 { 12583 emit_insn ((mode == DImode 12584 ? gen_addsi3 12585 : gen_adddi3) (operand, operand, operand)); 12586 } 12587 } 12588 else 12589 emit_insn ((mode == DImode 12590 ? gen_ashlsi3 12591 : gen_ashldi3) (operand, operand, GEN_INT (count))); 12592} 12593 12594void 12595ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 12596{ 12597 rtx low[2], high[2]; 12598 int count; 12599 const int single_width = mode == DImode ? 32 : 64; 12600 12601 if (GET_CODE (operands[2]) == CONST_INT) 12602 { 12603 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12604 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12605 12606 if (count >= single_width) 12607 { 12608 emit_move_insn (high[0], low[1]); 12609 emit_move_insn (low[0], const0_rtx); 12610 12611 if (count > single_width) 12612 ix86_expand_ashl_const (high[0], count - single_width, mode); 12613 } 12614 else 12615 { 12616 if (!rtx_equal_p (operands[0], operands[1])) 12617 emit_move_insn (operands[0], operands[1]); 12618 emit_insn ((mode == DImode 12619 ? gen_x86_shld_1 12620 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 12621 ix86_expand_ashl_const (low[0], count, mode); 12622 } 12623 return; 12624 } 12625 12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12627 12628 if (operands[1] == const1_rtx) 12629 { 12630 /* Assuming we've chosen a QImode capable registers, then 1 << N 12631 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 12632 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 12633 { 12634 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 12635 12636 ix86_expand_clear (low[0]); 12637 ix86_expand_clear (high[0]); 12638 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 12639 12640 d = gen_lowpart (QImode, low[0]); 12641 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12642 s = gen_rtx_EQ (QImode, flags, const0_rtx); 12643 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12644 12645 d = gen_lowpart (QImode, high[0]); 12646 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12647 s = gen_rtx_NE (QImode, flags, const0_rtx); 12648 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12649 } 12650 12651 /* Otherwise, we can get the same results by manually performing 12652 a bit extract operation on bit 5/6, and then performing the two 12653 shifts. The two methods of getting 0/1 into low/high are exactly 12654 the same size. Avoiding the shift in the bit extract case helps 12655 pentium4 a bit; no one else seems to care much either way. */ 12656 else 12657 { 12658 rtx x; 12659 12660 if (TARGET_PARTIAL_REG_STALL && !optimize_size) 12661 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 12662 else 12663 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 12664 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 12665 12666 emit_insn ((mode == DImode 12667 ? gen_lshrsi3 12668 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 12669 emit_insn ((mode == DImode 12670 ? gen_andsi3 12671 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 12672 emit_move_insn (low[0], high[0]); 12673 emit_insn ((mode == DImode 12674 ? gen_xorsi3 12675 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 12676 } 12677 12678 emit_insn ((mode == DImode 12679 ? gen_ashlsi3 12680 : gen_ashldi3) (low[0], low[0], operands[2])); 12681 emit_insn ((mode == DImode 12682 ? gen_ashlsi3 12683 : gen_ashldi3) (high[0], high[0], operands[2])); 12684 return; 12685 } 12686 12687 if (operands[1] == constm1_rtx) 12688 { 12689 /* For -1 << N, we can avoid the shld instruction, because we 12690 know that we're shifting 0...31/63 ones into a -1. */ 12691 emit_move_insn (low[0], constm1_rtx); 12692 if (optimize_size) 12693 emit_move_insn (high[0], low[0]); 12694 else 12695 emit_move_insn (high[0], constm1_rtx); 12696 } 12697 else 12698 { 12699 if (!rtx_equal_p (operands[0], operands[1])) 12700 emit_move_insn (operands[0], operands[1]); 12701 12702 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12703 emit_insn ((mode == DImode 12704 ? gen_x86_shld_1 12705 : gen_x86_64_shld) (high[0], low[0], operands[2])); 12706 } 12707 12708 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 12709 12710 if (TARGET_CMOVE && scratch) 12711 { 12712 ix86_expand_clear (scratch); 12713 emit_insn ((mode == DImode 12714 ? gen_x86_shift_adj_1 12715 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); 12716 } 12717 else 12718 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 12719} 12720 12721void 12722ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 12723{ 12724 rtx low[2], high[2]; 12725 int count; 12726 const int single_width = mode == DImode ? 32 : 64; 12727 12728 if (GET_CODE (operands[2]) == CONST_INT) 12729 { 12730 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12731 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12732 12733 if (count == single_width * 2 - 1) 12734 { 12735 emit_move_insn (high[0], high[1]); 12736 emit_insn ((mode == DImode 12737 ? gen_ashrsi3 12738 : gen_ashrdi3) (high[0], high[0], 12739 GEN_INT (single_width - 1))); 12740 emit_move_insn (low[0], high[0]); 12741 12742 } 12743 else if (count >= single_width) 12744 { 12745 emit_move_insn (low[0], high[1]); 12746 emit_move_insn (high[0], low[0]); 12747 emit_insn ((mode == DImode 12748 ? gen_ashrsi3 12749 : gen_ashrdi3) (high[0], high[0], 12750 GEN_INT (single_width - 1))); 12751 if (count > single_width) 12752 emit_insn ((mode == DImode 12753 ? gen_ashrsi3 12754 : gen_ashrdi3) (low[0], low[0], 12755 GEN_INT (count - single_width))); 12756 } 12757 else 12758 { 12759 if (!rtx_equal_p (operands[0], operands[1])) 12760 emit_move_insn (operands[0], operands[1]); 12761 emit_insn ((mode == DImode 12762 ? gen_x86_shrd_1 12763 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12764 emit_insn ((mode == DImode 12765 ? gen_ashrsi3 12766 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 12767 } 12768 } 12769 else 12770 { 12771 if (!rtx_equal_p (operands[0], operands[1])) 12772 emit_move_insn (operands[0], operands[1]); 12773 12774 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12775 12776 emit_insn ((mode == DImode 12777 ? gen_x86_shrd_1 12778 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12779 emit_insn ((mode == DImode 12780 ? gen_ashrsi3 12781 : gen_ashrdi3) (high[0], high[0], operands[2])); 12782 12783 if (TARGET_CMOVE && scratch) 12784 { 12785 emit_move_insn (scratch, high[0]); 12786 emit_insn ((mode == DImode 12787 ? gen_ashrsi3 12788 : gen_ashrdi3) (scratch, scratch, 12789 GEN_INT (single_width - 1))); 12790 emit_insn ((mode == DImode 12791 ? gen_x86_shift_adj_1 12792 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12793 scratch)); 12794 } 12795 else 12796 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 12797 } 12798} 12799 12800void 12801ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 12802{ 12803 rtx low[2], high[2]; 12804 int count; 12805 const int single_width = mode == DImode ? 32 : 64; 12806 12807 if (GET_CODE (operands[2]) == CONST_INT) 12808 { 12809 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12810 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12811 12812 if (count >= single_width) 12813 { 12814 emit_move_insn (low[0], high[1]); 12815 ix86_expand_clear (high[0]); 12816 12817 if (count > single_width) 12818 emit_insn ((mode == DImode 12819 ? gen_lshrsi3 12820 : gen_lshrdi3) (low[0], low[0], 12821 GEN_INT (count - single_width))); 12822 } 12823 else 12824 { 12825 if (!rtx_equal_p (operands[0], operands[1])) 12826 emit_move_insn (operands[0], operands[1]); 12827 emit_insn ((mode == DImode 12828 ? gen_x86_shrd_1 12829 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12830 emit_insn ((mode == DImode 12831 ? gen_lshrsi3 12832 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 12833 } 12834 } 12835 else 12836 { 12837 if (!rtx_equal_p (operands[0], operands[1])) 12838 emit_move_insn (operands[0], operands[1]); 12839 12840 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12841 12842 emit_insn ((mode == DImode 12843 ? gen_x86_shrd_1 12844 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12845 emit_insn ((mode == DImode 12846 ? gen_lshrsi3 12847 : gen_lshrdi3) (high[0], high[0], operands[2])); 12848 12849 /* Heh. By reversing the arguments, we can reuse this pattern. */ 12850 if (TARGET_CMOVE && scratch) 12851 { 12852 ix86_expand_clear (scratch); 12853 emit_insn ((mode == DImode 12854 ? gen_x86_shift_adj_1 12855 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12856 scratch)); 12857 } 12858 else 12859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 12860 } 12861} 12862 12863/* Helper function for the string operations below. Dest VARIABLE whether 12864 it is aligned to VALUE bytes. If true, jump to the label. */ 12865static rtx 12866ix86_expand_aligntest (rtx variable, int value) 12867{ 12868 rtx label = gen_label_rtx (); 12869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 12870 if (GET_MODE (variable) == DImode) 12871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 12872 else 12873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 12874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 12875 1, label); 12876 return label; 12877} 12878 12879/* Adjust COUNTER by the VALUE. */ 12880static void 12881ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 12882{ 12883 if (GET_MODE (countreg) == DImode) 12884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 12885 else 12886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 12887} 12888 12889/* Zero extend possibly SImode EXP to Pmode register. */ 12890rtx 12891ix86_zero_extend_to_Pmode (rtx exp) 12892{ 12893 rtx r; 12894 if (GET_MODE (exp) == VOIDmode) 12895 return force_reg (Pmode, exp); 12896 if (GET_MODE (exp) == Pmode) 12897 return copy_to_mode_reg (Pmode, exp); 12898 r = gen_reg_rtx (Pmode); 12899 emit_insn (gen_zero_extendsidi2 (r, exp)); 12900 return r; 12901} 12902 12903/* Expand string move (memcpy) operation. Use i386 string operations when 12904 profitable. expand_clrmem contains similar code. */ 12905int 12906ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) 12907{ 12908 rtx srcreg, destreg, countreg, srcexp, destexp; 12909 enum machine_mode counter_mode; 12910 HOST_WIDE_INT align = 0; 12911 unsigned HOST_WIDE_INT count = 0; 12912 12913 if (GET_CODE (align_exp) == CONST_INT) 12914 align = INTVAL (align_exp); 12915 12916 /* Can't use any of this if the user has appropriated esi or edi. */ 12917 if (global_regs[4] || global_regs[5]) 12918 return 0; 12919 12920 /* This simple hack avoids all inlining code and simplifies code below. */ 12921 if (!TARGET_ALIGN_STRINGOPS) 12922 align = 64; 12923 12924 if (GET_CODE (count_exp) == CONST_INT) 12925 { 12926 count = INTVAL (count_exp); 12927 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12928 return 0; 12929 } 12930 12931 /* Figure out proper mode for counter. For 32bits it is always SImode, 12932 for 64bits use SImode when possible, otherwise DImode. 12933 Set count to number of bytes copied when known at compile time. */ 12934 if (!TARGET_64BIT 12935 || GET_MODE (count_exp) == SImode 12936 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12937 counter_mode = SImode; 12938 else 12939 counter_mode = DImode; 12940 12941 gcc_assert (counter_mode == SImode || counter_mode == DImode); 12942 12943 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12944 if (destreg != XEXP (dst, 0)) 12945 dst = replace_equiv_address_nv (dst, destreg); 12946 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 12947 if (srcreg != XEXP (src, 0)) 12948 src = replace_equiv_address_nv (src, srcreg); 12949 12950 /* When optimizing for size emit simple rep ; movsb instruction for 12951 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? 12952 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. 12953 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is 12954 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, 12955 but we don't know whether upper 24 (resp. 56) bits of %ecx will be 12956 known to be zero or not. The rep; movsb sequence causes higher 12957 register pressure though, so take that into account. */ 12958 12959 if ((!optimize || optimize_size) 12960 && (count == 0 12961 || ((count & 0x03) 12962 && (!optimize_size 12963 || count > 5 * 4 12964 || (count & 3) + count / 4 > 6)))) 12965 { 12966 emit_insn (gen_cld ()); 12967 countreg = ix86_zero_extend_to_Pmode (count_exp); 12968 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12969 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 12970 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 12971 destexp, srcexp)); 12972 } 12973 12974 /* For constant aligned (or small unaligned) copies use rep movsl 12975 followed by code copying the rest. For PentiumPro ensure 8 byte 12976 alignment to allow rep movsl acceleration. */ 12977 12978 else if (count != 0 12979 && (align >= 8 12980 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12981 || optimize_size || count < (unsigned int) 64)) 12982 { 12983 unsigned HOST_WIDE_INT offset = 0; 12984 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12985 rtx srcmem, dstmem; 12986 12987 emit_insn (gen_cld ()); 12988 if (count & ~(size - 1)) 12989 { 12990 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) 12991 { 12992 enum machine_mode movs_mode = size == 4 ? SImode : DImode; 12993 12994 while (offset < (count & ~(size - 1))) 12995 { 12996 srcmem = adjust_automodify_address_nv (src, movs_mode, 12997 srcreg, offset); 12998 dstmem = adjust_automodify_address_nv (dst, movs_mode, 12999 destreg, offset); 13000 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13001 offset += size; 13002 } 13003 } 13004 else 13005 { 13006 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) 13007 & (TARGET_64BIT ? -1 : 0x3fffffff)); 13008 countreg = copy_to_mode_reg (counter_mode, countreg); 13009 countreg = ix86_zero_extend_to_Pmode (countreg); 13010 13011 destexp = gen_rtx_ASHIFT (Pmode, countreg, 13012 GEN_INT (size == 4 ? 2 : 3)); 13013 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 13014 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13015 13016 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 13017 countreg, destexp, srcexp)); 13018 offset = count & ~(size - 1); 13019 } 13020 } 13021 if (size == 8 && (count & 0x04)) 13022 { 13023 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 13024 offset); 13025 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 13026 offset); 13027 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13028 offset += 4; 13029 } 13030 if (count & 0x02) 13031 { 13032 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 13033 offset); 13034 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 13035 offset); 13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13037 offset += 2; 13038 } 13039 if (count & 0x01) 13040 { 13041 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 13042 offset); 13043 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 13044 offset); 13045 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13046 } 13047 } 13048 /* The generic code based on the glibc implementation: 13049 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 13050 allowing accelerated copying there) 13051 - copy the data using rep movsl 13052 - copy the rest. */ 13053 else 13054 { 13055 rtx countreg2; 13056 rtx label = NULL; 13057 rtx srcmem, dstmem; 13058 int desired_alignment = (TARGET_PENTIUMPRO 13059 && (count == 0 || count >= (unsigned int) 260) 13060 ? 8 : UNITS_PER_WORD); 13061 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 13062 dst = change_address (dst, BLKmode, destreg); 13063 src = change_address (src, BLKmode, srcreg); 13064 13065 /* In case we don't know anything about the alignment, default to 13066 library version, since it is usually equally fast and result in 13067 shorter code. 13068 13069 Also emit call when we know that the count is large and call overhead 13070 will not be important. */ 13071 if (!TARGET_INLINE_ALL_STRINGOPS 13072 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13073 return 0; 13074 13075 if (TARGET_SINGLE_STRINGOP) 13076 emit_insn (gen_cld ()); 13077 13078 countreg2 = gen_reg_rtx (Pmode); 13079 countreg = copy_to_mode_reg (counter_mode, count_exp); 13080 13081 /* We don't use loops to align destination and to copy parts smaller 13082 than 4 bytes, because gcc is able to optimize such code better (in 13083 the case the destination or the count really is aligned, gcc is often 13084 able to predict the branches) and also it is friendlier to the 13085 hardware branch prediction. 13086 13087 Using loops is beneficial for generic case, because we can 13088 handle small counts using the loops. Many CPUs (such as Athlon) 13089 have large REP prefix setup costs. 13090 13091 This is quite costly. Maybe we can revisit this decision later or 13092 add some customizability to this code. */ 13093 13094 if (count == 0 && align < desired_alignment) 13095 { 13096 label = gen_label_rtx (); 13097 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13098 LEU, 0, counter_mode, 1, label); 13099 } 13100 if (align <= 1) 13101 { 13102 rtx label = ix86_expand_aligntest (destreg, 1); 13103 srcmem = change_address (src, QImode, srcreg); 13104 dstmem = change_address (dst, QImode, destreg); 13105 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13106 ix86_adjust_counter (countreg, 1); 13107 emit_label (label); 13108 LABEL_NUSES (label) = 1; 13109 } 13110 if (align <= 2) 13111 { 13112 rtx label = ix86_expand_aligntest (destreg, 2); 13113 srcmem = change_address (src, HImode, srcreg); 13114 dstmem = change_address (dst, HImode, destreg); 13115 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13116 ix86_adjust_counter (countreg, 2); 13117 emit_label (label); 13118 LABEL_NUSES (label) = 1; 13119 } 13120 if (align <= 4 && desired_alignment > 4) 13121 { 13122 rtx label = ix86_expand_aligntest (destreg, 4); 13123 srcmem = change_address (src, SImode, srcreg); 13124 dstmem = change_address (dst, SImode, destreg); 13125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13126 ix86_adjust_counter (countreg, 4); 13127 emit_label (label); 13128 LABEL_NUSES (label) = 1; 13129 } 13130 13131 if (label && desired_alignment > 4 && !TARGET_64BIT) 13132 { 13133 emit_label (label); 13134 LABEL_NUSES (label) = 1; 13135 label = NULL_RTX; 13136 } 13137 if (!TARGET_SINGLE_STRINGOP) 13138 emit_insn (gen_cld ()); 13139 if (TARGET_64BIT) 13140 { 13141 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13142 GEN_INT (3))); 13143 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13144 } 13145 else 13146 { 13147 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13148 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13149 } 13150 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 13151 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13152 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 13153 countreg2, destexp, srcexp)); 13154 13155 if (label) 13156 { 13157 emit_label (label); 13158 LABEL_NUSES (label) = 1; 13159 } 13160 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13161 { 13162 srcmem = change_address (src, SImode, srcreg); 13163 dstmem = change_address (dst, SImode, destreg); 13164 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13165 } 13166 if ((align <= 4 || count == 0) && TARGET_64BIT) 13167 { 13168 rtx label = ix86_expand_aligntest (countreg, 4); 13169 srcmem = change_address (src, SImode, srcreg); 13170 dstmem = change_address (dst, SImode, destreg); 13171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13172 emit_label (label); 13173 LABEL_NUSES (label) = 1; 13174 } 13175 if (align > 2 && count != 0 && (count & 2)) 13176 { 13177 srcmem = change_address (src, HImode, srcreg); 13178 dstmem = change_address (dst, HImode, destreg); 13179 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13180 } 13181 if (align <= 2 || count == 0) 13182 { 13183 rtx label = ix86_expand_aligntest (countreg, 2); 13184 srcmem = change_address (src, HImode, srcreg); 13185 dstmem = change_address (dst, HImode, destreg); 13186 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13187 emit_label (label); 13188 LABEL_NUSES (label) = 1; 13189 } 13190 if (align > 1 && count != 0 && (count & 1)) 13191 { 13192 srcmem = change_address (src, QImode, srcreg); 13193 dstmem = change_address (dst, QImode, destreg); 13194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13195 } 13196 if (align <= 1 || count == 0) 13197 { 13198 rtx label = ix86_expand_aligntest (countreg, 1); 13199 srcmem = change_address (src, QImode, srcreg); 13200 dstmem = change_address (dst, QImode, destreg); 13201 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13202 emit_label (label); 13203 LABEL_NUSES (label) = 1; 13204 } 13205 } 13206 13207 return 1; 13208} 13209 13210/* Expand string clear operation (bzero). Use i386 string operations when 13211 profitable. expand_movmem contains similar code. */ 13212int 13213ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) 13214{ 13215 rtx destreg, zeroreg, countreg, destexp; 13216 enum machine_mode counter_mode; 13217 HOST_WIDE_INT align = 0; 13218 unsigned HOST_WIDE_INT count = 0; 13219 13220 if (GET_CODE (align_exp) == CONST_INT) 13221 align = INTVAL (align_exp); 13222 13223 /* Can't use any of this if the user has appropriated esi. */ 13224 if (global_regs[4]) 13225 return 0; 13226 13227 /* This simple hack avoids all inlining code and simplifies code below. */ 13228 if (!TARGET_ALIGN_STRINGOPS) 13229 align = 32; 13230 13231 if (GET_CODE (count_exp) == CONST_INT) 13232 { 13233 count = INTVAL (count_exp); 13234 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 13235 return 0; 13236 } 13237 /* Figure out proper mode for counter. For 32bits it is always SImode, 13238 for 64bits use SImode when possible, otherwise DImode. 13239 Set count to number of bytes copied when known at compile time. */ 13240 if (!TARGET_64BIT 13241 || GET_MODE (count_exp) == SImode 13242 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 13243 counter_mode = SImode; 13244 else 13245 counter_mode = DImode; 13246 13247 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 13248 if (destreg != XEXP (dst, 0)) 13249 dst = replace_equiv_address_nv (dst, destreg); 13250 13251 13252 /* When optimizing for size emit simple rep ; movsb instruction for 13253 counts not divisible by 4. The movl $N, %ecx; rep; stosb 13254 sequence is 7 bytes long, so if optimizing for size and count is 13255 small enough that some stosl, stosw and stosb instructions without 13256 rep are shorter, fall back into the next if. */ 13257 13258 if ((!optimize || optimize_size) 13259 && (count == 0 13260 || ((count & 0x03) 13261 && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) 13262 { 13263 emit_insn (gen_cld ()); 13264 13265 countreg = ix86_zero_extend_to_Pmode (count_exp); 13266 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 13267 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 13268 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 13269 } 13270 else if (count != 0 13271 && (align >= 8 13272 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 13273 || optimize_size || count < (unsigned int) 64)) 13274 { 13275 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 13276 unsigned HOST_WIDE_INT offset = 0; 13277 13278 emit_insn (gen_cld ()); 13279 13280 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 13281 if (count & ~(size - 1)) 13282 { 13283 unsigned HOST_WIDE_INT repcount; 13284 unsigned int max_nonrep; 13285 13286 repcount = count >> (size == 4 ? 2 : 3); 13287 if (!TARGET_64BIT) 13288 repcount &= 0x3fffffff; 13289 13290 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. 13291 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN 13292 bytes. In both cases the latter seems to be faster for small 13293 values of N. */ 13294 max_nonrep = size == 4 ? 7 : 4; 13295 if (!optimize_size) 13296 switch (ix86_tune) 13297 { 13298 case PROCESSOR_PENTIUM4: 13299 case PROCESSOR_NOCONA: 13300 max_nonrep = 3; 13301 break; 13302 default: 13303 break; 13304 } 13305 13306 if (repcount <= max_nonrep) 13307 while (repcount-- > 0) 13308 { 13309 rtx mem = adjust_automodify_address_nv (dst, 13310 GET_MODE (zeroreg), 13311 destreg, offset); 13312 emit_insn (gen_strset (destreg, mem, zeroreg)); 13313 offset += size; 13314 } 13315 else 13316 { 13317 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); 13318 countreg = ix86_zero_extend_to_Pmode (countreg); 13319 destexp = gen_rtx_ASHIFT (Pmode, countreg, 13320 GEN_INT (size == 4 ? 2 : 3)); 13321 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13322 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, 13323 destexp)); 13324 offset = count & ~(size - 1); 13325 } 13326 } 13327 if (size == 8 && (count & 0x04)) 13328 { 13329 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 13330 offset); 13331 emit_insn (gen_strset (destreg, mem, 13332 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13333 offset += 4; 13334 } 13335 if (count & 0x02) 13336 { 13337 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 13338 offset); 13339 emit_insn (gen_strset (destreg, mem, 13340 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13341 offset += 2; 13342 } 13343 if (count & 0x01) 13344 { 13345 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 13346 offset); 13347 emit_insn (gen_strset (destreg, mem, 13348 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13349 } 13350 } 13351 else 13352 { 13353 rtx countreg2; 13354 rtx label = NULL; 13355 /* Compute desired alignment of the string operation. */ 13356 int desired_alignment = (TARGET_PENTIUMPRO 13357 && (count == 0 || count >= (unsigned int) 260) 13358 ? 8 : UNITS_PER_WORD); 13359 13360 /* In case we don't know anything about the alignment, default to 13361 library version, since it is usually equally fast and result in 13362 shorter code. 13363 13364 Also emit call when we know that the count is large and call overhead 13365 will not be important. */ 13366 if (!TARGET_INLINE_ALL_STRINGOPS 13367 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13368 return 0; 13369 13370 if (TARGET_SINGLE_STRINGOP) 13371 emit_insn (gen_cld ()); 13372 13373 countreg2 = gen_reg_rtx (Pmode); 13374 countreg = copy_to_mode_reg (counter_mode, count_exp); 13375 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 13376 /* Get rid of MEM_OFFSET, it won't be accurate. */ 13377 dst = change_address (dst, BLKmode, destreg); 13378 13379 if (count == 0 && align < desired_alignment) 13380 { 13381 label = gen_label_rtx (); 13382 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13383 LEU, 0, counter_mode, 1, label); 13384 } 13385 if (align <= 1) 13386 { 13387 rtx label = ix86_expand_aligntest (destreg, 1); 13388 emit_insn (gen_strset (destreg, dst, 13389 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13390 ix86_adjust_counter (countreg, 1); 13391 emit_label (label); 13392 LABEL_NUSES (label) = 1; 13393 } 13394 if (align <= 2) 13395 { 13396 rtx label = ix86_expand_aligntest (destreg, 2); 13397 emit_insn (gen_strset (destreg, dst, 13398 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13399 ix86_adjust_counter (countreg, 2); 13400 emit_label (label); 13401 LABEL_NUSES (label) = 1; 13402 } 13403 if (align <= 4 && desired_alignment > 4) 13404 { 13405 rtx label = ix86_expand_aligntest (destreg, 4); 13406 emit_insn (gen_strset (destreg, dst, 13407 (TARGET_64BIT 13408 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 13409 : zeroreg))); 13410 ix86_adjust_counter (countreg, 4); 13411 emit_label (label); 13412 LABEL_NUSES (label) = 1; 13413 } 13414 13415 if (label && desired_alignment > 4 && !TARGET_64BIT) 13416 { 13417 emit_label (label); 13418 LABEL_NUSES (label) = 1; 13419 label = NULL_RTX; 13420 } 13421 13422 if (!TARGET_SINGLE_STRINGOP) 13423 emit_insn (gen_cld ()); 13424 if (TARGET_64BIT) 13425 { 13426 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13427 GEN_INT (3))); 13428 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13429 } 13430 else 13431 { 13432 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13433 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13434 } 13435 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13436 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 13437 13438 if (label) 13439 { 13440 emit_label (label); 13441 LABEL_NUSES (label) = 1; 13442 } 13443 13444 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13445 emit_insn (gen_strset (destreg, dst, 13446 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13447 if (TARGET_64BIT && (align <= 4 || count == 0)) 13448 { 13449 rtx label = ix86_expand_aligntest (countreg, 4); 13450 emit_insn (gen_strset (destreg, dst, 13451 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13452 emit_label (label); 13453 LABEL_NUSES (label) = 1; 13454 } 13455 if (align > 2 && count != 0 && (count & 2)) 13456 emit_insn (gen_strset (destreg, dst, 13457 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13458 if (align <= 2 || count == 0) 13459 { 13460 rtx label = ix86_expand_aligntest (countreg, 2); 13461 emit_insn (gen_strset (destreg, dst, 13462 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13463 emit_label (label); 13464 LABEL_NUSES (label) = 1; 13465 } 13466 if (align > 1 && count != 0 && (count & 1)) 13467 emit_insn (gen_strset (destreg, dst, 13468 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13469 if (align <= 1 || count == 0) 13470 { 13471 rtx label = ix86_expand_aligntest (countreg, 1); 13472 emit_insn (gen_strset (destreg, dst, 13473 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13474 emit_label (label); 13475 LABEL_NUSES (label) = 1; 13476 } 13477 } 13478 return 1; 13479} 13480 13481/* Expand strlen. */ 13482int 13483ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 13484{ 13485 rtx addr, scratch1, scratch2, scratch3, scratch4; 13486 13487 /* The generic case of strlen expander is long. Avoid it's 13488 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 13489 13490 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13491 && !TARGET_INLINE_ALL_STRINGOPS 13492 && !optimize_size 13493 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 13494 return 0; 13495 13496 addr = force_reg (Pmode, XEXP (src, 0)); 13497 scratch1 = gen_reg_rtx (Pmode); 13498 13499 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13500 && !optimize_size) 13501 { 13502 /* Well it seems that some optimizer does not combine a call like 13503 foo(strlen(bar), strlen(bar)); 13504 when the move and the subtraction is done here. It does calculate 13505 the length just once when these instructions are done inside of 13506 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 13507 often used and I use one fewer register for the lifetime of 13508 output_strlen_unroll() this is better. */ 13509 13510 emit_move_insn (out, addr); 13511 13512 ix86_expand_strlensi_unroll_1 (out, src, align); 13513 13514 /* strlensi_unroll_1 returns the address of the zero at the end of 13515 the string, like memchr(), so compute the length by subtracting 13516 the start address. */ 13517 if (TARGET_64BIT) 13518 emit_insn (gen_subdi3 (out, out, addr)); 13519 else 13520 emit_insn (gen_subsi3 (out, out, addr)); 13521 } 13522 else 13523 { 13524 rtx unspec; 13525 scratch2 = gen_reg_rtx (Pmode); 13526 scratch3 = gen_reg_rtx (Pmode); 13527 scratch4 = force_reg (Pmode, constm1_rtx); 13528 13529 emit_move_insn (scratch3, addr); 13530 eoschar = force_reg (QImode, eoschar); 13531 13532 emit_insn (gen_cld ()); 13533 src = replace_equiv_address_nv (src, scratch3); 13534 13535 /* If .md starts supporting :P, this can be done in .md. */ 13536 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 13537 scratch4), UNSPEC_SCAS); 13538 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 13539 if (TARGET_64BIT) 13540 { 13541 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 13542 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 13543 } 13544 else 13545 { 13546 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 13547 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 13548 } 13549 } 13550 return 1; 13551} 13552 13553/* Expand the appropriate insns for doing strlen if not just doing 13554 repnz; scasb 13555 13556 out = result, initialized with the start address 13557 align_rtx = alignment of the address. 13558 scratch = scratch register, initialized with the startaddress when 13559 not aligned, otherwise undefined 13560 13561 This is just the body. It needs the initializations mentioned above and 13562 some address computing at the end. These things are done in i386.md. */ 13563 13564static void 13565ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 13566{ 13567 int align; 13568 rtx tmp; 13569 rtx align_2_label = NULL_RTX; 13570 rtx align_3_label = NULL_RTX; 13571 rtx align_4_label = gen_label_rtx (); 13572 rtx end_0_label = gen_label_rtx (); 13573 rtx mem; 13574 rtx tmpreg = gen_reg_rtx (SImode); 13575 rtx scratch = gen_reg_rtx (SImode); 13576 rtx cmp; 13577 13578 align = 0; 13579 if (GET_CODE (align_rtx) == CONST_INT) 13580 align = INTVAL (align_rtx); 13581 13582 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 13583 13584 /* Is there a known alignment and is it less than 4? */ 13585 if (align < 4) 13586 { 13587 rtx scratch1 = gen_reg_rtx (Pmode); 13588 emit_move_insn (scratch1, out); 13589 /* Is there a known alignment and is it not 2? */ 13590 if (align != 2) 13591 { 13592 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 13593 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 13594 13595 /* Leave just the 3 lower bits. */ 13596 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 13597 NULL_RTX, 0, OPTAB_WIDEN); 13598 13599 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13600 Pmode, 1, align_4_label); 13601 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 13602 Pmode, 1, align_2_label); 13603 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 13604 Pmode, 1, align_3_label); 13605 } 13606 else 13607 { 13608 /* Since the alignment is 2, we have to check 2 or 0 bytes; 13609 check if is aligned to 4 - byte. */ 13610 13611 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 13612 NULL_RTX, 0, OPTAB_WIDEN); 13613 13614 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13615 Pmode, 1, align_4_label); 13616 } 13617 13618 mem = change_address (src, QImode, out); 13619 13620 /* Now compare the bytes. */ 13621 13622 /* Compare the first n unaligned byte on a byte per byte basis. */ 13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 13624 QImode, 1, end_0_label); 13625 13626 /* Increment the address. */ 13627 if (TARGET_64BIT) 13628 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13629 else 13630 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13631 13632 /* Not needed with an alignment of 2 */ 13633 if (align != 2) 13634 { 13635 emit_label (align_2_label); 13636 13637 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13638 end_0_label); 13639 13640 if (TARGET_64BIT) 13641 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13642 else 13643 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13644 13645 emit_label (align_3_label); 13646 } 13647 13648 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13649 end_0_label); 13650 13651 if (TARGET_64BIT) 13652 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13653 else 13654 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13655 } 13656 13657 /* Generate loop to check 4 bytes at a time. It is not a good idea to 13658 align this loop. It gives only huge programs, but does not help to 13659 speed up. */ 13660 emit_label (align_4_label); 13661 13662 mem = change_address (src, SImode, out); 13663 emit_move_insn (scratch, mem); 13664 if (TARGET_64BIT) 13665 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 13666 else 13667 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 13668 13669 /* This formula yields a nonzero result iff one of the bytes is zero. 13670 This saves three branches inside loop and many cycles. */ 13671 13672 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 13673 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 13674 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 13675 emit_insn (gen_andsi3 (tmpreg, tmpreg, 13676 gen_int_mode (0x80808080, SImode))); 13677 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 13678 align_4_label); 13679 13680 if (TARGET_CMOVE) 13681 { 13682 rtx reg = gen_reg_rtx (SImode); 13683 rtx reg2 = gen_reg_rtx (Pmode); 13684 emit_move_insn (reg, tmpreg); 13685 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 13686 13687 /* If zero is not in the first two bytes, move two bytes forward. */ 13688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13690 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13691 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 13692 gen_rtx_IF_THEN_ELSE (SImode, tmp, 13693 reg, 13694 tmpreg))); 13695 /* Emit lea manually to avoid clobbering of flags. */ 13696 emit_insn (gen_rtx_SET (SImode, reg2, 13697 gen_rtx_PLUS (Pmode, out, const2_rtx))); 13698 13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13700 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13701 emit_insn (gen_rtx_SET (VOIDmode, out, 13702 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 13703 reg2, 13704 out))); 13705 13706 } 13707 else 13708 { 13709 rtx end_2_label = gen_label_rtx (); 13710 /* Is zero in the first two bytes? */ 13711 13712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13714 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 13715 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 13716 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 13717 pc_rtx); 13718 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 13719 JUMP_LABEL (tmp) = end_2_label; 13720 13721 /* Not in the first two. Move two bytes forward. */ 13722 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 13723 if (TARGET_64BIT) 13724 emit_insn (gen_adddi3 (out, out, const2_rtx)); 13725 else 13726 emit_insn (gen_addsi3 (out, out, const2_rtx)); 13727 13728 emit_label (end_2_label); 13729 13730 } 13731 13732 /* Avoid branch in fixing the byte. */ 13733 tmpreg = gen_lowpart (QImode, tmpreg); 13734 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 13735 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 13736 if (TARGET_64BIT) 13737 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 13738 else 13739 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 13740 13741 emit_label (end_0_label); 13742} 13743 13744void 13745ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 13746 rtx callarg2 ATTRIBUTE_UNUSED, 13747 rtx pop, int sibcall) 13748{ 13749 rtx use = NULL, call; 13750 13751 if (pop == const0_rtx) 13752 pop = NULL; 13753 gcc_assert (!TARGET_64BIT || !pop); 13754 13755 if (TARGET_MACHO && !TARGET_64BIT) 13756 { 13757#if TARGET_MACHO 13758 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 13759 fnaddr = machopic_indirect_call_target (fnaddr); 13760#endif 13761 } 13762 else 13763 { 13764 /* Static functions and indirect calls don't need the pic register. */ 13765 if (! TARGET_64BIT && flag_pic 13766 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 13767 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 13768 use_reg (&use, pic_offset_table_rtx); 13769 } 13770 13771 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 13772 { 13773 rtx al = gen_rtx_REG (QImode, 0); 13774 emit_move_insn (al, callarg2); 13775 use_reg (&use, al); 13776 } 13777 13778 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 13779 { 13780 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13781 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13782 } 13783 if (sibcall && TARGET_64BIT 13784 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 13785 { 13786 rtx addr; 13787 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13788 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 13789 emit_move_insn (fnaddr, addr); 13790 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13791 } 13792 13793 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 13794 if (retval) 13795 call = gen_rtx_SET (VOIDmode, retval, call); 13796 if (pop) 13797 { 13798 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 13799 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 13800 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 13801 } 13802 13803 call = emit_call_insn (call); 13804 if (use) 13805 CALL_INSN_FUNCTION_USAGE (call) = use; 13806} 13807 13808 13809/* Clear stack slot assignments remembered from previous functions. 13810 This is called from INIT_EXPANDERS once before RTL is emitted for each 13811 function. */ 13812 13813static struct machine_function * 13814ix86_init_machine_status (void) 13815{ 13816 struct machine_function *f; 13817 13818 f = ggc_alloc_cleared (sizeof (struct machine_function)); 13819 f->use_fast_prologue_epilogue_nregs = -1; 13820 f->tls_descriptor_call_expanded_p = 0; 13821 13822 return f; 13823} 13824 13825/* Return a MEM corresponding to a stack slot with mode MODE. 13826 Allocate a new slot if necessary. 13827 13828 The RTL for a function can have several slots available: N is 13829 which slot to use. */ 13830 13831rtx 13832assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 13833{ 13834 struct stack_local_entry *s; 13835 13836 gcc_assert (n < MAX_386_STACK_LOCALS); 13837 13838 /* Virtual slot is valid only before vregs are instantiated. */ 13839 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 13840 13841 for (s = ix86_stack_locals; s; s = s->next) 13842 if (s->mode == mode && s->n == n) 13843 return s->rtl; 13844 13845 s = (struct stack_local_entry *) 13846 ggc_alloc (sizeof (struct stack_local_entry)); 13847 s->n = n; 13848 s->mode = mode; 13849 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 13850 13851 s->next = ix86_stack_locals; 13852 ix86_stack_locals = s; 13853 return s->rtl; 13854} 13855 13856/* Construct the SYMBOL_REF for the tls_get_addr function. */ 13857 13858static GTY(()) rtx ix86_tls_symbol; 13859rtx 13860ix86_tls_get_addr (void) 13861{ 13862 13863 if (!ix86_tls_symbol) 13864 { 13865 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 13866 (TARGET_ANY_GNU_TLS 13867 && !TARGET_64BIT) 13868 ? "___tls_get_addr" 13869 : "__tls_get_addr"); 13870 } 13871 13872 return ix86_tls_symbol; 13873} 13874 13875/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 13876 13877static GTY(()) rtx ix86_tls_module_base_symbol; 13878rtx 13879ix86_tls_module_base (void) 13880{ 13881 13882 if (!ix86_tls_module_base_symbol) 13883 { 13884 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 13885 "_TLS_MODULE_BASE_"); 13886 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 13887 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 13888 } 13889 13890 return ix86_tls_module_base_symbol; 13891} 13892 13893/* Calculate the length of the memory address in the instruction 13894 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 13895 13896int 13897memory_address_length (rtx addr) 13898{ 13899 struct ix86_address parts; 13900 rtx base, index, disp; 13901 int len; 13902 int ok; 13903 13904 if (GET_CODE (addr) == PRE_DEC 13905 || GET_CODE (addr) == POST_INC 13906 || GET_CODE (addr) == PRE_MODIFY 13907 || GET_CODE (addr) == POST_MODIFY) 13908 return 0; 13909 13910 ok = ix86_decompose_address (addr, &parts); 13911 gcc_assert (ok); 13912 13913 if (parts.base && GET_CODE (parts.base) == SUBREG) 13914 parts.base = SUBREG_REG (parts.base); 13915 if (parts.index && GET_CODE (parts.index) == SUBREG) 13916 parts.index = SUBREG_REG (parts.index); 13917 13918 base = parts.base; 13919 index = parts.index; 13920 disp = parts.disp; 13921 len = 0; 13922 13923 /* Rule of thumb: 13924 - esp as the base always wants an index, 13925 - ebp as the base always wants a displacement. */ 13926 13927 /* Register Indirect. */ 13928 if (base && !index && !disp) 13929 { 13930 /* esp (for its index) and ebp (for its displacement) need 13931 the two-byte modrm form. */ 13932 if (addr == stack_pointer_rtx 13933 || addr == arg_pointer_rtx 13934 || addr == frame_pointer_rtx 13935 || addr == hard_frame_pointer_rtx) 13936 len = 1; 13937 } 13938 13939 /* Direct Addressing. */ 13940 else if (disp && !base && !index) 13941 len = 4; 13942 13943 else 13944 { 13945 /* Find the length of the displacement constant. */ 13946 if (disp) 13947 { 13948 if (base && satisfies_constraint_K (disp)) 13949 len = 1; 13950 else 13951 len = 4; 13952 } 13953 /* ebp always wants a displacement. */ 13954 else if (base == hard_frame_pointer_rtx) 13955 len = 1; 13956 13957 /* An index requires the two-byte modrm form.... */ 13958 if (index 13959 /* ...like esp, which always wants an index. */ 13960 || base == stack_pointer_rtx 13961 || base == arg_pointer_rtx 13962 || base == frame_pointer_rtx) 13963 len += 1; 13964 } 13965 13966 return len; 13967} 13968 13969/* Compute default value for "length_immediate" attribute. When SHORTFORM 13970 is set, expect that insn have 8bit immediate alternative. */ 13971int 13972ix86_attr_length_immediate_default (rtx insn, int shortform) 13973{ 13974 int len = 0; 13975 int i; 13976 extract_insn_cached (insn); 13977 for (i = recog_data.n_operands - 1; i >= 0; --i) 13978 if (CONSTANT_P (recog_data.operand[i])) 13979 { 13980 gcc_assert (!len); 13981 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 13982 len = 1; 13983 else 13984 { 13985 switch (get_attr_mode (insn)) 13986 { 13987 case MODE_QI: 13988 len+=1; 13989 break; 13990 case MODE_HI: 13991 len+=2; 13992 break; 13993 case MODE_SI: 13994 len+=4; 13995 break; 13996 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 13997 case MODE_DI: 13998 len+=4; 13999 break; 14000 default: 14001 fatal_insn ("unknown insn mode", insn); 14002 } 14003 } 14004 } 14005 return len; 14006} 14007/* Compute default value for "length_address" attribute. */ 14008int 14009ix86_attr_length_address_default (rtx insn) 14010{ 14011 int i; 14012 14013 if (get_attr_type (insn) == TYPE_LEA) 14014 { 14015 rtx set = PATTERN (insn); 14016 14017 if (GET_CODE (set) == PARALLEL) 14018 set = XVECEXP (set, 0, 0); 14019 14020 gcc_assert (GET_CODE (set) == SET); 14021 14022 return memory_address_length (SET_SRC (set)); 14023 } 14024 14025 extract_insn_cached (insn); 14026 for (i = recog_data.n_operands - 1; i >= 0; --i) 14027 if (GET_CODE (recog_data.operand[i]) == MEM) 14028 { 14029 return memory_address_length (XEXP (recog_data.operand[i], 0)); 14030 break; 14031 } 14032 return 0; 14033} 14034 14035/* Return the maximum number of instructions a cpu can issue. */ 14036 14037static int 14038ix86_issue_rate (void) 14039{ 14040 switch (ix86_tune) 14041 { 14042 case PROCESSOR_PENTIUM: 14043 case PROCESSOR_K6: 14044 return 2; 14045 14046 case PROCESSOR_PENTIUMPRO: 14047 case PROCESSOR_PENTIUM4: 14048 case PROCESSOR_ATHLON: 14049 case PROCESSOR_K8: 14050 case PROCESSOR_AMDFAM10: 14051 case PROCESSOR_NOCONA: 14052 case PROCESSOR_GENERIC32: 14053 case PROCESSOR_GENERIC64: 14054 return 3; 14055 14056 case PROCESSOR_CORE2: 14057 return 4; 14058 14059 default: 14060 return 1; 14061 } 14062} 14063 14064/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 14065 by DEP_INSN and nothing set by DEP_INSN. */ 14066 14067static int 14068ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 14069{ 14070 rtx set, set2; 14071 14072 /* Simplify the test for uninteresting insns. */ 14073 if (insn_type != TYPE_SETCC 14074 && insn_type != TYPE_ICMOV 14075 && insn_type != TYPE_FCMOV 14076 && insn_type != TYPE_IBR) 14077 return 0; 14078 14079 if ((set = single_set (dep_insn)) != 0) 14080 { 14081 set = SET_DEST (set); 14082 set2 = NULL_RTX; 14083 } 14084 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 14085 && XVECLEN (PATTERN (dep_insn), 0) == 2 14086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 14087 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 14088 { 14089 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 14090 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 14091 } 14092 else 14093 return 0; 14094 14095 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 14096 return 0; 14097 14098 /* This test is true if the dependent insn reads the flags but 14099 not any other potentially set register. */ 14100 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 14101 return 0; 14102 14103 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 14104 return 0; 14105 14106 return 1; 14107} 14108 14109/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 14110 address with operands set by DEP_INSN. */ 14111 14112static int 14113ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 14114{ 14115 rtx addr; 14116 14117 if (insn_type == TYPE_LEA 14118 && TARGET_PENTIUM) 14119 { 14120 addr = PATTERN (insn); 14121 14122 if (GET_CODE (addr) == PARALLEL) 14123 addr = XVECEXP (addr, 0, 0); 14124 14125 gcc_assert (GET_CODE (addr) == SET); 14126 14127 addr = SET_SRC (addr); 14128 } 14129 else 14130 { 14131 int i; 14132 extract_insn_cached (insn); 14133 for (i = recog_data.n_operands - 1; i >= 0; --i) 14134 if (GET_CODE (recog_data.operand[i]) == MEM) 14135 { 14136 addr = XEXP (recog_data.operand[i], 0); 14137 goto found; 14138 } 14139 return 0; 14140 found:; 14141 } 14142 14143 return modified_in_p (addr, dep_insn); 14144} 14145 14146static int 14147ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 14148{ 14149 enum attr_type insn_type, dep_insn_type; 14150 enum attr_memory memory; 14151 rtx set, set2; 14152 int dep_insn_code_number; 14153 14154 /* Anti and output dependencies have zero cost on all CPUs. */ 14155 if (REG_NOTE_KIND (link) != 0) 14156 return 0; 14157 14158 dep_insn_code_number = recog_memoized (dep_insn); 14159 14160 /* If we can't recognize the insns, we can't really do anything. */ 14161 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 14162 return cost; 14163 14164 insn_type = get_attr_type (insn); 14165 dep_insn_type = get_attr_type (dep_insn); 14166 14167 switch (ix86_tune) 14168 { 14169 case PROCESSOR_PENTIUM: 14170 /* Address Generation Interlock adds a cycle of latency. */ 14171 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 14172 cost += 1; 14173 14174 /* ??? Compares pair with jump/setcc. */ 14175 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 14176 cost = 0; 14177 14178 /* Floating point stores require value to be ready one cycle earlier. */ 14179 if (insn_type == TYPE_FMOV 14180 && get_attr_memory (insn) == MEMORY_STORE 14181 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14182 cost += 1; 14183 break; 14184 14185 case PROCESSOR_PENTIUMPRO: 14186 memory = get_attr_memory (insn); 14187 14188 /* INT->FP conversion is expensive. */ 14189 if (get_attr_fp_int_src (dep_insn)) 14190 cost += 5; 14191 14192 /* There is one cycle extra latency between an FP op and a store. */ 14193 if (insn_type == TYPE_FMOV 14194 && (set = single_set (dep_insn)) != NULL_RTX 14195 && (set2 = single_set (insn)) != NULL_RTX 14196 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 14197 && GET_CODE (SET_DEST (set2)) == MEM) 14198 cost += 1; 14199 14200 /* Show ability of reorder buffer to hide latency of load by executing 14201 in parallel with previous instruction in case 14202 previous instruction is not needed to compute the address. */ 14203 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14204 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14205 { 14206 /* Claim moves to take one cycle, as core can issue one load 14207 at time and the next load can start cycle later. */ 14208 if (dep_insn_type == TYPE_IMOV 14209 || dep_insn_type == TYPE_FMOV) 14210 cost = 1; 14211 else if (cost > 1) 14212 cost--; 14213 } 14214 break; 14215 14216 case PROCESSOR_K6: 14217 memory = get_attr_memory (insn); 14218 14219 /* The esp dependency is resolved before the instruction is really 14220 finished. */ 14221 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 14222 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 14223 return 1; 14224 14225 /* INT->FP conversion is expensive. */ 14226 if (get_attr_fp_int_src (dep_insn)) 14227 cost += 5; 14228 14229 /* Show ability of reorder buffer to hide latency of load by executing 14230 in parallel with previous instruction in case 14231 previous instruction is not needed to compute the address. */ 14232 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14233 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14234 { 14235 /* Claim moves to take one cycle, as core can issue one load 14236 at time and the next load can start cycle later. */ 14237 if (dep_insn_type == TYPE_IMOV 14238 || dep_insn_type == TYPE_FMOV) 14239 cost = 1; 14240 else if (cost > 2) 14241 cost -= 2; 14242 else 14243 cost = 1; 14244 } 14245 break; 14246 14247 case PROCESSOR_ATHLON: 14248 case PROCESSOR_K8: 14249 case PROCESSOR_AMDFAM10: 14250 case PROCESSOR_GENERIC32: 14251 case PROCESSOR_GENERIC64: 14252 memory = get_attr_memory (insn); 14253 14254 /* Show ability of reorder buffer to hide latency of load by executing 14255 in parallel with previous instruction in case 14256 previous instruction is not needed to compute the address. */ 14257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14258 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14259 { 14260 enum attr_unit unit = get_attr_unit (insn); 14261 int loadcost = 3; 14262 14263 /* Because of the difference between the length of integer and 14264 floating unit pipeline preparation stages, the memory operands 14265 for floating point are cheaper. 14266 14267 ??? For Athlon it the difference is most probably 2. */ 14268 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 14269 loadcost = 3; 14270 else 14271 loadcost = TARGET_ATHLON ? 2 : 0; 14272 14273 if (cost >= loadcost) 14274 cost -= loadcost; 14275 else 14276 cost = 0; 14277 } 14278 14279 default: 14280 break; 14281 } 14282 14283 return cost; 14284} 14285 14286/* How many alternative schedules to try. This should be as wide as the 14287 scheduling freedom in the DFA, but no wider. Making this value too 14288 large results extra work for the scheduler. */ 14289 14290static int 14291ia32_multipass_dfa_lookahead (void) 14292{ 14293 if (ix86_tune == PROCESSOR_PENTIUM) 14294 return 2; 14295 14296 if (ix86_tune == PROCESSOR_PENTIUMPRO 14297 || ix86_tune == PROCESSOR_K6) 14298 return 1; 14299 14300 else 14301 return 0; 14302} 14303 14304 14305/* Compute the alignment given to a constant that is being placed in memory. 14306 EXP is the constant and ALIGN is the alignment that the object would 14307 ordinarily have. 14308 The value of this function is used instead of that alignment to align 14309 the object. */ 14310 14311int 14312ix86_constant_alignment (tree exp, int align) 14313{ 14314 if (TREE_CODE (exp) == REAL_CST) 14315 { 14316 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 14317 return 64; 14318 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 14319 return 128; 14320 } 14321 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 14322 && !TARGET_NO_ALIGN_LONG_STRINGS 14323 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 14324 return BITS_PER_WORD; 14325 14326 return align; 14327} 14328 14329/* Compute the alignment for a static variable. 14330 TYPE is the data type, and ALIGN is the alignment that 14331 the object would ordinarily have. The value of this function is used 14332 instead of that alignment to align the object. */ 14333 14334int 14335ix86_data_alignment (tree type, int align) 14336{ 14337 int max_align = optimize_size ? BITS_PER_WORD : 256; 14338 14339 if (AGGREGATE_TYPE_P (type) 14340 && TYPE_SIZE (type) 14341 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14342 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 14343 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 14344 && align < max_align) 14345 align = max_align; 14346 14347 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14348 to 16byte boundary. */ 14349 if (TARGET_64BIT) 14350 { 14351 if (AGGREGATE_TYPE_P (type) 14352 && TYPE_SIZE (type) 14353 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14354 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 14355 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14356 return 128; 14357 } 14358 14359 if (TREE_CODE (type) == ARRAY_TYPE) 14360 { 14361 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14362 return 64; 14363 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14364 return 128; 14365 } 14366 else if (TREE_CODE (type) == COMPLEX_TYPE) 14367 { 14368 14369 if (TYPE_MODE (type) == DCmode && align < 64) 14370 return 64; 14371 if (TYPE_MODE (type) == XCmode && align < 128) 14372 return 128; 14373 } 14374 else if ((TREE_CODE (type) == RECORD_TYPE 14375 || TREE_CODE (type) == UNION_TYPE 14376 || TREE_CODE (type) == QUAL_UNION_TYPE) 14377 && TYPE_FIELDS (type)) 14378 { 14379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14380 return 64; 14381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14382 return 128; 14383 } 14384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14385 || TREE_CODE (type) == INTEGER_TYPE) 14386 { 14387 if (TYPE_MODE (type) == DFmode && align < 64) 14388 return 64; 14389 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14390 return 128; 14391 } 14392 14393 return align; 14394} 14395 14396/* Compute the alignment for a local variable. 14397 TYPE is the data type, and ALIGN is the alignment that 14398 the object would ordinarily have. The value of this macro is used 14399 instead of that alignment to align the object. */ 14400 14401int 14402ix86_local_alignment (tree type, int align) 14403{ 14404 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14405 to 16byte boundary. */ 14406 if (TARGET_64BIT) 14407 { 14408 if (AGGREGATE_TYPE_P (type) 14409 && TYPE_SIZE (type) 14410 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14411 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 14412 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14413 return 128; 14414 } 14415 if (TREE_CODE (type) == ARRAY_TYPE) 14416 { 14417 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14418 return 64; 14419 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14420 return 128; 14421 } 14422 else if (TREE_CODE (type) == COMPLEX_TYPE) 14423 { 14424 if (TYPE_MODE (type) == DCmode && align < 64) 14425 return 64; 14426 if (TYPE_MODE (type) == XCmode && align < 128) 14427 return 128; 14428 } 14429 else if ((TREE_CODE (type) == RECORD_TYPE 14430 || TREE_CODE (type) == UNION_TYPE 14431 || TREE_CODE (type) == QUAL_UNION_TYPE) 14432 && TYPE_FIELDS (type)) 14433 { 14434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14435 return 64; 14436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14437 return 128; 14438 } 14439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14440 || TREE_CODE (type) == INTEGER_TYPE) 14441 { 14442 14443 if (TYPE_MODE (type) == DFmode && align < 64) 14444 return 64; 14445 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14446 return 128; 14447 } 14448 return align; 14449} 14450 14451/* Emit RTL insns to initialize the variable parts of a trampoline. 14452 FNADDR is an RTX for the address of the function's pure code. 14453 CXT is an RTX for the static chain value for the function. */ 14454void 14455x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 14456{ 14457 if (!TARGET_64BIT) 14458 { 14459 /* Compute offset from the end of the jmp to the target function. */ 14460 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 14461 plus_constant (tramp, 10), 14462 NULL_RTX, 1, OPTAB_DIRECT); 14463 emit_move_insn (gen_rtx_MEM (QImode, tramp), 14464 gen_int_mode (0xb9, QImode)); 14465 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 14466 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 14467 gen_int_mode (0xe9, QImode)); 14468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 14469 } 14470 else 14471 { 14472 int offset = 0; 14473 /* Try to load address using shorter movl instead of movabs. 14474 We may want to support movq for kernel mode, but kernel does not use 14475 trampolines at the moment. */ 14476 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 14477 { 14478 fnaddr = copy_to_mode_reg (DImode, fnaddr); 14479 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14480 gen_int_mode (0xbb41, HImode)); 14481 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 14482 gen_lowpart (SImode, fnaddr)); 14483 offset += 6; 14484 } 14485 else 14486 { 14487 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14488 gen_int_mode (0xbb49, HImode)); 14489 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14490 fnaddr); 14491 offset += 10; 14492 } 14493 /* Load static chain using movabs to r10. */ 14494 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14495 gen_int_mode (0xba49, HImode)); 14496 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14497 cxt); 14498 offset += 10; 14499 /* Jump to the r11 */ 14500 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14501 gen_int_mode (0xff49, HImode)); 14502 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 14503 gen_int_mode (0xe3, QImode)); 14504 offset += 3; 14505 gcc_assert (offset <= TRAMPOLINE_SIZE); 14506 } 14507 14508#ifdef ENABLE_EXECUTE_STACK 14509 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 14510 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 14511#endif 14512} 14513 14514/* Codes for all the SSE/MMX builtins. */ 14515enum ix86_builtins 14516{ 14517 IX86_BUILTIN_ADDPS, 14518 IX86_BUILTIN_ADDSS, 14519 IX86_BUILTIN_DIVPS, 14520 IX86_BUILTIN_DIVSS, 14521 IX86_BUILTIN_MULPS, 14522 IX86_BUILTIN_MULSS, 14523 IX86_BUILTIN_SUBPS, 14524 IX86_BUILTIN_SUBSS, 14525 14526 IX86_BUILTIN_CMPEQPS, 14527 IX86_BUILTIN_CMPLTPS, 14528 IX86_BUILTIN_CMPLEPS, 14529 IX86_BUILTIN_CMPGTPS, 14530 IX86_BUILTIN_CMPGEPS, 14531 IX86_BUILTIN_CMPNEQPS, 14532 IX86_BUILTIN_CMPNLTPS, 14533 IX86_BUILTIN_CMPNLEPS, 14534 IX86_BUILTIN_CMPNGTPS, 14535 IX86_BUILTIN_CMPNGEPS, 14536 IX86_BUILTIN_CMPORDPS, 14537 IX86_BUILTIN_CMPUNORDPS, 14538 IX86_BUILTIN_CMPEQSS, 14539 IX86_BUILTIN_CMPLTSS, 14540 IX86_BUILTIN_CMPLESS, 14541 IX86_BUILTIN_CMPNEQSS, 14542 IX86_BUILTIN_CMPNLTSS, 14543 IX86_BUILTIN_CMPNLESS, 14544 IX86_BUILTIN_CMPNGTSS, 14545 IX86_BUILTIN_CMPNGESS, 14546 IX86_BUILTIN_CMPORDSS, 14547 IX86_BUILTIN_CMPUNORDSS, 14548 14549 IX86_BUILTIN_COMIEQSS, 14550 IX86_BUILTIN_COMILTSS, 14551 IX86_BUILTIN_COMILESS, 14552 IX86_BUILTIN_COMIGTSS, 14553 IX86_BUILTIN_COMIGESS, 14554 IX86_BUILTIN_COMINEQSS, 14555 IX86_BUILTIN_UCOMIEQSS, 14556 IX86_BUILTIN_UCOMILTSS, 14557 IX86_BUILTIN_UCOMILESS, 14558 IX86_BUILTIN_UCOMIGTSS, 14559 IX86_BUILTIN_UCOMIGESS, 14560 IX86_BUILTIN_UCOMINEQSS, 14561 14562 IX86_BUILTIN_CVTPI2PS, 14563 IX86_BUILTIN_CVTPS2PI, 14564 IX86_BUILTIN_CVTSI2SS, 14565 IX86_BUILTIN_CVTSI642SS, 14566 IX86_BUILTIN_CVTSS2SI, 14567 IX86_BUILTIN_CVTSS2SI64, 14568 IX86_BUILTIN_CVTTPS2PI, 14569 IX86_BUILTIN_CVTTSS2SI, 14570 IX86_BUILTIN_CVTTSS2SI64, 14571 14572 IX86_BUILTIN_MAXPS, 14573 IX86_BUILTIN_MAXSS, 14574 IX86_BUILTIN_MINPS, 14575 IX86_BUILTIN_MINSS, 14576 14577 IX86_BUILTIN_LOADUPS, 14578 IX86_BUILTIN_STOREUPS, 14579 IX86_BUILTIN_MOVSS, 14580 14581 IX86_BUILTIN_MOVHLPS, 14582 IX86_BUILTIN_MOVLHPS, 14583 IX86_BUILTIN_LOADHPS, 14584 IX86_BUILTIN_LOADLPS, 14585 IX86_BUILTIN_STOREHPS, 14586 IX86_BUILTIN_STORELPS, 14587 14588 IX86_BUILTIN_MASKMOVQ, 14589 IX86_BUILTIN_MOVMSKPS, 14590 IX86_BUILTIN_PMOVMSKB, 14591 14592 IX86_BUILTIN_MOVNTPS, 14593 IX86_BUILTIN_MOVNTQ, 14594 14595 IX86_BUILTIN_LOADDQU, 14596 IX86_BUILTIN_STOREDQU, 14597 14598 IX86_BUILTIN_PACKSSWB, 14599 IX86_BUILTIN_PACKSSDW, 14600 IX86_BUILTIN_PACKUSWB, 14601 14602 IX86_BUILTIN_PADDB, 14603 IX86_BUILTIN_PADDW, 14604 IX86_BUILTIN_PADDD, 14605 IX86_BUILTIN_PADDQ, 14606 IX86_BUILTIN_PADDSB, 14607 IX86_BUILTIN_PADDSW, 14608 IX86_BUILTIN_PADDUSB, 14609 IX86_BUILTIN_PADDUSW, 14610 IX86_BUILTIN_PSUBB, 14611 IX86_BUILTIN_PSUBW, 14612 IX86_BUILTIN_PSUBD, 14613 IX86_BUILTIN_PSUBQ, 14614 IX86_BUILTIN_PSUBSB, 14615 IX86_BUILTIN_PSUBSW, 14616 IX86_BUILTIN_PSUBUSB, 14617 IX86_BUILTIN_PSUBUSW, 14618 14619 IX86_BUILTIN_PAND, 14620 IX86_BUILTIN_PANDN, 14621 IX86_BUILTIN_POR, 14622 IX86_BUILTIN_PXOR, 14623 14624 IX86_BUILTIN_PAVGB, 14625 IX86_BUILTIN_PAVGW, 14626 14627 IX86_BUILTIN_PCMPEQB, 14628 IX86_BUILTIN_PCMPEQW, 14629 IX86_BUILTIN_PCMPEQD, 14630 IX86_BUILTIN_PCMPGTB, 14631 IX86_BUILTIN_PCMPGTW, 14632 IX86_BUILTIN_PCMPGTD, 14633 14634 IX86_BUILTIN_PMADDWD, 14635 14636 IX86_BUILTIN_PMAXSW, 14637 IX86_BUILTIN_PMAXUB, 14638 IX86_BUILTIN_PMINSW, 14639 IX86_BUILTIN_PMINUB, 14640 14641 IX86_BUILTIN_PMULHUW, 14642 IX86_BUILTIN_PMULHW, 14643 IX86_BUILTIN_PMULLW, 14644 14645 IX86_BUILTIN_PSADBW, 14646 IX86_BUILTIN_PSHUFW, 14647 14648 IX86_BUILTIN_PSLLW, 14649 IX86_BUILTIN_PSLLD, 14650 IX86_BUILTIN_PSLLQ, 14651 IX86_BUILTIN_PSRAW, 14652 IX86_BUILTIN_PSRAD, 14653 IX86_BUILTIN_PSRLW, 14654 IX86_BUILTIN_PSRLD, 14655 IX86_BUILTIN_PSRLQ, 14656 IX86_BUILTIN_PSLLWI, 14657 IX86_BUILTIN_PSLLDI, 14658 IX86_BUILTIN_PSLLQI, 14659 IX86_BUILTIN_PSRAWI, 14660 IX86_BUILTIN_PSRADI, 14661 IX86_BUILTIN_PSRLWI, 14662 IX86_BUILTIN_PSRLDI, 14663 IX86_BUILTIN_PSRLQI, 14664 14665 IX86_BUILTIN_PUNPCKHBW, 14666 IX86_BUILTIN_PUNPCKHWD, 14667 IX86_BUILTIN_PUNPCKHDQ, 14668 IX86_BUILTIN_PUNPCKLBW, 14669 IX86_BUILTIN_PUNPCKLWD, 14670 IX86_BUILTIN_PUNPCKLDQ, 14671 14672 IX86_BUILTIN_SHUFPS, 14673 14674 IX86_BUILTIN_RCPPS, 14675 IX86_BUILTIN_RCPSS, 14676 IX86_BUILTIN_RSQRTPS, 14677 IX86_BUILTIN_RSQRTSS, 14678 IX86_BUILTIN_SQRTPS, 14679 IX86_BUILTIN_SQRTSS, 14680 14681 IX86_BUILTIN_UNPCKHPS, 14682 IX86_BUILTIN_UNPCKLPS, 14683 14684 IX86_BUILTIN_ANDPS, 14685 IX86_BUILTIN_ANDNPS, 14686 IX86_BUILTIN_ORPS, 14687 IX86_BUILTIN_XORPS, 14688 14689 IX86_BUILTIN_EMMS, 14690 IX86_BUILTIN_LDMXCSR, 14691 IX86_BUILTIN_STMXCSR, 14692 IX86_BUILTIN_SFENCE, 14693 14694 /* 3DNow! Original */ 14695 IX86_BUILTIN_FEMMS, 14696 IX86_BUILTIN_PAVGUSB, 14697 IX86_BUILTIN_PF2ID, 14698 IX86_BUILTIN_PFACC, 14699 IX86_BUILTIN_PFADD, 14700 IX86_BUILTIN_PFCMPEQ, 14701 IX86_BUILTIN_PFCMPGE, 14702 IX86_BUILTIN_PFCMPGT, 14703 IX86_BUILTIN_PFMAX, 14704 IX86_BUILTIN_PFMIN, 14705 IX86_BUILTIN_PFMUL, 14706 IX86_BUILTIN_PFRCP, 14707 IX86_BUILTIN_PFRCPIT1, 14708 IX86_BUILTIN_PFRCPIT2, 14709 IX86_BUILTIN_PFRSQIT1, 14710 IX86_BUILTIN_PFRSQRT, 14711 IX86_BUILTIN_PFSUB, 14712 IX86_BUILTIN_PFSUBR, 14713 IX86_BUILTIN_PI2FD, 14714 IX86_BUILTIN_PMULHRW, 14715 14716 /* 3DNow! Athlon Extensions */ 14717 IX86_BUILTIN_PF2IW, 14718 IX86_BUILTIN_PFNACC, 14719 IX86_BUILTIN_PFPNACC, 14720 IX86_BUILTIN_PI2FW, 14721 IX86_BUILTIN_PSWAPDSI, 14722 IX86_BUILTIN_PSWAPDSF, 14723 14724 /* SSE2 */ 14725 IX86_BUILTIN_ADDPD, 14726 IX86_BUILTIN_ADDSD, 14727 IX86_BUILTIN_DIVPD, 14728 IX86_BUILTIN_DIVSD, 14729 IX86_BUILTIN_MULPD, 14730 IX86_BUILTIN_MULSD, 14731 IX86_BUILTIN_SUBPD, 14732 IX86_BUILTIN_SUBSD, 14733 14734 IX86_BUILTIN_CMPEQPD, 14735 IX86_BUILTIN_CMPLTPD, 14736 IX86_BUILTIN_CMPLEPD, 14737 IX86_BUILTIN_CMPGTPD, 14738 IX86_BUILTIN_CMPGEPD, 14739 IX86_BUILTIN_CMPNEQPD, 14740 IX86_BUILTIN_CMPNLTPD, 14741 IX86_BUILTIN_CMPNLEPD, 14742 IX86_BUILTIN_CMPNGTPD, 14743 IX86_BUILTIN_CMPNGEPD, 14744 IX86_BUILTIN_CMPORDPD, 14745 IX86_BUILTIN_CMPUNORDPD, 14746 IX86_BUILTIN_CMPNEPD, 14747 IX86_BUILTIN_CMPEQSD, 14748 IX86_BUILTIN_CMPLTSD, 14749 IX86_BUILTIN_CMPLESD, 14750 IX86_BUILTIN_CMPNEQSD, 14751 IX86_BUILTIN_CMPNLTSD, 14752 IX86_BUILTIN_CMPNLESD, 14753 IX86_BUILTIN_CMPORDSD, 14754 IX86_BUILTIN_CMPUNORDSD, 14755 IX86_BUILTIN_CMPNESD, 14756 14757 IX86_BUILTIN_COMIEQSD, 14758 IX86_BUILTIN_COMILTSD, 14759 IX86_BUILTIN_COMILESD, 14760 IX86_BUILTIN_COMIGTSD, 14761 IX86_BUILTIN_COMIGESD, 14762 IX86_BUILTIN_COMINEQSD, 14763 IX86_BUILTIN_UCOMIEQSD, 14764 IX86_BUILTIN_UCOMILTSD, 14765 IX86_BUILTIN_UCOMILESD, 14766 IX86_BUILTIN_UCOMIGTSD, 14767 IX86_BUILTIN_UCOMIGESD, 14768 IX86_BUILTIN_UCOMINEQSD, 14769 14770 IX86_BUILTIN_MAXPD, 14771 IX86_BUILTIN_MAXSD, 14772 IX86_BUILTIN_MINPD, 14773 IX86_BUILTIN_MINSD, 14774 14775 IX86_BUILTIN_ANDPD, 14776 IX86_BUILTIN_ANDNPD, 14777 IX86_BUILTIN_ORPD, 14778 IX86_BUILTIN_XORPD, 14779 14780 IX86_BUILTIN_SQRTPD, 14781 IX86_BUILTIN_SQRTSD, 14782 14783 IX86_BUILTIN_UNPCKHPD, 14784 IX86_BUILTIN_UNPCKLPD, 14785 14786 IX86_BUILTIN_SHUFPD, 14787 14788 IX86_BUILTIN_LOADUPD, 14789 IX86_BUILTIN_STOREUPD, 14790 IX86_BUILTIN_MOVSD, 14791 14792 IX86_BUILTIN_LOADHPD, 14793 IX86_BUILTIN_LOADLPD, 14794 14795 IX86_BUILTIN_CVTDQ2PD, 14796 IX86_BUILTIN_CVTDQ2PS, 14797 14798 IX86_BUILTIN_CVTPD2DQ, 14799 IX86_BUILTIN_CVTPD2PI, 14800 IX86_BUILTIN_CVTPD2PS, 14801 IX86_BUILTIN_CVTTPD2DQ, 14802 IX86_BUILTIN_CVTTPD2PI, 14803 14804 IX86_BUILTIN_CVTPI2PD, 14805 IX86_BUILTIN_CVTSI2SD, 14806 IX86_BUILTIN_CVTSI642SD, 14807 14808 IX86_BUILTIN_CVTSD2SI, 14809 IX86_BUILTIN_CVTSD2SI64, 14810 IX86_BUILTIN_CVTSD2SS, 14811 IX86_BUILTIN_CVTSS2SD, 14812 IX86_BUILTIN_CVTTSD2SI, 14813 IX86_BUILTIN_CVTTSD2SI64, 14814 14815 IX86_BUILTIN_CVTPS2DQ, 14816 IX86_BUILTIN_CVTPS2PD, 14817 IX86_BUILTIN_CVTTPS2DQ, 14818 14819 IX86_BUILTIN_MOVNTI, 14820 IX86_BUILTIN_MOVNTPD, 14821 IX86_BUILTIN_MOVNTDQ, 14822 14823 /* SSE2 MMX */ 14824 IX86_BUILTIN_MASKMOVDQU, 14825 IX86_BUILTIN_MOVMSKPD, 14826 IX86_BUILTIN_PMOVMSKB128, 14827 14828 IX86_BUILTIN_PACKSSWB128, 14829 IX86_BUILTIN_PACKSSDW128, 14830 IX86_BUILTIN_PACKUSWB128, 14831 14832 IX86_BUILTIN_PADDB128, 14833 IX86_BUILTIN_PADDW128, 14834 IX86_BUILTIN_PADDD128, 14835 IX86_BUILTIN_PADDQ128, 14836 IX86_BUILTIN_PADDSB128, 14837 IX86_BUILTIN_PADDSW128, 14838 IX86_BUILTIN_PADDUSB128, 14839 IX86_BUILTIN_PADDUSW128, 14840 IX86_BUILTIN_PSUBB128, 14841 IX86_BUILTIN_PSUBW128, 14842 IX86_BUILTIN_PSUBD128, 14843 IX86_BUILTIN_PSUBQ128, 14844 IX86_BUILTIN_PSUBSB128, 14845 IX86_BUILTIN_PSUBSW128, 14846 IX86_BUILTIN_PSUBUSB128, 14847 IX86_BUILTIN_PSUBUSW128, 14848 14849 IX86_BUILTIN_PAND128, 14850 IX86_BUILTIN_PANDN128, 14851 IX86_BUILTIN_POR128, 14852 IX86_BUILTIN_PXOR128, 14853 14854 IX86_BUILTIN_PAVGB128, 14855 IX86_BUILTIN_PAVGW128, 14856 14857 IX86_BUILTIN_PCMPEQB128, 14858 IX86_BUILTIN_PCMPEQW128, 14859 IX86_BUILTIN_PCMPEQD128, 14860 IX86_BUILTIN_PCMPGTB128, 14861 IX86_BUILTIN_PCMPGTW128, 14862 IX86_BUILTIN_PCMPGTD128, 14863 14864 IX86_BUILTIN_PMADDWD128, 14865 14866 IX86_BUILTIN_PMAXSW128, 14867 IX86_BUILTIN_PMAXUB128, 14868 IX86_BUILTIN_PMINSW128, 14869 IX86_BUILTIN_PMINUB128, 14870 14871 IX86_BUILTIN_PMULUDQ, 14872 IX86_BUILTIN_PMULUDQ128, 14873 IX86_BUILTIN_PMULHUW128, 14874 IX86_BUILTIN_PMULHW128, 14875 IX86_BUILTIN_PMULLW128, 14876 14877 IX86_BUILTIN_PSADBW128, 14878 IX86_BUILTIN_PSHUFHW, 14879 IX86_BUILTIN_PSHUFLW, 14880 IX86_BUILTIN_PSHUFD, 14881 14882 IX86_BUILTIN_PSLLW128, 14883 IX86_BUILTIN_PSLLD128, 14884 IX86_BUILTIN_PSLLQ128, 14885 IX86_BUILTIN_PSRAW128, 14886 IX86_BUILTIN_PSRAD128, 14887 IX86_BUILTIN_PSRLW128, 14888 IX86_BUILTIN_PSRLD128, 14889 IX86_BUILTIN_PSRLQ128, 14890 IX86_BUILTIN_PSLLDQI128, 14891 IX86_BUILTIN_PSLLWI128, 14892 IX86_BUILTIN_PSLLDI128, 14893 IX86_BUILTIN_PSLLQI128, 14894 IX86_BUILTIN_PSRAWI128, 14895 IX86_BUILTIN_PSRADI128, 14896 IX86_BUILTIN_PSRLDQI128, 14897 IX86_BUILTIN_PSRLWI128, 14898 IX86_BUILTIN_PSRLDI128, 14899 IX86_BUILTIN_PSRLQI128, 14900 14901 IX86_BUILTIN_PUNPCKHBW128, 14902 IX86_BUILTIN_PUNPCKHWD128, 14903 IX86_BUILTIN_PUNPCKHDQ128, 14904 IX86_BUILTIN_PUNPCKHQDQ128, 14905 IX86_BUILTIN_PUNPCKLBW128, 14906 IX86_BUILTIN_PUNPCKLWD128, 14907 IX86_BUILTIN_PUNPCKLDQ128, 14908 IX86_BUILTIN_PUNPCKLQDQ128, 14909 14910 IX86_BUILTIN_CLFLUSH, 14911 IX86_BUILTIN_MFENCE, 14912 IX86_BUILTIN_LFENCE, 14913 14914 /* Prescott New Instructions. */ 14915 IX86_BUILTIN_ADDSUBPS, 14916 IX86_BUILTIN_HADDPS, 14917 IX86_BUILTIN_HSUBPS, 14918 IX86_BUILTIN_MOVSHDUP, 14919 IX86_BUILTIN_MOVSLDUP, 14920 IX86_BUILTIN_ADDSUBPD, 14921 IX86_BUILTIN_HADDPD, 14922 IX86_BUILTIN_HSUBPD, 14923 IX86_BUILTIN_LDDQU, 14924 14925 IX86_BUILTIN_MONITOR, 14926 IX86_BUILTIN_MWAIT, 14927 14928 /* SSSE3. */ 14929 IX86_BUILTIN_PHADDW, 14930 IX86_BUILTIN_PHADDD, 14931 IX86_BUILTIN_PHADDSW, 14932 IX86_BUILTIN_PHSUBW, 14933 IX86_BUILTIN_PHSUBD, 14934 IX86_BUILTIN_PHSUBSW, 14935 IX86_BUILTIN_PMADDUBSW, 14936 IX86_BUILTIN_PMULHRSW, 14937 IX86_BUILTIN_PSHUFB, 14938 IX86_BUILTIN_PSIGNB, 14939 IX86_BUILTIN_PSIGNW, 14940 IX86_BUILTIN_PSIGND, 14941 IX86_BUILTIN_PALIGNR, 14942 IX86_BUILTIN_PABSB, 14943 IX86_BUILTIN_PABSW, 14944 IX86_BUILTIN_PABSD, 14945 14946 IX86_BUILTIN_PHADDW128, 14947 IX86_BUILTIN_PHADDD128, 14948 IX86_BUILTIN_PHADDSW128, 14949 IX86_BUILTIN_PHSUBW128, 14950 IX86_BUILTIN_PHSUBD128, 14951 IX86_BUILTIN_PHSUBSW128, 14952 IX86_BUILTIN_PMADDUBSW128, 14953 IX86_BUILTIN_PMULHRSW128, 14954 IX86_BUILTIN_PSHUFB128, 14955 IX86_BUILTIN_PSIGNB128, 14956 IX86_BUILTIN_PSIGNW128, 14957 IX86_BUILTIN_PSIGND128, 14958 IX86_BUILTIN_PALIGNR128, 14959 IX86_BUILTIN_PABSB128, 14960 IX86_BUILTIN_PABSW128, 14961 IX86_BUILTIN_PABSD128, 14962 14963 /* AMDFAM10 - SSE4A New Instructions. */ 14964 IX86_BUILTIN_MOVNTSD, 14965 IX86_BUILTIN_MOVNTSS, 14966 IX86_BUILTIN_EXTRQI, 14967 IX86_BUILTIN_EXTRQ, 14968 IX86_BUILTIN_INSERTQI, 14969 IX86_BUILTIN_INSERTQ, 14970 14971 IX86_BUILTIN_VEC_INIT_V2SI, 14972 IX86_BUILTIN_VEC_INIT_V4HI, 14973 IX86_BUILTIN_VEC_INIT_V8QI, 14974 IX86_BUILTIN_VEC_EXT_V2DF, 14975 IX86_BUILTIN_VEC_EXT_V2DI, 14976 IX86_BUILTIN_VEC_EXT_V4SF, 14977 IX86_BUILTIN_VEC_EXT_V4SI, 14978 IX86_BUILTIN_VEC_EXT_V8HI, 14979 IX86_BUILTIN_VEC_EXT_V16QI, 14980 IX86_BUILTIN_VEC_EXT_V2SI, 14981 IX86_BUILTIN_VEC_EXT_V4HI, 14982 IX86_BUILTIN_VEC_SET_V8HI, 14983 IX86_BUILTIN_VEC_SET_V4HI, 14984 14985 IX86_BUILTIN_MAX 14986}; 14987 14988#define def_builtin(MASK, NAME, TYPE, CODE) \ 14989do { \ 14990 if ((MASK) & target_flags \ 14991 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 14992 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 14993 NULL, NULL_TREE); \ 14994} while (0) 14995 14996/* Bits for builtin_description.flag. */ 14997 14998/* Set when we don't support the comparison natively, and should 14999 swap_comparison in order to support it. */ 15000#define BUILTIN_DESC_SWAP_OPERANDS 1 15001 15002struct builtin_description 15003{ 15004 const unsigned int mask; 15005 const enum insn_code icode; 15006 const char *const name; 15007 const enum ix86_builtins code; 15008 const enum rtx_code comparison; 15009 const unsigned int flag; 15010}; 15011 15012static const struct builtin_description bdesc_comi[] = 15013{ 15014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 15015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 15016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 15017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 15018 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 15019 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 15020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 15021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 15022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 15023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 15024 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 15025 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 15026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 15027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 15028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 15029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 15030 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 15031 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 15032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 15033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 15034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 15035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 15036 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 15037 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 15038}; 15039 15040static const struct builtin_description bdesc_2arg[] = 15041{ 15042 /* SSE */ 15043 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 15044 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 15045 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 15046 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 15047 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 15048 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 15049 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 15050 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 15051 15052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 15054 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 15055 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 15056 BUILTIN_DESC_SWAP_OPERANDS }, 15057 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 15058 BUILTIN_DESC_SWAP_OPERANDS }, 15059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 15060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, 15061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, 15062 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, 15063 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, 15064 BUILTIN_DESC_SWAP_OPERANDS }, 15065 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, 15066 BUILTIN_DESC_SWAP_OPERANDS }, 15067 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, 15068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 15069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 15070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 15071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 15072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, 15073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, 15074 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, 15075 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, 15076 BUILTIN_DESC_SWAP_OPERANDS }, 15077 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, 15078 BUILTIN_DESC_SWAP_OPERANDS }, 15079 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, 15080 15081 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 15082 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 15083 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 15084 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 15085 15086 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 15087 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 15088 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 15089 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 15090 15091 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 15092 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 15093 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 15094 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 15095 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 15096 15097 /* MMX */ 15098 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 15099 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 15100 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 15101 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 15102 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 15103 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 15104 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 15105 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 15106 15107 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 15108 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 15109 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 15110 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 15111 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 15112 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 15113 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 15114 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 15115 15116 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 15117 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 15118 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 15119 15120 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 15121 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 15122 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 15123 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 15124 15125 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 15126 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 15127 15128 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 15129 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 15130 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 15131 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 15132 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 15133 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 15134 15135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 15136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 15137 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 15138 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 15139 15140 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 15141 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 15142 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 15143 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 15144 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 15145 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 15146 15147 /* Special. */ 15148 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 15149 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 15150 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 15151 15152 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 15153 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 15154 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 15155 15156 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 15157 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 15158 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 15159 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 15160 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 15161 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 15162 15163 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 15164 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 15165 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 15166 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 15167 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 15168 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 15169 15170 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 15171 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 15172 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 15173 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 15174 15175 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 15176 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 15177 15178 /* SSE2 */ 15179 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 15180 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 15181 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 15182 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 15183 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 15184 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 15185 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 15186 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 15187 15188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 15190 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 15191 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 15192 BUILTIN_DESC_SWAP_OPERANDS }, 15193 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 15194 BUILTIN_DESC_SWAP_OPERANDS }, 15195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 15196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, 15197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, 15198 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, 15199 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, 15200 BUILTIN_DESC_SWAP_OPERANDS }, 15201 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, 15202 BUILTIN_DESC_SWAP_OPERANDS }, 15203 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, 15204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 15205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 15206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 15207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 15208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, 15209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, 15210 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, 15211 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, 15212 15213 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 15214 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 15215 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 15216 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 15217 15218 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 15219 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 15220 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 15221 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 15222 15223 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 15224 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 15225 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 15226 15227 /* SSE2 MMX */ 15228 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 15229 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 15230 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 15231 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 15232 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 15233 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 15234 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 15235 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 15236 15237 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 15238 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 15239 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 15240 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 15241 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 15242 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 15243 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 15244 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 15245 15246 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 15247 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 15248 15249 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 15250 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 15251 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 15252 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 15253 15254 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 15255 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 15256 15257 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 15258 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 15259 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 15260 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 15261 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 15262 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 15263 15264 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 15265 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 15266 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 15267 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 15268 15269 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 15270 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 15271 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 15272 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 15273 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 15274 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 15275 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 15276 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 15277 15278 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 15279 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 15280 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 15281 15282 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 15283 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 15284 15285 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, 15286 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, 15287 15288 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 15289 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 15290 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 15291 15292 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 15293 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 15294 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 15295 15296 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 15297 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 15298 15299 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 15300 15301 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 15302 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 15303 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 15304 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 15305 15306 /* SSE3 MMX */ 15307 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 15308 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 15309 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 15310 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 15311 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 15312 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, 15313 15314 /* SSSE3 */ 15315 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, 15316 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, 15317 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, 15318 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, 15319 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, 15320 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, 15321 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, 15322 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, 15323 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, 15324 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, 15325 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, 15326 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, 15327 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, 15328 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, 15329 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, 15330 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, 15331 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, 15332 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, 15333 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, 15334 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, 15335 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, 15336 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, 15337 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, 15338 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 } 15339}; 15340 15341static const struct builtin_description bdesc_1arg[] = 15342{ 15343 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 15344 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 15345 15346 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 15347 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 15348 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 15349 15350 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 15351 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 15352 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 15353 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 15354 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 15355 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 15356 15357 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 15358 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 15359 15360 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 15361 15362 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 15363 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 15364 15365 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 15366 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 15367 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 15368 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 15369 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 15370 15371 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 15372 15373 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 15374 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 15375 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 15376 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 15377 15378 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 15379 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 15380 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 15381 15382 /* SSE3 */ 15383 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 15384 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 15385 15386 /* SSSE3 */ 15387 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, 15388 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, 15389 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, 15390 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, 15391 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, 15392 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, 15393}; 15394 15395static void 15396ix86_init_builtins (void) 15397{ 15398 if (TARGET_MMX) 15399 ix86_init_mmx_sse_builtins (); 15400} 15401 15402/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 15403 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 15404 builtins. */ 15405static void 15406ix86_init_mmx_sse_builtins (void) 15407{ 15408 const struct builtin_description * d; 15409 size_t i; 15410 15411 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); 15412 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 15413 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 15414 tree V2DI_type_node 15415 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 15416 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 15417 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 15418 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 15419 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 15420 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); 15421 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 15422 15423 tree pchar_type_node = build_pointer_type (char_type_node); 15424 tree pcchar_type_node = build_pointer_type ( 15425 build_type_variant (char_type_node, 1, 0)); 15426 tree pfloat_type_node = build_pointer_type (float_type_node); 15427 tree pcfloat_type_node = build_pointer_type ( 15428 build_type_variant (float_type_node, 1, 0)); 15429 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 15430 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 15431 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 15432 15433 /* Comparisons. */ 15434 tree int_ftype_v4sf_v4sf 15435 = build_function_type_list (integer_type_node, 15436 V4SF_type_node, V4SF_type_node, NULL_TREE); 15437 tree v4si_ftype_v4sf_v4sf 15438 = build_function_type_list (V4SI_type_node, 15439 V4SF_type_node, V4SF_type_node, NULL_TREE); 15440 /* MMX/SSE/integer conversions. */ 15441 tree int_ftype_v4sf 15442 = build_function_type_list (integer_type_node, 15443 V4SF_type_node, NULL_TREE); 15444 tree int64_ftype_v4sf 15445 = build_function_type_list (long_long_integer_type_node, 15446 V4SF_type_node, NULL_TREE); 15447 tree int_ftype_v8qi 15448 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 15449 tree v4sf_ftype_v4sf_int 15450 = build_function_type_list (V4SF_type_node, 15451 V4SF_type_node, integer_type_node, NULL_TREE); 15452 tree v4sf_ftype_v4sf_int64 15453 = build_function_type_list (V4SF_type_node, 15454 V4SF_type_node, long_long_integer_type_node, 15455 NULL_TREE); 15456 tree v4sf_ftype_v4sf_v2si 15457 = build_function_type_list (V4SF_type_node, 15458 V4SF_type_node, V2SI_type_node, NULL_TREE); 15459 15460 /* Miscellaneous. */ 15461 tree v8qi_ftype_v4hi_v4hi 15462 = build_function_type_list (V8QI_type_node, 15463 V4HI_type_node, V4HI_type_node, NULL_TREE); 15464 tree v4hi_ftype_v2si_v2si 15465 = build_function_type_list (V4HI_type_node, 15466 V2SI_type_node, V2SI_type_node, NULL_TREE); 15467 tree v4sf_ftype_v4sf_v4sf_int 15468 = build_function_type_list (V4SF_type_node, 15469 V4SF_type_node, V4SF_type_node, 15470 integer_type_node, NULL_TREE); 15471 tree v2si_ftype_v4hi_v4hi 15472 = build_function_type_list (V2SI_type_node, 15473 V4HI_type_node, V4HI_type_node, NULL_TREE); 15474 tree v4hi_ftype_v4hi_int 15475 = build_function_type_list (V4HI_type_node, 15476 V4HI_type_node, integer_type_node, NULL_TREE); 15477 tree v4hi_ftype_v4hi_di 15478 = build_function_type_list (V4HI_type_node, 15479 V4HI_type_node, long_long_unsigned_type_node, 15480 NULL_TREE); 15481 tree v2si_ftype_v2si_di 15482 = build_function_type_list (V2SI_type_node, 15483 V2SI_type_node, long_long_unsigned_type_node, 15484 NULL_TREE); 15485 tree void_ftype_void 15486 = build_function_type (void_type_node, void_list_node); 15487 tree void_ftype_unsigned 15488 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 15489 tree void_ftype_unsigned_unsigned 15490 = build_function_type_list (void_type_node, unsigned_type_node, 15491 unsigned_type_node, NULL_TREE); 15492 tree void_ftype_pcvoid_unsigned_unsigned 15493 = build_function_type_list (void_type_node, const_ptr_type_node, 15494 unsigned_type_node, unsigned_type_node, 15495 NULL_TREE); 15496 tree unsigned_ftype_void 15497 = build_function_type (unsigned_type_node, void_list_node); 15498 tree v2si_ftype_v4sf 15499 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 15500 /* Loads/stores. */ 15501 tree void_ftype_v8qi_v8qi_pchar 15502 = build_function_type_list (void_type_node, 15503 V8QI_type_node, V8QI_type_node, 15504 pchar_type_node, NULL_TREE); 15505 tree v4sf_ftype_pcfloat 15506 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 15507 /* @@@ the type is bogus */ 15508 tree v4sf_ftype_v4sf_pv2si 15509 = build_function_type_list (V4SF_type_node, 15510 V4SF_type_node, pv2si_type_node, NULL_TREE); 15511 tree void_ftype_pv2si_v4sf 15512 = build_function_type_list (void_type_node, 15513 pv2si_type_node, V4SF_type_node, NULL_TREE); 15514 tree void_ftype_pfloat_v4sf 15515 = build_function_type_list (void_type_node, 15516 pfloat_type_node, V4SF_type_node, NULL_TREE); 15517 tree void_ftype_pdi_di 15518 = build_function_type_list (void_type_node, 15519 pdi_type_node, long_long_unsigned_type_node, 15520 NULL_TREE); 15521 tree void_ftype_pv2di_v2di 15522 = build_function_type_list (void_type_node, 15523 pv2di_type_node, V2DI_type_node, NULL_TREE); 15524 /* Normal vector unops. */ 15525 tree v4sf_ftype_v4sf 15526 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 15527 tree v16qi_ftype_v16qi 15528 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); 15529 tree v8hi_ftype_v8hi 15530 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); 15531 tree v4si_ftype_v4si 15532 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); 15533 tree v8qi_ftype_v8qi 15534 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); 15535 tree v4hi_ftype_v4hi 15536 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); 15537 15538 /* Normal vector binops. */ 15539 tree v4sf_ftype_v4sf_v4sf 15540 = build_function_type_list (V4SF_type_node, 15541 V4SF_type_node, V4SF_type_node, NULL_TREE); 15542 tree v8qi_ftype_v8qi_v8qi 15543 = build_function_type_list (V8QI_type_node, 15544 V8QI_type_node, V8QI_type_node, NULL_TREE); 15545 tree v4hi_ftype_v4hi_v4hi 15546 = build_function_type_list (V4HI_type_node, 15547 V4HI_type_node, V4HI_type_node, NULL_TREE); 15548 tree v2si_ftype_v2si_v2si 15549 = build_function_type_list (V2SI_type_node, 15550 V2SI_type_node, V2SI_type_node, NULL_TREE); 15551 tree di_ftype_di_di 15552 = build_function_type_list (long_long_unsigned_type_node, 15553 long_long_unsigned_type_node, 15554 long_long_unsigned_type_node, NULL_TREE); 15555 15556 tree di_ftype_di_di_int 15557 = build_function_type_list (long_long_unsigned_type_node, 15558 long_long_unsigned_type_node, 15559 long_long_unsigned_type_node, 15560 integer_type_node, NULL_TREE); 15561 15562 tree v2si_ftype_v2sf 15563 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 15564 tree v2sf_ftype_v2si 15565 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 15566 tree v2si_ftype_v2si 15567 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 15568 tree v2sf_ftype_v2sf 15569 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 15570 tree v2sf_ftype_v2sf_v2sf 15571 = build_function_type_list (V2SF_type_node, 15572 V2SF_type_node, V2SF_type_node, NULL_TREE); 15573 tree v2si_ftype_v2sf_v2sf 15574 = build_function_type_list (V2SI_type_node, 15575 V2SF_type_node, V2SF_type_node, NULL_TREE); 15576 tree pint_type_node = build_pointer_type (integer_type_node); 15577 tree pdouble_type_node = build_pointer_type (double_type_node); 15578 tree pcdouble_type_node = build_pointer_type ( 15579 build_type_variant (double_type_node, 1, 0)); 15580 tree int_ftype_v2df_v2df 15581 = build_function_type_list (integer_type_node, 15582 V2DF_type_node, V2DF_type_node, NULL_TREE); 15583 15584 tree void_ftype_pcvoid 15585 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 15586 tree v4sf_ftype_v4si 15587 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 15588 tree v4si_ftype_v4sf 15589 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 15590 tree v2df_ftype_v4si 15591 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 15592 tree v4si_ftype_v2df 15593 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 15594 tree v2si_ftype_v2df 15595 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 15596 tree v4sf_ftype_v2df 15597 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 15598 tree v2df_ftype_v2si 15599 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 15600 tree v2df_ftype_v4sf 15601 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 15602 tree int_ftype_v2df 15603 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 15604 tree int64_ftype_v2df 15605 = build_function_type_list (long_long_integer_type_node, 15606 V2DF_type_node, NULL_TREE); 15607 tree v2df_ftype_v2df_int 15608 = build_function_type_list (V2DF_type_node, 15609 V2DF_type_node, integer_type_node, NULL_TREE); 15610 tree v2df_ftype_v2df_int64 15611 = build_function_type_list (V2DF_type_node, 15612 V2DF_type_node, long_long_integer_type_node, 15613 NULL_TREE); 15614 tree v4sf_ftype_v4sf_v2df 15615 = build_function_type_list (V4SF_type_node, 15616 V4SF_type_node, V2DF_type_node, NULL_TREE); 15617 tree v2df_ftype_v2df_v4sf 15618 = build_function_type_list (V2DF_type_node, 15619 V2DF_type_node, V4SF_type_node, NULL_TREE); 15620 tree v2df_ftype_v2df_v2df_int 15621 = build_function_type_list (V2DF_type_node, 15622 V2DF_type_node, V2DF_type_node, 15623 integer_type_node, 15624 NULL_TREE); 15625 tree v2df_ftype_v2df_pcdouble 15626 = build_function_type_list (V2DF_type_node, 15627 V2DF_type_node, pcdouble_type_node, NULL_TREE); 15628 tree void_ftype_pdouble_v2df 15629 = build_function_type_list (void_type_node, 15630 pdouble_type_node, V2DF_type_node, NULL_TREE); 15631 tree void_ftype_pint_int 15632 = build_function_type_list (void_type_node, 15633 pint_type_node, integer_type_node, NULL_TREE); 15634 tree void_ftype_v16qi_v16qi_pchar 15635 = build_function_type_list (void_type_node, 15636 V16QI_type_node, V16QI_type_node, 15637 pchar_type_node, NULL_TREE); 15638 tree v2df_ftype_pcdouble 15639 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 15640 tree v2df_ftype_v2df_v2df 15641 = build_function_type_list (V2DF_type_node, 15642 V2DF_type_node, V2DF_type_node, NULL_TREE); 15643 tree v16qi_ftype_v16qi_v16qi 15644 = build_function_type_list (V16QI_type_node, 15645 V16QI_type_node, V16QI_type_node, NULL_TREE); 15646 tree v8hi_ftype_v8hi_v8hi 15647 = build_function_type_list (V8HI_type_node, 15648 V8HI_type_node, V8HI_type_node, NULL_TREE); 15649 tree v4si_ftype_v4si_v4si 15650 = build_function_type_list (V4SI_type_node, 15651 V4SI_type_node, V4SI_type_node, NULL_TREE); 15652 tree v2di_ftype_v2di_v2di 15653 = build_function_type_list (V2DI_type_node, 15654 V2DI_type_node, V2DI_type_node, NULL_TREE); 15655 tree v2di_ftype_v2df_v2df 15656 = build_function_type_list (V2DI_type_node, 15657 V2DF_type_node, V2DF_type_node, NULL_TREE); 15658 tree v2df_ftype_v2df 15659 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 15660 tree v2di_ftype_v2di_int 15661 = build_function_type_list (V2DI_type_node, 15662 V2DI_type_node, integer_type_node, NULL_TREE); 15663 tree v2di_ftype_v2di_v2di_int 15664 = build_function_type_list (V2DI_type_node, V2DI_type_node, 15665 V2DI_type_node, integer_type_node, NULL_TREE); 15666 tree v4si_ftype_v4si_int 15667 = build_function_type_list (V4SI_type_node, 15668 V4SI_type_node, integer_type_node, NULL_TREE); 15669 tree v8hi_ftype_v8hi_int 15670 = build_function_type_list (V8HI_type_node, 15671 V8HI_type_node, integer_type_node, NULL_TREE); 15672 tree v4si_ftype_v8hi_v8hi 15673 = build_function_type_list (V4SI_type_node, 15674 V8HI_type_node, V8HI_type_node, NULL_TREE); 15675 tree di_ftype_v8qi_v8qi 15676 = build_function_type_list (long_long_unsigned_type_node, 15677 V8QI_type_node, V8QI_type_node, NULL_TREE); 15678 tree di_ftype_v2si_v2si 15679 = build_function_type_list (long_long_unsigned_type_node, 15680 V2SI_type_node, V2SI_type_node, NULL_TREE); 15681 tree v2di_ftype_v16qi_v16qi 15682 = build_function_type_list (V2DI_type_node, 15683 V16QI_type_node, V16QI_type_node, NULL_TREE); 15684 tree v2di_ftype_v4si_v4si 15685 = build_function_type_list (V2DI_type_node, 15686 V4SI_type_node, V4SI_type_node, NULL_TREE); 15687 tree int_ftype_v16qi 15688 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 15689 tree v16qi_ftype_pcchar 15690 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 15691 tree void_ftype_pchar_v16qi 15692 = build_function_type_list (void_type_node, 15693 pchar_type_node, V16QI_type_node, NULL_TREE); 15694 15695 tree v2di_ftype_v2di_unsigned_unsigned 15696 = build_function_type_list (V2DI_type_node, V2DI_type_node, 15697 unsigned_type_node, unsigned_type_node, 15698 NULL_TREE); 15699 tree v2di_ftype_v2di_v2di_unsigned_unsigned 15700 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, 15701 unsigned_type_node, unsigned_type_node, 15702 NULL_TREE); 15703 tree v2di_ftype_v2di_v16qi 15704 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, 15705 NULL_TREE); 15706 15707 tree float80_type; 15708 tree float128_type; 15709 tree ftype; 15710 15711 /* The __float80 type. */ 15712 if (TYPE_MODE (long_double_type_node) == XFmode) 15713 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 15714 "__float80"); 15715 else 15716 { 15717 /* The __float80 type. */ 15718 float80_type = make_node (REAL_TYPE); 15719 TYPE_PRECISION (float80_type) = 80; 15720 layout_type (float80_type); 15721 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 15722 } 15723 15724 if (TARGET_64BIT) 15725 { 15726 float128_type = make_node (REAL_TYPE); 15727 TYPE_PRECISION (float128_type) = 128; 15728 layout_type (float128_type); 15729 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 15730 } 15731 15732 /* Add all builtins that are more or less simple operations on two 15733 operands. */ 15734 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 15735 { 15736 /* Use one of the operands; the target can have a different mode for 15737 mask-generating compares. */ 15738 enum machine_mode mode; 15739 tree type; 15740 15741 if (d->name == 0) 15742 continue; 15743 mode = insn_data[d->icode].operand[1].mode; 15744 15745 switch (mode) 15746 { 15747 case V16QImode: 15748 type = v16qi_ftype_v16qi_v16qi; 15749 break; 15750 case V8HImode: 15751 type = v8hi_ftype_v8hi_v8hi; 15752 break; 15753 case V4SImode: 15754 type = v4si_ftype_v4si_v4si; 15755 break; 15756 case V2DImode: 15757 type = v2di_ftype_v2di_v2di; 15758 break; 15759 case V2DFmode: 15760 type = v2df_ftype_v2df_v2df; 15761 break; 15762 case V4SFmode: 15763 type = v4sf_ftype_v4sf_v4sf; 15764 break; 15765 case V8QImode: 15766 type = v8qi_ftype_v8qi_v8qi; 15767 break; 15768 case V4HImode: 15769 type = v4hi_ftype_v4hi_v4hi; 15770 break; 15771 case V2SImode: 15772 type = v2si_ftype_v2si_v2si; 15773 break; 15774 case DImode: 15775 type = di_ftype_di_di; 15776 break; 15777 15778 default: 15779 gcc_unreachable (); 15780 } 15781 15782 /* Override for comparisons. */ 15783 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 15784 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) 15785 type = v4si_ftype_v4sf_v4sf; 15786 15787 if (d->icode == CODE_FOR_sse2_maskcmpv2df3 15788 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 15789 type = v2di_ftype_v2df_v2df; 15790 15791 def_builtin (d->mask, d->name, type, d->code); 15792 } 15793 15794 /* Add all builtins that are more or less simple operations on 1 operand. */ 15795 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 15796 { 15797 enum machine_mode mode; 15798 tree type; 15799 15800 if (d->name == 0) 15801 continue; 15802 mode = insn_data[d->icode].operand[1].mode; 15803 15804 switch (mode) 15805 { 15806 case V16QImode: 15807 type = v16qi_ftype_v16qi; 15808 break; 15809 case V8HImode: 15810 type = v8hi_ftype_v8hi; 15811 break; 15812 case V4SImode: 15813 type = v4si_ftype_v4si; 15814 break; 15815 case V2DFmode: 15816 type = v2df_ftype_v2df; 15817 break; 15818 case V4SFmode: 15819 type = v4sf_ftype_v4sf; 15820 break; 15821 case V8QImode: 15822 type = v8qi_ftype_v8qi; 15823 break; 15824 case V4HImode: 15825 type = v4hi_ftype_v4hi; 15826 break; 15827 case V2SImode: 15828 type = v2si_ftype_v2si; 15829 break; 15830 15831 default: 15832 abort (); 15833 } 15834 15835 def_builtin (d->mask, d->name, type, d->code); 15836 } 15837 15838 /* Add the remaining MMX insns with somewhat more complicated types. */ 15839 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 15840 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 15841 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 15842 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 15843 15844 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 15845 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 15846 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 15847 15848 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 15849 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 15850 15851 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 15852 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 15853 15854 /* comi/ucomi insns. */ 15855 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 15856 if (d->mask == MASK_SSE2) 15857 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 15858 else 15859 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 15860 15861 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 15862 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 15863 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 15864 15865 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 15866 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 15867 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 15868 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 15869 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 15870 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 15871 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 15872 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 15873 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 15874 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 15875 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 15876 15877 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 15878 15879 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 15880 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 15881 15882 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 15883 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 15884 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 15885 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 15886 15887 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 15888 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 15889 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 15890 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 15891 15892 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 15893 15894 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 15895 15896 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 15897 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 15898 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 15899 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 15900 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 15901 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 15902 15903 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 15904 15905 /* Original 3DNow! */ 15906 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 15912 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 15913 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 15914 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 15915 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 15916 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 15917 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 15918 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 15919 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 15920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 15921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 15922 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 15923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 15924 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 15925 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 15926 15927 /* 3DNow! extension as used in the Athlon CPU. */ 15928 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 15929 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 15930 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 15931 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 15932 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 15933 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 15934 15935 /* SSE2 */ 15936 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 15937 15938 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 15939 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 15940 15941 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); 15942 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); 15943 15944 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 15945 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 15946 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 15947 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 15948 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 15949 15950 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 15951 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 15952 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 15953 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 15954 15955 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 15956 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 15957 15958 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 15959 15960 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 15961 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 15962 15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 15965 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 15967 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 15968 15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 15970 15971 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 15972 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 15973 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 15974 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 15975 15976 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 15977 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 15978 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 15979 15980 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 15981 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 15982 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 15983 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 15984 15985 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 15986 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 15987 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 15988 15989 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 15990 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 15991 15992 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); 15993 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); 15994 15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); 15996 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); 15997 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 15998 15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); 16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); 16001 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 16002 16003 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); 16004 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); 16005 16006 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 16007 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 16008 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 16009 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 16010 16011 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 16012 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 16013 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 16014 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 16015 16016 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 16017 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 16018 16019 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 16020 16021 /* Prescott New Instructions. */ 16022 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 16023 void_ftype_pcvoid_unsigned_unsigned, 16024 IX86_BUILTIN_MONITOR); 16025 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 16026 void_ftype_unsigned_unsigned, 16027 IX86_BUILTIN_MWAIT); 16028 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 16029 v4sf_ftype_v4sf, 16030 IX86_BUILTIN_MOVSHDUP); 16031 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 16032 v4sf_ftype_v4sf, 16033 IX86_BUILTIN_MOVSLDUP); 16034 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 16035 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 16036 16037 /* SSSE3. */ 16038 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", 16039 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); 16040 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, 16041 IX86_BUILTIN_PALIGNR); 16042 16043 /* AMDFAM10 SSE4A New built-ins */ 16044 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", 16045 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); 16046 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", 16047 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); 16048 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", 16049 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI); 16050 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", 16051 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ); 16052 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", 16053 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI); 16054 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", 16055 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); 16056 16057 /* Access to the vec_init patterns. */ 16058 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 16059 integer_type_node, NULL_TREE); 16060 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", 16061 ftype, IX86_BUILTIN_VEC_INIT_V2SI); 16062 16063 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 16064 short_integer_type_node, 16065 short_integer_type_node, 16066 short_integer_type_node, NULL_TREE); 16067 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", 16068 ftype, IX86_BUILTIN_VEC_INIT_V4HI); 16069 16070 ftype = build_function_type_list (V8QI_type_node, char_type_node, 16071 char_type_node, char_type_node, 16072 char_type_node, char_type_node, 16073 char_type_node, char_type_node, 16074 char_type_node, NULL_TREE); 16075 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", 16076 ftype, IX86_BUILTIN_VEC_INIT_V8QI); 16077 16078 /* Access to the vec_extract patterns. */ 16079 ftype = build_function_type_list (double_type_node, V2DF_type_node, 16080 integer_type_node, NULL_TREE); 16081 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", 16082 ftype, IX86_BUILTIN_VEC_EXT_V2DF); 16083 16084 ftype = build_function_type_list (long_long_integer_type_node, 16085 V2DI_type_node, integer_type_node, 16086 NULL_TREE); 16087 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", 16088 ftype, IX86_BUILTIN_VEC_EXT_V2DI); 16089 16090 ftype = build_function_type_list (float_type_node, V4SF_type_node, 16091 integer_type_node, NULL_TREE); 16092 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", 16093 ftype, IX86_BUILTIN_VEC_EXT_V4SF); 16094 16095 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 16096 integer_type_node, NULL_TREE); 16097 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", 16098 ftype, IX86_BUILTIN_VEC_EXT_V4SI); 16099 16100 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 16101 integer_type_node, NULL_TREE); 16102 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", 16103 ftype, IX86_BUILTIN_VEC_EXT_V8HI); 16104 16105 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 16106 integer_type_node, NULL_TREE); 16107 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", 16108 ftype, IX86_BUILTIN_VEC_EXT_V4HI); 16109 16110 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 16111 integer_type_node, NULL_TREE); 16112 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", 16113 ftype, IX86_BUILTIN_VEC_EXT_V2SI); 16114 16115 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 16116 integer_type_node, NULL_TREE); 16117 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 16118 16119 /* Access to the vec_set patterns. */ 16120 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 16121 intHI_type_node, 16122 integer_type_node, NULL_TREE); 16123 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", 16124 ftype, IX86_BUILTIN_VEC_SET_V8HI); 16125 16126 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 16127 intHI_type_node, 16128 integer_type_node, NULL_TREE); 16129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", 16130 ftype, IX86_BUILTIN_VEC_SET_V4HI); 16131} 16132 16133/* Errors in the source file can cause expand_expr to return const0_rtx 16134 where we expect a vector. To avoid crashing, use one of the vector 16135 clear instructions. */ 16136static rtx 16137safe_vector_operand (rtx x, enum machine_mode mode) 16138{ 16139 if (x == const0_rtx) 16140 x = CONST0_RTX (mode); 16141 return x; 16142} 16143 16144/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 16145 16146static rtx 16147ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 16148{ 16149 rtx pat, xops[3]; 16150 tree arg0 = TREE_VALUE (arglist); 16151 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16152 rtx op0 = expand_normal (arg0); 16153 rtx op1 = expand_normal (arg1); 16154 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16155 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16156 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 16157 16158 if (VECTOR_MODE_P (mode0)) 16159 op0 = safe_vector_operand (op0, mode0); 16160 if (VECTOR_MODE_P (mode1)) 16161 op1 = safe_vector_operand (op1, mode1); 16162 16163 if (optimize || !target 16164 || GET_MODE (target) != tmode 16165 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16166 target = gen_reg_rtx (tmode); 16167 16168 if (GET_MODE (op1) == SImode && mode1 == TImode) 16169 { 16170 rtx x = gen_reg_rtx (V4SImode); 16171 emit_insn (gen_sse2_loadd (x, op1)); 16172 op1 = gen_lowpart (TImode, x); 16173 } 16174 16175 /* The insn must want input operands in the same modes as the 16176 result. */ 16177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 16178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 16179 16180 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 16181 op0 = copy_to_mode_reg (mode0, op0); 16182 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16183 op1 = copy_to_mode_reg (mode1, op1); 16184 16185 /* ??? Using ix86_fixup_binary_operands is problematic when 16186 we've got mismatched modes. Fake it. */ 16187 16188 xops[0] = target; 16189 xops[1] = op0; 16190 xops[2] = op1; 16191 16192 if (tmode == mode0 && tmode == mode1) 16193 { 16194 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); 16195 op0 = xops[1]; 16196 op1 = xops[2]; 16197 } 16198 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) 16199 { 16200 op0 = force_reg (mode0, op0); 16201 op1 = force_reg (mode1, op1); 16202 target = gen_reg_rtx (tmode); 16203 } 16204 16205 pat = GEN_FCN (icode) (target, op0, op1); 16206 if (! pat) 16207 return 0; 16208 emit_insn (pat); 16209 return target; 16210} 16211 16212/* Subroutine of ix86_expand_builtin to take care of stores. */ 16213 16214static rtx 16215ix86_expand_store_builtin (enum insn_code icode, tree arglist) 16216{ 16217 rtx pat; 16218 tree arg0 = TREE_VALUE (arglist); 16219 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16220 rtx op0 = expand_normal (arg0); 16221 rtx op1 = expand_normal (arg1); 16222 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 16223 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 16224 16225 if (VECTOR_MODE_P (mode1)) 16226 op1 = safe_vector_operand (op1, mode1); 16227 16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16229 op1 = copy_to_mode_reg (mode1, op1); 16230 16231 pat = GEN_FCN (icode) (op0, op1); 16232 if (pat) 16233 emit_insn (pat); 16234 return 0; 16235} 16236 16237/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 16238 16239static rtx 16240ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 16241 rtx target, int do_load) 16242{ 16243 rtx pat; 16244 tree arg0 = TREE_VALUE (arglist); 16245 rtx op0 = expand_normal (arg0); 16246 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16247 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16248 16249 if (optimize || !target 16250 || GET_MODE (target) != tmode 16251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16252 target = gen_reg_rtx (tmode); 16253 if (do_load) 16254 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16255 else 16256 { 16257 if (VECTOR_MODE_P (mode0)) 16258 op0 = safe_vector_operand (op0, mode0); 16259 16260 if ((optimize && !register_operand (op0, mode0)) 16261 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16262 op0 = copy_to_mode_reg (mode0, op0); 16263 } 16264 16265 pat = GEN_FCN (icode) (target, op0); 16266 if (! pat) 16267 return 0; 16268 emit_insn (pat); 16269 return target; 16270} 16271 16272/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 16273 sqrtss, rsqrtss, rcpss. */ 16274 16275static rtx 16276ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 16277{ 16278 rtx pat; 16279 tree arg0 = TREE_VALUE (arglist); 16280 rtx op1, op0 = expand_normal (arg0); 16281 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16282 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16283 16284 if (optimize || !target 16285 || GET_MODE (target) != tmode 16286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16287 target = gen_reg_rtx (tmode); 16288 16289 if (VECTOR_MODE_P (mode0)) 16290 op0 = safe_vector_operand (op0, mode0); 16291 16292 if ((optimize && !register_operand (op0, mode0)) 16293 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16294 op0 = copy_to_mode_reg (mode0, op0); 16295 16296 op1 = op0; 16297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 16298 op1 = copy_to_mode_reg (mode0, op1); 16299 16300 pat = GEN_FCN (icode) (target, op0, op1); 16301 if (! pat) 16302 return 0; 16303 emit_insn (pat); 16304 return target; 16305} 16306 16307/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 16308 16309static rtx 16310ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 16311 rtx target) 16312{ 16313 rtx pat; 16314 tree arg0 = TREE_VALUE (arglist); 16315 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16316 rtx op0 = expand_normal (arg0); 16317 rtx op1 = expand_normal (arg1); 16318 rtx op2; 16319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 16320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 16321 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 16322 enum rtx_code comparison = d->comparison; 16323 16324 if (VECTOR_MODE_P (mode0)) 16325 op0 = safe_vector_operand (op0, mode0); 16326 if (VECTOR_MODE_P (mode1)) 16327 op1 = safe_vector_operand (op1, mode1); 16328 16329 /* Swap operands if we have a comparison that isn't available in 16330 hardware. */ 16331 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16332 { 16333 rtx tmp = gen_reg_rtx (mode1); 16334 emit_move_insn (tmp, op1); 16335 op1 = op0; 16336 op0 = tmp; 16337 } 16338 16339 if (optimize || !target 16340 || GET_MODE (target) != tmode 16341 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 16342 target = gen_reg_rtx (tmode); 16343 16344 if ((optimize && !register_operand (op0, mode0)) 16345 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 16346 op0 = copy_to_mode_reg (mode0, op0); 16347 if ((optimize && !register_operand (op1, mode1)) 16348 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 16349 op1 = copy_to_mode_reg (mode1, op1); 16350 16351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16352 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 16353 if (! pat) 16354 return 0; 16355 emit_insn (pat); 16356 return target; 16357} 16358 16359/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 16360 16361static rtx 16362ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 16363 rtx target) 16364{ 16365 rtx pat; 16366 tree arg0 = TREE_VALUE (arglist); 16367 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16368 rtx op0 = expand_normal (arg0); 16369 rtx op1 = expand_normal (arg1); 16370 rtx op2; 16371 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 16372 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 16373 enum rtx_code comparison = d->comparison; 16374 16375 if (VECTOR_MODE_P (mode0)) 16376 op0 = safe_vector_operand (op0, mode0); 16377 if (VECTOR_MODE_P (mode1)) 16378 op1 = safe_vector_operand (op1, mode1); 16379 16380 /* Swap operands if we have a comparison that isn't available in 16381 hardware. */ 16382 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16383 { 16384 rtx tmp = op1; 16385 op1 = op0; 16386 op0 = tmp; 16387 } 16388 16389 target = gen_reg_rtx (SImode); 16390 emit_move_insn (target, const0_rtx); 16391 target = gen_rtx_SUBREG (QImode, target, 0); 16392 16393 if ((optimize && !register_operand (op0, mode0)) 16394 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 16395 op0 = copy_to_mode_reg (mode0, op0); 16396 if ((optimize && !register_operand (op1, mode1)) 16397 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 16398 op1 = copy_to_mode_reg (mode1, op1); 16399 16400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16401 pat = GEN_FCN (d->icode) (op0, op1); 16402 if (! pat) 16403 return 0; 16404 emit_insn (pat); 16405 emit_insn (gen_rtx_SET (VOIDmode, 16406 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 16407 gen_rtx_fmt_ee (comparison, QImode, 16408 SET_DEST (pat), 16409 const0_rtx))); 16410 16411 return SUBREG_REG (target); 16412} 16413 16414/* Return the integer constant in ARG. Constrain it to be in the range 16415 of the subparts of VEC_TYPE; issue an error if not. */ 16416 16417static int 16418get_element_number (tree vec_type, tree arg) 16419{ 16420 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 16421 16422 if (!host_integerp (arg, 1) 16423 || (elt = tree_low_cst (arg, 1), elt > max)) 16424 { 16425 error ("selector must be an integer constant in the range 0..%wi", max); 16426 return 0; 16427 } 16428 16429 return elt; 16430} 16431 16432/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16433 ix86_expand_vector_init. We DO have language-level syntax for this, in 16434 the form of (type){ init-list }. Except that since we can't place emms 16435 instructions from inside the compiler, we can't allow the use of MMX 16436 registers unless the user explicitly asks for it. So we do *not* define 16437 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 16438 we have builtins invoked by mmintrin.h that gives us license to emit 16439 these sorts of instructions. */ 16440 16441static rtx 16442ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) 16443{ 16444 enum machine_mode tmode = TYPE_MODE (type); 16445 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 16446 int i, n_elt = GET_MODE_NUNITS (tmode); 16447 rtvec v = rtvec_alloc (n_elt); 16448 16449 gcc_assert (VECTOR_MODE_P (tmode)); 16450 16451 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) 16452 { 16453 rtx x = expand_normal (TREE_VALUE (arglist)); 16454 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 16455 } 16456 16457 gcc_assert (arglist == NULL); 16458 16459 if (!target || !register_operand (target, tmode)) 16460 target = gen_reg_rtx (tmode); 16461 16462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 16463 return target; 16464} 16465 16466/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 16468 had a language-level syntax for referencing vector elements. */ 16469 16470static rtx 16471ix86_expand_vec_ext_builtin (tree arglist, rtx target) 16472{ 16473 enum machine_mode tmode, mode0; 16474 tree arg0, arg1; 16475 int elt; 16476 rtx op0; 16477 16478 arg0 = TREE_VALUE (arglist); 16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16480 16481 op0 = expand_normal (arg0); 16482 elt = get_element_number (TREE_TYPE (arg0), arg1); 16483 16484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16485 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 16486 gcc_assert (VECTOR_MODE_P (mode0)); 16487 16488 op0 = force_reg (mode0, op0); 16489 16490 if (optimize || !target || !register_operand (target, tmode)) 16491 target = gen_reg_rtx (tmode); 16492 16493 ix86_expand_vector_extract (true, target, op0, elt); 16494 16495 return target; 16496} 16497 16498/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 16500 a language-level syntax for referencing vector elements. */ 16501 16502static rtx 16503ix86_expand_vec_set_builtin (tree arglist) 16504{ 16505 enum machine_mode tmode, mode1; 16506 tree arg0, arg1, arg2; 16507 int elt; 16508 rtx op0, op1, target; 16509 16510 arg0 = TREE_VALUE (arglist); 16511 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16513 16514 tmode = TYPE_MODE (TREE_TYPE (arg0)); 16515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16516 gcc_assert (VECTOR_MODE_P (tmode)); 16517 16518 op0 = expand_expr (arg0, NULL_RTX, tmode, 0); 16519 op1 = expand_expr (arg1, NULL_RTX, mode1, 0); 16520 elt = get_element_number (TREE_TYPE (arg0), arg2); 16521 16522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 16523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 16524 16525 op0 = force_reg (tmode, op0); 16526 op1 = force_reg (mode1, op1); 16527 16528 /* OP0 is the source of these builtin functions and shouldn't be 16529 modified. Create a copy, use it and return it as target. */ 16530 target = gen_reg_rtx (tmode); 16531 emit_move_insn (target, op0); 16532 ix86_expand_vector_set (true, target, op1, elt); 16533 16534 return target; 16535} 16536 16537/* Expand an expression EXP that calls a built-in function, 16538 with result going to TARGET if that's convenient 16539 (and in mode MODE if that's convenient). 16540 SUBTARGET may be used as the target for computing one of EXP's operands. 16541 IGNORE is nonzero if the value is to be ignored. */ 16542 16543static rtx 16544ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 16545 enum machine_mode mode ATTRIBUTE_UNUSED, 16546 int ignore ATTRIBUTE_UNUSED) 16547{ 16548 const struct builtin_description *d; 16549 size_t i; 16550 enum insn_code icode; 16551 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 16552 tree arglist = TREE_OPERAND (exp, 1); 16553 tree arg0, arg1, arg2, arg3; 16554 rtx op0, op1, op2, op3, pat; 16555 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4; 16556 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 16557 16558 switch (fcode) 16559 { 16560 case IX86_BUILTIN_EMMS: 16561 emit_insn (gen_mmx_emms ()); 16562 return 0; 16563 16564 case IX86_BUILTIN_SFENCE: 16565 emit_insn (gen_sse_sfence ()); 16566 return 0; 16567 16568 case IX86_BUILTIN_MASKMOVQ: 16569 case IX86_BUILTIN_MASKMOVDQU: 16570 icode = (fcode == IX86_BUILTIN_MASKMOVQ 16571 ? CODE_FOR_mmx_maskmovq 16572 : CODE_FOR_sse2_maskmovdqu); 16573 /* Note the arg order is different from the operand order. */ 16574 arg1 = TREE_VALUE (arglist); 16575 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 16576 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16577 op0 = expand_normal (arg0); 16578 op1 = expand_normal (arg1); 16579 op2 = expand_normal (arg2); 16580 mode0 = insn_data[icode].operand[0].mode; 16581 mode1 = insn_data[icode].operand[1].mode; 16582 mode2 = insn_data[icode].operand[2].mode; 16583 16584 op0 = force_reg (Pmode, op0); 16585 op0 = gen_rtx_MEM (mode1, op0); 16586 16587 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 16588 op0 = copy_to_mode_reg (mode0, op0); 16589 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 16590 op1 = copy_to_mode_reg (mode1, op1); 16591 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 16592 op2 = copy_to_mode_reg (mode2, op2); 16593 pat = GEN_FCN (icode) (op0, op1, op2); 16594 if (! pat) 16595 return 0; 16596 emit_insn (pat); 16597 return 0; 16598 16599 case IX86_BUILTIN_SQRTSS: 16600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); 16601 case IX86_BUILTIN_RSQRTSS: 16602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); 16603 case IX86_BUILTIN_RCPSS: 16604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); 16605 16606 case IX86_BUILTIN_LOADUPS: 16607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 16608 16609 case IX86_BUILTIN_STOREUPS: 16610 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 16611 16612 case IX86_BUILTIN_LOADHPS: 16613 case IX86_BUILTIN_LOADLPS: 16614 case IX86_BUILTIN_LOADHPD: 16615 case IX86_BUILTIN_LOADLPD: 16616 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps 16617 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps 16618 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd 16619 : CODE_FOR_sse2_loadlpd); 16620 arg0 = TREE_VALUE (arglist); 16621 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16622 op0 = expand_normal (arg0); 16623 op1 = expand_normal (arg1); 16624 tmode = insn_data[icode].operand[0].mode; 16625 mode0 = insn_data[icode].operand[1].mode; 16626 mode1 = insn_data[icode].operand[2].mode; 16627 16628 op0 = force_reg (mode0, op0); 16629 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 16630 if (optimize || target == 0 16631 || GET_MODE (target) != tmode 16632 || !register_operand (target, tmode)) 16633 target = gen_reg_rtx (tmode); 16634 pat = GEN_FCN (icode) (target, op0, op1); 16635 if (! pat) 16636 return 0; 16637 emit_insn (pat); 16638 return target; 16639 16640 case IX86_BUILTIN_STOREHPS: 16641 case IX86_BUILTIN_STORELPS: 16642 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps 16643 : CODE_FOR_sse_storelps); 16644 arg0 = TREE_VALUE (arglist); 16645 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16646 op0 = expand_normal (arg0); 16647 op1 = expand_normal (arg1); 16648 mode0 = insn_data[icode].operand[0].mode; 16649 mode1 = insn_data[icode].operand[1].mode; 16650 16651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16652 op1 = force_reg (mode1, op1); 16653 16654 pat = GEN_FCN (icode) (op0, op1); 16655 if (! pat) 16656 return 0; 16657 emit_insn (pat); 16658 return const0_rtx; 16659 16660 case IX86_BUILTIN_MOVNTPS: 16661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 16662 case IX86_BUILTIN_MOVNTQ: 16663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 16664 16665 case IX86_BUILTIN_LDMXCSR: 16666 op0 = expand_normal (TREE_VALUE (arglist)); 16667 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16668 emit_move_insn (target, op0); 16669 emit_insn (gen_sse_ldmxcsr (target)); 16670 return 0; 16671 16672 case IX86_BUILTIN_STMXCSR: 16673 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16674 emit_insn (gen_sse_stmxcsr (target)); 16675 return copy_to_mode_reg (SImode, target); 16676 16677 case IX86_BUILTIN_SHUFPS: 16678 case IX86_BUILTIN_SHUFPD: 16679 icode = (fcode == IX86_BUILTIN_SHUFPS 16680 ? CODE_FOR_sse_shufps 16681 : CODE_FOR_sse2_shufpd); 16682 arg0 = TREE_VALUE (arglist); 16683 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16684 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16685 op0 = expand_normal (arg0); 16686 op1 = expand_normal (arg1); 16687 op2 = expand_normal (arg2); 16688 tmode = insn_data[icode].operand[0].mode; 16689 mode0 = insn_data[icode].operand[1].mode; 16690 mode1 = insn_data[icode].operand[2].mode; 16691 mode2 = insn_data[icode].operand[3].mode; 16692 16693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16694 op0 = copy_to_mode_reg (mode0, op0); 16695 if ((optimize && !register_operand (op1, mode1)) 16696 || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16697 op1 = copy_to_mode_reg (mode1, op1); 16698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 16699 { 16700 /* @@@ better error message */ 16701 error ("mask must be an immediate"); 16702 return gen_reg_rtx (tmode); 16703 } 16704 if (optimize || target == 0 16705 || GET_MODE (target) != tmode 16706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16707 target = gen_reg_rtx (tmode); 16708 pat = GEN_FCN (icode) (target, op0, op1, op2); 16709 if (! pat) 16710 return 0; 16711 emit_insn (pat); 16712 return target; 16713 16714 case IX86_BUILTIN_PSHUFW: 16715 case IX86_BUILTIN_PSHUFD: 16716 case IX86_BUILTIN_PSHUFHW: 16717 case IX86_BUILTIN_PSHUFLW: 16718 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 16719 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 16720 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 16721 : CODE_FOR_mmx_pshufw); 16722 arg0 = TREE_VALUE (arglist); 16723 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16724 op0 = expand_normal (arg0); 16725 op1 = expand_normal (arg1); 16726 tmode = insn_data[icode].operand[0].mode; 16727 mode1 = insn_data[icode].operand[1].mode; 16728 mode2 = insn_data[icode].operand[2].mode; 16729 16730 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16731 op0 = copy_to_mode_reg (mode1, op0); 16732 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16733 { 16734 /* @@@ better error message */ 16735 error ("mask must be an immediate"); 16736 return const0_rtx; 16737 } 16738 if (target == 0 16739 || GET_MODE (target) != tmode 16740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16741 target = gen_reg_rtx (tmode); 16742 pat = GEN_FCN (icode) (target, op0, op1); 16743 if (! pat) 16744 return 0; 16745 emit_insn (pat); 16746 return target; 16747 16748 case IX86_BUILTIN_PSLLWI128: 16749 icode = CODE_FOR_ashlv8hi3; 16750 goto do_pshifti; 16751 case IX86_BUILTIN_PSLLDI128: 16752 icode = CODE_FOR_ashlv4si3; 16753 goto do_pshifti; 16754 case IX86_BUILTIN_PSLLQI128: 16755 icode = CODE_FOR_ashlv2di3; 16756 goto do_pshifti; 16757 case IX86_BUILTIN_PSRAWI128: 16758 icode = CODE_FOR_ashrv8hi3; 16759 goto do_pshifti; 16760 case IX86_BUILTIN_PSRADI128: 16761 icode = CODE_FOR_ashrv4si3; 16762 goto do_pshifti; 16763 case IX86_BUILTIN_PSRLWI128: 16764 icode = CODE_FOR_lshrv8hi3; 16765 goto do_pshifti; 16766 case IX86_BUILTIN_PSRLDI128: 16767 icode = CODE_FOR_lshrv4si3; 16768 goto do_pshifti; 16769 case IX86_BUILTIN_PSRLQI128: 16770 icode = CODE_FOR_lshrv2di3; 16771 goto do_pshifti; 16772 do_pshifti: 16773 arg0 = TREE_VALUE (arglist); 16774 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16775 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16776 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16777 16778 if (GET_CODE (op1) != CONST_INT) 16779 { 16780 error ("shift must be an immediate"); 16781 return const0_rtx; 16782 } 16783 if (INTVAL (op1) < 0 || INTVAL (op1) > 255) 16784 op1 = GEN_INT (255); 16785 16786 tmode = insn_data[icode].operand[0].mode; 16787 mode1 = insn_data[icode].operand[1].mode; 16788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16789 op0 = copy_to_reg (op0); 16790 16791 target = gen_reg_rtx (tmode); 16792 pat = GEN_FCN (icode) (target, op0, op1); 16793 if (!pat) 16794 return 0; 16795 emit_insn (pat); 16796 return target; 16797 16798 case IX86_BUILTIN_PSLLW128: 16799 icode = CODE_FOR_ashlv8hi3; 16800 goto do_pshift; 16801 case IX86_BUILTIN_PSLLD128: 16802 icode = CODE_FOR_ashlv4si3; 16803 goto do_pshift; 16804 case IX86_BUILTIN_PSLLQ128: 16805 icode = CODE_FOR_ashlv2di3; 16806 goto do_pshift; 16807 case IX86_BUILTIN_PSRAW128: 16808 icode = CODE_FOR_ashrv8hi3; 16809 goto do_pshift; 16810 case IX86_BUILTIN_PSRAD128: 16811 icode = CODE_FOR_ashrv4si3; 16812 goto do_pshift; 16813 case IX86_BUILTIN_PSRLW128: 16814 icode = CODE_FOR_lshrv8hi3; 16815 goto do_pshift; 16816 case IX86_BUILTIN_PSRLD128: 16817 icode = CODE_FOR_lshrv4si3; 16818 goto do_pshift; 16819 case IX86_BUILTIN_PSRLQ128: 16820 icode = CODE_FOR_lshrv2di3; 16821 goto do_pshift; 16822 do_pshift: 16823 arg0 = TREE_VALUE (arglist); 16824 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16827 16828 tmode = insn_data[icode].operand[0].mode; 16829 mode1 = insn_data[icode].operand[1].mode; 16830 16831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16832 op0 = copy_to_reg (op0); 16833 16834 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); 16835 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 16836 op1 = copy_to_reg (op1); 16837 16838 target = gen_reg_rtx (tmode); 16839 pat = GEN_FCN (icode) (target, op0, op1); 16840 if (!pat) 16841 return 0; 16842 emit_insn (pat); 16843 return target; 16844 16845 case IX86_BUILTIN_PSLLDQI128: 16846 case IX86_BUILTIN_PSRLDQI128: 16847 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 16848 : CODE_FOR_sse2_lshrti3); 16849 arg0 = TREE_VALUE (arglist); 16850 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16851 op0 = expand_normal (arg0); 16852 op1 = expand_normal (arg1); 16853 tmode = insn_data[icode].operand[0].mode; 16854 mode1 = insn_data[icode].operand[1].mode; 16855 mode2 = insn_data[icode].operand[2].mode; 16856 16857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16858 { 16859 op0 = copy_to_reg (op0); 16860 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16861 } 16862 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16863 { 16864 error ("shift must be an immediate"); 16865 return const0_rtx; 16866 } 16867 target = gen_reg_rtx (V2DImode); 16868 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), 16869 op0, op1); 16870 if (! pat) 16871 return 0; 16872 emit_insn (pat); 16873 return target; 16874 16875 case IX86_BUILTIN_FEMMS: 16876 emit_insn (gen_mmx_femms ()); 16877 return NULL_RTX; 16878 16879 case IX86_BUILTIN_PAVGUSB: 16880 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); 16881 16882 case IX86_BUILTIN_PF2ID: 16883 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); 16884 16885 case IX86_BUILTIN_PFACC: 16886 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); 16887 16888 case IX86_BUILTIN_PFADD: 16889 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); 16890 16891 case IX86_BUILTIN_PFCMPEQ: 16892 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); 16893 16894 case IX86_BUILTIN_PFCMPGE: 16895 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); 16896 16897 case IX86_BUILTIN_PFCMPGT: 16898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); 16899 16900 case IX86_BUILTIN_PFMAX: 16901 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); 16902 16903 case IX86_BUILTIN_PFMIN: 16904 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); 16905 16906 case IX86_BUILTIN_PFMUL: 16907 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); 16908 16909 case IX86_BUILTIN_PFRCP: 16910 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); 16911 16912 case IX86_BUILTIN_PFRCPIT1: 16913 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); 16914 16915 case IX86_BUILTIN_PFRCPIT2: 16916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); 16917 16918 case IX86_BUILTIN_PFRSQIT1: 16919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); 16920 16921 case IX86_BUILTIN_PFRSQRT: 16922 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); 16923 16924 case IX86_BUILTIN_PFSUB: 16925 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); 16926 16927 case IX86_BUILTIN_PFSUBR: 16928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); 16929 16930 case IX86_BUILTIN_PI2FD: 16931 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); 16932 16933 case IX86_BUILTIN_PMULHRW: 16934 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); 16935 16936 case IX86_BUILTIN_PF2IW: 16937 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); 16938 16939 case IX86_BUILTIN_PFNACC: 16940 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); 16941 16942 case IX86_BUILTIN_PFPNACC: 16943 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); 16944 16945 case IX86_BUILTIN_PI2FW: 16946 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); 16947 16948 case IX86_BUILTIN_PSWAPDSI: 16949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); 16950 16951 case IX86_BUILTIN_PSWAPDSF: 16952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); 16953 16954 case IX86_BUILTIN_SQRTSD: 16955 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); 16956 case IX86_BUILTIN_LOADUPD: 16957 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 16958 case IX86_BUILTIN_STOREUPD: 16959 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 16960 16961 case IX86_BUILTIN_MFENCE: 16962 emit_insn (gen_sse2_mfence ()); 16963 return 0; 16964 case IX86_BUILTIN_LFENCE: 16965 emit_insn (gen_sse2_lfence ()); 16966 return 0; 16967 16968 case IX86_BUILTIN_CLFLUSH: 16969 arg0 = TREE_VALUE (arglist); 16970 op0 = expand_normal (arg0); 16971 icode = CODE_FOR_sse2_clflush; 16972 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 16973 op0 = copy_to_mode_reg (Pmode, op0); 16974 16975 emit_insn (gen_sse2_clflush (op0)); 16976 return 0; 16977 16978 case IX86_BUILTIN_MOVNTPD: 16979 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 16980 case IX86_BUILTIN_MOVNTDQ: 16981 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 16982 case IX86_BUILTIN_MOVNTI: 16983 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 16984 16985 case IX86_BUILTIN_LOADDQU: 16986 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 16987 case IX86_BUILTIN_STOREDQU: 16988 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 16989 16990 case IX86_BUILTIN_MONITOR: 16991 arg0 = TREE_VALUE (arglist); 16992 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16993 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16994 op0 = expand_normal (arg0); 16995 op1 = expand_normal (arg1); 16996 op2 = expand_normal (arg2); 16997 if (!REG_P (op0)) 16998 op0 = copy_to_mode_reg (Pmode, op0); 16999 if (!REG_P (op1)) 17000 op1 = copy_to_mode_reg (SImode, op1); 17001 if (!REG_P (op2)) 17002 op2 = copy_to_mode_reg (SImode, op2); 17003 if (!TARGET_64BIT) 17004 emit_insn (gen_sse3_monitor (op0, op1, op2)); 17005 else 17006 emit_insn (gen_sse3_monitor64 (op0, op1, op2)); 17007 return 0; 17008 17009 case IX86_BUILTIN_MWAIT: 17010 arg0 = TREE_VALUE (arglist); 17011 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17012 op0 = expand_normal (arg0); 17013 op1 = expand_normal (arg1); 17014 if (!REG_P (op0)) 17015 op0 = copy_to_mode_reg (SImode, op0); 17016 if (!REG_P (op1)) 17017 op1 = copy_to_mode_reg (SImode, op1); 17018 emit_insn (gen_sse3_mwait (op0, op1)); 17019 return 0; 17020 17021 case IX86_BUILTIN_LDDQU: 17022 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, 17023 target, 1); 17024 17025 case IX86_BUILTIN_PALIGNR: 17026 case IX86_BUILTIN_PALIGNR128: 17027 if (fcode == IX86_BUILTIN_PALIGNR) 17028 { 17029 icode = CODE_FOR_ssse3_palignrdi; 17030 mode = DImode; 17031 } 17032 else 17033 { 17034 icode = CODE_FOR_ssse3_palignrti; 17035 mode = V2DImode; 17036 } 17037 arg0 = TREE_VALUE (arglist); 17038 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 17041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 17042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 17043 tmode = insn_data[icode].operand[0].mode; 17044 mode1 = insn_data[icode].operand[1].mode; 17045 mode2 = insn_data[icode].operand[2].mode; 17046 mode3 = insn_data[icode].operand[3].mode; 17047 17048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17049 { 17050 op0 = copy_to_reg (op0); 17051 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 17052 } 17053 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17054 { 17055 op1 = copy_to_reg (op1); 17056 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); 17057 } 17058 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17059 { 17060 error ("shift must be an immediate"); 17061 return const0_rtx; 17062 } 17063 target = gen_reg_rtx (mode); 17064 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), 17065 op0, op1, op2); 17066 if (! pat) 17067 return 0; 17068 emit_insn (pat); 17069 return target; 17070 17071 case IX86_BUILTIN_MOVNTSD: 17072 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist); 17073 17074 case IX86_BUILTIN_MOVNTSS: 17075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist); 17076 17077 case IX86_BUILTIN_INSERTQ: 17078 case IX86_BUILTIN_EXTRQ: 17079 icode = (fcode == IX86_BUILTIN_EXTRQ 17080 ? CODE_FOR_sse4a_extrq 17081 : CODE_FOR_sse4a_insertq); 17082 arg0 = TREE_VALUE (arglist); 17083 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17084 op0 = expand_normal (arg0); 17085 op1 = expand_normal (arg1); 17086 tmode = insn_data[icode].operand[0].mode; 17087 mode1 = insn_data[icode].operand[1].mode; 17088 mode2 = insn_data[icode].operand[2].mode; 17089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17090 op0 = copy_to_mode_reg (mode1, op0); 17091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17092 op1 = copy_to_mode_reg (mode2, op1); 17093 if (optimize || target == 0 17094 || GET_MODE (target) != tmode 17095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17096 target = gen_reg_rtx (tmode); 17097 pat = GEN_FCN (icode) (target, op0, op1); 17098 if (! pat) 17099 return NULL_RTX; 17100 emit_insn (pat); 17101 return target; 17102 17103 case IX86_BUILTIN_EXTRQI: 17104 icode = CODE_FOR_sse4a_extrqi; 17105 arg0 = TREE_VALUE (arglist); 17106 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17107 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17108 op0 = expand_normal (arg0); 17109 op1 = expand_normal (arg1); 17110 op2 = expand_normal (arg2); 17111 tmode = insn_data[icode].operand[0].mode; 17112 mode1 = insn_data[icode].operand[1].mode; 17113 mode2 = insn_data[icode].operand[2].mode; 17114 mode3 = insn_data[icode].operand[3].mode; 17115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17116 op0 = copy_to_mode_reg (mode1, op0); 17117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17118 { 17119 error ("index mask must be an immediate"); 17120 return gen_reg_rtx (tmode); 17121 } 17122 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17123 { 17124 error ("length mask must be an immediate"); 17125 return gen_reg_rtx (tmode); 17126 } 17127 if (optimize || target == 0 17128 || GET_MODE (target) != tmode 17129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17130 target = gen_reg_rtx (tmode); 17131 pat = GEN_FCN (icode) (target, op0, op1, op2); 17132 if (! pat) 17133 return NULL_RTX; 17134 emit_insn (pat); 17135 return target; 17136 17137 case IX86_BUILTIN_INSERTQI: 17138 icode = CODE_FOR_sse4a_insertqi; 17139 arg0 = TREE_VALUE (arglist); 17140 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17142 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); 17143 op0 = expand_normal (arg0); 17144 op1 = expand_normal (arg1); 17145 op2 = expand_normal (arg2); 17146 op3 = expand_normal (arg3); 17147 tmode = insn_data[icode].operand[0].mode; 17148 mode1 = insn_data[icode].operand[1].mode; 17149 mode2 = insn_data[icode].operand[2].mode; 17150 mode3 = insn_data[icode].operand[3].mode; 17151 mode4 = insn_data[icode].operand[4].mode; 17152 17153 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17154 op0 = copy_to_mode_reg (mode1, op0); 17155 17156 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17157 op1 = copy_to_mode_reg (mode2, op1); 17158 17159 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17160 { 17161 error ("index mask must be an immediate"); 17162 return gen_reg_rtx (tmode); 17163 } 17164 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4)) 17165 { 17166 error ("length mask must be an immediate"); 17167 return gen_reg_rtx (tmode); 17168 } 17169 if (optimize || target == 0 17170 || GET_MODE (target) != tmode 17171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17172 target = gen_reg_rtx (tmode); 17173 pat = GEN_FCN (icode) (target, op0, op1, op2, op3); 17174 if (! pat) 17175 return NULL_RTX; 17176 emit_insn (pat); 17177 return target; 17178 17179 case IX86_BUILTIN_VEC_INIT_V2SI: 17180 case IX86_BUILTIN_VEC_INIT_V4HI: 17181 case IX86_BUILTIN_VEC_INIT_V8QI: 17182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); 17183 17184 case IX86_BUILTIN_VEC_EXT_V2DF: 17185 case IX86_BUILTIN_VEC_EXT_V2DI: 17186 case IX86_BUILTIN_VEC_EXT_V4SF: 17187 case IX86_BUILTIN_VEC_EXT_V4SI: 17188 case IX86_BUILTIN_VEC_EXT_V8HI: 17189 case IX86_BUILTIN_VEC_EXT_V16QI: 17190 case IX86_BUILTIN_VEC_EXT_V2SI: 17191 case IX86_BUILTIN_VEC_EXT_V4HI: 17192 return ix86_expand_vec_ext_builtin (arglist, target); 17193 17194 case IX86_BUILTIN_VEC_SET_V8HI: 17195 case IX86_BUILTIN_VEC_SET_V4HI: 17196 return ix86_expand_vec_set_builtin (arglist); 17197 17198 default: 17199 break; 17200 } 17201 17202 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 17203 if (d->code == fcode) 17204 { 17205 /* Compares are treated specially. */ 17206 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 17207 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 17208 || d->icode == CODE_FOR_sse2_maskcmpv2df3 17209 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 17210 return ix86_expand_sse_compare (d, arglist, target); 17211 17212 return ix86_expand_binop_builtin (d->icode, arglist, target); 17213 } 17214 17215 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 17216 if (d->code == fcode) 17217 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 17218 17219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 17220 if (d->code == fcode) 17221 return ix86_expand_sse_comi (d, arglist, target); 17222 17223 gcc_unreachable (); 17224} 17225 17226/* Store OPERAND to the memory after reload is completed. This means 17227 that we can't easily use assign_stack_local. */ 17228rtx 17229ix86_force_to_memory (enum machine_mode mode, rtx operand) 17230{ 17231 rtx result; 17232 17233 gcc_assert (reload_completed); 17234 if (TARGET_RED_ZONE) 17235 { 17236 result = gen_rtx_MEM (mode, 17237 gen_rtx_PLUS (Pmode, 17238 stack_pointer_rtx, 17239 GEN_INT (-RED_ZONE_SIZE))); 17240 emit_move_insn (result, operand); 17241 } 17242 else if (!TARGET_RED_ZONE && TARGET_64BIT) 17243 { 17244 switch (mode) 17245 { 17246 case HImode: 17247 case SImode: 17248 operand = gen_lowpart (DImode, operand); 17249 /* FALLTHRU */ 17250 case DImode: 17251 emit_insn ( 17252 gen_rtx_SET (VOIDmode, 17253 gen_rtx_MEM (DImode, 17254 gen_rtx_PRE_DEC (DImode, 17255 stack_pointer_rtx)), 17256 operand)); 17257 break; 17258 default: 17259 gcc_unreachable (); 17260 } 17261 result = gen_rtx_MEM (mode, stack_pointer_rtx); 17262 } 17263 else 17264 { 17265 switch (mode) 17266 { 17267 case DImode: 17268 { 17269 rtx operands[2]; 17270 split_di (&operand, 1, operands, operands + 1); 17271 emit_insn ( 17272 gen_rtx_SET (VOIDmode, 17273 gen_rtx_MEM (SImode, 17274 gen_rtx_PRE_DEC (Pmode, 17275 stack_pointer_rtx)), 17276 operands[1])); 17277 emit_insn ( 17278 gen_rtx_SET (VOIDmode, 17279 gen_rtx_MEM (SImode, 17280 gen_rtx_PRE_DEC (Pmode, 17281 stack_pointer_rtx)), 17282 operands[0])); 17283 } 17284 break; 17285 case HImode: 17286 /* Store HImodes as SImodes. */ 17287 operand = gen_lowpart (SImode, operand); 17288 /* FALLTHRU */ 17289 case SImode: 17290 emit_insn ( 17291 gen_rtx_SET (VOIDmode, 17292 gen_rtx_MEM (GET_MODE (operand), 17293 gen_rtx_PRE_DEC (SImode, 17294 stack_pointer_rtx)), 17295 operand)); 17296 break; 17297 default: 17298 gcc_unreachable (); 17299 } 17300 result = gen_rtx_MEM (mode, stack_pointer_rtx); 17301 } 17302 return result; 17303} 17304 17305/* Free operand from the memory. */ 17306void 17307ix86_free_from_memory (enum machine_mode mode) 17308{ 17309 if (!TARGET_RED_ZONE) 17310 { 17311 int size; 17312 17313 if (mode == DImode || TARGET_64BIT) 17314 size = 8; 17315 else 17316 size = 4; 17317 /* Use LEA to deallocate stack space. In peephole2 it will be converted 17318 to pop or add instruction if registers are available. */ 17319 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 17320 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 17321 GEN_INT (size)))); 17322 } 17323} 17324 17325/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 17326 QImode must go into class Q_REGS. 17327 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 17328 movdf to do mem-to-mem moves through integer regs. */ 17329enum reg_class 17330ix86_preferred_reload_class (rtx x, enum reg_class class) 17331{ 17332 enum machine_mode mode = GET_MODE (x); 17333 17334 /* We're only allowed to return a subclass of CLASS. Many of the 17335 following checks fail for NO_REGS, so eliminate that early. */ 17336 if (class == NO_REGS) 17337 return NO_REGS; 17338 17339 /* All classes can load zeros. */ 17340 if (x == CONST0_RTX (mode)) 17341 return class; 17342 17343 /* Force constants into memory if we are loading a (nonzero) constant into 17344 an MMX or SSE register. This is because there are no MMX/SSE instructions 17345 to load from a constant. */ 17346 if (CONSTANT_P (x) 17347 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) 17348 return NO_REGS; 17349 17350 /* Prefer SSE regs only, if we can use them for math. */ 17351 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 17352 return SSE_CLASS_P (class) ? class : NO_REGS; 17353 17354 /* Floating-point constants need more complex checks. */ 17355 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 17356 { 17357 /* General regs can load everything. */ 17358 if (reg_class_subset_p (class, GENERAL_REGS)) 17359 return class; 17360 17361 /* Floats can load 0 and 1 plus some others. Note that we eliminated 17362 zero above. We only want to wind up preferring 80387 registers if 17363 we plan on doing computation with them. */ 17364 if (TARGET_80387 17365 && standard_80387_constant_p (x)) 17366 { 17367 /* Limit class to non-sse. */ 17368 if (class == FLOAT_SSE_REGS) 17369 return FLOAT_REGS; 17370 if (class == FP_TOP_SSE_REGS) 17371 return FP_TOP_REG; 17372 if (class == FP_SECOND_SSE_REGS) 17373 return FP_SECOND_REG; 17374 if (class == FLOAT_INT_REGS || class == FLOAT_REGS) 17375 return class; 17376 } 17377 17378 return NO_REGS; 17379 } 17380 17381 /* Generally when we see PLUS here, it's the function invariant 17382 (plus soft-fp const_int). Which can only be computed into general 17383 regs. */ 17384 if (GET_CODE (x) == PLUS) 17385 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; 17386 17387 /* QImode constants are easy to load, but non-constant QImode data 17388 must go into Q_REGS. */ 17389 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 17390 { 17391 if (reg_class_subset_p (class, Q_REGS)) 17392 return class; 17393 if (reg_class_subset_p (Q_REGS, class)) 17394 return Q_REGS; 17395 return NO_REGS; 17396 } 17397 17398 return class; 17399} 17400 17401/* Discourage putting floating-point values in SSE registers unless 17402 SSE math is being used, and likewise for the 387 registers. */ 17403enum reg_class 17404ix86_preferred_output_reload_class (rtx x, enum reg_class class) 17405{ 17406 enum machine_mode mode = GET_MODE (x); 17407 17408 /* Restrict the output reload class to the register bank that we are doing 17409 math on. If we would like not to return a subset of CLASS, reject this 17410 alternative: if reload cannot do this, it will still use its choice. */ 17411 mode = GET_MODE (x); 17412 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 17413 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; 17414 17415 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) 17416 { 17417 if (class == FP_TOP_SSE_REGS) 17418 return FP_TOP_REG; 17419 else if (class == FP_SECOND_SSE_REGS) 17420 return FP_SECOND_REG; 17421 else 17422 return FLOAT_CLASS_P (class) ? class : NO_REGS; 17423 } 17424 17425 return class; 17426} 17427 17428/* If we are copying between general and FP registers, we need a memory 17429 location. The same is true for SSE and MMX registers. 17430 17431 The macro can't work reliably when one of the CLASSES is class containing 17432 registers from multiple units (SSE, MMX, integer). We avoid this by never 17433 combining those units in single alternative in the machine description. 17434 Ensure that this constraint holds to avoid unexpected surprises. 17435 17436 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 17437 enforce these sanity checks. */ 17438 17439int 17440ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 17441 enum machine_mode mode, int strict) 17442{ 17443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 17444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 17445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 17446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 17447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 17448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 17449 { 17450 gcc_assert (!strict); 17451 return true; 17452 } 17453 17454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 17455 return true; 17456 17457 /* ??? This is a lie. We do have moves between mmx/general, and for 17458 mmx/sse2. But by saying we need secondary memory we discourage the 17459 register allocator from using the mmx registers unless needed. */ 17460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 17461 return true; 17462 17463 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17464 { 17465 /* SSE1 doesn't have any direct moves from other classes. */ 17466 if (!TARGET_SSE2) 17467 return true; 17468 17469 /* If the target says that inter-unit moves are more expensive 17470 than moving through memory, then don't generate them. */ 17471 if (!TARGET_INTER_UNIT_MOVES && !optimize_size) 17472 return true; 17473 17474 /* Between SSE and general, we have moves no larger than word size. */ 17475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 17476 return true; 17477 17478 /* ??? For the cost of one register reformat penalty, we could use 17479 the same instructions to move SFmode and DFmode data, but the 17480 relevant move patterns don't support those alternatives. */ 17481 if (mode == SFmode || mode == DFmode) 17482 return true; 17483 } 17484 17485 return false; 17486} 17487 17488/* Return true if the registers in CLASS cannot represent the change from 17489 modes FROM to TO. */ 17490 17491bool 17492ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 17493 enum reg_class class) 17494{ 17495 if (from == to) 17496 return false; 17497 17498 /* x87 registers can't do subreg at all, as all values are reformatted 17499 to extended precision. */ 17500 if (MAYBE_FLOAT_CLASS_P (class)) 17501 return true; 17502 17503 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) 17504 { 17505 /* Vector registers do not support QI or HImode loads. If we don't 17506 disallow a change to these modes, reload will assume it's ok to 17507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 17508 the vec_dupv4hi pattern. */ 17509 if (GET_MODE_SIZE (from) < 4) 17510 return true; 17511 17512 /* Vector registers do not support subreg with nonzero offsets, which 17513 are otherwise valid for integer registers. Since we can't see 17514 whether we have a nonzero offset from here, prohibit all 17515 nonparadoxical subregs changing size. */ 17516 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 17517 return true; 17518 } 17519 17520 return false; 17521} 17522 17523/* Return the cost of moving data from a register in class CLASS1 to 17524 one in class CLASS2. 17525 17526 It is not required that the cost always equal 2 when FROM is the same as TO; 17527 on some machines it is expensive to move between registers if they are not 17528 general registers. */ 17529 17530int 17531ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 17532 enum reg_class class2) 17533{ 17534 /* In case we require secondary memory, compute cost of the store followed 17535 by load. In order to avoid bad register allocation choices, we need 17536 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 17537 17538 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 17539 { 17540 int cost = 1; 17541 17542 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 17543 MEMORY_MOVE_COST (mode, class1, 1)); 17544 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 17545 MEMORY_MOVE_COST (mode, class2, 1)); 17546 17547 /* In case of copying from general_purpose_register we may emit multiple 17548 stores followed by single load causing memory size mismatch stall. 17549 Count this as arbitrarily high cost of 20. */ 17550 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 17551 cost += 20; 17552 17553 /* In the case of FP/MMX moves, the registers actually overlap, and we 17554 have to switch modes in order to treat them differently. */ 17555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 17556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 17557 cost += 20; 17558 17559 return cost; 17560 } 17561 17562 /* Moves between SSE/MMX and integer unit are expensive. */ 17563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 17564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17565 return ix86_cost->mmxsse_to_integer; 17566 if (MAYBE_FLOAT_CLASS_P (class1)) 17567 return ix86_cost->fp_move; 17568 if (MAYBE_SSE_CLASS_P (class1)) 17569 return ix86_cost->sse_move; 17570 if (MAYBE_MMX_CLASS_P (class1)) 17571 return ix86_cost->mmx_move; 17572 return 2; 17573} 17574 17575/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 17576 17577bool 17578ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 17579{ 17580 /* Flags and only flags can only hold CCmode values. */ 17581 if (CC_REGNO_P (regno)) 17582 return GET_MODE_CLASS (mode) == MODE_CC; 17583 if (GET_MODE_CLASS (mode) == MODE_CC 17584 || GET_MODE_CLASS (mode) == MODE_RANDOM 17585 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 17586 return 0; 17587 if (FP_REGNO_P (regno)) 17588 return VALID_FP_MODE_P (mode); 17589 if (SSE_REGNO_P (regno)) 17590 { 17591 /* We implement the move patterns for all vector modes into and 17592 out of SSE registers, even when no operation instructions 17593 are available. */ 17594 return (VALID_SSE_REG_MODE (mode) 17595 || VALID_SSE2_REG_MODE (mode) 17596 || VALID_MMX_REG_MODE (mode) 17597 || VALID_MMX_REG_MODE_3DNOW (mode)); 17598 } 17599 if (MMX_REGNO_P (regno)) 17600 { 17601 /* We implement the move patterns for 3DNOW modes even in MMX mode, 17602 so if the register is available at all, then we can move data of 17603 the given mode into or out of it. */ 17604 return (VALID_MMX_REG_MODE (mode) 17605 || VALID_MMX_REG_MODE_3DNOW (mode)); 17606 } 17607 17608 if (mode == QImode) 17609 { 17610 /* Take care for QImode values - they can be in non-QI regs, 17611 but then they do cause partial register stalls. */ 17612 if (regno < 4 || TARGET_64BIT) 17613 return 1; 17614 if (!TARGET_PARTIAL_REG_STALL) 17615 return 1; 17616 return reload_in_progress || reload_completed; 17617 } 17618 /* We handle both integer and floats in the general purpose registers. */ 17619 else if (VALID_INT_MODE_P (mode)) 17620 return 1; 17621 else if (VALID_FP_MODE_P (mode)) 17622 return 1; 17623 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 17624 on to use that value in smaller contexts, this can easily force a 17625 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 17626 supporting DImode, allow it. */ 17627 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 17628 return 1; 17629 17630 return 0; 17631} 17632 17633/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 17634 tieable integer mode. */ 17635 17636static bool 17637ix86_tieable_integer_mode_p (enum machine_mode mode) 17638{ 17639 switch (mode) 17640 { 17641 case HImode: 17642 case SImode: 17643 return true; 17644 17645 case QImode: 17646 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 17647 17648 case DImode: 17649 return TARGET_64BIT; 17650 17651 default: 17652 return false; 17653 } 17654} 17655 17656/* Return true if MODE1 is accessible in a register that can hold MODE2 17657 without copying. That is, all register classes that can hold MODE2 17658 can also hold MODE1. */ 17659 17660bool 17661ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 17662{ 17663 if (mode1 == mode2) 17664 return true; 17665 17666 if (ix86_tieable_integer_mode_p (mode1) 17667 && ix86_tieable_integer_mode_p (mode2)) 17668 return true; 17669 17670 /* MODE2 being XFmode implies fp stack or general regs, which means we 17671 can tie any smaller floating point modes to it. Note that we do not 17672 tie this with TFmode. */ 17673 if (mode2 == XFmode) 17674 return mode1 == SFmode || mode1 == DFmode; 17675 17676 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 17677 that we can tie it with SFmode. */ 17678 if (mode2 == DFmode) 17679 return mode1 == SFmode; 17680 17681 /* If MODE2 is only appropriate for an SSE register, then tie with 17682 any other mode acceptable to SSE registers. */ 17683 if (GET_MODE_SIZE (mode2) >= 8 17684 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 17685 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); 17686 17687 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie 17688 with any other mode acceptable to MMX registers. */ 17689 if (GET_MODE_SIZE (mode2) == 8 17690 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 17691 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); 17692 17693 return false; 17694} 17695 17696/* Return the cost of moving data of mode M between a 17697 register and memory. A value of 2 is the default; this cost is 17698 relative to those in `REGISTER_MOVE_COST'. 17699 17700 If moving between registers and memory is more expensive than 17701 between two registers, you should define this macro to express the 17702 relative cost. 17703 17704 Model also increased moving costs of QImode registers in non 17705 Q_REGS classes. 17706 */ 17707int 17708ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 17709{ 17710 if (FLOAT_CLASS_P (class)) 17711 { 17712 int index; 17713 switch (mode) 17714 { 17715 case SFmode: 17716 index = 0; 17717 break; 17718 case DFmode: 17719 index = 1; 17720 break; 17721 case XFmode: 17722 index = 2; 17723 break; 17724 default: 17725 return 100; 17726 } 17727 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 17728 } 17729 if (SSE_CLASS_P (class)) 17730 { 17731 int index; 17732 switch (GET_MODE_SIZE (mode)) 17733 { 17734 case 4: 17735 index = 0; 17736 break; 17737 case 8: 17738 index = 1; 17739 break; 17740 case 16: 17741 index = 2; 17742 break; 17743 default: 17744 return 100; 17745 } 17746 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 17747 } 17748 if (MMX_CLASS_P (class)) 17749 { 17750 int index; 17751 switch (GET_MODE_SIZE (mode)) 17752 { 17753 case 4: 17754 index = 0; 17755 break; 17756 case 8: 17757 index = 1; 17758 break; 17759 default: 17760 return 100; 17761 } 17762 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 17763 } 17764 switch (GET_MODE_SIZE (mode)) 17765 { 17766 case 1: 17767 if (in) 17768 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 17769 : ix86_cost->movzbl_load); 17770 else 17771 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 17772 : ix86_cost->int_store[0] + 4); 17773 break; 17774 case 2: 17775 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 17776 default: 17777 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 17778 if (mode == TFmode) 17779 mode = XFmode; 17780 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 17781 * (((int) GET_MODE_SIZE (mode) 17782 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 17783 } 17784} 17785 17786/* Compute a (partial) cost for rtx X. Return true if the complete 17787 cost has been computed, and false if subexpressions should be 17788 scanned. In either case, *TOTAL contains the cost result. */ 17789 17790static bool 17791ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 17792{ 17793 enum machine_mode mode = GET_MODE (x); 17794 17795 switch (code) 17796 { 17797 case CONST_INT: 17798 case CONST: 17799 case LABEL_REF: 17800 case SYMBOL_REF: 17801 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 17802 *total = 3; 17803 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 17804 *total = 2; 17805 else if (flag_pic && SYMBOLIC_CONST (x) 17806 && (!TARGET_64BIT 17807 || (!GET_CODE (x) != LABEL_REF 17808 && (GET_CODE (x) != SYMBOL_REF 17809 || !SYMBOL_REF_LOCAL_P (x))))) 17810 *total = 1; 17811 else 17812 *total = 0; 17813 return true; 17814 17815 case CONST_DOUBLE: 17816 if (mode == VOIDmode) 17817 *total = 0; 17818 else 17819 switch (standard_80387_constant_p (x)) 17820 { 17821 case 1: /* 0.0 */ 17822 *total = 1; 17823 break; 17824 default: /* Other constants */ 17825 *total = 2; 17826 break; 17827 case 0: 17828 case -1: 17829 /* Start with (MEM (SYMBOL_REF)), since that's where 17830 it'll probably end up. Add a penalty for size. */ 17831 *total = (COSTS_N_INSNS (1) 17832 + (flag_pic != 0 && !TARGET_64BIT) 17833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 17834 break; 17835 } 17836 return true; 17837 17838 case ZERO_EXTEND: 17839 /* The zero extensions is often completely free on x86_64, so make 17840 it as cheap as possible. */ 17841 if (TARGET_64BIT && mode == DImode 17842 && GET_MODE (XEXP (x, 0)) == SImode) 17843 *total = 1; 17844 else if (TARGET_ZERO_EXTEND_WITH_AND) 17845 *total = ix86_cost->add; 17846 else 17847 *total = ix86_cost->movzx; 17848 return false; 17849 17850 case SIGN_EXTEND: 17851 *total = ix86_cost->movsx; 17852 return false; 17853 17854 case ASHIFT: 17855 if (GET_CODE (XEXP (x, 1)) == CONST_INT 17856 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 17857 { 17858 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17859 if (value == 1) 17860 { 17861 *total = ix86_cost->add; 17862 return false; 17863 } 17864 if ((value == 2 || value == 3) 17865 && ix86_cost->lea <= ix86_cost->shift_const) 17866 { 17867 *total = ix86_cost->lea; 17868 return false; 17869 } 17870 } 17871 /* FALLTHRU */ 17872 17873 case ROTATE: 17874 case ASHIFTRT: 17875 case LSHIFTRT: 17876 case ROTATERT: 17877 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 17878 { 17879 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17880 { 17881 if (INTVAL (XEXP (x, 1)) > 32) 17882 *total = ix86_cost->shift_const + COSTS_N_INSNS (2); 17883 else 17884 *total = ix86_cost->shift_const * 2; 17885 } 17886 else 17887 { 17888 if (GET_CODE (XEXP (x, 1)) == AND) 17889 *total = ix86_cost->shift_var * 2; 17890 else 17891 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); 17892 } 17893 } 17894 else 17895 { 17896 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17897 *total = ix86_cost->shift_const; 17898 else 17899 *total = ix86_cost->shift_var; 17900 } 17901 return false; 17902 17903 case MULT: 17904 if (FLOAT_MODE_P (mode)) 17905 { 17906 *total = ix86_cost->fmul; 17907 return false; 17908 } 17909 else 17910 { 17911 rtx op0 = XEXP (x, 0); 17912 rtx op1 = XEXP (x, 1); 17913 int nbits; 17914 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17915 { 17916 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17917 for (nbits = 0; value != 0; value &= value - 1) 17918 nbits++; 17919 } 17920 else 17921 /* This is arbitrary. */ 17922 nbits = 7; 17923 17924 /* Compute costs correctly for widening multiplication. */ 17925 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) 17926 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 17927 == GET_MODE_SIZE (mode)) 17928 { 17929 int is_mulwiden = 0; 17930 enum machine_mode inner_mode = GET_MODE (op0); 17931 17932 if (GET_CODE (op0) == GET_CODE (op1)) 17933 is_mulwiden = 1, op1 = XEXP (op1, 0); 17934 else if (GET_CODE (op1) == CONST_INT) 17935 { 17936 if (GET_CODE (op0) == SIGN_EXTEND) 17937 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 17938 == INTVAL (op1); 17939 else 17940 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 17941 } 17942 17943 if (is_mulwiden) 17944 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 17945 } 17946 17947 *total = (ix86_cost->mult_init[MODE_INDEX (mode)] 17948 + nbits * ix86_cost->mult_bit 17949 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); 17950 17951 return true; 17952 } 17953 17954 case DIV: 17955 case UDIV: 17956 case MOD: 17957 case UMOD: 17958 if (FLOAT_MODE_P (mode)) 17959 *total = ix86_cost->fdiv; 17960 else 17961 *total = ix86_cost->divide[MODE_INDEX (mode)]; 17962 return false; 17963 17964 case PLUS: 17965 if (FLOAT_MODE_P (mode)) 17966 *total = ix86_cost->fadd; 17967 else if (GET_MODE_CLASS (mode) == MODE_INT 17968 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 17969 { 17970 if (GET_CODE (XEXP (x, 0)) == PLUS 17971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 17972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 17973 && CONSTANT_P (XEXP (x, 1))) 17974 { 17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 17976 if (val == 2 || val == 4 || val == 8) 17977 { 17978 *total = ix86_cost->lea; 17979 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17980 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 17981 outer_code); 17982 *total += rtx_cost (XEXP (x, 1), outer_code); 17983 return true; 17984 } 17985 } 17986 else if (GET_CODE (XEXP (x, 0)) == MULT 17987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 17988 { 17989 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 17990 if (val == 2 || val == 4 || val == 8) 17991 { 17992 *total = ix86_cost->lea; 17993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17994 *total += rtx_cost (XEXP (x, 1), outer_code); 17995 return true; 17996 } 17997 } 17998 else if (GET_CODE (XEXP (x, 0)) == PLUS) 17999 { 18000 *total = ix86_cost->lea; 18001 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 18002 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 18003 *total += rtx_cost (XEXP (x, 1), outer_code); 18004 return true; 18005 } 18006 } 18007 /* FALLTHRU */ 18008 18009 case MINUS: 18010 if (FLOAT_MODE_P (mode)) 18011 { 18012 *total = ix86_cost->fadd; 18013 return false; 18014 } 18015 /* FALLTHRU */ 18016 18017 case AND: 18018 case IOR: 18019 case XOR: 18020 if (!TARGET_64BIT && mode == DImode) 18021 { 18022 *total = (ix86_cost->add * 2 18023 + (rtx_cost (XEXP (x, 0), outer_code) 18024 << (GET_MODE (XEXP (x, 0)) != DImode)) 18025 + (rtx_cost (XEXP (x, 1), outer_code) 18026 << (GET_MODE (XEXP (x, 1)) != DImode))); 18027 return true; 18028 } 18029 /* FALLTHRU */ 18030 18031 case NEG: 18032 if (FLOAT_MODE_P (mode)) 18033 { 18034 *total = ix86_cost->fchs; 18035 return false; 18036 } 18037 /* FALLTHRU */ 18038 18039 case NOT: 18040 if (!TARGET_64BIT && mode == DImode) 18041 *total = ix86_cost->add * 2; 18042 else 18043 *total = ix86_cost->add; 18044 return false; 18045 18046 case COMPARE: 18047 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 18048 && XEXP (XEXP (x, 0), 1) == const1_rtx 18049 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT 18050 && XEXP (x, 1) == const0_rtx) 18051 { 18052 /* This kind of construct is implemented using test[bwl]. 18053 Treat it as if we had an AND. */ 18054 *total = (ix86_cost->add 18055 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) 18056 + rtx_cost (const1_rtx, outer_code)); 18057 return true; 18058 } 18059 return false; 18060 18061 case FLOAT_EXTEND: 18062 if (!TARGET_SSE_MATH 18063 || mode == XFmode 18064 || (mode == DFmode && !TARGET_SSE2)) 18065 /* For standard 80387 constants, raise the cost to prevent 18066 compress_float_constant() to generate load from memory. */ 18067 switch (standard_80387_constant_p (XEXP (x, 0))) 18068 { 18069 case -1: 18070 case 0: 18071 *total = 0; 18072 break; 18073 case 1: /* 0.0 */ 18074 *total = 1; 18075 break; 18076 default: 18077 *total = (x86_ext_80387_constants & TUNEMASK 18078 || optimize_size 18079 ? 1 : 0); 18080 } 18081 return false; 18082 18083 case ABS: 18084 if (FLOAT_MODE_P (mode)) 18085 *total = ix86_cost->fabs; 18086 return false; 18087 18088 case SQRT: 18089 if (FLOAT_MODE_P (mode)) 18090 *total = ix86_cost->fsqrt; 18091 return false; 18092 18093 case UNSPEC: 18094 if (XINT (x, 1) == UNSPEC_TP) 18095 *total = 0; 18096 return false; 18097 18098 default: 18099 return false; 18100 } 18101} 18102 18103#if TARGET_MACHO 18104 18105static int current_machopic_label_num; 18106 18107/* Given a symbol name and its associated stub, write out the 18108 definition of the stub. */ 18109 18110void 18111machopic_output_stub (FILE *file, const char *symb, const char *stub) 18112{ 18113 unsigned int length; 18114 char *binder_name, *symbol_name, lazy_ptr_name[32]; 18115 int label = ++current_machopic_label_num; 18116 18117 /* For 64-bit we shouldn't get here. */ 18118 gcc_assert (!TARGET_64BIT); 18119 18120 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 18121 symb = (*targetm.strip_name_encoding) (symb); 18122 18123 length = strlen (stub); 18124 binder_name = alloca (length + 32); 18125 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 18126 18127 length = strlen (symb); 18128 symbol_name = alloca (length + 32); 18129 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 18130 18131 sprintf (lazy_ptr_name, "L%d$lz", label); 18132 18133 if (MACHOPIC_PURE) 18134 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 18135 else 18136 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 18137 18138 fprintf (file, "%s:\n", stub); 18139 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 18140 18141 if (MACHOPIC_PURE) 18142 { 18143 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 18144 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 18145 fprintf (file, "\tjmp\t*%%edx\n"); 18146 } 18147 else 18148 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 18149 18150 fprintf (file, "%s:\n", binder_name); 18151 18152 if (MACHOPIC_PURE) 18153 { 18154 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 18155 fprintf (file, "\tpushl\t%%eax\n"); 18156 } 18157 else 18158 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 18159 18160 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 18161 18162 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 18163 fprintf (file, "%s:\n", lazy_ptr_name); 18164 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 18165 fprintf (file, "\t.long %s\n", binder_name); 18166} 18167 18168void 18169darwin_x86_file_end (void) 18170{ 18171 darwin_file_end (); 18172 ix86_file_end (); 18173} 18174#endif /* TARGET_MACHO */ 18175 18176/* Order the registers for register allocator. */ 18177 18178void 18179x86_order_regs_for_local_alloc (void) 18180{ 18181 int pos = 0; 18182 int i; 18183 18184 /* First allocate the local general purpose registers. */ 18185 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 18186 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 18187 reg_alloc_order [pos++] = i; 18188 18189 /* Global general purpose registers. */ 18190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 18191 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 18192 reg_alloc_order [pos++] = i; 18193 18194 /* x87 registers come first in case we are doing FP math 18195 using them. */ 18196 if (!TARGET_SSE_MATH) 18197 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 18198 reg_alloc_order [pos++] = i; 18199 18200 /* SSE registers. */ 18201 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 18202 reg_alloc_order [pos++] = i; 18203 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 18204 reg_alloc_order [pos++] = i; 18205 18206 /* x87 registers. */ 18207 if (TARGET_SSE_MATH) 18208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 18209 reg_alloc_order [pos++] = i; 18210 18211 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 18212 reg_alloc_order [pos++] = i; 18213 18214 /* Initialize the rest of array as we do not allocate some registers 18215 at all. */ 18216 while (pos < FIRST_PSEUDO_REGISTER) 18217 reg_alloc_order [pos++] = 0; 18218} 18219 18220/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 18221 struct attribute_spec.handler. */ 18222static tree 18223ix86_handle_struct_attribute (tree *node, tree name, 18224 tree args ATTRIBUTE_UNUSED, 18225 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 18226{ 18227 tree *type = NULL; 18228 if (DECL_P (*node)) 18229 { 18230 if (TREE_CODE (*node) == TYPE_DECL) 18231 type = &TREE_TYPE (*node); 18232 } 18233 else 18234 type = node; 18235 18236 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 18237 || TREE_CODE (*type) == UNION_TYPE))) 18238 { 18239 warning (OPT_Wattributes, "%qs attribute ignored", 18240 IDENTIFIER_POINTER (name)); 18241 *no_add_attrs = true; 18242 } 18243 18244 else if ((is_attribute_p ("ms_struct", name) 18245 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 18246 || ((is_attribute_p ("gcc_struct", name) 18247 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 18248 { 18249 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 18250 IDENTIFIER_POINTER (name)); 18251 *no_add_attrs = true; 18252 } 18253 18254 return NULL_TREE; 18255} 18256 18257static bool 18258ix86_ms_bitfield_layout_p (tree record_type) 18259{ 18260 return (TARGET_MS_BITFIELD_LAYOUT && 18261 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 18262 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 18263} 18264 18265/* Returns an expression indicating where the this parameter is 18266 located on entry to the FUNCTION. */ 18267 18268static rtx 18269x86_this_parameter (tree function) 18270{ 18271 tree type = TREE_TYPE (function); 18272 18273 if (TARGET_64BIT) 18274 { 18275 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 18276 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 18277 } 18278 18279 if (ix86_function_regparm (type, function) > 0) 18280 { 18281 tree parm; 18282 18283 parm = TYPE_ARG_TYPES (type); 18284 /* Figure out whether or not the function has a variable number of 18285 arguments. */ 18286 for (; parm; parm = TREE_CHAIN (parm)) 18287 if (TREE_VALUE (parm) == void_type_node) 18288 break; 18289 /* If not, the this parameter is in the first argument. */ 18290 if (parm) 18291 { 18292 int regno = 0; 18293 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 18294 regno = 2; 18295 return gen_rtx_REG (SImode, regno); 18296 } 18297 } 18298 18299 if (aggregate_value_p (TREE_TYPE (type), type)) 18300 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 18301 else 18302 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 18303} 18304 18305/* Determine whether x86_output_mi_thunk can succeed. */ 18306 18307static bool 18308x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 18309 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 18310 HOST_WIDE_INT vcall_offset, tree function) 18311{ 18312 /* 64-bit can handle anything. */ 18313 if (TARGET_64BIT) 18314 return true; 18315 18316 /* For 32-bit, everything's fine if we have one free register. */ 18317 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 18318 return true; 18319 18320 /* Need a free register for vcall_offset. */ 18321 if (vcall_offset) 18322 return false; 18323 18324 /* Need a free register for GOT references. */ 18325 if (flag_pic && !(*targetm.binds_local_p) (function)) 18326 return false; 18327 18328 /* Otherwise ok. */ 18329 return true; 18330} 18331 18332/* Output the assembler code for a thunk function. THUNK_DECL is the 18333 declaration for the thunk function itself, FUNCTION is the decl for 18334 the target function. DELTA is an immediate constant offset to be 18335 added to THIS. If VCALL_OFFSET is nonzero, the word at 18336 *(*this + vcall_offset) should be added to THIS. */ 18337 18338static void 18339x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 18340 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 18341 HOST_WIDE_INT vcall_offset, tree function) 18342{ 18343 rtx xops[3]; 18344 rtx this = x86_this_parameter (function); 18345 rtx this_reg, tmp; 18346 18347 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 18348 pull it in now and let DELTA benefit. */ 18349 if (REG_P (this)) 18350 this_reg = this; 18351 else if (vcall_offset) 18352 { 18353 /* Put the this parameter into %eax. */ 18354 xops[0] = this; 18355 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 18356 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18357 } 18358 else 18359 this_reg = NULL_RTX; 18360 18361 /* Adjust the this parameter by a fixed constant. */ 18362 if (delta) 18363 { 18364 xops[0] = GEN_INT (delta); 18365 xops[1] = this_reg ? this_reg : this; 18366 if (TARGET_64BIT) 18367 { 18368 if (!x86_64_general_operand (xops[0], DImode)) 18369 { 18370 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18371 xops[1] = tmp; 18372 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 18373 xops[0] = tmp; 18374 xops[1] = this; 18375 } 18376 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18377 } 18378 else 18379 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18380 } 18381 18382 /* Adjust the this parameter by a value stored in the vtable. */ 18383 if (vcall_offset) 18384 { 18385 if (TARGET_64BIT) 18386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18387 else 18388 { 18389 int tmp_regno = 2 /* ECX */; 18390 if (lookup_attribute ("fastcall", 18391 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 18392 tmp_regno = 0 /* EAX */; 18393 tmp = gen_rtx_REG (SImode, tmp_regno); 18394 } 18395 18396 xops[0] = gen_rtx_MEM (Pmode, this_reg); 18397 xops[1] = tmp; 18398 if (TARGET_64BIT) 18399 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18400 else 18401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18402 18403 /* Adjust the this parameter. */ 18404 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 18405 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 18406 { 18407 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 18408 xops[0] = GEN_INT (vcall_offset); 18409 xops[1] = tmp2; 18410 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18411 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 18412 } 18413 xops[1] = this_reg; 18414 if (TARGET_64BIT) 18415 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18416 else 18417 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18418 } 18419 18420 /* If necessary, drop THIS back to its stack slot. */ 18421 if (this_reg && this_reg != this) 18422 { 18423 xops[0] = this_reg; 18424 xops[1] = this; 18425 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18426 } 18427 18428 xops[0] = XEXP (DECL_RTL (function), 0); 18429 if (TARGET_64BIT) 18430 { 18431 if (!flag_pic || (*targetm.binds_local_p) (function)) 18432 output_asm_insn ("jmp\t%P0", xops); 18433 else 18434 { 18435 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 18436 tmp = gen_rtx_CONST (Pmode, tmp); 18437 tmp = gen_rtx_MEM (QImode, tmp); 18438 xops[0] = tmp; 18439 output_asm_insn ("jmp\t%A0", xops); 18440 } 18441 } 18442 else 18443 { 18444 if (!flag_pic || (*targetm.binds_local_p) (function)) 18445 output_asm_insn ("jmp\t%P0", xops); 18446 else 18447#if TARGET_MACHO 18448 if (TARGET_MACHO) 18449 { 18450 rtx sym_ref = XEXP (DECL_RTL (function), 0); 18451 tmp = (gen_rtx_SYMBOL_REF 18452 (Pmode, 18453 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 18454 tmp = gen_rtx_MEM (QImode, tmp); 18455 xops[0] = tmp; 18456 output_asm_insn ("jmp\t%0", xops); 18457 } 18458 else 18459#endif /* TARGET_MACHO */ 18460 { 18461 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 18462 output_set_got (tmp, NULL_RTX); 18463 18464 xops[1] = tmp; 18465 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 18466 output_asm_insn ("jmp\t{*}%1", xops); 18467 } 18468 } 18469} 18470 18471static void 18472x86_file_start (void) 18473{ 18474 default_file_start (); 18475#if TARGET_MACHO 18476 darwin_file_start (); 18477#endif 18478 if (X86_FILE_START_VERSION_DIRECTIVE) 18479 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 18480 if (X86_FILE_START_FLTUSED) 18481 fputs ("\t.global\t__fltused\n", asm_out_file); 18482 if (ix86_asm_dialect == ASM_INTEL) 18483 fputs ("\t.intel_syntax\n", asm_out_file); 18484} 18485 18486int 18487x86_field_alignment (tree field, int computed) 18488{ 18489 enum machine_mode mode; 18490 tree type = TREE_TYPE (field); 18491 18492 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 18493 return computed; 18494 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 18495 ? get_inner_array_type (type) : type); 18496 if (mode == DFmode || mode == DCmode 18497 || GET_MODE_CLASS (mode) == MODE_INT 18498 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 18499 return MIN (32, computed); 18500 return computed; 18501} 18502 18503/* Output assembler code to FILE to increment profiler label # LABELNO 18504 for profiling a function entry. */ 18505void 18506x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 18507{ 18508 if (TARGET_64BIT) 18509 if (flag_pic) 18510 { 18511#ifndef NO_PROFILE_COUNTERS 18512 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 18513#endif 18514 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 18515 } 18516 else 18517 { 18518#ifndef NO_PROFILE_COUNTERS 18519 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 18520#endif 18521 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18522 } 18523 else if (flag_pic) 18524 { 18525#ifndef NO_PROFILE_COUNTERS 18526 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 18527 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 18528#endif 18529 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 18530 } 18531 else 18532 { 18533#ifndef NO_PROFILE_COUNTERS 18534 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 18535 PROFILE_COUNT_REGISTER); 18536#endif 18537 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18538 } 18539} 18540 18541/* We don't have exact information about the insn sizes, but we may assume 18542 quite safely that we are informed about all 1 byte insns and memory 18543 address sizes. This is enough to eliminate unnecessary padding in 18544 99% of cases. */ 18545 18546static int 18547min_insn_size (rtx insn) 18548{ 18549 int l = 0; 18550 18551 if (!INSN_P (insn) || !active_insn_p (insn)) 18552 return 0; 18553 18554 /* Discard alignments we've emit and jump instructions. */ 18555 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 18556 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 18557 return 0; 18558 if (GET_CODE (insn) == JUMP_INSN 18559 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 18560 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 18561 return 0; 18562 18563 /* Important case - calls are always 5 bytes. 18564 It is common to have many calls in the row. */ 18565 if (GET_CODE (insn) == CALL_INSN 18566 && symbolic_reference_mentioned_p (PATTERN (insn)) 18567 && !SIBLING_CALL_P (insn)) 18568 return 5; 18569 if (get_attr_length (insn) <= 1) 18570 return 1; 18571 18572 /* For normal instructions we may rely on the sizes of addresses 18573 and the presence of symbol to require 4 bytes of encoding. 18574 This is not the case for jumps where references are PC relative. */ 18575 if (GET_CODE (insn) != JUMP_INSN) 18576 { 18577 l = get_attr_length_address (insn); 18578 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 18579 l = 4; 18580 } 18581 if (l) 18582 return 1+l; 18583 else 18584 return 2; 18585} 18586 18587/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 18588 window. */ 18589 18590static void 18591ix86_avoid_jump_misspredicts (void) 18592{ 18593 rtx insn, start = get_insns (); 18594 int nbytes = 0, njumps = 0; 18595 int isjump = 0; 18596 18597 /* Look for all minimal intervals of instructions containing 4 jumps. 18598 The intervals are bounded by START and INSN. NBYTES is the total 18599 size of instructions in the interval including INSN and not including 18600 START. When the NBYTES is smaller than 16 bytes, it is possible 18601 that the end of START and INSN ends up in the same 16byte page. 18602 18603 The smallest offset in the page INSN can start is the case where START 18604 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 18605 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 18606 */ 18607 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 18608 { 18609 18610 nbytes += min_insn_size (insn); 18611 if (dump_file) 18612 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 18613 INSN_UID (insn), min_insn_size (insn)); 18614 if ((GET_CODE (insn) == JUMP_INSN 18615 && GET_CODE (PATTERN (insn)) != ADDR_VEC 18616 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 18617 || GET_CODE (insn) == CALL_INSN) 18618 njumps++; 18619 else 18620 continue; 18621 18622 while (njumps > 3) 18623 { 18624 start = NEXT_INSN (start); 18625 if ((GET_CODE (start) == JUMP_INSN 18626 && GET_CODE (PATTERN (start)) != ADDR_VEC 18627 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 18628 || GET_CODE (start) == CALL_INSN) 18629 njumps--, isjump = 1; 18630 else 18631 isjump = 0; 18632 nbytes -= min_insn_size (start); 18633 } 18634 gcc_assert (njumps >= 0); 18635 if (dump_file) 18636 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 18637 INSN_UID (start), INSN_UID (insn), nbytes); 18638 18639 if (njumps == 3 && isjump && nbytes < 16) 18640 { 18641 int padsize = 15 - nbytes + min_insn_size (insn); 18642 18643 if (dump_file) 18644 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 18645 INSN_UID (insn), padsize); 18646 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 18647 } 18648 } 18649} 18650 18651/* AMD Athlon works faster 18652 when RET is not destination of conditional jump or directly preceded 18653 by other jump instruction. We avoid the penalty by inserting NOP just 18654 before the RET instructions in such cases. */ 18655static void 18656ix86_pad_returns (void) 18657{ 18658 edge e; 18659 edge_iterator ei; 18660 18661 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 18662 { 18663 basic_block bb = e->src; 18664 rtx ret = BB_END (bb); 18665 rtx prev; 18666 bool replace = false; 18667 18668 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 18669 || !maybe_hot_bb_p (bb)) 18670 continue; 18671 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 18672 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 18673 break; 18674 if (prev && GET_CODE (prev) == CODE_LABEL) 18675 { 18676 edge e; 18677 edge_iterator ei; 18678 18679 FOR_EACH_EDGE (e, ei, bb->preds) 18680 if (EDGE_FREQUENCY (e) && e->src->index >= 0 18681 && !(e->flags & EDGE_FALLTHRU)) 18682 replace = true; 18683 } 18684 if (!replace) 18685 { 18686 prev = prev_active_insn (ret); 18687 if (prev 18688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 18689 || GET_CODE (prev) == CALL_INSN)) 18690 replace = true; 18691 /* Empty functions get branch mispredict even when the jump destination 18692 is not visible to us. */ 18693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 18694 replace = true; 18695 } 18696 if (replace) 18697 { 18698 emit_insn_before (gen_return_internal_long (), ret); 18699 delete_insn (ret); 18700 } 18701 } 18702} 18703 18704/* Implement machine specific optimizations. We implement padding of returns 18705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 18706static void 18707ix86_reorg (void) 18708{ 18709 if (TARGET_PAD_RETURNS && optimize && !optimize_size) 18710 ix86_pad_returns (); 18711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) 18712 ix86_avoid_jump_misspredicts (); 18713} 18714 18715/* Return nonzero when QImode register that must be represented via REX prefix 18716 is used. */ 18717bool 18718x86_extended_QIreg_mentioned_p (rtx insn) 18719{ 18720 int i; 18721 extract_insn_cached (insn); 18722 for (i = 0; i < recog_data.n_operands; i++) 18723 if (REG_P (recog_data.operand[i]) 18724 && REGNO (recog_data.operand[i]) >= 4) 18725 return true; 18726 return false; 18727} 18728 18729/* Return nonzero when P points to register encoded via REX prefix. 18730 Called via for_each_rtx. */ 18731static int 18732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 18733{ 18734 unsigned int regno; 18735 if (!REG_P (*p)) 18736 return 0; 18737 regno = REGNO (*p); 18738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 18739} 18740 18741/* Return true when INSN mentions register that must be encoded using REX 18742 prefix. */ 18743bool 18744x86_extended_reg_mentioned_p (rtx insn) 18745{ 18746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 18747} 18748 18749/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 18750 optabs would emit if we didn't have TFmode patterns. */ 18751 18752void 18753x86_emit_floatuns (rtx operands[2]) 18754{ 18755 rtx neglab, donelab, i0, i1, f0, in, out; 18756 enum machine_mode mode, inmode; 18757 18758 inmode = GET_MODE (operands[1]); 18759 gcc_assert (inmode == SImode || inmode == DImode); 18760 18761 out = operands[0]; 18762 in = force_reg (inmode, operands[1]); 18763 mode = GET_MODE (out); 18764 neglab = gen_label_rtx (); 18765 donelab = gen_label_rtx (); 18766 i1 = gen_reg_rtx (Pmode); 18767 f0 = gen_reg_rtx (mode); 18768 18769 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 18770 18771 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 18772 emit_jump_insn (gen_jump (donelab)); 18773 emit_barrier (); 18774 18775 emit_label (neglab); 18776 18777 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18778 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18779 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 18780 expand_float (f0, i0, 0); 18781 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 18782 18783 emit_label (donelab); 18784} 18785 18786/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18787 with all elements equal to VAR. Return true if successful. */ 18788 18789static bool 18790ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 18791 rtx target, rtx val) 18792{ 18793 enum machine_mode smode, wsmode, wvmode; 18794 rtx x; 18795 18796 switch (mode) 18797 { 18798 case V2SImode: 18799 case V2SFmode: 18800 if (!mmx_ok) 18801 return false; 18802 /* FALLTHRU */ 18803 18804 case V2DFmode: 18805 case V2DImode: 18806 case V4SFmode: 18807 case V4SImode: 18808 val = force_reg (GET_MODE_INNER (mode), val); 18809 x = gen_rtx_VEC_DUPLICATE (mode, val); 18810 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18811 return true; 18812 18813 case V4HImode: 18814 if (!mmx_ok) 18815 return false; 18816 if (TARGET_SSE || TARGET_3DNOW_A) 18817 { 18818 val = gen_lowpart (SImode, val); 18819 x = gen_rtx_TRUNCATE (HImode, val); 18820 x = gen_rtx_VEC_DUPLICATE (mode, x); 18821 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18822 return true; 18823 } 18824 else 18825 { 18826 smode = HImode; 18827 wsmode = SImode; 18828 wvmode = V2SImode; 18829 goto widen; 18830 } 18831 18832 case V8QImode: 18833 if (!mmx_ok) 18834 return false; 18835 smode = QImode; 18836 wsmode = HImode; 18837 wvmode = V4HImode; 18838 goto widen; 18839 case V8HImode: 18840 if (TARGET_SSE2) 18841 { 18842 rtx tmp1, tmp2; 18843 /* Extend HImode to SImode using a paradoxical SUBREG. */ 18844 tmp1 = gen_reg_rtx (SImode); 18845 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18846 /* Insert the SImode value as low element of V4SImode vector. */ 18847 tmp2 = gen_reg_rtx (V4SImode); 18848 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18849 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18850 CONST0_RTX (V4SImode), 18851 const1_rtx); 18852 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18853 /* Cast the V4SImode vector back to a V8HImode vector. */ 18854 tmp1 = gen_reg_rtx (V8HImode); 18855 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 18856 /* Duplicate the low short through the whole low SImode word. */ 18857 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 18858 /* Cast the V8HImode vector back to a V4SImode vector. */ 18859 tmp2 = gen_reg_rtx (V4SImode); 18860 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18861 /* Replicate the low element of the V4SImode vector. */ 18862 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18863 /* Cast the V2SImode back to V8HImode, and store in target. */ 18864 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 18865 return true; 18866 } 18867 smode = HImode; 18868 wsmode = SImode; 18869 wvmode = V4SImode; 18870 goto widen; 18871 case V16QImode: 18872 if (TARGET_SSE2) 18873 { 18874 rtx tmp1, tmp2; 18875 /* Extend QImode to SImode using a paradoxical SUBREG. */ 18876 tmp1 = gen_reg_rtx (SImode); 18877 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18878 /* Insert the SImode value as low element of V4SImode vector. */ 18879 tmp2 = gen_reg_rtx (V4SImode); 18880 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18881 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18882 CONST0_RTX (V4SImode), 18883 const1_rtx); 18884 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18885 /* Cast the V4SImode vector back to a V16QImode vector. */ 18886 tmp1 = gen_reg_rtx (V16QImode); 18887 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 18888 /* Duplicate the low byte through the whole low SImode word. */ 18889 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18890 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18891 /* Cast the V16QImode vector back to a V4SImode vector. */ 18892 tmp2 = gen_reg_rtx (V4SImode); 18893 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18894 /* Replicate the low element of the V4SImode vector. */ 18895 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18896 /* Cast the V2SImode back to V16QImode, and store in target. */ 18897 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 18898 return true; 18899 } 18900 smode = QImode; 18901 wsmode = HImode; 18902 wvmode = V8HImode; 18903 goto widen; 18904 widen: 18905 /* Replicate the value once into the next wider mode and recurse. */ 18906 val = convert_modes (wsmode, smode, val, true); 18907 x = expand_simple_binop (wsmode, ASHIFT, val, 18908 GEN_INT (GET_MODE_BITSIZE (smode)), 18909 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18910 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 18911 18912 x = gen_reg_rtx (wvmode); 18913 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 18914 gcc_unreachable (); 18915 emit_move_insn (target, gen_lowpart (mode, x)); 18916 return true; 18917 18918 default: 18919 return false; 18920 } 18921} 18922 18923/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18924 whose ONE_VAR element is VAR, and other elements are zero. Return true 18925 if successful. */ 18926 18927static bool 18928ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 18929 rtx target, rtx var, int one_var) 18930{ 18931 enum machine_mode vsimode; 18932 rtx new_target; 18933 rtx x, tmp; 18934 18935 switch (mode) 18936 { 18937 case V2SFmode: 18938 case V2SImode: 18939 if (!mmx_ok) 18940 return false; 18941 /* FALLTHRU */ 18942 18943 case V2DFmode: 18944 case V2DImode: 18945 if (one_var != 0) 18946 return false; 18947 var = force_reg (GET_MODE_INNER (mode), var); 18948 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 18949 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18950 return true; 18951 18952 case V4SFmode: 18953 case V4SImode: 18954 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 18955 new_target = gen_reg_rtx (mode); 18956 else 18957 new_target = target; 18958 var = force_reg (GET_MODE_INNER (mode), var); 18959 x = gen_rtx_VEC_DUPLICATE (mode, var); 18960 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 18961 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 18962 if (one_var != 0) 18963 { 18964 /* We need to shuffle the value to the correct position, so 18965 create a new pseudo to store the intermediate result. */ 18966 18967 /* With SSE2, we can use the integer shuffle insns. */ 18968 if (mode != V4SFmode && TARGET_SSE2) 18969 { 18970 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 18971 GEN_INT (1), 18972 GEN_INT (one_var == 1 ? 0 : 1), 18973 GEN_INT (one_var == 2 ? 0 : 1), 18974 GEN_INT (one_var == 3 ? 0 : 1))); 18975 if (target != new_target) 18976 emit_move_insn (target, new_target); 18977 return true; 18978 } 18979 18980 /* Otherwise convert the intermediate result to V4SFmode and 18981 use the SSE1 shuffle instructions. */ 18982 if (mode != V4SFmode) 18983 { 18984 tmp = gen_reg_rtx (V4SFmode); 18985 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 18986 } 18987 else 18988 tmp = new_target; 18989 18990 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, 18991 GEN_INT (1), 18992 GEN_INT (one_var == 1 ? 0 : 1), 18993 GEN_INT (one_var == 2 ? 0+4 : 1+4), 18994 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 18995 18996 if (mode != V4SFmode) 18997 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 18998 else if (tmp != target) 18999 emit_move_insn (target, tmp); 19000 } 19001 else if (target != new_target) 19002 emit_move_insn (target, new_target); 19003 return true; 19004 19005 case V8HImode: 19006 case V16QImode: 19007 vsimode = V4SImode; 19008 goto widen; 19009 case V4HImode: 19010 case V8QImode: 19011 if (!mmx_ok) 19012 return false; 19013 vsimode = V2SImode; 19014 goto widen; 19015 widen: 19016 if (one_var != 0) 19017 return false; 19018 19019 /* Zero extend the variable element to SImode and recurse. */ 19020 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 19021 19022 x = gen_reg_rtx (vsimode); 19023 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 19024 var, one_var)) 19025 gcc_unreachable (); 19026 19027 emit_move_insn (target, gen_lowpart (mode, x)); 19028 return true; 19029 19030 default: 19031 return false; 19032 } 19033} 19034 19035/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 19036 consisting of the values in VALS. It is known that all elements 19037 except ONE_VAR are constants. Return true if successful. */ 19038 19039static bool 19040ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 19041 rtx target, rtx vals, int one_var) 19042{ 19043 rtx var = XVECEXP (vals, 0, one_var); 19044 enum machine_mode wmode; 19045 rtx const_vec, x; 19046 19047 const_vec = copy_rtx (vals); 19048 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 19049 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 19050 19051 switch (mode) 19052 { 19053 case V2DFmode: 19054 case V2DImode: 19055 case V2SFmode: 19056 case V2SImode: 19057 /* For the two element vectors, it's just as easy to use 19058 the general case. */ 19059 return false; 19060 19061 case V4SFmode: 19062 case V4SImode: 19063 case V8HImode: 19064 case V4HImode: 19065 break; 19066 19067 case V16QImode: 19068 wmode = V8HImode; 19069 goto widen; 19070 case V8QImode: 19071 wmode = V4HImode; 19072 goto widen; 19073 widen: 19074 /* There's no way to set one QImode entry easily. Combine 19075 the variable value with its adjacent constant value, and 19076 promote to an HImode set. */ 19077 x = XVECEXP (vals, 0, one_var ^ 1); 19078 if (one_var & 1) 19079 { 19080 var = convert_modes (HImode, QImode, var, true); 19081 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 19082 NULL_RTX, 1, OPTAB_LIB_WIDEN); 19083 x = GEN_INT (INTVAL (x) & 0xff); 19084 } 19085 else 19086 { 19087 var = convert_modes (HImode, QImode, var, true); 19088 x = gen_int_mode (INTVAL (x) << 8, HImode); 19089 } 19090 if (x != const0_rtx) 19091 var = expand_simple_binop (HImode, IOR, var, x, var, 19092 1, OPTAB_LIB_WIDEN); 19093 19094 x = gen_reg_rtx (wmode); 19095 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 19096 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 19097 19098 emit_move_insn (target, gen_lowpart (mode, x)); 19099 return true; 19100 19101 default: 19102 return false; 19103 } 19104 19105 emit_move_insn (target, const_vec); 19106 ix86_expand_vector_set (mmx_ok, target, var, one_var); 19107 return true; 19108} 19109 19110/* A subroutine of ix86_expand_vector_init. Handle the most general case: 19111 all values variable, and none identical. */ 19112 19113static void 19114ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 19115 rtx target, rtx vals) 19116{ 19117 enum machine_mode half_mode = GET_MODE_INNER (mode); 19118 rtx op0 = NULL, op1 = NULL; 19119 bool use_vec_concat = false; 19120 19121 switch (mode) 19122 { 19123 case V2SFmode: 19124 case V2SImode: 19125 if (!mmx_ok && !TARGET_SSE) 19126 break; 19127 /* FALLTHRU */ 19128 19129 case V2DFmode: 19130 case V2DImode: 19131 /* For the two element vectors, we always implement VEC_CONCAT. */ 19132 op0 = XVECEXP (vals, 0, 0); 19133 op1 = XVECEXP (vals, 0, 1); 19134 use_vec_concat = true; 19135 break; 19136 19137 case V4SFmode: 19138 half_mode = V2SFmode; 19139 goto half; 19140 case V4SImode: 19141 half_mode = V2SImode; 19142 goto half; 19143 half: 19144 { 19145 rtvec v; 19146 19147 /* For V4SF and V4SI, we implement a concat of two V2 vectors. 19148 Recurse to load the two halves. */ 19149 19150 op0 = gen_reg_rtx (half_mode); 19151 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); 19152 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); 19153 19154 op1 = gen_reg_rtx (half_mode); 19155 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); 19156 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); 19157 19158 use_vec_concat = true; 19159 } 19160 break; 19161 19162 case V8HImode: 19163 case V16QImode: 19164 case V4HImode: 19165 case V8QImode: 19166 break; 19167 19168 default: 19169 gcc_unreachable (); 19170 } 19171 19172 if (use_vec_concat) 19173 { 19174 if (!register_operand (op0, half_mode)) 19175 op0 = force_reg (half_mode, op0); 19176 if (!register_operand (op1, half_mode)) 19177 op1 = force_reg (half_mode, op1); 19178 19179 emit_insn (gen_rtx_SET (VOIDmode, target, 19180 gen_rtx_VEC_CONCAT (mode, op0, op1))); 19181 } 19182 else 19183 { 19184 int i, j, n_elts, n_words, n_elt_per_word; 19185 enum machine_mode inner_mode; 19186 rtx words[4], shift; 19187 19188 inner_mode = GET_MODE_INNER (mode); 19189 n_elts = GET_MODE_NUNITS (mode); 19190 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 19191 n_elt_per_word = n_elts / n_words; 19192 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 19193 19194 for (i = 0; i < n_words; ++i) 19195 { 19196 rtx word = NULL_RTX; 19197 19198 for (j = 0; j < n_elt_per_word; ++j) 19199 { 19200 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 19201 elt = convert_modes (word_mode, inner_mode, elt, true); 19202 19203 if (j == 0) 19204 word = elt; 19205 else 19206 { 19207 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 19208 word, 1, OPTAB_LIB_WIDEN); 19209 word = expand_simple_binop (word_mode, IOR, word, elt, 19210 word, 1, OPTAB_LIB_WIDEN); 19211 } 19212 } 19213 19214 words[i] = word; 19215 } 19216 19217 if (n_words == 1) 19218 emit_move_insn (target, gen_lowpart (mode, words[0])); 19219 else if (n_words == 2) 19220 { 19221 rtx tmp = gen_reg_rtx (mode); 19222 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); 19223 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 19224 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 19225 emit_move_insn (target, tmp); 19226 } 19227 else if (n_words == 4) 19228 { 19229 rtx tmp = gen_reg_rtx (V4SImode); 19230 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 19231 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 19232 emit_move_insn (target, gen_lowpart (mode, tmp)); 19233 } 19234 else 19235 gcc_unreachable (); 19236 } 19237} 19238 19239/* Initialize vector TARGET via VALS. Suppress the use of MMX 19240 instructions unless MMX_OK is true. */ 19241 19242void 19243ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 19244{ 19245 enum machine_mode mode = GET_MODE (target); 19246 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19247 int n_elts = GET_MODE_NUNITS (mode); 19248 int n_var = 0, one_var = -1; 19249 bool all_same = true, all_const_zero = true; 19250 int i; 19251 rtx x; 19252 19253 for (i = 0; i < n_elts; ++i) 19254 { 19255 x = XVECEXP (vals, 0, i); 19256 if (!CONSTANT_P (x)) 19257 n_var++, one_var = i; 19258 else if (x != CONST0_RTX (inner_mode)) 19259 all_const_zero = false; 19260 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 19261 all_same = false; 19262 } 19263 19264 /* Constants are best loaded from the constant pool. */ 19265 if (n_var == 0) 19266 { 19267 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 19268 return; 19269 } 19270 19271 /* If all values are identical, broadcast the value. */ 19272 if (all_same 19273 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 19274 XVECEXP (vals, 0, 0))) 19275 return; 19276 19277 /* Values where only one field is non-constant are best loaded from 19278 the pool and overwritten via move later. */ 19279 if (n_var == 1) 19280 { 19281 if (all_const_zero 19282 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 19283 XVECEXP (vals, 0, one_var), 19284 one_var)) 19285 return; 19286 19287 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 19288 return; 19289 } 19290 19291 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 19292} 19293 19294void 19295ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 19296{ 19297 enum machine_mode mode = GET_MODE (target); 19298 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19299 bool use_vec_merge = false; 19300 rtx tmp; 19301 19302 switch (mode) 19303 { 19304 case V2SFmode: 19305 case V2SImode: 19306 if (mmx_ok) 19307 { 19308 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 19309 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 19310 if (elt == 0) 19311 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 19312 else 19313 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 19314 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19315 return; 19316 } 19317 break; 19318 19319 case V2DFmode: 19320 case V2DImode: 19321 { 19322 rtx op0, op1; 19323 19324 /* For the two element vectors, we implement a VEC_CONCAT with 19325 the extraction of the other element. */ 19326 19327 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 19328 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 19329 19330 if (elt == 0) 19331 op0 = val, op1 = tmp; 19332 else 19333 op0 = tmp, op1 = val; 19334 19335 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 19336 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19337 } 19338 return; 19339 19340 case V4SFmode: 19341 switch (elt) 19342 { 19343 case 0: 19344 use_vec_merge = true; 19345 break; 19346 19347 case 1: 19348 /* tmp = target = A B C D */ 19349 tmp = copy_to_reg (target); 19350 /* target = A A B B */ 19351 emit_insn (gen_sse_unpcklps (target, target, target)); 19352 /* target = X A B B */ 19353 ix86_expand_vector_set (false, target, val, 0); 19354 /* target = A X C D */ 19355 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19356 GEN_INT (1), GEN_INT (0), 19357 GEN_INT (2+4), GEN_INT (3+4))); 19358 return; 19359 19360 case 2: 19361 /* tmp = target = A B C D */ 19362 tmp = copy_to_reg (target); 19363 /* tmp = X B C D */ 19364 ix86_expand_vector_set (false, tmp, val, 0); 19365 /* target = A B X D */ 19366 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19367 GEN_INT (0), GEN_INT (1), 19368 GEN_INT (0+4), GEN_INT (3+4))); 19369 return; 19370 19371 case 3: 19372 /* tmp = target = A B C D */ 19373 tmp = copy_to_reg (target); 19374 /* tmp = X B C D */ 19375 ix86_expand_vector_set (false, tmp, val, 0); 19376 /* target = A B X D */ 19377 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19378 GEN_INT (0), GEN_INT (1), 19379 GEN_INT (2+4), GEN_INT (0+4))); 19380 return; 19381 19382 default: 19383 gcc_unreachable (); 19384 } 19385 break; 19386 19387 case V4SImode: 19388 /* Element 0 handled by vec_merge below. */ 19389 if (elt == 0) 19390 { 19391 use_vec_merge = true; 19392 break; 19393 } 19394 19395 if (TARGET_SSE2) 19396 { 19397 /* With SSE2, use integer shuffles to swap element 0 and ELT, 19398 store into element 0, then shuffle them back. */ 19399 19400 rtx order[4]; 19401 19402 order[0] = GEN_INT (elt); 19403 order[1] = const1_rtx; 19404 order[2] = const2_rtx; 19405 order[3] = GEN_INT (3); 19406 order[elt] = const0_rtx; 19407 19408 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19409 order[1], order[2], order[3])); 19410 19411 ix86_expand_vector_set (false, target, val, 0); 19412 19413 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19414 order[1], order[2], order[3])); 19415 } 19416 else 19417 { 19418 /* For SSE1, we have to reuse the V4SF code. */ 19419 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 19420 gen_lowpart (SFmode, val), elt); 19421 } 19422 return; 19423 19424 case V8HImode: 19425 use_vec_merge = TARGET_SSE2; 19426 break; 19427 case V4HImode: 19428 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19429 break; 19430 19431 case V16QImode: 19432 case V8QImode: 19433 default: 19434 break; 19435 } 19436 19437 if (use_vec_merge) 19438 { 19439 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 19440 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 19441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19442 } 19443 else 19444 { 19445 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19446 19447 emit_move_insn (mem, target); 19448 19449 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19450 emit_move_insn (tmp, val); 19451 19452 emit_move_insn (target, mem); 19453 } 19454} 19455 19456void 19457ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 19458{ 19459 enum machine_mode mode = GET_MODE (vec); 19460 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19461 bool use_vec_extr = false; 19462 rtx tmp; 19463 19464 switch (mode) 19465 { 19466 case V2SImode: 19467 case V2SFmode: 19468 if (!mmx_ok) 19469 break; 19470 /* FALLTHRU */ 19471 19472 case V2DFmode: 19473 case V2DImode: 19474 use_vec_extr = true; 19475 break; 19476 19477 case V4SFmode: 19478 switch (elt) 19479 { 19480 case 0: 19481 tmp = vec; 19482 break; 19483 19484 case 1: 19485 case 3: 19486 tmp = gen_reg_rtx (mode); 19487 emit_insn (gen_sse_shufps_1 (tmp, vec, vec, 19488 GEN_INT (elt), GEN_INT (elt), 19489 GEN_INT (elt+4), GEN_INT (elt+4))); 19490 break; 19491 19492 case 2: 19493 tmp = gen_reg_rtx (mode); 19494 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 19495 break; 19496 19497 default: 19498 gcc_unreachable (); 19499 } 19500 vec = tmp; 19501 use_vec_extr = true; 19502 elt = 0; 19503 break; 19504 19505 case V4SImode: 19506 if (TARGET_SSE2) 19507 { 19508 switch (elt) 19509 { 19510 case 0: 19511 tmp = vec; 19512 break; 19513 19514 case 1: 19515 case 3: 19516 tmp = gen_reg_rtx (mode); 19517 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 19518 GEN_INT (elt), GEN_INT (elt), 19519 GEN_INT (elt), GEN_INT (elt))); 19520 break; 19521 19522 case 2: 19523 tmp = gen_reg_rtx (mode); 19524 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 19525 break; 19526 19527 default: 19528 gcc_unreachable (); 19529 } 19530 vec = tmp; 19531 use_vec_extr = true; 19532 elt = 0; 19533 } 19534 else 19535 { 19536 /* For SSE1, we have to reuse the V4SF code. */ 19537 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 19538 gen_lowpart (V4SFmode, vec), elt); 19539 return; 19540 } 19541 break; 19542 19543 case V8HImode: 19544 use_vec_extr = TARGET_SSE2; 19545 break; 19546 case V4HImode: 19547 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19548 break; 19549 19550 case V16QImode: 19551 case V8QImode: 19552 /* ??? Could extract the appropriate HImode element and shift. */ 19553 default: 19554 break; 19555 } 19556 19557 if (use_vec_extr) 19558 { 19559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 19560 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 19561 19562 /* Let the rtl optimizers know about the zero extension performed. */ 19563 if (inner_mode == HImode) 19564 { 19565 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 19566 target = gen_lowpart (SImode, target); 19567 } 19568 19569 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19570 } 19571 else 19572 { 19573 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19574 19575 emit_move_insn (mem, vec); 19576 19577 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19578 emit_move_insn (target, tmp); 19579 } 19580} 19581 19582/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 19583 pattern to reduce; DEST is the destination; IN is the input vector. */ 19584 19585void 19586ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 19587{ 19588 rtx tmp1, tmp2, tmp3; 19589 19590 tmp1 = gen_reg_rtx (V4SFmode); 19591 tmp2 = gen_reg_rtx (V4SFmode); 19592 tmp3 = gen_reg_rtx (V4SFmode); 19593 19594 emit_insn (gen_sse_movhlps (tmp1, in, in)); 19595 emit_insn (fn (tmp2, tmp1, in)); 19596 19597 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, 19598 GEN_INT (1), GEN_INT (1), 19599 GEN_INT (1+4), GEN_INT (1+4))); 19600 emit_insn (fn (dest, tmp2, tmp3)); 19601} 19602 19603/* Target hook for scalar_mode_supported_p. */ 19604static bool 19605ix86_scalar_mode_supported_p (enum machine_mode mode) 19606{ 19607 if (DECIMAL_FLOAT_MODE_P (mode)) 19608 return true; 19609 else 19610 return default_scalar_mode_supported_p (mode); 19611} 19612 19613/* Implements target hook vector_mode_supported_p. */ 19614static bool 19615ix86_vector_mode_supported_p (enum machine_mode mode) 19616{ 19617 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 19618 return true; 19619 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 19620 return true; 19621 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 19622 return true; 19623 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 19624 return true; 19625 return false; 19626} 19627 19628/* Worker function for TARGET_MD_ASM_CLOBBERS. 19629 19630 We do this in the new i386 backend to maintain source compatibility 19631 with the old cc0-based compiler. */ 19632 19633static tree 19634ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 19635 tree inputs ATTRIBUTE_UNUSED, 19636 tree clobbers) 19637{ 19638 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 19639 clobbers); 19640 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 19641 clobbers); 19642 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), 19643 clobbers); 19644 return clobbers; 19645} 19646 19647/* Return true if this goes in small data/bss. */ 19648 19649static bool 19650ix86_in_large_data_p (tree exp) 19651{ 19652 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 19653 return false; 19654 19655 /* Functions are never large data. */ 19656 if (TREE_CODE (exp) == FUNCTION_DECL) 19657 return false; 19658 19659 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 19660 { 19661 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 19662 if (strcmp (section, ".ldata") == 0 19663 || strcmp (section, ".lbss") == 0) 19664 return true; 19665 return false; 19666 } 19667 else 19668 { 19669 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 19670 19671 /* If this is an incomplete type with size 0, then we can't put it 19672 in data because it might be too big when completed. */ 19673 if (!size || size > ix86_section_threshold) 19674 return true; 19675 } 19676 19677 return false; 19678} 19679static void 19680ix86_encode_section_info (tree decl, rtx rtl, int first) 19681{ 19682 default_encode_section_info (decl, rtl, first); 19683 19684 if (TREE_CODE (decl) == VAR_DECL 19685 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 19686 && ix86_in_large_data_p (decl)) 19687 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 19688} 19689 19690/* Worker function for REVERSE_CONDITION. */ 19691 19692enum rtx_code 19693ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 19694{ 19695 return (mode != CCFPmode && mode != CCFPUmode 19696 ? reverse_condition (code) 19697 : reverse_condition_maybe_unordered (code)); 19698} 19699 19700/* Output code to perform an x87 FP register move, from OPERANDS[1] 19701 to OPERANDS[0]. */ 19702 19703const char * 19704output_387_reg_move (rtx insn, rtx *operands) 19705{ 19706 if (REG_P (operands[1]) 19707 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 19708 { 19709 if (REGNO (operands[0]) == FIRST_STACK_REG) 19710 return output_387_ffreep (operands, 0); 19711 return "fstp\t%y0"; 19712 } 19713 if (STACK_TOP_P (operands[0])) 19714 return "fld%z1\t%y1"; 19715 return "fst\t%y0"; 19716} 19717 19718/* Output code to perform a conditional jump to LABEL, if C2 flag in 19719 FP status register is set. */ 19720 19721void 19722ix86_emit_fp_unordered_jump (rtx label) 19723{ 19724 rtx reg = gen_reg_rtx (HImode); 19725 rtx temp; 19726 19727 emit_insn (gen_x86_fnstsw_1 (reg)); 19728 19729 if (TARGET_USE_SAHF) 19730 { 19731 emit_insn (gen_x86_sahf_1 (reg)); 19732 19733 temp = gen_rtx_REG (CCmode, FLAGS_REG); 19734 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 19735 } 19736 else 19737 { 19738 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 19739 19740 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 19741 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 19742 } 19743 19744 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 19745 gen_rtx_LABEL_REF (VOIDmode, label), 19746 pc_rtx); 19747 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 19748 emit_jump_insn (temp); 19749} 19750 19751/* Output code to perform a log1p XFmode calculation. */ 19752 19753void ix86_emit_i387_log1p (rtx op0, rtx op1) 19754{ 19755 rtx label1 = gen_label_rtx (); 19756 rtx label2 = gen_label_rtx (); 19757 19758 rtx tmp = gen_reg_rtx (XFmode); 19759 rtx tmp2 = gen_reg_rtx (XFmode); 19760 19761 emit_insn (gen_absxf2 (tmp, op1)); 19762 emit_insn (gen_cmpxf (tmp, 19763 CONST_DOUBLE_FROM_REAL_VALUE ( 19764 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 19765 XFmode))); 19766 emit_jump_insn (gen_bge (label1)); 19767 19768 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19769 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); 19770 emit_jump (label2); 19771 19772 emit_label (label1); 19773 emit_move_insn (tmp, CONST1_RTX (XFmode)); 19774 emit_insn (gen_addxf3 (tmp, op1, tmp)); 19775 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19776 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); 19777 19778 emit_label (label2); 19779} 19780 19781/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 19782 19783static void 19784i386_solaris_elf_named_section (const char *name, unsigned int flags, 19785 tree decl) 19786{ 19787 /* With Binutils 2.15, the "@unwind" marker must be specified on 19788 every occurrence of the ".eh_frame" section, not just the first 19789 one. */ 19790 if (TARGET_64BIT 19791 && strcmp (name, ".eh_frame") == 0) 19792 { 19793 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 19794 flags & SECTION_WRITE ? "aw" : "a"); 19795 return; 19796 } 19797 default_elf_asm_named_section (name, flags, decl); 19798} 19799 19800/* Return the mangling of TYPE if it is an extended fundamental type. */ 19801 19802static const char * 19803ix86_mangle_fundamental_type (tree type) 19804{ 19805 switch (TYPE_MODE (type)) 19806 { 19807 case TFmode: 19808 /* __float128 is "g". */ 19809 return "g"; 19810 case XFmode: 19811 /* "long double" or __float80 is "e". */ 19812 return "e"; 19813 default: 19814 return NULL; 19815 } 19816} 19817 19818/* For 32-bit code we can save PIC register setup by using 19819 __stack_chk_fail_local hidden function instead of calling 19820 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 19821 register, so it is better to call __stack_chk_fail directly. */ 19822 19823static tree 19824ix86_stack_protect_fail (void) 19825{ 19826 return TARGET_64BIT 19827 ? default_external_stack_protect_fail () 19828 : default_hidden_stack_protect_fail (); 19829} 19830 19831/* Select a format to encode pointers in exception handling data. CODE 19832 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 19833 true if the symbol may be affected by dynamic relocations. 19834 19835 ??? All x86 object file formats are capable of representing this. 19836 After all, the relocation needed is the same as for the call insn. 19837 Whether or not a particular assembler allows us to enter such, I 19838 guess we'll have to see. */ 19839int 19840asm_preferred_eh_data_format (int code, int global) 19841{ 19842 if (flag_pic) 19843 { 19844 int type = DW_EH_PE_sdata8; 19845 if (!TARGET_64BIT 19846 || ix86_cmodel == CM_SMALL_PIC 19847 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 19848 type = DW_EH_PE_sdata4; 19849 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 19850 } 19851 if (ix86_cmodel == CM_SMALL 19852 || (ix86_cmodel == CM_MEDIUM && code)) 19853 return DW_EH_PE_udata4; 19854 return DW_EH_PE_absptr; 19855} 19856 19857#include "gt-i386.h" 19858