i386.c revision 267654
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to 19the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20Boston, MA 02110-1301, USA. */ 21 22/* $FreeBSD: releng/9.3/contrib/gcc/config/i386/i386.c 260075 2013-12-30 03:40:16Z pfg $ */ 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "tm.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-codes.h" 38#include "insn-attr.h" 39#include "flags.h" 40#include "except.h" 41#include "function.h" 42#include "recog.h" 43#include "expr.h" 44#include "optabs.h" 45#include "toplev.h" 46#include "basic-block.h" 47#include "ggc.h" 48#include "target.h" 49#include "target-def.h" 50#include "langhooks.h" 51#include "cgraph.h" 52#include "tree-gimple.h" 53#include "dwarf2.h" 54#include "tm-constrs.h" 55 56#ifndef CHECK_STACK_LIMIT 57#define CHECK_STACK_LIMIT (-1) 58#endif 59 60/* Return index of given mode in mult and division cost tables. */ 61#define MODE_INDEX(mode) \ 62 ((mode) == QImode ? 0 \ 63 : (mode) == HImode ? 1 \ 64 : (mode) == SImode ? 2 \ 65 : (mode) == DImode ? 3 \ 66 : 4) 67 68/* Processor costs (relative to an add) */ 69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 70#define COSTS_N_BYTES(N) ((N) * 2) 71 72static const 73struct processor_costs size_cost = { /* costs for tuning for size */ 74 COSTS_N_BYTES (2), /* cost of an add instruction */ 75 COSTS_N_BYTES (3), /* cost of a lea instruction */ 76 COSTS_N_BYTES (2), /* variable shift costs */ 77 COSTS_N_BYTES (3), /* constant shift costs */ 78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 79 COSTS_N_BYTES (3), /* HI */ 80 COSTS_N_BYTES (3), /* SI */ 81 COSTS_N_BYTES (3), /* DI */ 82 COSTS_N_BYTES (5)}, /* other */ 83 0, /* cost of multiply per each bit set */ 84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 85 COSTS_N_BYTES (3), /* HI */ 86 COSTS_N_BYTES (3), /* SI */ 87 COSTS_N_BYTES (3), /* DI */ 88 COSTS_N_BYTES (5)}, /* other */ 89 COSTS_N_BYTES (3), /* cost of movsx */ 90 COSTS_N_BYTES (3), /* cost of movzx */ 91 0, /* "large" insn */ 92 2, /* MOVE_RATIO */ 93 2, /* cost for loading QImode using movzbl */ 94 {2, 2, 2}, /* cost of loading integer registers 95 in QImode, HImode and SImode. 96 Relative to reg-reg move (2). */ 97 {2, 2, 2}, /* cost of storing integer registers */ 98 2, /* cost of reg,reg fld/fst */ 99 {2, 2, 2}, /* cost of loading fp registers 100 in SFmode, DFmode and XFmode */ 101 {2, 2, 2}, /* cost of storing fp registers 102 in SFmode, DFmode and XFmode */ 103 3, /* cost of moving MMX register */ 104 {3, 3}, /* cost of loading MMX registers 105 in SImode and DImode */ 106 {3, 3}, /* cost of storing MMX registers 107 in SImode and DImode */ 108 3, /* cost of moving SSE register */ 109 {3, 3, 3}, /* cost of loading SSE registers 110 in SImode, DImode and TImode */ 111 {3, 3, 3}, /* cost of storing SSE registers 112 in SImode, DImode and TImode */ 113 3, /* MMX or SSE register to integer */ 114 0, /* size of prefetch block */ 115 0, /* number of parallel prefetches */ 116 2, /* Branch cost */ 117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 120 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 123}; 124 125/* Processor costs (relative to an add) */ 126static const 127struct processor_costs i386_cost = { /* 386 specific costs */ 128 COSTS_N_INSNS (1), /* cost of an add instruction */ 129 COSTS_N_INSNS (1), /* cost of a lea instruction */ 130 COSTS_N_INSNS (3), /* variable shift costs */ 131 COSTS_N_INSNS (2), /* constant shift costs */ 132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 133 COSTS_N_INSNS (6), /* HI */ 134 COSTS_N_INSNS (6), /* SI */ 135 COSTS_N_INSNS (6), /* DI */ 136 COSTS_N_INSNS (6)}, /* other */ 137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 139 COSTS_N_INSNS (23), /* HI */ 140 COSTS_N_INSNS (23), /* SI */ 141 COSTS_N_INSNS (23), /* DI */ 142 COSTS_N_INSNS (23)}, /* other */ 143 COSTS_N_INSNS (3), /* cost of movsx */ 144 COSTS_N_INSNS (2), /* cost of movzx */ 145 15, /* "large" insn */ 146 3, /* MOVE_RATIO */ 147 4, /* cost for loading QImode using movzbl */ 148 {2, 4, 2}, /* cost of loading integer registers 149 in QImode, HImode and SImode. 150 Relative to reg-reg move (2). */ 151 {2, 4, 2}, /* cost of storing integer registers */ 152 2, /* cost of reg,reg fld/fst */ 153 {8, 8, 8}, /* cost of loading fp registers 154 in SFmode, DFmode and XFmode */ 155 {8, 8, 8}, /* cost of storing fp registers 156 in SFmode, DFmode and XFmode */ 157 2, /* cost of moving MMX register */ 158 {4, 8}, /* cost of loading MMX registers 159 in SImode and DImode */ 160 {4, 8}, /* cost of storing MMX registers 161 in SImode and DImode */ 162 2, /* cost of moving SSE register */ 163 {4, 8, 16}, /* cost of loading SSE registers 164 in SImode, DImode and TImode */ 165 {4, 8, 16}, /* cost of storing SSE registers 166 in SImode, DImode and TImode */ 167 3, /* MMX or SSE register to integer */ 168 0, /* size of prefetch block */ 169 0, /* number of parallel prefetches */ 170 1, /* Branch cost */ 171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 174 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 177}; 178 179static const 180struct processor_costs i486_cost = { /* 486 specific costs */ 181 COSTS_N_INSNS (1), /* cost of an add instruction */ 182 COSTS_N_INSNS (1), /* cost of a lea instruction */ 183 COSTS_N_INSNS (3), /* variable shift costs */ 184 COSTS_N_INSNS (2), /* constant shift costs */ 185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 186 COSTS_N_INSNS (12), /* HI */ 187 COSTS_N_INSNS (12), /* SI */ 188 COSTS_N_INSNS (12), /* DI */ 189 COSTS_N_INSNS (12)}, /* other */ 190 1, /* cost of multiply per each bit set */ 191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 192 COSTS_N_INSNS (40), /* HI */ 193 COSTS_N_INSNS (40), /* SI */ 194 COSTS_N_INSNS (40), /* DI */ 195 COSTS_N_INSNS (40)}, /* other */ 196 COSTS_N_INSNS (3), /* cost of movsx */ 197 COSTS_N_INSNS (2), /* cost of movzx */ 198 15, /* "large" insn */ 199 3, /* MOVE_RATIO */ 200 4, /* cost for loading QImode using movzbl */ 201 {2, 4, 2}, /* cost of loading integer registers 202 in QImode, HImode and SImode. 203 Relative to reg-reg move (2). */ 204 {2, 4, 2}, /* cost of storing integer registers */ 205 2, /* cost of reg,reg fld/fst */ 206 {8, 8, 8}, /* cost of loading fp registers 207 in SFmode, DFmode and XFmode */ 208 {8, 8, 8}, /* cost of storing fp registers 209 in SFmode, DFmode and XFmode */ 210 2, /* cost of moving MMX register */ 211 {4, 8}, /* cost of loading MMX registers 212 in SImode and DImode */ 213 {4, 8}, /* cost of storing MMX registers 214 in SImode and DImode */ 215 2, /* cost of moving SSE register */ 216 {4, 8, 16}, /* cost of loading SSE registers 217 in SImode, DImode and TImode */ 218 {4, 8, 16}, /* cost of storing SSE registers 219 in SImode, DImode and TImode */ 220 3, /* MMX or SSE register to integer */ 221 0, /* size of prefetch block */ 222 0, /* number of parallel prefetches */ 223 1, /* Branch cost */ 224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 227 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 230}; 231 232static const 233struct processor_costs pentium_cost = { 234 COSTS_N_INSNS (1), /* cost of an add instruction */ 235 COSTS_N_INSNS (1), /* cost of a lea instruction */ 236 COSTS_N_INSNS (4), /* variable shift costs */ 237 COSTS_N_INSNS (1), /* constant shift costs */ 238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 239 COSTS_N_INSNS (11), /* HI */ 240 COSTS_N_INSNS (11), /* SI */ 241 COSTS_N_INSNS (11), /* DI */ 242 COSTS_N_INSNS (11)}, /* other */ 243 0, /* cost of multiply per each bit set */ 244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 245 COSTS_N_INSNS (25), /* HI */ 246 COSTS_N_INSNS (25), /* SI */ 247 COSTS_N_INSNS (25), /* DI */ 248 COSTS_N_INSNS (25)}, /* other */ 249 COSTS_N_INSNS (3), /* cost of movsx */ 250 COSTS_N_INSNS (2), /* cost of movzx */ 251 8, /* "large" insn */ 252 6, /* MOVE_RATIO */ 253 6, /* cost for loading QImode using movzbl */ 254 {2, 4, 2}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 4, 2}, /* cost of storing integer registers */ 258 2, /* cost of reg,reg fld/fst */ 259 {2, 2, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 6}, /* cost of storing fp registers 262 in SFmode, DFmode and XFmode */ 263 8, /* cost of moving MMX register */ 264 {8, 8}, /* cost of loading MMX registers 265 in SImode and DImode */ 266 {8, 8}, /* cost of storing MMX registers 267 in SImode and DImode */ 268 2, /* cost of moving SSE register */ 269 {4, 8, 16}, /* cost of loading SSE registers 270 in SImode, DImode and TImode */ 271 {4, 8, 16}, /* cost of storing SSE registers 272 in SImode, DImode and TImode */ 273 3, /* MMX or SSE register to integer */ 274 0, /* size of prefetch block */ 275 0, /* number of parallel prefetches */ 276 2, /* Branch cost */ 277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 280 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 283}; 284 285static const 286struct processor_costs pentiumpro_cost = { 287 COSTS_N_INSNS (1), /* cost of an add instruction */ 288 COSTS_N_INSNS (1), /* cost of a lea instruction */ 289 COSTS_N_INSNS (1), /* variable shift costs */ 290 COSTS_N_INSNS (1), /* constant shift costs */ 291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 292 COSTS_N_INSNS (4), /* HI */ 293 COSTS_N_INSNS (4), /* SI */ 294 COSTS_N_INSNS (4), /* DI */ 295 COSTS_N_INSNS (4)}, /* other */ 296 0, /* cost of multiply per each bit set */ 297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 298 COSTS_N_INSNS (17), /* HI */ 299 COSTS_N_INSNS (17), /* SI */ 300 COSTS_N_INSNS (17), /* DI */ 301 COSTS_N_INSNS (17)}, /* other */ 302 COSTS_N_INSNS (1), /* cost of movsx */ 303 COSTS_N_INSNS (1), /* cost of movzx */ 304 8, /* "large" insn */ 305 6, /* MOVE_RATIO */ 306 2, /* cost for loading QImode using movzbl */ 307 {4, 4, 4}, /* cost of loading integer registers 308 in QImode, HImode and SImode. 309 Relative to reg-reg move (2). */ 310 {2, 2, 2}, /* cost of storing integer registers */ 311 2, /* cost of reg,reg fld/fst */ 312 {2, 2, 6}, /* cost of loading fp registers 313 in SFmode, DFmode and XFmode */ 314 {4, 4, 6}, /* cost of storing fp registers 315 in SFmode, DFmode and XFmode */ 316 2, /* cost of moving MMX register */ 317 {2, 2}, /* cost of loading MMX registers 318 in SImode and DImode */ 319 {2, 2}, /* cost of storing MMX registers 320 in SImode and DImode */ 321 2, /* cost of moving SSE register */ 322 {2, 2, 8}, /* cost of loading SSE registers 323 in SImode, DImode and TImode */ 324 {2, 2, 8}, /* cost of storing SSE registers 325 in SImode, DImode and TImode */ 326 3, /* MMX or SSE register to integer */ 327 32, /* size of prefetch block */ 328 6, /* number of parallel prefetches */ 329 2, /* Branch cost */ 330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 333 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 336}; 337 338static const 339struct processor_costs geode_cost = { 340 COSTS_N_INSNS (1), /* cost of an add instruction */ 341 COSTS_N_INSNS (1), /* cost of a lea instruction */ 342 COSTS_N_INSNS (2), /* variable shift costs */ 343 COSTS_N_INSNS (1), /* constant shift costs */ 344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 345 COSTS_N_INSNS (4), /* HI */ 346 COSTS_N_INSNS (7), /* SI */ 347 COSTS_N_INSNS (7), /* DI */ 348 COSTS_N_INSNS (7)}, /* other */ 349 0, /* cost of multiply per each bit set */ 350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ 351 COSTS_N_INSNS (23), /* HI */ 352 COSTS_N_INSNS (39), /* SI */ 353 COSTS_N_INSNS (39), /* DI */ 354 COSTS_N_INSNS (39)}, /* other */ 355 COSTS_N_INSNS (1), /* cost of movsx */ 356 COSTS_N_INSNS (1), /* cost of movzx */ 357 8, /* "large" insn */ 358 4, /* MOVE_RATIO */ 359 1, /* cost for loading QImode using movzbl */ 360 {1, 1, 1}, /* cost of loading integer registers 361 in QImode, HImode and SImode. 362 Relative to reg-reg move (2). */ 363 {1, 1, 1}, /* cost of storing integer registers */ 364 1, /* cost of reg,reg fld/fst */ 365 {1, 1, 1}, /* cost of loading fp registers 366 in SFmode, DFmode and XFmode */ 367 {4, 6, 6}, /* cost of storing fp registers 368 in SFmode, DFmode and XFmode */ 369 370 1, /* cost of moving MMX register */ 371 {1, 1}, /* cost of loading MMX registers 372 in SImode and DImode */ 373 {1, 1}, /* cost of storing MMX registers 374 in SImode and DImode */ 375 1, /* cost of moving SSE register */ 376 {1, 1, 1}, /* cost of loading SSE registers 377 in SImode, DImode and TImode */ 378 {1, 1, 1}, /* cost of storing SSE registers 379 in SImode, DImode and TImode */ 380 1, /* MMX or SSE register to integer */ 381 32, /* size of prefetch block */ 382 1, /* number of parallel prefetches */ 383 1, /* Branch cost */ 384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */ 386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */ 387 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ 390}; 391 392static const 393struct processor_costs k6_cost = { 394 COSTS_N_INSNS (1), /* cost of an add instruction */ 395 COSTS_N_INSNS (2), /* cost of a lea instruction */ 396 COSTS_N_INSNS (1), /* variable shift costs */ 397 COSTS_N_INSNS (1), /* constant shift costs */ 398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 399 COSTS_N_INSNS (3), /* HI */ 400 COSTS_N_INSNS (3), /* SI */ 401 COSTS_N_INSNS (3), /* DI */ 402 COSTS_N_INSNS (3)}, /* other */ 403 0, /* cost of multiply per each bit set */ 404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 405 COSTS_N_INSNS (18), /* HI */ 406 COSTS_N_INSNS (18), /* SI */ 407 COSTS_N_INSNS (18), /* DI */ 408 COSTS_N_INSNS (18)}, /* other */ 409 COSTS_N_INSNS (2), /* cost of movsx */ 410 COSTS_N_INSNS (2), /* cost of movzx */ 411 8, /* "large" insn */ 412 4, /* MOVE_RATIO */ 413 3, /* cost for loading QImode using movzbl */ 414 {4, 5, 4}, /* cost of loading integer registers 415 in QImode, HImode and SImode. 416 Relative to reg-reg move (2). */ 417 {2, 3, 2}, /* cost of storing integer registers */ 418 4, /* cost of reg,reg fld/fst */ 419 {6, 6, 6}, /* cost of loading fp registers 420 in SFmode, DFmode and XFmode */ 421 {4, 4, 4}, /* cost of storing fp registers 422 in SFmode, DFmode and XFmode */ 423 2, /* cost of moving MMX register */ 424 {2, 2}, /* cost of loading MMX registers 425 in SImode and DImode */ 426 {2, 2}, /* cost of storing MMX registers 427 in SImode and DImode */ 428 2, /* cost of moving SSE register */ 429 {2, 2, 8}, /* cost of loading SSE registers 430 in SImode, DImode and TImode */ 431 {2, 2, 8}, /* cost of storing SSE registers 432 in SImode, DImode and TImode */ 433 6, /* MMX or SSE register to integer */ 434 32, /* size of prefetch block */ 435 1, /* number of parallel prefetches */ 436 1, /* Branch cost */ 437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 440 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 443}; 444 445static const 446struct processor_costs athlon_cost = { 447 COSTS_N_INSNS (1), /* cost of an add instruction */ 448 COSTS_N_INSNS (2), /* cost of a lea instruction */ 449 COSTS_N_INSNS (1), /* variable shift costs */ 450 COSTS_N_INSNS (1), /* constant shift costs */ 451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 452 COSTS_N_INSNS (5), /* HI */ 453 COSTS_N_INSNS (5), /* SI */ 454 COSTS_N_INSNS (5), /* DI */ 455 COSTS_N_INSNS (5)}, /* other */ 456 0, /* cost of multiply per each bit set */ 457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 458 COSTS_N_INSNS (26), /* HI */ 459 COSTS_N_INSNS (42), /* SI */ 460 COSTS_N_INSNS (74), /* DI */ 461 COSTS_N_INSNS (74)}, /* other */ 462 COSTS_N_INSNS (1), /* cost of movsx */ 463 COSTS_N_INSNS (1), /* cost of movzx */ 464 8, /* "large" insn */ 465 9, /* MOVE_RATIO */ 466 4, /* cost for loading QImode using movzbl */ 467 {3, 4, 3}, /* cost of loading integer registers 468 in QImode, HImode and SImode. 469 Relative to reg-reg move (2). */ 470 {3, 4, 3}, /* cost of storing integer registers */ 471 4, /* cost of reg,reg fld/fst */ 472 {4, 4, 12}, /* cost of loading fp registers 473 in SFmode, DFmode and XFmode */ 474 {6, 6, 8}, /* cost of storing fp registers 475 in SFmode, DFmode and XFmode */ 476 2, /* cost of moving MMX register */ 477 {4, 4}, /* cost of loading MMX registers 478 in SImode and DImode */ 479 {4, 4}, /* cost of storing MMX registers 480 in SImode and DImode */ 481 2, /* cost of moving SSE register */ 482 {4, 4, 6}, /* cost of loading SSE registers 483 in SImode, DImode and TImode */ 484 {4, 4, 5}, /* cost of storing SSE registers 485 in SImode, DImode and TImode */ 486 5, /* MMX or SSE register to integer */ 487 64, /* size of prefetch block */ 488 6, /* number of parallel prefetches */ 489 5, /* Branch cost */ 490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 493 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 496}; 497 498static const 499struct processor_costs k8_cost = { 500 COSTS_N_INSNS (1), /* cost of an add instruction */ 501 COSTS_N_INSNS (2), /* cost of a lea instruction */ 502 COSTS_N_INSNS (1), /* variable shift costs */ 503 COSTS_N_INSNS (1), /* constant shift costs */ 504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 505 COSTS_N_INSNS (4), /* HI */ 506 COSTS_N_INSNS (3), /* SI */ 507 COSTS_N_INSNS (4), /* DI */ 508 COSTS_N_INSNS (5)}, /* other */ 509 0, /* cost of multiply per each bit set */ 510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 511 COSTS_N_INSNS (26), /* HI */ 512 COSTS_N_INSNS (42), /* SI */ 513 COSTS_N_INSNS (74), /* DI */ 514 COSTS_N_INSNS (74)}, /* other */ 515 COSTS_N_INSNS (1), /* cost of movsx */ 516 COSTS_N_INSNS (1), /* cost of movzx */ 517 8, /* "large" insn */ 518 9, /* MOVE_RATIO */ 519 4, /* cost for loading QImode using movzbl */ 520 {3, 4, 3}, /* cost of loading integer registers 521 in QImode, HImode and SImode. 522 Relative to reg-reg move (2). */ 523 {3, 4, 3}, /* cost of storing integer registers */ 524 4, /* cost of reg,reg fld/fst */ 525 {4, 4, 12}, /* cost of loading fp registers 526 in SFmode, DFmode and XFmode */ 527 {6, 6, 8}, /* cost of storing fp registers 528 in SFmode, DFmode and XFmode */ 529 2, /* cost of moving MMX register */ 530 {3, 3}, /* cost of loading MMX registers 531 in SImode and DImode */ 532 {4, 4}, /* cost of storing MMX registers 533 in SImode and DImode */ 534 2, /* cost of moving SSE register */ 535 {4, 3, 6}, /* cost of loading SSE registers 536 in SImode, DImode and TImode */ 537 {4, 4, 5}, /* cost of storing SSE registers 538 in SImode, DImode and TImode */ 539 5, /* MMX or SSE register to integer */ 540 64, /* size of prefetch block */ 541 6, /* number of parallel prefetches */ 542 5, /* Branch cost */ 543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 546 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 549}; 550 551struct processor_costs amdfam10_cost = { 552 COSTS_N_INSNS (1), /* cost of an add instruction */ 553 COSTS_N_INSNS (2), /* cost of a lea instruction */ 554 COSTS_N_INSNS (1), /* variable shift costs */ 555 COSTS_N_INSNS (1), /* constant shift costs */ 556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 557 COSTS_N_INSNS (4), /* HI */ 558 COSTS_N_INSNS (3), /* SI */ 559 COSTS_N_INSNS (4), /* DI */ 560 COSTS_N_INSNS (5)}, /* other */ 561 0, /* cost of multiply per each bit set */ 562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ 563 COSTS_N_INSNS (35), /* HI */ 564 COSTS_N_INSNS (51), /* SI */ 565 COSTS_N_INSNS (83), /* DI */ 566 COSTS_N_INSNS (83)}, /* other */ 567 COSTS_N_INSNS (1), /* cost of movsx */ 568 COSTS_N_INSNS (1), /* cost of movzx */ 569 8, /* "large" insn */ 570 9, /* MOVE_RATIO */ 571 4, /* cost for loading QImode using movzbl */ 572 {3, 4, 3}, /* cost of loading integer registers 573 in QImode, HImode and SImode. 574 Relative to reg-reg move (2). */ 575 {3, 4, 3}, /* cost of storing integer registers */ 576 4, /* cost of reg,reg fld/fst */ 577 {4, 4, 12}, /* cost of loading fp registers 578 in SFmode, DFmode and XFmode */ 579 {6, 6, 8}, /* cost of storing fp registers 580 in SFmode, DFmode and XFmode */ 581 2, /* cost of moving MMX register */ 582 {3, 3}, /* cost of loading MMX registers 583 in SImode and DImode */ 584 {4, 4}, /* cost of storing MMX registers 585 in SImode and DImode */ 586 2, /* cost of moving SSE register */ 587 {4, 4, 3}, /* cost of loading SSE registers 588 in SImode, DImode and TImode */ 589 {4, 4, 5}, /* cost of storing SSE registers 590 in SImode, DImode and TImode */ 591 3, /* MMX or SSE register to integer */ 592 /* On K8 593 MOVD reg64, xmmreg Double FSTORE 4 594 MOVD reg32, xmmreg Double FSTORE 4 595 On AMDFAM10 596 MOVD reg64, xmmreg Double FADD 3 597 1/1 1/1 598 MOVD reg32, xmmreg Double FADD 3 599 1/1 1/1 */ 600 64, /* size of prefetch block */ 601 /* New AMD processors never drop prefetches; if they cannot be performed 602 immediately, they are queued. We set number of simultaneous prefetches 603 to a large constant to reflect this (it probably is not a good idea not 604 to limit number of prefetches at all, as their execution also takes some 605 time). */ 606 100, /* number of parallel prefetches */ 607 5, /* Branch cost */ 608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 611 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 614}; 615 616static const 617struct processor_costs pentium4_cost = { 618 COSTS_N_INSNS (1), /* cost of an add instruction */ 619 COSTS_N_INSNS (3), /* cost of a lea instruction */ 620 COSTS_N_INSNS (4), /* variable shift costs */ 621 COSTS_N_INSNS (4), /* constant shift costs */ 622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 623 COSTS_N_INSNS (15), /* HI */ 624 COSTS_N_INSNS (15), /* SI */ 625 COSTS_N_INSNS (15), /* DI */ 626 COSTS_N_INSNS (15)}, /* other */ 627 0, /* cost of multiply per each bit set */ 628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 629 COSTS_N_INSNS (56), /* HI */ 630 COSTS_N_INSNS (56), /* SI */ 631 COSTS_N_INSNS (56), /* DI */ 632 COSTS_N_INSNS (56)}, /* other */ 633 COSTS_N_INSNS (1), /* cost of movsx */ 634 COSTS_N_INSNS (1), /* cost of movzx */ 635 16, /* "large" insn */ 636 6, /* MOVE_RATIO */ 637 2, /* cost for loading QImode using movzbl */ 638 {4, 5, 4}, /* cost of loading integer registers 639 in QImode, HImode and SImode. 640 Relative to reg-reg move (2). */ 641 {2, 3, 2}, /* cost of storing integer registers */ 642 2, /* cost of reg,reg fld/fst */ 643 {2, 2, 6}, /* cost of loading fp registers 644 in SFmode, DFmode and XFmode */ 645 {4, 4, 6}, /* cost of storing fp registers 646 in SFmode, DFmode and XFmode */ 647 2, /* cost of moving MMX register */ 648 {2, 2}, /* cost of loading MMX registers 649 in SImode and DImode */ 650 {2, 2}, /* cost of storing MMX registers 651 in SImode and DImode */ 652 12, /* cost of moving SSE register */ 653 {12, 12, 12}, /* cost of loading SSE registers 654 in SImode, DImode and TImode */ 655 {2, 2, 8}, /* cost of storing SSE registers 656 in SImode, DImode and TImode */ 657 10, /* MMX or SSE register to integer */ 658 64, /* size of prefetch block */ 659 6, /* number of parallel prefetches */ 660 2, /* Branch cost */ 661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 664 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 667}; 668 669static const 670struct processor_costs nocona_cost = { 671 COSTS_N_INSNS (1), /* cost of an add instruction */ 672 COSTS_N_INSNS (1), /* cost of a lea instruction */ 673 COSTS_N_INSNS (1), /* variable shift costs */ 674 COSTS_N_INSNS (1), /* constant shift costs */ 675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 676 COSTS_N_INSNS (10), /* HI */ 677 COSTS_N_INSNS (10), /* SI */ 678 COSTS_N_INSNS (10), /* DI */ 679 COSTS_N_INSNS (10)}, /* other */ 680 0, /* cost of multiply per each bit set */ 681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 682 COSTS_N_INSNS (66), /* HI */ 683 COSTS_N_INSNS (66), /* SI */ 684 COSTS_N_INSNS (66), /* DI */ 685 COSTS_N_INSNS (66)}, /* other */ 686 COSTS_N_INSNS (1), /* cost of movsx */ 687 COSTS_N_INSNS (1), /* cost of movzx */ 688 16, /* "large" insn */ 689 17, /* MOVE_RATIO */ 690 4, /* cost for loading QImode using movzbl */ 691 {4, 4, 4}, /* cost of loading integer registers 692 in QImode, HImode and SImode. 693 Relative to reg-reg move (2). */ 694 {4, 4, 4}, /* cost of storing integer registers */ 695 3, /* cost of reg,reg fld/fst */ 696 {12, 12, 12}, /* cost of loading fp registers 697 in SFmode, DFmode and XFmode */ 698 {4, 4, 4}, /* cost of storing fp registers 699 in SFmode, DFmode and XFmode */ 700 6, /* cost of moving MMX register */ 701 {12, 12}, /* cost of loading MMX registers 702 in SImode and DImode */ 703 {12, 12}, /* cost of storing MMX registers 704 in SImode and DImode */ 705 6, /* cost of moving SSE register */ 706 {12, 12, 12}, /* cost of loading SSE registers 707 in SImode, DImode and TImode */ 708 {12, 12, 12}, /* cost of storing SSE registers 709 in SImode, DImode and TImode */ 710 8, /* MMX or SSE register to integer */ 711 128, /* size of prefetch block */ 712 8, /* number of parallel prefetches */ 713 1, /* Branch cost */ 714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 717 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 720}; 721 722static const 723struct processor_costs core2_cost = { 724 COSTS_N_INSNS (1), /* cost of an add instruction */ 725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 726 COSTS_N_INSNS (1), /* variable shift costs */ 727 COSTS_N_INSNS (1), /* constant shift costs */ 728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 729 COSTS_N_INSNS (3), /* HI */ 730 COSTS_N_INSNS (3), /* SI */ 731 COSTS_N_INSNS (3), /* DI */ 732 COSTS_N_INSNS (3)}, /* other */ 733 0, /* cost of multiply per each bit set */ 734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ 735 COSTS_N_INSNS (22), /* HI */ 736 COSTS_N_INSNS (22), /* SI */ 737 COSTS_N_INSNS (22), /* DI */ 738 COSTS_N_INSNS (22)}, /* other */ 739 COSTS_N_INSNS (1), /* cost of movsx */ 740 COSTS_N_INSNS (1), /* cost of movzx */ 741 8, /* "large" insn */ 742 16, /* MOVE_RATIO */ 743 2, /* cost for loading QImode using movzbl */ 744 {6, 6, 6}, /* cost of loading integer registers 745 in QImode, HImode and SImode. 746 Relative to reg-reg move (2). */ 747 {4, 4, 4}, /* cost of storing integer registers */ 748 2, /* cost of reg,reg fld/fst */ 749 {6, 6, 6}, /* cost of loading fp registers 750 in SFmode, DFmode and XFmode */ 751 {4, 4, 4}, /* cost of loading integer registers */ 752 2, /* cost of moving MMX register */ 753 {6, 6}, /* cost of loading MMX registers 754 in SImode and DImode */ 755 {4, 4}, /* cost of storing MMX registers 756 in SImode and DImode */ 757 2, /* cost of moving SSE register */ 758 {6, 6, 6}, /* cost of loading SSE registers 759 in SImode, DImode and TImode */ 760 {4, 4, 4}, /* cost of storing SSE registers 761 in SImode, DImode and TImode */ 762 2, /* MMX or SSE register to integer */ 763 128, /* size of prefetch block */ 764 8, /* number of parallel prefetches */ 765 3, /* Branch cost */ 766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */ 769 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ 772}; 773 774/* Generic64 should produce code tuned for Nocona and K8. */ 775static const 776struct processor_costs generic64_cost = { 777 COSTS_N_INSNS (1), /* cost of an add instruction */ 778 /* On all chips taken into consideration lea is 2 cycles and more. With 779 this cost however our current implementation of synth_mult results in 780 use of unnecessary temporary registers causing regression on several 781 SPECfp benchmarks. */ 782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 783 COSTS_N_INSNS (1), /* variable shift costs */ 784 COSTS_N_INSNS (1), /* constant shift costs */ 785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 786 COSTS_N_INSNS (4), /* HI */ 787 COSTS_N_INSNS (3), /* SI */ 788 COSTS_N_INSNS (4), /* DI */ 789 COSTS_N_INSNS (2)}, /* other */ 790 0, /* cost of multiply per each bit set */ 791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 792 COSTS_N_INSNS (26), /* HI */ 793 COSTS_N_INSNS (42), /* SI */ 794 COSTS_N_INSNS (74), /* DI */ 795 COSTS_N_INSNS (74)}, /* other */ 796 COSTS_N_INSNS (1), /* cost of movsx */ 797 COSTS_N_INSNS (1), /* cost of movzx */ 798 8, /* "large" insn */ 799 17, /* MOVE_RATIO */ 800 4, /* cost for loading QImode using movzbl */ 801 {4, 4, 4}, /* cost of loading integer registers 802 in QImode, HImode and SImode. 803 Relative to reg-reg move (2). */ 804 {4, 4, 4}, /* cost of storing integer registers */ 805 4, /* cost of reg,reg fld/fst */ 806 {12, 12, 12}, /* cost of loading fp registers 807 in SFmode, DFmode and XFmode */ 808 {6, 6, 8}, /* cost of storing fp registers 809 in SFmode, DFmode and XFmode */ 810 2, /* cost of moving MMX register */ 811 {8, 8}, /* cost of loading MMX registers 812 in SImode and DImode */ 813 {8, 8}, /* cost of storing MMX registers 814 in SImode and DImode */ 815 2, /* cost of moving SSE register */ 816 {8, 8, 8}, /* cost of loading SSE registers 817 in SImode, DImode and TImode */ 818 {8, 8, 8}, /* cost of storing SSE registers 819 in SImode, DImode and TImode */ 820 5, /* MMX or SSE register to integer */ 821 64, /* size of prefetch block */ 822 6, /* number of parallel prefetches */ 823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 824 is increased to perhaps more appropriate value of 5. */ 825 3, /* Branch cost */ 826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 829 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 832}; 833 834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 835static const 836struct processor_costs generic32_cost = { 837 COSTS_N_INSNS (1), /* cost of an add instruction */ 838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 839 COSTS_N_INSNS (1), /* variable shift costs */ 840 COSTS_N_INSNS (1), /* constant shift costs */ 841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 842 COSTS_N_INSNS (4), /* HI */ 843 COSTS_N_INSNS (3), /* SI */ 844 COSTS_N_INSNS (4), /* DI */ 845 COSTS_N_INSNS (2)}, /* other */ 846 0, /* cost of multiply per each bit set */ 847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 848 COSTS_N_INSNS (26), /* HI */ 849 COSTS_N_INSNS (42), /* SI */ 850 COSTS_N_INSNS (74), /* DI */ 851 COSTS_N_INSNS (74)}, /* other */ 852 COSTS_N_INSNS (1), /* cost of movsx */ 853 COSTS_N_INSNS (1), /* cost of movzx */ 854 8, /* "large" insn */ 855 17, /* MOVE_RATIO */ 856 4, /* cost for loading QImode using movzbl */ 857 {4, 4, 4}, /* cost of loading integer registers 858 in QImode, HImode and SImode. 859 Relative to reg-reg move (2). */ 860 {4, 4, 4}, /* cost of storing integer registers */ 861 4, /* cost of reg,reg fld/fst */ 862 {12, 12, 12}, /* cost of loading fp registers 863 in SFmode, DFmode and XFmode */ 864 {6, 6, 8}, /* cost of storing fp registers 865 in SFmode, DFmode and XFmode */ 866 2, /* cost of moving MMX register */ 867 {8, 8}, /* cost of loading MMX registers 868 in SImode and DImode */ 869 {8, 8}, /* cost of storing MMX registers 870 in SImode and DImode */ 871 2, /* cost of moving SSE register */ 872 {8, 8, 8}, /* cost of loading SSE registers 873 in SImode, DImode and TImode */ 874 {8, 8, 8}, /* cost of storing SSE registers 875 in SImode, DImode and TImode */ 876 5, /* MMX or SSE register to integer */ 877 64, /* size of prefetch block */ 878 6, /* number of parallel prefetches */ 879 3, /* Branch cost */ 880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 883 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 886}; 887 888const struct processor_costs *ix86_cost = &pentium_cost; 889 890/* Processor feature/optimization bitmasks. */ 891#define m_386 (1<<PROCESSOR_I386) 892#define m_486 (1<<PROCESSOR_I486) 893#define m_PENT (1<<PROCESSOR_PENTIUM) 894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 895#define m_GEODE (1<<PROCESSOR_GEODE) 896#define m_K6_GEODE (m_K6 | m_GEODE) 897#define m_K6 (1<<PROCESSOR_K6) 898#define m_ATHLON (1<<PROCESSOR_ATHLON) 899#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 900#define m_K8 (1<<PROCESSOR_K8) 901#define m_ATHLON_K8 (m_K8 | m_ATHLON) 902#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) 903#define m_NOCONA (1<<PROCESSOR_NOCONA) 904#define m_CORE2 (1<<PROCESSOR_CORE2) 905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 907#define m_GENERIC (m_GENERIC32 | m_GENERIC64) 908#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10) 909 910/* Generic instruction choice should be common subset of supported CPUs 911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ 912 913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for 914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit 915 generic because it is not working well with PPro base chips. */ 916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 917 | m_GENERIC64; 918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 919 | m_NOCONA | m_CORE2 | m_GENERIC; 920const int x86_zero_extend_with_and = m_486 | m_PENT; 921/* Enable to zero extend integer registers to avoid partial dependencies */ 922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA 923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */; 924const int x86_double_with_add = ~m_386; 925const int x86_use_bit_test = m_386; 926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 927 | m_K6 | m_CORE2 | m_GENERIC; 928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 929 | m_NOCONA; 930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10; 931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 933/* Branch hints were put in P4 based on simulation result. But 934 after P4 was made, no performance benefit was observed with 935 branch hints. It also increases the code size. As the result, 936 icc never generates branch hints. */ 937const int x86_branch_hints = 0; 938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; 939 /*m_GENERIC | m_ATHLON_K8 ? */ 940/* We probably ought to watch for partial register stalls on Generic32 941 compilation setting as well. However in current implementation the 942 partial register stalls are not eliminated very well - they can 943 be introduced via subregs synthesized by combine and can happen 944 in caller/callee saving sequences. 945 Because this option pays back little on PPro based chips and is in conflict 946 with partial reg. dependencies used by Athlon/P4 based chips, it is better 947 to leave it off for generic32 for now. */ 948const int x86_partial_reg_stall = m_PPRO; 949const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC; 950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE; 951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT 952 | m_CORE2 | m_GENERIC); 953const int x86_use_mov0 = m_K6; 954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC); 955const int x86_read_modify_write = ~m_PENT; 956const int x86_read_modify = ~(m_PENT | m_PPRO); 957const int x86_split_long_moves = m_PPRO; 958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC; 960 /* m_PENT4 ? */ 961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; 963const int x86_qimode_math = ~(0); 964const int x86_promote_qi_regs = 0; 965/* On PPro this flag is meant to avoid partial register stalls. Just like 966 the x86_partial_reg_stall this option might be considered for Generic32 967 if our scheme for avoiding partial stalls was more effective. */ 968const int x86_himode_math = ~(m_PPRO); 969const int x86_promote_hi_regs = m_PPRO; 970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */ 971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA 972 | m_CORE2 | m_GENERIC; 973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486 974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA 976 | m_CORE2 | m_GENERIC; 977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386 978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; 979/* Enable if integer moves are preferred for DFmode copies */ 980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE); 982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 983 | m_CORE2 | m_GENERIC; 984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA 985 | m_CORE2 | m_GENERIC; 986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required 987 for outgoing arguments will be computed and placed into the variable 988 `current_function_outgoing_args_size'. No space will be pushed onto the stack 989 for each call; instead, the function prologue should increase the stack frame 990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is 991 not proper. */ 992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4 993 | m_NOCONA | m_PPRO | m_CORE2 994 | m_GENERIC; 995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; 997const int x86_shift1 = ~m_486; 998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO 999 | m_ATHLON_K8_AMDFAM10 | m_PENT4 1000 | m_NOCONA | m_CORE2 | m_GENERIC; 1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips 1002 that thread 128bit SSE registers as single units versus K8 based chips that 1003 divide SSE registers to two 64bit halves. 1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit 1005 to allow register renaming on 128bit SSE units, but usually results in one 1006 extra microop on 64bit SSE units. Experimental results shows that disabling 1007 this option on P4 brings over 20% SPECfp regression, while enabling it on 1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling 1009 of moves. */ 1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1011 | m_GENERIC | m_AMDFAM10; 1012/* Set for machines where the type and dependencies are resolved on SSE 1013 register parts instead of whole registers, so we may maintain just 1014 lower part of scalar values in proper format leaving the upper part 1015 undefined. */ 1016const int x86_sse_split_regs = m_ATHLON_K8; 1017/* Code generation for scalar reg-reg moves of single and double precision data: 1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) 1019 movaps reg, reg 1020 else 1021 movss reg, reg 1022 if (x86_sse_partial_reg_dependency == true) 1023 movapd reg, reg 1024 else 1025 movsd reg, reg 1026 1027 Code generation for scalar loads of double precision data: 1028 if (x86_sse_split_regs == true) 1029 movlpd mem, reg (gas syntax) 1030 else 1031 movsd mem, reg 1032 1033 Code generation for unaligned packed loads of single precision data 1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): 1035 if (x86_sse_unaligned_move_optimal) 1036 movups mem, reg 1037 1038 if (x86_sse_partial_reg_dependency == true) 1039 { 1040 xorps reg, reg 1041 movlps mem, reg 1042 movhps mem+8, reg 1043 } 1044 else 1045 { 1046 movlps mem, reg 1047 movhps mem+8, reg 1048 } 1049 1050 Code generation for unaligned packed loads of double precision data 1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): 1052 if (x86_sse_unaligned_move_optimal) 1053 movupd mem, reg 1054 1055 if (x86_sse_split_regs == true) 1056 { 1057 movlpd mem, reg 1058 movhpd mem+8, reg 1059 } 1060 else 1061 { 1062 movsd mem, reg 1063 movhpd mem+8, reg 1064 } 1065 */ 1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10; 1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10; 1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; 1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10; 1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2; 1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC); 1072 1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put 1074 integer data in xmm registers. Which results in pretty abysmal code. */ 1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; 1076 1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 1079/* Some CPU cores are not able to predict more than 4 branch instructions in 1080 the 16 byte window. */ 1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 1082 | m_NOCONA | m_CORE2 | m_GENERIC; 1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT 1084 | m_CORE2 | m_GENERIC; 1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10; 1086/* Compare and exchange was added for 80486. */ 1087const int x86_cmpxchg = ~m_386; 1088/* Compare and exchange 8 bytes was added for pentium. */ 1089const int x86_cmpxchg8b = ~(m_386 | m_486); 1090/* Exchange and add was added for 80486. */ 1091const int x86_xadd = ~m_386; 1092const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC; 1093 1094/* In case the average insn count for single function invocation is 1095 lower than this constant, emit fast (but longer) prologue and 1096 epilogue code. */ 1097#define FAST_PROLOGUE_INSN_COUNT 20 1098 1099/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 1100static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 1101static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 1102static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 1103 1104/* Array of the smallest class containing reg number REGNO, indexed by 1105 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 1106 1107enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 1108{ 1109 /* ax, dx, cx, bx */ 1110 AREG, DREG, CREG, BREG, 1111 /* si, di, bp, sp */ 1112 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 1113 /* FP registers */ 1114 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 1115 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 1116 /* arg pointer */ 1117 NON_Q_REGS, 1118 /* flags, fpsr, dirflag, frame */ 1119 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 1120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1121 SSE_REGS, SSE_REGS, 1122 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 1123 MMX_REGS, MMX_REGS, 1124 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1125 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1126 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1127 SSE_REGS, SSE_REGS, 1128}; 1129 1130/* The "default" register map used in 32bit mode. */ 1131 1132int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 1133{ 1134 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 1135 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 1136 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1137 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 1138 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 1139 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1140 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1141}; 1142 1143static int const x86_64_int_parameter_registers[6] = 1144{ 1145 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 1146 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 1147}; 1148 1149static int const x86_64_int_return_registers[4] = 1150{ 1151 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 1152}; 1153 1154/* The "default" register map used in 64bit mode. */ 1155int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 1156{ 1157 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 1158 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 1159 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1160 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 1161 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 1162 8,9,10,11,12,13,14,15, /* extended integer registers */ 1163 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 1164}; 1165 1166/* Define the register numbers to be used in Dwarf debugging information. 1167 The SVR4 reference port C compiler uses the following register numbers 1168 in its Dwarf output code: 1169 0 for %eax (gcc regno = 0) 1170 1 for %ecx (gcc regno = 2) 1171 2 for %edx (gcc regno = 1) 1172 3 for %ebx (gcc regno = 3) 1173 4 for %esp (gcc regno = 7) 1174 5 for %ebp (gcc regno = 6) 1175 6 for %esi (gcc regno = 4) 1176 7 for %edi (gcc regno = 5) 1177 The following three DWARF register numbers are never generated by 1178 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 1179 believes these numbers have these meanings. 1180 8 for %eip (no gcc equivalent) 1181 9 for %eflags (gcc regno = 17) 1182 10 for %trapno (no gcc equivalent) 1183 It is not at all clear how we should number the FP stack registers 1184 for the x86 architecture. If the version of SDB on x86/svr4 were 1185 a bit less brain dead with respect to floating-point then we would 1186 have a precedent to follow with respect to DWARF register numbers 1187 for x86 FP registers, but the SDB on x86/svr4 is so completely 1188 broken with respect to FP registers that it is hardly worth thinking 1189 of it as something to strive for compatibility with. 1190 The version of x86/svr4 SDB I have at the moment does (partially) 1191 seem to believe that DWARF register number 11 is associated with 1192 the x86 register %st(0), but that's about all. Higher DWARF 1193 register numbers don't seem to be associated with anything in 1194 particular, and even for DWARF regno 11, SDB only seems to under- 1195 stand that it should say that a variable lives in %st(0) (when 1196 asked via an `=' command) if we said it was in DWARF regno 11, 1197 but SDB still prints garbage when asked for the value of the 1198 variable in question (via a `/' command). 1199 (Also note that the labels SDB prints for various FP stack regs 1200 when doing an `x' command are all wrong.) 1201 Note that these problems generally don't affect the native SVR4 1202 C compiler because it doesn't allow the use of -O with -g and 1203 because when it is *not* optimizing, it allocates a memory 1204 location for each floating-point variable, and the memory 1205 location is what gets described in the DWARF AT_location 1206 attribute for the variable in question. 1207 Regardless of the severe mental illness of the x86/svr4 SDB, we 1208 do something sensible here and we use the following DWARF 1209 register numbers. Note that these are all stack-top-relative 1210 numbers. 1211 11 for %st(0) (gcc regno = 8) 1212 12 for %st(1) (gcc regno = 9) 1213 13 for %st(2) (gcc regno = 10) 1214 14 for %st(3) (gcc regno = 11) 1215 15 for %st(4) (gcc regno = 12) 1216 16 for %st(5) (gcc regno = 13) 1217 17 for %st(6) (gcc regno = 14) 1218 18 for %st(7) (gcc regno = 15) 1219*/ 1220int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 1221{ 1222 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 1223 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 1224 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 1225 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 1226 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 1227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1229}; 1230 1231/* Test and compare insns in i386.md store the information needed to 1232 generate branch and scc insns here. */ 1233 1234rtx ix86_compare_op0 = NULL_RTX; 1235rtx ix86_compare_op1 = NULL_RTX; 1236rtx ix86_compare_emitted = NULL_RTX; 1237 1238/* Size of the register save area. */ 1239#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 1240 1241/* Define the structure for the machine field in struct function. */ 1242 1243struct stack_local_entry GTY(()) 1244{ 1245 unsigned short mode; 1246 unsigned short n; 1247 rtx rtl; 1248 struct stack_local_entry *next; 1249}; 1250 1251/* Structure describing stack frame layout. 1252 Stack grows downward: 1253 1254 [arguments] 1255 <- ARG_POINTER 1256 saved pc 1257 1258 saved frame pointer if frame_pointer_needed 1259 <- HARD_FRAME_POINTER 1260 [saved regs] 1261 1262 [padding1] \ 1263 ) 1264 [va_arg registers] ( 1265 > to_allocate <- FRAME_POINTER 1266 [frame] ( 1267 ) 1268 [padding2] / 1269 */ 1270struct ix86_frame 1271{ 1272 int nregs; 1273 int padding1; 1274 int va_arg_size; 1275 HOST_WIDE_INT frame; 1276 int padding2; 1277 int outgoing_arguments_size; 1278 int red_zone_size; 1279 1280 HOST_WIDE_INT to_allocate; 1281 /* The offsets relative to ARG_POINTER. */ 1282 HOST_WIDE_INT frame_pointer_offset; 1283 HOST_WIDE_INT hard_frame_pointer_offset; 1284 HOST_WIDE_INT stack_pointer_offset; 1285 1286 /* When save_regs_using_mov is set, emit prologue using 1287 move instead of push instructions. */ 1288 bool save_regs_using_mov; 1289}; 1290 1291/* Code model option. */ 1292enum cmodel ix86_cmodel; 1293/* Asm dialect. */ 1294enum asm_dialect ix86_asm_dialect = ASM_ATT; 1295/* TLS dialects. */ 1296enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1297 1298/* Which unit we are generating floating point math for. */ 1299enum fpmath_unit ix86_fpmath; 1300 1301/* Which cpu are we scheduling for. */ 1302enum processor_type ix86_tune; 1303/* Which instruction set architecture to use. */ 1304enum processor_type ix86_arch; 1305 1306/* true if sse prefetch instruction is not NOOP. */ 1307int x86_prefetch_sse; 1308 1309/* true if cmpxchg16b is supported. */ 1310int x86_cmpxchg16b; 1311 1312/* ix86_regparm_string as a number */ 1313static int ix86_regparm; 1314 1315/* -mstackrealign option */ 1316extern int ix86_force_align_arg_pointer; 1317static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; 1318 1319/* Preferred alignment for stack boundary in bits. */ 1320unsigned int ix86_preferred_stack_boundary; 1321 1322/* Values 1-5: see jump.c */ 1323int ix86_branch_cost; 1324 1325/* Variables which are this size or smaller are put in the data/bss 1326 or ldata/lbss sections. */ 1327 1328int ix86_section_threshold = 65536; 1329 1330/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1331char internal_label_prefix[16]; 1332int internal_label_prefix_len; 1333 1334static bool ix86_handle_option (size_t, const char *, int); 1335static void output_pic_addr_const (FILE *, rtx, int); 1336static void put_condition_code (enum rtx_code, enum machine_mode, 1337 int, int, FILE *); 1338static const char *get_some_local_dynamic_name (void); 1339static int get_some_local_dynamic_name_1 (rtx *, void *); 1340static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 1341static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 1342 rtx *); 1343static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 1344static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 1345 enum machine_mode); 1346static rtx get_thread_pointer (int); 1347static rtx legitimize_tls_address (rtx, enum tls_model, int); 1348static void get_pc_thunk_name (char [32], unsigned int); 1349static rtx gen_push (rtx); 1350static int ix86_flags_dependent (rtx, rtx, enum attr_type); 1351static int ix86_agi_dependent (rtx, rtx, enum attr_type); 1352static struct machine_function * ix86_init_machine_status (void); 1353static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 1354static int ix86_nsaved_regs (void); 1355static void ix86_emit_save_regs (void); 1356static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 1357static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 1358static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 1359static HOST_WIDE_INT ix86_GOT_alias_set (void); 1360static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 1361static rtx ix86_expand_aligntest (rtx, int); 1362static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 1363static int ix86_issue_rate (void); 1364static int ix86_adjust_cost (rtx, rtx, rtx, int); 1365static int ia32_multipass_dfa_lookahead (void); 1366static void ix86_init_mmx_sse_builtins (void); 1367static rtx x86_this_parameter (tree); 1368static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 1369 HOST_WIDE_INT, tree); 1370static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 1371static void x86_file_start (void); 1372static void ix86_reorg (void); 1373static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 1374static tree ix86_build_builtin_va_list (void); 1375static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 1376 tree, int *, int); 1377static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); 1378static bool ix86_scalar_mode_supported_p (enum machine_mode); 1379static bool ix86_vector_mode_supported_p (enum machine_mode); 1380 1381static int ix86_address_cost (rtx); 1382static bool ix86_cannot_force_const_mem (rtx); 1383static rtx ix86_delegitimize_address (rtx); 1384 1385static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 1386 1387struct builtin_description; 1388static rtx ix86_expand_sse_comi (const struct builtin_description *, 1389 tree, rtx); 1390static rtx ix86_expand_sse_compare (const struct builtin_description *, 1391 tree, rtx); 1392static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 1393static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 1394static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 1395static rtx ix86_expand_store_builtin (enum insn_code, tree); 1396static rtx safe_vector_operand (rtx, enum machine_mode); 1397static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 1398static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 1399static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 1400static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 1401static int ix86_fp_comparison_cost (enum rtx_code code); 1402static unsigned int ix86_select_alt_pic_regnum (void); 1403static int ix86_save_reg (unsigned int, int); 1404static void ix86_compute_frame_layout (struct ix86_frame *); 1405static int ix86_comp_type_attributes (tree, tree); 1406static int ix86_function_regparm (tree, tree); 1407const struct attribute_spec ix86_attribute_table[]; 1408static bool ix86_function_ok_for_sibcall (tree, tree); 1409static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); 1410static int ix86_value_regno (enum machine_mode, tree, tree); 1411static bool contains_128bit_aligned_vector_p (tree); 1412static rtx ix86_struct_value_rtx (tree, int); 1413static bool ix86_ms_bitfield_layout_p (tree); 1414static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 1415static int extended_reg_mentioned_1 (rtx *, void *); 1416static bool ix86_rtx_costs (rtx, int, int, int *); 1417static int min_insn_size (rtx); 1418static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); 1419static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); 1420static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 1421 tree, bool); 1422static void ix86_init_builtins (void); 1423static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 1424static const char *ix86_mangle_fundamental_type (tree); 1425static tree ix86_stack_protect_fail (void); 1426static rtx ix86_internal_arg_pointer (void); 1427static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); 1428 1429/* This function is only used on Solaris. */ 1430static void i386_solaris_elf_named_section (const char *, unsigned int, tree) 1431 ATTRIBUTE_UNUSED; 1432 1433/* Register class used for passing given 64bit part of the argument. 1434 These represent classes as documented by the PS ABI, with the exception 1435 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1436 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1437 1438 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1439 whenever possible (upper half does contain padding). 1440 */ 1441enum x86_64_reg_class 1442 { 1443 X86_64_NO_CLASS, 1444 X86_64_INTEGER_CLASS, 1445 X86_64_INTEGERSI_CLASS, 1446 X86_64_SSE_CLASS, 1447 X86_64_SSESF_CLASS, 1448 X86_64_SSEDF_CLASS, 1449 X86_64_SSEUP_CLASS, 1450 X86_64_X87_CLASS, 1451 X86_64_X87UP_CLASS, 1452 X86_64_COMPLEX_X87_CLASS, 1453 X86_64_MEMORY_CLASS 1454 }; 1455static const char * const x86_64_reg_class_name[] = { 1456 "no", "integer", "integerSI", "sse", "sseSF", "sseDF", 1457 "sseup", "x87", "x87up", "cplx87", "no" 1458}; 1459 1460#define MAX_CLASSES 4 1461 1462/* Table of constants used by fldpi, fldln2, etc.... */ 1463static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1464static bool ext_80387_constants_init = 0; 1465static void init_ext_80387_constants (void); 1466static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; 1467static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 1468static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; 1469static section *x86_64_elf_select_section (tree decl, int reloc, 1470 unsigned HOST_WIDE_INT align) 1471 ATTRIBUTE_UNUSED; 1472 1473/* Initialize the GCC target structure. */ 1474#undef TARGET_ATTRIBUTE_TABLE 1475#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 1476#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 1477# undef TARGET_MERGE_DECL_ATTRIBUTES 1478# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 1479#endif 1480 1481#undef TARGET_COMP_TYPE_ATTRIBUTES 1482#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 1483 1484#undef TARGET_INIT_BUILTINS 1485#define TARGET_INIT_BUILTINS ix86_init_builtins 1486#undef TARGET_EXPAND_BUILTIN 1487#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 1488 1489#undef TARGET_ASM_FUNCTION_EPILOGUE 1490#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 1491 1492#undef TARGET_ENCODE_SECTION_INFO 1493#ifndef SUBTARGET_ENCODE_SECTION_INFO 1494#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 1495#else 1496#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 1497#endif 1498 1499#undef TARGET_ASM_OPEN_PAREN 1500#define TARGET_ASM_OPEN_PAREN "" 1501#undef TARGET_ASM_CLOSE_PAREN 1502#define TARGET_ASM_CLOSE_PAREN "" 1503 1504#undef TARGET_ASM_ALIGNED_HI_OP 1505#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 1506#undef TARGET_ASM_ALIGNED_SI_OP 1507#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 1508#ifdef ASM_QUAD 1509#undef TARGET_ASM_ALIGNED_DI_OP 1510#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 1511#endif 1512 1513#undef TARGET_ASM_UNALIGNED_HI_OP 1514#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 1515#undef TARGET_ASM_UNALIGNED_SI_OP 1516#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 1517#undef TARGET_ASM_UNALIGNED_DI_OP 1518#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 1519 1520#undef TARGET_SCHED_ADJUST_COST 1521#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 1522#undef TARGET_SCHED_ISSUE_RATE 1523#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 1524#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1525#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 1526 ia32_multipass_dfa_lookahead 1527 1528#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1529#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 1530 1531#ifdef HAVE_AS_TLS 1532#undef TARGET_HAVE_TLS 1533#define TARGET_HAVE_TLS true 1534#endif 1535#undef TARGET_CANNOT_FORCE_CONST_MEM 1536#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1537#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1538#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true 1539 1540#undef TARGET_DELEGITIMIZE_ADDRESS 1541#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1542 1543#undef TARGET_MS_BITFIELD_LAYOUT_P 1544#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1545 1546#if TARGET_MACHO 1547#undef TARGET_BINDS_LOCAL_P 1548#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1549#endif 1550 1551#undef TARGET_ASM_OUTPUT_MI_THUNK 1552#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1553#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1554#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1555 1556#undef TARGET_ASM_FILE_START 1557#define TARGET_ASM_FILE_START x86_file_start 1558 1559#undef TARGET_DEFAULT_TARGET_FLAGS 1560#define TARGET_DEFAULT_TARGET_FLAGS \ 1561 (TARGET_DEFAULT \ 1562 | TARGET_64BIT_DEFAULT \ 1563 | TARGET_SUBTARGET_DEFAULT \ 1564 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 1565 1566#undef TARGET_HANDLE_OPTION 1567#define TARGET_HANDLE_OPTION ix86_handle_option 1568 1569#undef TARGET_RTX_COSTS 1570#define TARGET_RTX_COSTS ix86_rtx_costs 1571#undef TARGET_ADDRESS_COST 1572#define TARGET_ADDRESS_COST ix86_address_cost 1573 1574#undef TARGET_FIXED_CONDITION_CODE_REGS 1575#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1576#undef TARGET_CC_MODES_COMPATIBLE 1577#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1578 1579#undef TARGET_MACHINE_DEPENDENT_REORG 1580#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1581 1582#undef TARGET_BUILD_BUILTIN_VA_LIST 1583#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1584 1585#undef TARGET_MD_ASM_CLOBBERS 1586#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 1587 1588#undef TARGET_PROMOTE_PROTOTYPES 1589#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 1590#undef TARGET_STRUCT_VALUE_RTX 1591#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 1592#undef TARGET_SETUP_INCOMING_VARARGS 1593#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 1594#undef TARGET_MUST_PASS_IN_STACK 1595#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 1596#undef TARGET_PASS_BY_REFERENCE 1597#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 1598#undef TARGET_INTERNAL_ARG_POINTER 1599#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 1600#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 1601#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 1602 1603#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1604#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 1605 1606#undef TARGET_SCALAR_MODE_SUPPORTED_P 1607#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 1608 1609#undef TARGET_VECTOR_MODE_SUPPORTED_P 1610#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 1611 1612#ifdef HAVE_AS_TLS 1613#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1614#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 1615#endif 1616 1617#ifdef SUBTARGET_INSERT_ATTRIBUTES 1618#undef TARGET_INSERT_ATTRIBUTES 1619#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 1620#endif 1621 1622#undef TARGET_MANGLE_FUNDAMENTAL_TYPE 1623#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type 1624 1625#undef TARGET_STACK_PROTECT_FAIL 1626#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 1627 1628#undef TARGET_FUNCTION_VALUE 1629#define TARGET_FUNCTION_VALUE ix86_function_value 1630 1631struct gcc_target targetm = TARGET_INITIALIZER; 1632 1633 1634/* The svr4 ABI for the i386 says that records and unions are returned 1635 in memory. */ 1636#ifndef DEFAULT_PCC_STRUCT_RETURN 1637#define DEFAULT_PCC_STRUCT_RETURN 1 1638#endif 1639 1640/* Implement TARGET_HANDLE_OPTION. */ 1641 1642static bool 1643ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1644{ 1645 switch (code) 1646 { 1647 case OPT_m3dnow: 1648 if (!value) 1649 { 1650 target_flags &= ~MASK_3DNOW_A; 1651 target_flags_explicit |= MASK_3DNOW_A; 1652 } 1653 return true; 1654 1655 case OPT_mmmx: 1656 if (!value) 1657 { 1658 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); 1659 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; 1660 } 1661 return true; 1662 1663 case OPT_msse: 1664 if (!value) 1665 { 1666 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A); 1667 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A; 1668 } 1669 return true; 1670 1671 case OPT_msse2: 1672 if (!value) 1673 { 1674 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A); 1675 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A; 1676 } 1677 return true; 1678 1679 case OPT_msse3: 1680 if (!value) 1681 { 1682 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A); 1683 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A; 1684 } 1685 return true; 1686 1687 default: 1688 return true; 1689 } 1690} 1691 1692/* Sometimes certain combinations of command options do not make 1693 sense on a particular target machine. You can define a macro 1694 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1695 defined, is executed once just after all the command options have 1696 been parsed. 1697 1698 Don't use this macro to turn on various extra optimizations for 1699 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1700 1701void 1702override_options (void) 1703{ 1704 int i; 1705 int ix86_tune_defaulted = 0; 1706 1707 /* Comes from final.c -- no real reason to change it. */ 1708#define MAX_CODE_ALIGN 16 1709 1710 static struct ptt 1711 { 1712 const struct processor_costs *cost; /* Processor costs */ 1713 const int target_enable; /* Target flags to enable. */ 1714 const int target_disable; /* Target flags to disable. */ 1715 const int align_loop; /* Default alignments. */ 1716 const int align_loop_max_skip; 1717 const int align_jump; 1718 const int align_jump_max_skip; 1719 const int align_func; 1720 } 1721 const processor_target_table[PROCESSOR_max] = 1722 { 1723 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1724 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1725 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1726 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1727 {&geode_cost, 0, 0, 0, 0, 0, 0, 0}, 1728 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1729 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1730 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1731 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, 1732 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, 1733 {&core2_cost, 0, 0, 16, 7, 16, 7, 16}, 1734 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, 1735 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}, 1736 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32} 1737 }; 1738 1739 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1740 static struct pta 1741 { 1742 const char *const name; /* processor name or nickname. */ 1743 const enum processor_type processor; 1744 const enum pta_flags 1745 { 1746 PTA_SSE = 1, 1747 PTA_SSE2 = 2, 1748 PTA_SSE3 = 4, 1749 PTA_MMX = 8, 1750 PTA_PREFETCH_SSE = 16, 1751 PTA_3DNOW = 32, 1752 PTA_3DNOW_A = 64, 1753 PTA_64BIT = 128, 1754 PTA_SSSE3 = 256, 1755 PTA_CX16 = 512, 1756 PTA_POPCNT = 1024, 1757 PTA_ABM = 2048, 1758 PTA_SSE4A = 4096 1759 } flags; 1760 } 1761 const processor_alias_table[] = 1762 { 1763 {"i386", PROCESSOR_I386, 0}, 1764 {"i486", PROCESSOR_I486, 0}, 1765 {"i586", PROCESSOR_PENTIUM, 0}, 1766 {"pentium", PROCESSOR_PENTIUM, 0}, 1767 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1768 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1769 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1770 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1771 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1772 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1773 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1774 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1775 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1776 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1777 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1778 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1779 | PTA_MMX | PTA_PREFETCH_SSE}, 1780 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1781 | PTA_MMX | PTA_PREFETCH_SSE}, 1782 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 1783 | PTA_MMX | PTA_PREFETCH_SSE}, 1784 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1785 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16}, 1786 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 1787 | PTA_64BIT | PTA_MMX 1788 | PTA_PREFETCH_SSE | PTA_CX16}, 1789 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1790 | PTA_3DNOW_A}, 1791 {"k6", PROCESSOR_K6, PTA_MMX}, 1792 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1793 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1794 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1795 | PTA_3DNOW_A}, 1796 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1797 | PTA_3DNOW | PTA_3DNOW_A}, 1798 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1799 | PTA_3DNOW_A | PTA_SSE}, 1800 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1801 | PTA_3DNOW_A | PTA_SSE}, 1802 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1803 | PTA_3DNOW_A | PTA_SSE}, 1804 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1805 | PTA_SSE | PTA_SSE2 }, 1806 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1807 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1808 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1809 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1810 | PTA_SSE3 }, 1811 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1812 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1813 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1814 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1815 | PTA_SSE3 }, 1816 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1818 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 1820 | PTA_SSE3 }, 1821 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1823 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1824 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE 1825 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT 1826 | PTA_ABM | PTA_SSE4A | PTA_CX16}, 1827 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1828 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE 1829 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT 1830 | PTA_ABM | PTA_SSE4A | PTA_CX16}, 1831 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, 1832 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, 1833 }; 1834 1835 int const pta_size = ARRAY_SIZE (processor_alias_table); 1836 1837#ifdef SUBTARGET_OVERRIDE_OPTIONS 1838 SUBTARGET_OVERRIDE_OPTIONS; 1839#endif 1840 1841#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 1842 SUBSUBTARGET_OVERRIDE_OPTIONS; 1843#endif 1844 1845 /* -fPIC is the default for x86_64. */ 1846 if (TARGET_MACHO && TARGET_64BIT) 1847 flag_pic = 2; 1848 1849 /* Set the default values for switches whose default depends on TARGET_64BIT 1850 in case they weren't overwritten by command line options. */ 1851 if (TARGET_64BIT) 1852 { 1853 /* Mach-O doesn't support omitting the frame pointer for now. */ 1854 if (flag_omit_frame_pointer == 2) 1855 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 1856 if (flag_asynchronous_unwind_tables == 2) 1857 flag_asynchronous_unwind_tables = 1; 1858 if (flag_pcc_struct_return == 2) 1859 flag_pcc_struct_return = 0; 1860 } 1861 else 1862 { 1863 if (flag_omit_frame_pointer == 2) 1864 flag_omit_frame_pointer = 0; 1865 if (flag_asynchronous_unwind_tables == 2) 1866 flag_asynchronous_unwind_tables = 0; 1867 if (flag_pcc_struct_return == 2) 1868 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1869 } 1870 1871 /* Need to check -mtune=generic first. */ 1872 if (ix86_tune_string) 1873 { 1874 if (!strcmp (ix86_tune_string, "generic") 1875 || !strcmp (ix86_tune_string, "i686") 1876 /* As special support for cross compilers we read -mtune=native 1877 as -mtune=generic. With native compilers we won't see the 1878 -mtune=native, as it was changed by the driver. */ 1879 || !strcmp (ix86_tune_string, "native")) 1880 { 1881 if (TARGET_64BIT) 1882 ix86_tune_string = "generic64"; 1883 else 1884 ix86_tune_string = "generic32"; 1885 } 1886 else if (!strncmp (ix86_tune_string, "generic", 7)) 1887 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1888 } 1889 else 1890 { 1891 if (ix86_arch_string) 1892 ix86_tune_string = ix86_arch_string; 1893 if (!ix86_tune_string) 1894 { 1895 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1896 ix86_tune_defaulted = 1; 1897 } 1898 1899 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 1900 need to use a sensible tune option. */ 1901 if (!strcmp (ix86_tune_string, "generic") 1902 || !strcmp (ix86_tune_string, "x86-64") 1903 || !strcmp (ix86_tune_string, "i686")) 1904 { 1905 if (TARGET_64BIT) 1906 ix86_tune_string = "generic64"; 1907 else 1908 ix86_tune_string = "generic32"; 1909 } 1910 } 1911 if (!strcmp (ix86_tune_string, "x86-64")) 1912 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " 1913 "-mtune=generic instead as appropriate."); 1914 1915 if (!ix86_arch_string) 1916 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486"; 1917 if (!strcmp (ix86_arch_string, "generic")) 1918 error ("generic CPU can be used only for -mtune= switch"); 1919 if (!strncmp (ix86_arch_string, "generic", 7)) 1920 error ("bad value (%s) for -march= switch", ix86_arch_string); 1921 1922 if (ix86_cmodel_string != 0) 1923 { 1924 if (!strcmp (ix86_cmodel_string, "small")) 1925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1926 else if (!strcmp (ix86_cmodel_string, "medium")) 1927 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 1928 else if (flag_pic) 1929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1930 else if (!strcmp (ix86_cmodel_string, "32")) 1931 ix86_cmodel = CM_32; 1932 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1933 ix86_cmodel = CM_KERNEL; 1934 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1935 ix86_cmodel = CM_LARGE; 1936 else 1937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1938 } 1939 else 1940 { 1941 ix86_cmodel = CM_32; 1942 if (TARGET_64BIT) 1943 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1944 } 1945 if (ix86_asm_string != 0) 1946 { 1947 if (! TARGET_MACHO 1948 && !strcmp (ix86_asm_string, "intel")) 1949 ix86_asm_dialect = ASM_INTEL; 1950 else if (!strcmp (ix86_asm_string, "att")) 1951 ix86_asm_dialect = ASM_ATT; 1952 else 1953 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1954 } 1955 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1956 error ("code model %qs not supported in the %s bit mode", 1957 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1958 if (ix86_cmodel == CM_LARGE) 1959 sorry ("code model %<large%> not supported yet"); 1960 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1961 sorry ("%i-bit mode not compiled in", 1962 (target_flags & MASK_64BIT) ? 64 : 32); 1963 1964 for (i = 0; i < pta_size; i++) 1965 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1966 { 1967 ix86_arch = processor_alias_table[i].processor; 1968 /* Default cpu tuning to the architecture. */ 1969 ix86_tune = ix86_arch; 1970 if (processor_alias_table[i].flags & PTA_MMX 1971 && !(target_flags_explicit & MASK_MMX)) 1972 target_flags |= MASK_MMX; 1973 if (processor_alias_table[i].flags & PTA_3DNOW 1974 && !(target_flags_explicit & MASK_3DNOW)) 1975 target_flags |= MASK_3DNOW; 1976 if (processor_alias_table[i].flags & PTA_3DNOW_A 1977 && !(target_flags_explicit & MASK_3DNOW_A)) 1978 target_flags |= MASK_3DNOW_A; 1979 if (processor_alias_table[i].flags & PTA_SSE 1980 && !(target_flags_explicit & MASK_SSE)) 1981 target_flags |= MASK_SSE; 1982 if (processor_alias_table[i].flags & PTA_SSE2 1983 && !(target_flags_explicit & MASK_SSE2)) 1984 target_flags |= MASK_SSE2; 1985 if (processor_alias_table[i].flags & PTA_SSE3 1986 && !(target_flags_explicit & MASK_SSE3)) 1987 target_flags |= MASK_SSE3; 1988 if (processor_alias_table[i].flags & PTA_SSSE3 1989 && !(target_flags_explicit & MASK_SSSE3)) 1990 target_flags |= MASK_SSSE3; 1991 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1992 x86_prefetch_sse = true; 1993 if (processor_alias_table[i].flags & PTA_CX16) 1994 x86_cmpxchg16b = true; 1995 if (processor_alias_table[i].flags & PTA_POPCNT 1996 && !(target_flags_explicit & MASK_POPCNT)) 1997 target_flags |= MASK_POPCNT; 1998 if (processor_alias_table[i].flags & PTA_ABM 1999 && !(target_flags_explicit & MASK_ABM)) 2000 target_flags |= MASK_ABM; 2001 if (processor_alias_table[i].flags & PTA_SSE4A 2002 && !(target_flags_explicit & MASK_SSE4A)) 2003 target_flags |= MASK_SSE4A; 2004 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2005 error ("CPU you selected does not support x86-64 " 2006 "instruction set"); 2007 break; 2008 } 2009 2010 if (i == pta_size) 2011 error ("bad value (%s) for -march= switch", ix86_arch_string); 2012 2013 for (i = 0; i < pta_size; i++) 2014 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 2015 { 2016 ix86_tune = processor_alias_table[i].processor; 2017 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2018 { 2019 if (ix86_tune_defaulted) 2020 { 2021 ix86_tune_string = "x86-64"; 2022 for (i = 0; i < pta_size; i++) 2023 if (! strcmp (ix86_tune_string, 2024 processor_alias_table[i].name)) 2025 break; 2026 ix86_tune = processor_alias_table[i].processor; 2027 } 2028 else 2029 error ("CPU you selected does not support x86-64 " 2030 "instruction set"); 2031 } 2032 /* Intel CPUs have always interpreted SSE prefetch instructions as 2033 NOPs; so, we can enable SSE prefetch instructions even when 2034 -mtune (rather than -march) points us to a processor that has them. 2035 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 2036 higher processors. */ 2037 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 2038 x86_prefetch_sse = true; 2039 break; 2040 } 2041 if (i == pta_size) 2042 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 2043 2044 if (optimize_size) 2045 ix86_cost = &size_cost; 2046 else 2047 ix86_cost = processor_target_table[ix86_tune].cost; 2048 target_flags |= processor_target_table[ix86_tune].target_enable; 2049 target_flags &= ~processor_target_table[ix86_tune].target_disable; 2050 2051 /* Arrange to set up i386_stack_locals for all functions. */ 2052 init_machine_status = ix86_init_machine_status; 2053 2054 /* Validate -mregparm= value. */ 2055 if (ix86_regparm_string) 2056 { 2057 i = atoi (ix86_regparm_string); 2058 if (i < 0 || i > REGPARM_MAX) 2059 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 2060 else 2061 ix86_regparm = i; 2062 } 2063 else 2064 if (TARGET_64BIT) 2065 ix86_regparm = REGPARM_MAX; 2066 2067 /* If the user has provided any of the -malign-* options, 2068 warn and use that value only if -falign-* is not set. 2069 Remove this code in GCC 3.2 or later. */ 2070 if (ix86_align_loops_string) 2071 { 2072 warning (0, "-malign-loops is obsolete, use -falign-loops"); 2073 if (align_loops == 0) 2074 { 2075 i = atoi (ix86_align_loops_string); 2076 if (i < 0 || i > MAX_CODE_ALIGN) 2077 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2078 else 2079 align_loops = 1 << i; 2080 } 2081 } 2082 2083 if (ix86_align_jumps_string) 2084 { 2085 warning (0, "-malign-jumps is obsolete, use -falign-jumps"); 2086 if (align_jumps == 0) 2087 { 2088 i = atoi (ix86_align_jumps_string); 2089 if (i < 0 || i > MAX_CODE_ALIGN) 2090 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2091 else 2092 align_jumps = 1 << i; 2093 } 2094 } 2095 2096 if (ix86_align_funcs_string) 2097 { 2098 warning (0, "-malign-functions is obsolete, use -falign-functions"); 2099 if (align_functions == 0) 2100 { 2101 i = atoi (ix86_align_funcs_string); 2102 if (i < 0 || i > MAX_CODE_ALIGN) 2103 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 2104 else 2105 align_functions = 1 << i; 2106 } 2107 } 2108 2109 /* Default align_* from the processor table. */ 2110 if (align_loops == 0) 2111 { 2112 align_loops = processor_target_table[ix86_tune].align_loop; 2113 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 2114 } 2115 if (align_jumps == 0) 2116 { 2117 align_jumps = processor_target_table[ix86_tune].align_jump; 2118 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 2119 } 2120 if (align_functions == 0) 2121 { 2122 align_functions = processor_target_table[ix86_tune].align_func; 2123 } 2124 2125 /* Validate -mbranch-cost= value, or provide default. */ 2126 ix86_branch_cost = ix86_cost->branch_cost; 2127 if (ix86_branch_cost_string) 2128 { 2129 i = atoi (ix86_branch_cost_string); 2130 if (i < 0 || i > 5) 2131 error ("-mbranch-cost=%d is not between 0 and 5", i); 2132 else 2133 ix86_branch_cost = i; 2134 } 2135 if (ix86_section_threshold_string) 2136 { 2137 i = atoi (ix86_section_threshold_string); 2138 if (i < 0) 2139 error ("-mlarge-data-threshold=%d is negative", i); 2140 else 2141 ix86_section_threshold = i; 2142 } 2143 2144 if (ix86_tls_dialect_string) 2145 { 2146 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 2147 ix86_tls_dialect = TLS_DIALECT_GNU; 2148 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 2149 ix86_tls_dialect = TLS_DIALECT_GNU2; 2150 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 2151 ix86_tls_dialect = TLS_DIALECT_SUN; 2152 else 2153 error ("bad value (%s) for -mtls-dialect= switch", 2154 ix86_tls_dialect_string); 2155 } 2156 2157 /* Keep nonleaf frame pointers. */ 2158 if (flag_omit_frame_pointer) 2159 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 2160 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 2161 flag_omit_frame_pointer = 1; 2162 2163 /* If we're doing fast math, we don't care about comparison order 2164 wrt NaNs. This lets us use a shorter comparison sequence. */ 2165 if (flag_finite_math_only) 2166 target_flags &= ~MASK_IEEE_FP; 2167 2168 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 2169 since the insns won't need emulation. */ 2170 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 2171 target_flags &= ~MASK_NO_FANCY_MATH_387; 2172 2173 /* Likewise, if the target doesn't have a 387, or we've specified 2174 software floating point, don't use 387 inline intrinsics. */ 2175 if (!TARGET_80387) 2176 target_flags |= MASK_NO_FANCY_MATH_387; 2177 2178 /* Turn on SSE3 builtins for -mssse3. */ 2179 if (TARGET_SSSE3) 2180 target_flags |= MASK_SSE3; 2181 2182 /* Turn on SSE3 builtins for -msse4a. */ 2183 if (TARGET_SSE4A) 2184 target_flags |= MASK_SSE3; 2185 2186 /* Turn on SSE2 builtins for -msse3. */ 2187 if (TARGET_SSE3) 2188 target_flags |= MASK_SSE2; 2189 2190 /* Turn on SSE builtins for -msse2. */ 2191 if (TARGET_SSE2) 2192 target_flags |= MASK_SSE; 2193 2194 /* Turn on MMX builtins for -msse. */ 2195 if (TARGET_SSE) 2196 { 2197 target_flags |= MASK_MMX & ~target_flags_explicit; 2198 x86_prefetch_sse = true; 2199 } 2200 2201 /* Turn on MMX builtins for 3Dnow. */ 2202 if (TARGET_3DNOW) 2203 target_flags |= MASK_MMX; 2204 2205 /* Turn on POPCNT builtins for -mabm. */ 2206 if (TARGET_ABM) 2207 target_flags |= MASK_POPCNT; 2208 2209 if (TARGET_64BIT) 2210 { 2211 if (TARGET_ALIGN_DOUBLE) 2212 error ("-malign-double makes no sense in the 64bit mode"); 2213 if (TARGET_RTD) 2214 error ("-mrtd calling convention not supported in the 64bit mode"); 2215 2216 /* Enable by default the SSE and MMX builtins. Do allow the user to 2217 explicitly disable any of these. In particular, disabling SSE and 2218 MMX for kernel code is extremely useful. */ 2219 target_flags 2220 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) 2221 & ~target_flags_explicit); 2222 } 2223 else 2224 { 2225 /* i386 ABI does not specify red zone. It still makes sense to use it 2226 when programmer takes care to stack from being destroyed. */ 2227 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 2228 target_flags |= MASK_NO_RED_ZONE; 2229 } 2230 2231 /* Validate -mpreferred-stack-boundary= value, or provide default. 2232 The default of 128 bits is for Pentium III's SSE __m128. We can't 2233 change it because of optimize_size. Otherwise, we can't mix object 2234 files compiled with -Os and -On. */ 2235 ix86_preferred_stack_boundary = 128; 2236 if (ix86_preferred_stack_boundary_string) 2237 { 2238 i = atoi (ix86_preferred_stack_boundary_string); 2239 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 2240 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 2241 TARGET_64BIT ? 4 : 2); 2242 else 2243 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 2244 } 2245 2246 /* Accept -msseregparm only if at least SSE support is enabled. */ 2247 if (TARGET_SSEREGPARM 2248 && ! TARGET_SSE) 2249 error ("-msseregparm used without SSE enabled"); 2250 2251 ix86_fpmath = TARGET_FPMATH_DEFAULT; 2252 2253 if (ix86_fpmath_string != 0) 2254 { 2255 if (! strcmp (ix86_fpmath_string, "387")) 2256 ix86_fpmath = FPMATH_387; 2257 else if (! strcmp (ix86_fpmath_string, "sse")) 2258 { 2259 if (!TARGET_SSE) 2260 { 2261 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2262 ix86_fpmath = FPMATH_387; 2263 } 2264 else 2265 ix86_fpmath = FPMATH_SSE; 2266 } 2267 else if (! strcmp (ix86_fpmath_string, "387,sse") 2268 || ! strcmp (ix86_fpmath_string, "sse,387")) 2269 { 2270 if (!TARGET_SSE) 2271 { 2272 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 2273 ix86_fpmath = FPMATH_387; 2274 } 2275 else if (!TARGET_80387) 2276 { 2277 warning (0, "387 instruction set disabled, using SSE arithmetics"); 2278 ix86_fpmath = FPMATH_SSE; 2279 } 2280 else 2281 ix86_fpmath = FPMATH_SSE | FPMATH_387; 2282 } 2283 else 2284 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 2285 } 2286 2287 /* If the i387 is disabled, then do not return values in it. */ 2288 if (!TARGET_80387) 2289 target_flags &= ~MASK_FLOAT_RETURNS; 2290 2291 if ((x86_accumulate_outgoing_args & TUNEMASK) 2292 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2293 && !optimize_size) 2294 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2295 2296 /* ??? Unwind info is not correct around the CFG unless either a frame 2297 pointer is present or M_A_O_A is set. Fixing this requires rewriting 2298 unwind info generation to be aware of the CFG and propagating states 2299 around edges. */ 2300 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 2301 || flag_exceptions || flag_non_call_exceptions) 2302 && flag_omit_frame_pointer 2303 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 2304 { 2305 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 2306 warning (0, "unwind tables currently require either a frame pointer " 2307 "or -maccumulate-outgoing-args for correctness"); 2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 2309 } 2310 2311 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 2312 { 2313 char *p; 2314 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 2315 p = strchr (internal_label_prefix, 'X'); 2316 internal_label_prefix_len = p - internal_label_prefix; 2317 *p = '\0'; 2318 } 2319 2320 /* When scheduling description is not available, disable scheduler pass 2321 so it won't slow down the compilation and make x87 code slower. */ 2322 if (!TARGET_SCHEDULE) 2323 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 2324} 2325 2326/* switch to the appropriate section for output of DECL. 2327 DECL is either a `VAR_DECL' node or a constant of some sort. 2328 RELOC indicates whether forming the initial value of DECL requires 2329 link-time relocations. */ 2330 2331static section * 2332x86_64_elf_select_section (tree decl, int reloc, 2333 unsigned HOST_WIDE_INT align) 2334{ 2335 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2336 && ix86_in_large_data_p (decl)) 2337 { 2338 const char *sname = NULL; 2339 unsigned int flags = SECTION_WRITE; 2340 switch (categorize_decl_for_section (decl, reloc)) 2341 { 2342 case SECCAT_DATA: 2343 sname = ".ldata"; 2344 break; 2345 case SECCAT_DATA_REL: 2346 sname = ".ldata.rel"; 2347 break; 2348 case SECCAT_DATA_REL_LOCAL: 2349 sname = ".ldata.rel.local"; 2350 break; 2351 case SECCAT_DATA_REL_RO: 2352 sname = ".ldata.rel.ro"; 2353 break; 2354 case SECCAT_DATA_REL_RO_LOCAL: 2355 sname = ".ldata.rel.ro.local"; 2356 break; 2357 case SECCAT_BSS: 2358 sname = ".lbss"; 2359 flags |= SECTION_BSS; 2360 break; 2361 case SECCAT_RODATA: 2362 case SECCAT_RODATA_MERGE_STR: 2363 case SECCAT_RODATA_MERGE_STR_INIT: 2364 case SECCAT_RODATA_MERGE_CONST: 2365 sname = ".lrodata"; 2366 flags = 0; 2367 break; 2368 case SECCAT_SRODATA: 2369 case SECCAT_SDATA: 2370 case SECCAT_SBSS: 2371 gcc_unreachable (); 2372 case SECCAT_TEXT: 2373 case SECCAT_TDATA: 2374 case SECCAT_TBSS: 2375 /* We don't split these for medium model. Place them into 2376 default sections and hope for best. */ 2377 break; 2378 } 2379 if (sname) 2380 { 2381 /* We might get called with string constants, but get_named_section 2382 doesn't like them as they are not DECLs. Also, we need to set 2383 flags in that case. */ 2384 if (!DECL_P (decl)) 2385 return get_section (sname, flags, NULL); 2386 return get_named_section (decl, sname, reloc); 2387 } 2388 } 2389 return default_elf_select_section (decl, reloc, align); 2390} 2391 2392/* Build up a unique section name, expressed as a 2393 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 2394 RELOC indicates whether the initial value of EXP requires 2395 link-time relocations. */ 2396 2397static void 2398x86_64_elf_unique_section (tree decl, int reloc) 2399{ 2400 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2401 && ix86_in_large_data_p (decl)) 2402 { 2403 const char *prefix = NULL; 2404 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 2405 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 2406 2407 switch (categorize_decl_for_section (decl, reloc)) 2408 { 2409 case SECCAT_DATA: 2410 case SECCAT_DATA_REL: 2411 case SECCAT_DATA_REL_LOCAL: 2412 case SECCAT_DATA_REL_RO: 2413 case SECCAT_DATA_REL_RO_LOCAL: 2414 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; 2415 break; 2416 case SECCAT_BSS: 2417 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; 2418 break; 2419 case SECCAT_RODATA: 2420 case SECCAT_RODATA_MERGE_STR: 2421 case SECCAT_RODATA_MERGE_STR_INIT: 2422 case SECCAT_RODATA_MERGE_CONST: 2423 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; 2424 break; 2425 case SECCAT_SRODATA: 2426 case SECCAT_SDATA: 2427 case SECCAT_SBSS: 2428 gcc_unreachable (); 2429 case SECCAT_TEXT: 2430 case SECCAT_TDATA: 2431 case SECCAT_TBSS: 2432 /* We don't split these for medium model. Place them into 2433 default sections and hope for best. */ 2434 break; 2435 } 2436 if (prefix) 2437 { 2438 const char *name; 2439 size_t nlen, plen; 2440 char *string; 2441 plen = strlen (prefix); 2442 2443 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 2444 name = targetm.strip_name_encoding (name); 2445 nlen = strlen (name); 2446 2447 string = alloca (nlen + plen + 1); 2448 memcpy (string, prefix, plen); 2449 memcpy (string + plen, name, nlen + 1); 2450 2451 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); 2452 return; 2453 } 2454 } 2455 default_unique_section (decl, reloc); 2456} 2457 2458#ifdef COMMON_ASM_OP 2459/* This says how to output assembler code to declare an 2460 uninitialized external linkage data object. 2461 2462 For medium model x86-64 we need to use .largecomm opcode for 2463 large objects. */ 2464void 2465x86_elf_aligned_common (FILE *file, 2466 const char *name, unsigned HOST_WIDE_INT size, 2467 int align) 2468{ 2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2470 && size > (unsigned int)ix86_section_threshold) 2471 fprintf (file, ".largecomm\t"); 2472 else 2473 fprintf (file, "%s", COMMON_ASM_OP); 2474 assemble_name (file, name); 2475 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 2476 size, align / BITS_PER_UNIT); 2477} 2478 2479/* Utility function for targets to use in implementing 2480 ASM_OUTPUT_ALIGNED_BSS. */ 2481 2482void 2483x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 2484 const char *name, unsigned HOST_WIDE_INT size, 2485 int align) 2486{ 2487 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2488 && size > (unsigned int)ix86_section_threshold) 2489 switch_to_section (get_named_section (decl, ".lbss", 0)); 2490 else 2491 switch_to_section (bss_section); 2492 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 2493#ifdef ASM_DECLARE_OBJECT_NAME 2494 last_assemble_variable_decl = decl; 2495 ASM_DECLARE_OBJECT_NAME (file, name, decl); 2496#else 2497 /* Standard thing is just output label for the object. */ 2498 ASM_OUTPUT_LABEL (file, name); 2499#endif /* ASM_DECLARE_OBJECT_NAME */ 2500 ASM_OUTPUT_SKIP (file, size ? size : 1); 2501} 2502#endif 2503 2504void 2505optimization_options (int level, int size ATTRIBUTE_UNUSED) 2506{ 2507 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 2508 make the problem with not enough registers even worse. */ 2509#ifdef INSN_SCHEDULING 2510 if (level > 1) 2511 flag_schedule_insns = 0; 2512#endif 2513 2514 if (TARGET_MACHO) 2515 /* The Darwin libraries never set errno, so we might as well 2516 avoid calling them when that's the only reason we would. */ 2517 flag_errno_math = 0; 2518 2519 /* The default values of these switches depend on the TARGET_64BIT 2520 that is not known at this moment. Mark these values with 2 and 2521 let user the to override these. In case there is no command line option 2522 specifying them, we will set the defaults in override_options. */ 2523 if (optimize >= 1) 2524 flag_omit_frame_pointer = 2; 2525 flag_pcc_struct_return = 2; 2526 flag_asynchronous_unwind_tables = 2; 2527#ifdef SUBTARGET_OPTIMIZATION_OPTIONS 2528 SUBTARGET_OPTIMIZATION_OPTIONS; 2529#endif 2530} 2531 2532/* Table of valid machine attributes. */ 2533const struct attribute_spec ix86_attribute_table[] = 2534{ 2535 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 2536 /* Stdcall attribute says callee is responsible for popping arguments 2537 if they are not variable. */ 2538 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2539 /* Fastcall attribute says callee is responsible for popping arguments 2540 if they are not variable. */ 2541 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2542 /* Cdecl attribute says the callee is a normal C declaration */ 2543 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2544 /* Regparm attribute specifies how many integer arguments are to be 2545 passed in registers. */ 2546 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 2547 /* Sseregparm attribute says we are using x86_64 calling conventions 2548 for FP arguments. */ 2549 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2550 /* force_align_arg_pointer says this function realigns the stack at entry. */ 2551 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 2552 false, true, true, ix86_handle_cconv_attribute }, 2553#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2554 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 2555 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 2556 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 2557#endif 2558 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2559 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2560#ifdef SUBTARGET_ATTRIBUTE_TABLE 2561 SUBTARGET_ATTRIBUTE_TABLE, 2562#endif 2563 { NULL, 0, 0, false, false, false, NULL } 2564}; 2565 2566/* Decide whether we can make a sibling call to a function. DECL is the 2567 declaration of the function being targeted by the call and EXP is the 2568 CALL_EXPR representing the call. */ 2569 2570static bool 2571ix86_function_ok_for_sibcall (tree decl, tree exp) 2572{ 2573 tree func; 2574 rtx a, b; 2575 2576 /* If we are generating position-independent code, we cannot sibcall 2577 optimize any indirect call, or a direct call to a global function, 2578 as the PLT requires %ebx be live. */ 2579 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 2580 return false; 2581 2582 if (decl) 2583 func = decl; 2584 else 2585 { 2586 func = TREE_TYPE (TREE_OPERAND (exp, 0)); 2587 if (POINTER_TYPE_P (func)) 2588 func = TREE_TYPE (func); 2589 } 2590 2591 /* Check that the return value locations are the same. Like 2592 if we are returning floats on the 80387 register stack, we cannot 2593 make a sibcall from a function that doesn't return a float to a 2594 function that does or, conversely, from a function that does return 2595 a float to a function that doesn't; the necessary stack adjustment 2596 would not be executed. This is also the place we notice 2597 differences in the return value ABI. Note that it is ok for one 2598 of the functions to have void return type as long as the return 2599 value of the other is passed in a register. */ 2600 a = ix86_function_value (TREE_TYPE (exp), func, false); 2601 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 2602 cfun->decl, false); 2603 if (STACK_REG_P (a) || STACK_REG_P (b)) 2604 { 2605 if (!rtx_equal_p (a, b)) 2606 return false; 2607 } 2608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 2609 ; 2610 else if (!rtx_equal_p (a, b)) 2611 return false; 2612 2613 /* If this call is indirect, we'll need to be able to use a call-clobbered 2614 register for the address of the target function. Make sure that all 2615 such registers are not used for passing parameters. */ 2616 if (!decl && !TARGET_64BIT) 2617 { 2618 tree type; 2619 2620 /* We're looking at the CALL_EXPR, we need the type of the function. */ 2621 type = TREE_OPERAND (exp, 0); /* pointer expression */ 2622 type = TREE_TYPE (type); /* pointer type */ 2623 type = TREE_TYPE (type); /* function type */ 2624 2625 if (ix86_function_regparm (type, NULL) >= 3) 2626 { 2627 /* ??? Need to count the actual number of registers to be used, 2628 not the possible number of registers. Fix later. */ 2629 return false; 2630 } 2631 } 2632 2633#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2634 /* Dllimport'd functions are also called indirectly. */ 2635 if (decl && DECL_DLLIMPORT_P (decl) 2636 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) 2637 return false; 2638#endif 2639 2640 /* If we forced aligned the stack, then sibcalling would unalign the 2641 stack, which may break the called function. */ 2642 if (cfun->machine->force_align_arg_pointer) 2643 return false; 2644 2645 /* Otherwise okay. That also includes certain types of indirect calls. */ 2646 return true; 2647} 2648 2649/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 2650 calling convention attributes; 2651 arguments as in struct attribute_spec.handler. */ 2652 2653static tree 2654ix86_handle_cconv_attribute (tree *node, tree name, 2655 tree args, 2656 int flags ATTRIBUTE_UNUSED, 2657 bool *no_add_attrs) 2658{ 2659 if (TREE_CODE (*node) != FUNCTION_TYPE 2660 && TREE_CODE (*node) != METHOD_TYPE 2661 && TREE_CODE (*node) != FIELD_DECL 2662 && TREE_CODE (*node) != TYPE_DECL) 2663 { 2664 warning (OPT_Wattributes, "%qs attribute only applies to functions", 2665 IDENTIFIER_POINTER (name)); 2666 *no_add_attrs = true; 2667 return NULL_TREE; 2668 } 2669 2670 /* Can combine regparm with all attributes but fastcall. */ 2671 if (is_attribute_p ("regparm", name)) 2672 { 2673 tree cst; 2674 2675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2676 { 2677 error ("fastcall and regparm attributes are not compatible"); 2678 } 2679 2680 cst = TREE_VALUE (args); 2681 if (TREE_CODE (cst) != INTEGER_CST) 2682 { 2683 warning (OPT_Wattributes, 2684 "%qs attribute requires an integer constant argument", 2685 IDENTIFIER_POINTER (name)); 2686 *no_add_attrs = true; 2687 } 2688 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 2689 { 2690 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 2691 IDENTIFIER_POINTER (name), REGPARM_MAX); 2692 *no_add_attrs = true; 2693 } 2694 2695 if (!TARGET_64BIT 2696 && lookup_attribute (ix86_force_align_arg_pointer_string, 2697 TYPE_ATTRIBUTES (*node)) 2698 && compare_tree_int (cst, REGPARM_MAX-1)) 2699 { 2700 error ("%s functions limited to %d register parameters", 2701 ix86_force_align_arg_pointer_string, REGPARM_MAX-1); 2702 } 2703 2704 return NULL_TREE; 2705 } 2706 2707 if (TARGET_64BIT) 2708 { 2709 warning (OPT_Wattributes, "%qs attribute ignored", 2710 IDENTIFIER_POINTER (name)); 2711 *no_add_attrs = true; 2712 return NULL_TREE; 2713 } 2714 2715 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 2716 if (is_attribute_p ("fastcall", name)) 2717 { 2718 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2719 { 2720 error ("fastcall and cdecl attributes are not compatible"); 2721 } 2722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2723 { 2724 error ("fastcall and stdcall attributes are not compatible"); 2725 } 2726 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 2727 { 2728 error ("fastcall and regparm attributes are not compatible"); 2729 } 2730 } 2731 2732 /* Can combine stdcall with fastcall (redundant), regparm and 2733 sseregparm. */ 2734 else if (is_attribute_p ("stdcall", name)) 2735 { 2736 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2737 { 2738 error ("stdcall and cdecl attributes are not compatible"); 2739 } 2740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2741 { 2742 error ("stdcall and fastcall attributes are not compatible"); 2743 } 2744 } 2745 2746 /* Can combine cdecl with regparm and sseregparm. */ 2747 else if (is_attribute_p ("cdecl", name)) 2748 { 2749 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2750 { 2751 error ("stdcall and cdecl attributes are not compatible"); 2752 } 2753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2754 { 2755 error ("fastcall and cdecl attributes are not compatible"); 2756 } 2757 } 2758 2759 /* Can combine sseregparm with all attributes. */ 2760 2761 return NULL_TREE; 2762} 2763 2764/* Return 0 if the attributes for two types are incompatible, 1 if they 2765 are compatible, and 2 if they are nearly compatible (which causes a 2766 warning to be generated). */ 2767 2768static int 2769ix86_comp_type_attributes (tree type1, tree type2) 2770{ 2771 /* Check for mismatch of non-default calling convention. */ 2772 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 2773 2774 if (TREE_CODE (type1) != FUNCTION_TYPE) 2775 return 1; 2776 2777 /* Check for mismatched fastcall/regparm types. */ 2778 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 2779 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 2780 || (ix86_function_regparm (type1, NULL) 2781 != ix86_function_regparm (type2, NULL))) 2782 return 0; 2783 2784 /* Check for mismatched sseregparm types. */ 2785 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 2786 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 2787 return 0; 2788 2789 /* Check for mismatched return types (cdecl vs stdcall). */ 2790 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 2791 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 2792 return 0; 2793 2794 return 1; 2795} 2796 2797/* Return the regparm value for a function with the indicated TYPE and DECL. 2798 DECL may be NULL when calling function indirectly 2799 or considering a libcall. */ 2800 2801static int 2802ix86_function_regparm (tree type, tree decl) 2803{ 2804 tree attr; 2805 int regparm = ix86_regparm; 2806 bool user_convention = false; 2807 2808 if (!TARGET_64BIT) 2809 { 2810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 2811 if (attr) 2812 { 2813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 2814 user_convention = true; 2815 } 2816 2817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 2818 { 2819 regparm = 2; 2820 user_convention = true; 2821 } 2822 2823 /* Use register calling convention for local functions when possible. */ 2824 if (!TARGET_64BIT && !user_convention && decl 2825 && flag_unit_at_a_time && !profile_flag) 2826 { 2827 struct cgraph_local_info *i = cgraph_local_info (decl); 2828 if (i && i->local) 2829 { 2830 int local_regparm, globals = 0, regno; 2831 2832 /* Make sure no regparm register is taken by a global register 2833 variable. */ 2834 for (local_regparm = 0; local_regparm < 3; local_regparm++) 2835 if (global_regs[local_regparm]) 2836 break; 2837 /* We can't use regparm(3) for nested functions as these use 2838 static chain pointer in third argument. */ 2839 if (local_regparm == 3 2840 && decl_function_context (decl) 2841 && !DECL_NO_STATIC_CHAIN (decl)) 2842 local_regparm = 2; 2843 /* If the function realigns its stackpointer, the 2844 prologue will clobber %ecx. If we've already 2845 generated code for the callee, the callee 2846 DECL_STRUCT_FUNCTION is gone, so we fall back to 2847 scanning the attributes for the self-realigning 2848 property. */ 2849 if ((DECL_STRUCT_FUNCTION (decl) 2850 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) 2851 || (!DECL_STRUCT_FUNCTION (decl) 2852 && lookup_attribute (ix86_force_align_arg_pointer_string, 2853 TYPE_ATTRIBUTES (TREE_TYPE (decl))))) 2854 local_regparm = 2; 2855 /* Each global register variable increases register preassure, 2856 so the more global reg vars there are, the smaller regparm 2857 optimization use, unless requested by the user explicitly. */ 2858 for (regno = 0; regno < 6; regno++) 2859 if (global_regs[regno]) 2860 globals++; 2861 local_regparm 2862 = globals < local_regparm ? local_regparm - globals : 0; 2863 2864 if (local_regparm > regparm) 2865 regparm = local_regparm; 2866 } 2867 } 2868 } 2869 return regparm; 2870} 2871 2872/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 2873 DFmode (2) arguments in SSE registers for a function with the 2874 indicated TYPE and DECL. DECL may be NULL when calling function 2875 indirectly or considering a libcall. Otherwise return 0. */ 2876 2877static int 2878ix86_function_sseregparm (tree type, tree decl) 2879{ 2880 /* Use SSE registers to pass SFmode and DFmode arguments if requested 2881 by the sseregparm attribute. */ 2882 if (TARGET_SSEREGPARM 2883 || (type 2884 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 2885 { 2886 if (!TARGET_SSE) 2887 { 2888 if (decl) 2889 error ("Calling %qD with attribute sseregparm without " 2890 "SSE/SSE2 enabled", decl); 2891 else 2892 error ("Calling %qT with attribute sseregparm without " 2893 "SSE/SSE2 enabled", type); 2894 return 0; 2895 } 2896 2897 return 2; 2898 } 2899 2900 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 2901 (and DFmode for SSE2) arguments in SSE registers, 2902 even for 32-bit targets. */ 2903 if (!TARGET_64BIT && decl 2904 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) 2905 { 2906 struct cgraph_local_info *i = cgraph_local_info (decl); 2907 if (i && i->local) 2908 return TARGET_SSE2 ? 2 : 1; 2909 } 2910 2911 return 0; 2912} 2913 2914/* Return true if EAX is live at the start of the function. Used by 2915 ix86_expand_prologue to determine if we need special help before 2916 calling allocate_stack_worker. */ 2917 2918static bool 2919ix86_eax_live_at_start_p (void) 2920{ 2921 /* Cheat. Don't bother working forward from ix86_function_regparm 2922 to the function type to whether an actual argument is located in 2923 eax. Instead just look at cfg info, which is still close enough 2924 to correct at this point. This gives false positives for broken 2925 functions that might use uninitialized data that happens to be 2926 allocated in eax, but who cares? */ 2927 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); 2928} 2929 2930/* Value is the number of bytes of arguments automatically 2931 popped when returning from a subroutine call. 2932 FUNDECL is the declaration node of the function (as a tree), 2933 FUNTYPE is the data type of the function (as a tree), 2934 or for a library call it is an identifier node for the subroutine name. 2935 SIZE is the number of bytes of arguments passed on the stack. 2936 2937 On the 80386, the RTD insn may be used to pop them if the number 2938 of args is fixed, but if the number is variable then the caller 2939 must pop them all. RTD can't be used for library calls now 2940 because the library is compiled with the Unix compiler. 2941 Use of RTD is a selectable option, since it is incompatible with 2942 standard Unix calling sequences. If the option is not selected, 2943 the caller must always pop the args. 2944 2945 The attribute stdcall is equivalent to RTD on a per module basis. */ 2946 2947int 2948ix86_return_pops_args (tree fundecl, tree funtype, int size) 2949{ 2950 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 2951 2952 /* Cdecl functions override -mrtd, and never pop the stack. */ 2953 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 2954 2955 /* Stdcall and fastcall functions will pop the stack if not 2956 variable args. */ 2957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 2958 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 2959 rtd = 1; 2960 2961 if (rtd 2962 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 2963 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 2964 == void_type_node))) 2965 return size; 2966 } 2967 2968 /* Lose any fake structure return argument if it is passed on the stack. */ 2969 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 2970 && !TARGET_64BIT 2971 && !KEEP_AGGREGATE_RETURN_POINTER) 2972 { 2973 int nregs = ix86_function_regparm (funtype, fundecl); 2974 2975 if (!nregs) 2976 return GET_MODE_SIZE (Pmode); 2977 } 2978 2979 return 0; 2980} 2981 2982/* Argument support functions. */ 2983 2984/* Return true when register may be used to pass function parameters. */ 2985bool 2986ix86_function_arg_regno_p (int regno) 2987{ 2988 int i; 2989 if (!TARGET_64BIT) 2990 { 2991 if (TARGET_MACHO) 2992 return (regno < REGPARM_MAX 2993 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 2994 else 2995 return (regno < REGPARM_MAX 2996 || (TARGET_MMX && MMX_REGNO_P (regno) 2997 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 2998 || (TARGET_SSE && SSE_REGNO_P (regno) 2999 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 3000 } 3001 3002 if (TARGET_MACHO) 3003 { 3004 if (SSE_REGNO_P (regno) && TARGET_SSE) 3005 return true; 3006 } 3007 else 3008 { 3009 if (TARGET_SSE && SSE_REGNO_P (regno) 3010 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 3011 return true; 3012 } 3013 /* RAX is used as hidden argument to va_arg functions. */ 3014 if (!regno) 3015 return true; 3016 for (i = 0; i < REGPARM_MAX; i++) 3017 if (regno == x86_64_int_parameter_registers[i]) 3018 return true; 3019 return false; 3020} 3021 3022/* Return if we do not know how to pass TYPE solely in registers. */ 3023 3024static bool 3025ix86_must_pass_in_stack (enum machine_mode mode, tree type) 3026{ 3027 if (must_pass_in_stack_var_size_or_pad (mode, type)) 3028 return true; 3029 3030 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 3031 The layout_type routine is crafty and tries to trick us into passing 3032 currently unsupported vector types on the stack by using TImode. */ 3033 return (!TARGET_64BIT && mode == TImode 3034 && type && TREE_CODE (type) != VECTOR_TYPE); 3035} 3036 3037/* Initialize a variable CUM of type CUMULATIVE_ARGS 3038 for a call to a function whose data type is FNTYPE. 3039 For a library call, FNTYPE is 0. */ 3040 3041void 3042init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 3043 tree fntype, /* tree ptr for function decl */ 3044 rtx libname, /* SYMBOL_REF of library name or 0 */ 3045 tree fndecl) 3046{ 3047 static CUMULATIVE_ARGS zero_cum; 3048 tree param, next_param; 3049 3050 if (TARGET_DEBUG_ARG) 3051 { 3052 fprintf (stderr, "\ninit_cumulative_args ("); 3053 if (fntype) 3054 fprintf (stderr, "fntype code = %s, ret code = %s", 3055 tree_code_name[(int) TREE_CODE (fntype)], 3056 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 3057 else 3058 fprintf (stderr, "no fntype"); 3059 3060 if (libname) 3061 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 3062 } 3063 3064 *cum = zero_cum; 3065 3066 /* Set up the number of registers to use for passing arguments. */ 3067 cum->nregs = ix86_regparm; 3068 if (TARGET_SSE) 3069 cum->sse_nregs = SSE_REGPARM_MAX; 3070 if (TARGET_MMX) 3071 cum->mmx_nregs = MMX_REGPARM_MAX; 3072 cum->warn_sse = true; 3073 cum->warn_mmx = true; 3074 cum->maybe_vaarg = false; 3075 3076 /* Use ecx and edx registers if function has fastcall attribute, 3077 else look for regparm information. */ 3078 if (fntype && !TARGET_64BIT) 3079 { 3080 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 3081 { 3082 cum->nregs = 2; 3083 cum->fastcall = 1; 3084 } 3085 else 3086 cum->nregs = ix86_function_regparm (fntype, fndecl); 3087 } 3088 3089 /* Set up the number of SSE registers used for passing SFmode 3090 and DFmode arguments. Warn for mismatching ABI. */ 3091 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); 3092 3093 /* Determine if this function has variable arguments. This is 3094 indicated by the last argument being 'void_type_mode' if there 3095 are no variable arguments. If there are variable arguments, then 3096 we won't pass anything in registers in 32-bit mode. */ 3097 3098 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) 3099 { 3100 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 3101 param != 0; param = next_param) 3102 { 3103 next_param = TREE_CHAIN (param); 3104 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 3105 { 3106 if (!TARGET_64BIT) 3107 { 3108 cum->nregs = 0; 3109 cum->sse_nregs = 0; 3110 cum->mmx_nregs = 0; 3111 cum->warn_sse = 0; 3112 cum->warn_mmx = 0; 3113 cum->fastcall = 0; 3114 cum->float_in_sse = 0; 3115 } 3116 cum->maybe_vaarg = true; 3117 } 3118 } 3119 } 3120 if ((!fntype && !libname) 3121 || (fntype && !TYPE_ARG_TYPES (fntype))) 3122 cum->maybe_vaarg = true; 3123 3124 if (TARGET_DEBUG_ARG) 3125 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 3126 3127 return; 3128} 3129 3130/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 3131 But in the case of vector types, it is some vector mode. 3132 3133 When we have only some of our vector isa extensions enabled, then there 3134 are some modes for which vector_mode_supported_p is false. For these 3135 modes, the generic vector support in gcc will choose some non-vector mode 3136 in order to implement the type. By computing the natural mode, we'll 3137 select the proper ABI location for the operand and not depend on whatever 3138 the middle-end decides to do with these vector types. */ 3139 3140static enum machine_mode 3141type_natural_mode (tree type) 3142{ 3143 enum machine_mode mode = TYPE_MODE (type); 3144 3145 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 3146 { 3147 HOST_WIDE_INT size = int_size_in_bytes (type); 3148 if ((size == 8 || size == 16) 3149 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 3150 && TYPE_VECTOR_SUBPARTS (type) > 1) 3151 { 3152 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 3153 3154 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 3155 mode = MIN_MODE_VECTOR_FLOAT; 3156 else 3157 mode = MIN_MODE_VECTOR_INT; 3158 3159 /* Get the mode which has this inner mode and number of units. */ 3160 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 3161 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 3162 && GET_MODE_INNER (mode) == innermode) 3163 return mode; 3164 3165 gcc_unreachable (); 3166 } 3167 } 3168 3169 return mode; 3170} 3171 3172/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 3173 this may not agree with the mode that the type system has chosen for the 3174 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 3175 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 3176 3177static rtx 3178gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 3179 unsigned int regno) 3180{ 3181 rtx tmp; 3182 3183 if (orig_mode != BLKmode) 3184 tmp = gen_rtx_REG (orig_mode, regno); 3185 else 3186 { 3187 tmp = gen_rtx_REG (mode, regno); 3188 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 3189 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 3190 } 3191 3192 return tmp; 3193} 3194 3195/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 3196 of this code is to classify each 8bytes of incoming argument by the register 3197 class and assign registers accordingly. */ 3198 3199/* Return the union class of CLASS1 and CLASS2. 3200 See the x86-64 PS ABI for details. */ 3201 3202static enum x86_64_reg_class 3203merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 3204{ 3205 /* Rule #1: If both classes are equal, this is the resulting class. */ 3206 if (class1 == class2) 3207 return class1; 3208 3209 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 3210 the other class. */ 3211 if (class1 == X86_64_NO_CLASS) 3212 return class2; 3213 if (class2 == X86_64_NO_CLASS) 3214 return class1; 3215 3216 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 3217 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 3218 return X86_64_MEMORY_CLASS; 3219 3220 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 3221 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 3222 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 3223 return X86_64_INTEGERSI_CLASS; 3224 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 3225 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 3226 return X86_64_INTEGER_CLASS; 3227 3228 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 3229 MEMORY is used. */ 3230 if (class1 == X86_64_X87_CLASS 3231 || class1 == X86_64_X87UP_CLASS 3232 || class1 == X86_64_COMPLEX_X87_CLASS 3233 || class2 == X86_64_X87_CLASS 3234 || class2 == X86_64_X87UP_CLASS 3235 || class2 == X86_64_COMPLEX_X87_CLASS) 3236 return X86_64_MEMORY_CLASS; 3237 3238 /* Rule #6: Otherwise class SSE is used. */ 3239 return X86_64_SSE_CLASS; 3240} 3241 3242/* Classify the argument of type TYPE and mode MODE. 3243 CLASSES will be filled by the register class used to pass each word 3244 of the operand. The number of words is returned. In case the parameter 3245 should be passed in memory, 0 is returned. As a special case for zero 3246 sized containers, classes[0] will be NO_CLASS and 1 is returned. 3247 3248 BIT_OFFSET is used internally for handling records and specifies offset 3249 of the offset in bits modulo 256 to avoid overflow cases. 3250 3251 See the x86-64 PS ABI for details. 3252*/ 3253 3254static int 3255classify_argument (enum machine_mode mode, tree type, 3256 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 3257{ 3258 HOST_WIDE_INT bytes = 3259 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3260 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3261 3262 /* Variable sized entities are always passed/returned in memory. */ 3263 if (bytes < 0) 3264 return 0; 3265 3266 if (mode != VOIDmode 3267 && targetm.calls.must_pass_in_stack (mode, type)) 3268 return 0; 3269 3270 if (type && AGGREGATE_TYPE_P (type)) 3271 { 3272 int i; 3273 tree field; 3274 enum x86_64_reg_class subclasses[MAX_CLASSES]; 3275 3276 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 3277 if (bytes > 16) 3278 return 0; 3279 3280 for (i = 0; i < words; i++) 3281 classes[i] = X86_64_NO_CLASS; 3282 3283 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 3284 signalize memory class, so handle it as special case. */ 3285 if (!words) 3286 { 3287 classes[0] = X86_64_NO_CLASS; 3288 return 1; 3289 } 3290 3291 /* Classify each field of record and merge classes. */ 3292 switch (TREE_CODE (type)) 3293 { 3294 case RECORD_TYPE: 3295 /* For classes first merge in the field of the subclasses. */ 3296 if (TYPE_BINFO (type)) 3297 { 3298 tree binfo, base_binfo; 3299 int basenum; 3300 3301 for (binfo = TYPE_BINFO (type), basenum = 0; 3302 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) 3303 { 3304 int num; 3305 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; 3306 tree type = BINFO_TYPE (base_binfo); 3307 3308 num = classify_argument (TYPE_MODE (type), 3309 type, subclasses, 3310 (offset + bit_offset) % 256); 3311 if (!num) 3312 return 0; 3313 for (i = 0; i < num; i++) 3314 { 3315 int pos = (offset + (bit_offset % 64)) / 8 / 8; 3316 classes[i + pos] = 3317 merge_classes (subclasses[i], classes[i + pos]); 3318 } 3319 } 3320 } 3321 /* And now merge the fields of structure. */ 3322 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3323 { 3324 if (TREE_CODE (field) == FIELD_DECL) 3325 { 3326 int num; 3327 3328 if (TREE_TYPE (field) == error_mark_node) 3329 continue; 3330 3331 /* Bitfields are always classified as integer. Handle them 3332 early, since later code would consider them to be 3333 misaligned integers. */ 3334 if (DECL_BIT_FIELD (field)) 3335 { 3336 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3337 i < ((int_bit_position (field) + (bit_offset % 64)) 3338 + tree_low_cst (DECL_SIZE (field), 0) 3339 + 63) / 8 / 8; i++) 3340 classes[i] = 3341 merge_classes (X86_64_INTEGER_CLASS, 3342 classes[i]); 3343 } 3344 else 3345 { 3346 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3347 TREE_TYPE (field), subclasses, 3348 (int_bit_position (field) 3349 + bit_offset) % 256); 3350 if (!num) 3351 return 0; 3352 for (i = 0; i < num; i++) 3353 { 3354 int pos = 3355 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3356 classes[i + pos] = 3357 merge_classes (subclasses[i], classes[i + pos]); 3358 } 3359 } 3360 } 3361 } 3362 break; 3363 3364 case ARRAY_TYPE: 3365 /* Arrays are handled as small records. */ 3366 { 3367 int num; 3368 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 3369 TREE_TYPE (type), subclasses, bit_offset); 3370 if (!num) 3371 return 0; 3372 3373 /* The partial classes are now full classes. */ 3374 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 3375 subclasses[0] = X86_64_SSE_CLASS; 3376 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 3377 subclasses[0] = X86_64_INTEGER_CLASS; 3378 3379 for (i = 0; i < words; i++) 3380 classes[i] = subclasses[i % num]; 3381 3382 break; 3383 } 3384 case UNION_TYPE: 3385 case QUAL_UNION_TYPE: 3386 /* Unions are similar to RECORD_TYPE but offset is always 0. 3387 */ 3388 3389 /* Unions are not derived. */ 3390 gcc_assert (!TYPE_BINFO (type) 3391 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); 3392 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3393 { 3394 if (TREE_CODE (field) == FIELD_DECL) 3395 { 3396 int num; 3397 3398 if (TREE_TYPE (field) == error_mark_node) 3399 continue; 3400 3401 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3402 TREE_TYPE (field), subclasses, 3403 bit_offset); 3404 if (!num) 3405 return 0; 3406 for (i = 0; i < num; i++) 3407 classes[i] = merge_classes (subclasses[i], classes[i]); 3408 } 3409 } 3410 break; 3411 3412 default: 3413 gcc_unreachable (); 3414 } 3415 3416 /* Final merger cleanup. */ 3417 for (i = 0; i < words; i++) 3418 { 3419 /* If one class is MEMORY, everything should be passed in 3420 memory. */ 3421 if (classes[i] == X86_64_MEMORY_CLASS) 3422 return 0; 3423 3424 /* The X86_64_SSEUP_CLASS should be always preceded by 3425 X86_64_SSE_CLASS. */ 3426 if (classes[i] == X86_64_SSEUP_CLASS 3427 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 3428 classes[i] = X86_64_SSE_CLASS; 3429 3430 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 3431 if (classes[i] == X86_64_X87UP_CLASS 3432 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 3433 classes[i] = X86_64_SSE_CLASS; 3434 } 3435 return words; 3436 } 3437 3438 /* Compute alignment needed. We align all types to natural boundaries with 3439 exception of XFmode that is aligned to 64bits. */ 3440 if (mode != VOIDmode && mode != BLKmode) 3441 { 3442 int mode_alignment = GET_MODE_BITSIZE (mode); 3443 3444 if (mode == XFmode) 3445 mode_alignment = 128; 3446 else if (mode == XCmode) 3447 mode_alignment = 256; 3448 if (COMPLEX_MODE_P (mode)) 3449 mode_alignment /= 2; 3450 /* Misaligned fields are always returned in memory. */ 3451 if (bit_offset % mode_alignment) 3452 return 0; 3453 } 3454 3455 /* for V1xx modes, just use the base mode */ 3456 if (VECTOR_MODE_P (mode) 3457 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 3458 mode = GET_MODE_INNER (mode); 3459 3460 /* Classification of atomic types. */ 3461 switch (mode) 3462 { 3463 case SDmode: 3464 case DDmode: 3465 classes[0] = X86_64_SSE_CLASS; 3466 return 1; 3467 case TDmode: 3468 classes[0] = X86_64_SSE_CLASS; 3469 classes[1] = X86_64_SSEUP_CLASS; 3470 return 2; 3471 case DImode: 3472 case SImode: 3473 case HImode: 3474 case QImode: 3475 case CSImode: 3476 case CHImode: 3477 case CQImode: 3478 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3479 classes[0] = X86_64_INTEGERSI_CLASS; 3480 else 3481 classes[0] = X86_64_INTEGER_CLASS; 3482 return 1; 3483 case CDImode: 3484 case TImode: 3485 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 3486 return 2; 3487 case CTImode: 3488 return 0; 3489 case SFmode: 3490 if (!(bit_offset % 64)) 3491 classes[0] = X86_64_SSESF_CLASS; 3492 else 3493 classes[0] = X86_64_SSE_CLASS; 3494 return 1; 3495 case DFmode: 3496 classes[0] = X86_64_SSEDF_CLASS; 3497 return 1; 3498 case XFmode: 3499 classes[0] = X86_64_X87_CLASS; 3500 classes[1] = X86_64_X87UP_CLASS; 3501 return 2; 3502 case TFmode: 3503 classes[0] = X86_64_SSE_CLASS; 3504 classes[1] = X86_64_SSEUP_CLASS; 3505 return 2; 3506 case SCmode: 3507 classes[0] = X86_64_SSE_CLASS; 3508 return 1; 3509 case DCmode: 3510 classes[0] = X86_64_SSEDF_CLASS; 3511 classes[1] = X86_64_SSEDF_CLASS; 3512 return 2; 3513 case XCmode: 3514 classes[0] = X86_64_COMPLEX_X87_CLASS; 3515 return 1; 3516 case TCmode: 3517 /* This modes is larger than 16 bytes. */ 3518 return 0; 3519 case V4SFmode: 3520 case V4SImode: 3521 case V16QImode: 3522 case V8HImode: 3523 case V2DFmode: 3524 case V2DImode: 3525 classes[0] = X86_64_SSE_CLASS; 3526 classes[1] = X86_64_SSEUP_CLASS; 3527 return 2; 3528 case V2SFmode: 3529 case V2SImode: 3530 case V4HImode: 3531 case V8QImode: 3532 classes[0] = X86_64_SSE_CLASS; 3533 return 1; 3534 case BLKmode: 3535 case VOIDmode: 3536 return 0; 3537 default: 3538 gcc_assert (VECTOR_MODE_P (mode)); 3539 3540 if (bytes > 16) 3541 return 0; 3542 3543 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 3544 3545 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3546 classes[0] = X86_64_INTEGERSI_CLASS; 3547 else 3548 classes[0] = X86_64_INTEGER_CLASS; 3549 classes[1] = X86_64_INTEGER_CLASS; 3550 return 1 + (bytes > 8); 3551 } 3552} 3553 3554/* Examine the argument and return set number of register required in each 3555 class. Return 0 iff parameter should be passed in memory. */ 3556static int 3557examine_argument (enum machine_mode mode, tree type, int in_return, 3558 int *int_nregs, int *sse_nregs) 3559{ 3560 enum x86_64_reg_class class[MAX_CLASSES]; 3561 int n = classify_argument (mode, type, class, 0); 3562 3563 *int_nregs = 0; 3564 *sse_nregs = 0; 3565 if (!n) 3566 return 0; 3567 for (n--; n >= 0; n--) 3568 switch (class[n]) 3569 { 3570 case X86_64_INTEGER_CLASS: 3571 case X86_64_INTEGERSI_CLASS: 3572 (*int_nregs)++; 3573 break; 3574 case X86_64_SSE_CLASS: 3575 case X86_64_SSESF_CLASS: 3576 case X86_64_SSEDF_CLASS: 3577 (*sse_nregs)++; 3578 break; 3579 case X86_64_NO_CLASS: 3580 case X86_64_SSEUP_CLASS: 3581 break; 3582 case X86_64_X87_CLASS: 3583 case X86_64_X87UP_CLASS: 3584 if (!in_return) 3585 return 0; 3586 break; 3587 case X86_64_COMPLEX_X87_CLASS: 3588 return in_return ? 2 : 0; 3589 case X86_64_MEMORY_CLASS: 3590 gcc_unreachable (); 3591 } 3592 return 1; 3593} 3594 3595/* Construct container for the argument used by GCC interface. See 3596 FUNCTION_ARG for the detailed description. */ 3597 3598static rtx 3599construct_container (enum machine_mode mode, enum machine_mode orig_mode, 3600 tree type, int in_return, int nintregs, int nsseregs, 3601 const int *intreg, int sse_regno) 3602{ 3603 /* The following variables hold the static issued_error state. */ 3604 static bool issued_sse_arg_error; 3605 static bool issued_sse_ret_error; 3606 static bool issued_x87_ret_error; 3607 3608 enum machine_mode tmpmode; 3609 int bytes = 3610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3611 enum x86_64_reg_class class[MAX_CLASSES]; 3612 int n; 3613 int i; 3614 int nexps = 0; 3615 int needed_sseregs, needed_intregs; 3616 rtx exp[MAX_CLASSES]; 3617 rtx ret; 3618 3619 n = classify_argument (mode, type, class, 0); 3620 if (TARGET_DEBUG_ARG) 3621 { 3622 if (!n) 3623 fprintf (stderr, "Memory class\n"); 3624 else 3625 { 3626 fprintf (stderr, "Classes:"); 3627 for (i = 0; i < n; i++) 3628 { 3629 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 3630 } 3631 fprintf (stderr, "\n"); 3632 } 3633 } 3634 if (!n) 3635 return NULL; 3636 if (!examine_argument (mode, type, in_return, &needed_intregs, 3637 &needed_sseregs)) 3638 return NULL; 3639 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 3640 return NULL; 3641 3642 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 3643 some less clueful developer tries to use floating-point anyway. */ 3644 if (needed_sseregs && !TARGET_SSE) 3645 { 3646 if (in_return) 3647 { 3648 if (!issued_sse_ret_error) 3649 { 3650 error ("SSE register return with SSE disabled"); 3651 issued_sse_ret_error = true; 3652 } 3653 } 3654 else if (!issued_sse_arg_error) 3655 { 3656 error ("SSE register argument with SSE disabled"); 3657 issued_sse_arg_error = true; 3658 } 3659 return NULL; 3660 } 3661 3662 /* Likewise, error if the ABI requires us to return values in the 3663 x87 registers and the user specified -mno-80387. */ 3664 if (!TARGET_80387 && in_return) 3665 for (i = 0; i < n; i++) 3666 if (class[i] == X86_64_X87_CLASS 3667 || class[i] == X86_64_X87UP_CLASS 3668 || class[i] == X86_64_COMPLEX_X87_CLASS) 3669 { 3670 if (!issued_x87_ret_error) 3671 { 3672 error ("x87 register return with x87 disabled"); 3673 issued_x87_ret_error = true; 3674 } 3675 return NULL; 3676 } 3677 3678 /* First construct simple cases. Avoid SCmode, since we want to use 3679 single register to pass this type. */ 3680 if (n == 1 && mode != SCmode) 3681 switch (class[0]) 3682 { 3683 case X86_64_INTEGER_CLASS: 3684 case X86_64_INTEGERSI_CLASS: 3685 return gen_rtx_REG (mode, intreg[0]); 3686 case X86_64_SSE_CLASS: 3687 case X86_64_SSESF_CLASS: 3688 case X86_64_SSEDF_CLASS: 3689 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); 3690 case X86_64_X87_CLASS: 3691 case X86_64_COMPLEX_X87_CLASS: 3692 return gen_rtx_REG (mode, FIRST_STACK_REG); 3693 case X86_64_NO_CLASS: 3694 /* Zero sized array, struct or class. */ 3695 return NULL; 3696 default: 3697 gcc_unreachable (); 3698 } 3699 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 3700 && mode != BLKmode) 3701 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 3702 if (n == 2 3703 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 3704 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 3705 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 3706 && class[1] == X86_64_INTEGER_CLASS 3707 && (mode == CDImode || mode == TImode || mode == TFmode) 3708 && intreg[0] + 1 == intreg[1]) 3709 return gen_rtx_REG (mode, intreg[0]); 3710 3711 /* Otherwise figure out the entries of the PARALLEL. */ 3712 for (i = 0; i < n; i++) 3713 { 3714 switch (class[i]) 3715 { 3716 case X86_64_NO_CLASS: 3717 break; 3718 case X86_64_INTEGER_CLASS: 3719 case X86_64_INTEGERSI_CLASS: 3720 /* Merge TImodes on aligned occasions here too. */ 3721 if (i * 8 + 8 > bytes) 3722 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 3723 else if (class[i] == X86_64_INTEGERSI_CLASS) 3724 tmpmode = SImode; 3725 else 3726 tmpmode = DImode; 3727 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 3728 if (tmpmode == BLKmode) 3729 tmpmode = DImode; 3730 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3731 gen_rtx_REG (tmpmode, *intreg), 3732 GEN_INT (i*8)); 3733 intreg++; 3734 break; 3735 case X86_64_SSESF_CLASS: 3736 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3737 gen_rtx_REG (SFmode, 3738 SSE_REGNO (sse_regno)), 3739 GEN_INT (i*8)); 3740 sse_regno++; 3741 break; 3742 case X86_64_SSEDF_CLASS: 3743 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3744 gen_rtx_REG (DFmode, 3745 SSE_REGNO (sse_regno)), 3746 GEN_INT (i*8)); 3747 sse_regno++; 3748 break; 3749 case X86_64_SSE_CLASS: 3750 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 3751 tmpmode = TImode; 3752 else 3753 tmpmode = DImode; 3754 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3755 gen_rtx_REG (tmpmode, 3756 SSE_REGNO (sse_regno)), 3757 GEN_INT (i*8)); 3758 if (tmpmode == TImode) 3759 i++; 3760 sse_regno++; 3761 break; 3762 default: 3763 gcc_unreachable (); 3764 } 3765 } 3766 3767 /* Empty aligned struct, union or class. */ 3768 if (nexps == 0) 3769 return NULL; 3770 3771 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 3772 for (i = 0; i < nexps; i++) 3773 XVECEXP (ret, 0, i) = exp [i]; 3774 return ret; 3775} 3776 3777/* Update the data in CUM to advance over an argument 3778 of mode MODE and data type TYPE. 3779 (TYPE is null for libcalls where that information may not be available.) */ 3780 3781void 3782function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3783 tree type, int named) 3784{ 3785 int bytes = 3786 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3787 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3788 3789 if (type) 3790 mode = type_natural_mode (type); 3791 3792 if (TARGET_DEBUG_ARG) 3793 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " 3794 "mode=%s, named=%d)\n\n", 3795 words, cum->words, cum->nregs, cum->sse_nregs, 3796 GET_MODE_NAME (mode), named); 3797 3798 if (TARGET_64BIT) 3799 { 3800 int int_nregs, sse_nregs; 3801 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 3802 cum->words += words; 3803 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 3804 { 3805 cum->nregs -= int_nregs; 3806 cum->sse_nregs -= sse_nregs; 3807 cum->regno += int_nregs; 3808 cum->sse_regno += sse_nregs; 3809 } 3810 else 3811 cum->words += words; 3812 } 3813 else 3814 { 3815 switch (mode) 3816 { 3817 default: 3818 break; 3819 3820 case BLKmode: 3821 if (bytes < 0) 3822 break; 3823 /* FALLTHRU */ 3824 3825 case DImode: 3826 case SImode: 3827 case HImode: 3828 case QImode: 3829 cum->words += words; 3830 cum->nregs -= words; 3831 cum->regno += words; 3832 3833 if (cum->nregs <= 0) 3834 { 3835 cum->nregs = 0; 3836 cum->regno = 0; 3837 } 3838 break; 3839 3840 case DFmode: 3841 if (cum->float_in_sse < 2) 3842 break; 3843 case SFmode: 3844 if (cum->float_in_sse < 1) 3845 break; 3846 /* FALLTHRU */ 3847 3848 case TImode: 3849 case V16QImode: 3850 case V8HImode: 3851 case V4SImode: 3852 case V2DImode: 3853 case V4SFmode: 3854 case V2DFmode: 3855 if (!type || !AGGREGATE_TYPE_P (type)) 3856 { 3857 cum->sse_words += words; 3858 cum->sse_nregs -= 1; 3859 cum->sse_regno += 1; 3860 if (cum->sse_nregs <= 0) 3861 { 3862 cum->sse_nregs = 0; 3863 cum->sse_regno = 0; 3864 } 3865 } 3866 break; 3867 3868 case V8QImode: 3869 case V4HImode: 3870 case V2SImode: 3871 case V2SFmode: 3872 if (!type || !AGGREGATE_TYPE_P (type)) 3873 { 3874 cum->mmx_words += words; 3875 cum->mmx_nregs -= 1; 3876 cum->mmx_regno += 1; 3877 if (cum->mmx_nregs <= 0) 3878 { 3879 cum->mmx_nregs = 0; 3880 cum->mmx_regno = 0; 3881 } 3882 } 3883 break; 3884 } 3885 } 3886} 3887 3888/* Define where to put the arguments to a function. 3889 Value is zero to push the argument on the stack, 3890 or a hard register in which to store the argument. 3891 3892 MODE is the argument's machine mode. 3893 TYPE is the data type of the argument (as a tree). 3894 This is null for libcalls where that information may 3895 not be available. 3896 CUM is a variable of type CUMULATIVE_ARGS which gives info about 3897 the preceding args and about the function being called. 3898 NAMED is nonzero if this argument is a named parameter 3899 (otherwise it is an extra parameter matching an ellipsis). */ 3900 3901rtx 3902function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 3903 tree type, int named) 3904{ 3905 enum machine_mode mode = orig_mode; 3906 rtx ret = NULL_RTX; 3907 int bytes = 3908 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3909 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3910 static bool warnedsse, warnedmmx; 3911 3912 /* To simplify the code below, represent vector types with a vector mode 3913 even if MMX/SSE are not active. */ 3914 if (type && TREE_CODE (type) == VECTOR_TYPE) 3915 mode = type_natural_mode (type); 3916 3917 /* Handle a hidden AL argument containing number of registers for varargs 3918 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 3919 any AL settings. */ 3920 if (mode == VOIDmode) 3921 { 3922 if (TARGET_64BIT) 3923 return GEN_INT (cum->maybe_vaarg 3924 ? (cum->sse_nregs < 0 3925 ? SSE_REGPARM_MAX 3926 : cum->sse_regno) 3927 : -1); 3928 else 3929 return constm1_rtx; 3930 } 3931 if (TARGET_64BIT) 3932 ret = construct_container (mode, orig_mode, type, 0, cum->nregs, 3933 cum->sse_nregs, 3934 &x86_64_int_parameter_registers [cum->regno], 3935 cum->sse_regno); 3936 else 3937 switch (mode) 3938 { 3939 /* For now, pass fp/complex values on the stack. */ 3940 default: 3941 break; 3942 3943 case BLKmode: 3944 if (bytes < 0) 3945 break; 3946 /* FALLTHRU */ 3947 case DImode: 3948 case SImode: 3949 case HImode: 3950 case QImode: 3951 if (words <= cum->nregs) 3952 { 3953 int regno = cum->regno; 3954 3955 /* Fastcall allocates the first two DWORD (SImode) or 3956 smaller arguments to ECX and EDX. */ 3957 if (cum->fastcall) 3958 { 3959 if (mode == BLKmode || mode == DImode) 3960 break; 3961 3962 /* ECX not EAX is the first allocated register. */ 3963 if (regno == 0) 3964 regno = 2; 3965 } 3966 ret = gen_rtx_REG (mode, regno); 3967 } 3968 break; 3969 case DFmode: 3970 if (cum->float_in_sse < 2) 3971 break; 3972 case SFmode: 3973 if (cum->float_in_sse < 1) 3974 break; 3975 /* FALLTHRU */ 3976 case TImode: 3977 case V16QImode: 3978 case V8HImode: 3979 case V4SImode: 3980 case V2DImode: 3981 case V4SFmode: 3982 case V2DFmode: 3983 if (!type || !AGGREGATE_TYPE_P (type)) 3984 { 3985 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 3986 { 3987 warnedsse = true; 3988 warning (0, "SSE vector argument without SSE enabled " 3989 "changes the ABI"); 3990 } 3991 if (cum->sse_nregs) 3992 ret = gen_reg_or_parallel (mode, orig_mode, 3993 cum->sse_regno + FIRST_SSE_REG); 3994 } 3995 break; 3996 case V8QImode: 3997 case V4HImode: 3998 case V2SImode: 3999 case V2SFmode: 4000 if (!type || !AGGREGATE_TYPE_P (type)) 4001 { 4002 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 4003 { 4004 warnedmmx = true; 4005 warning (0, "MMX vector argument without MMX enabled " 4006 "changes the ABI"); 4007 } 4008 if (cum->mmx_nregs) 4009 ret = gen_reg_or_parallel (mode, orig_mode, 4010 cum->mmx_regno + FIRST_MMX_REG); 4011 } 4012 break; 4013 } 4014 4015 if (TARGET_DEBUG_ARG) 4016 { 4017 fprintf (stderr, 4018 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 4019 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 4020 4021 if (ret) 4022 print_simple_rtl (stderr, ret); 4023 else 4024 fprintf (stderr, ", stack"); 4025 4026 fprintf (stderr, " )\n"); 4027 } 4028 4029 return ret; 4030} 4031 4032/* A C expression that indicates when an argument must be passed by 4033 reference. If nonzero for an argument, a copy of that argument is 4034 made in memory and a pointer to the argument is passed instead of 4035 the argument itself. The pointer is passed in whatever way is 4036 appropriate for passing a pointer to that type. */ 4037 4038static bool 4039ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 4040 enum machine_mode mode ATTRIBUTE_UNUSED, 4041 tree type, bool named ATTRIBUTE_UNUSED) 4042{ 4043 if (!TARGET_64BIT) 4044 return 0; 4045 4046 if (type && int_size_in_bytes (type) == -1) 4047 { 4048 if (TARGET_DEBUG_ARG) 4049 fprintf (stderr, "function_arg_pass_by_reference\n"); 4050 return 1; 4051 } 4052 4053 return 0; 4054} 4055 4056/* Return true when TYPE should be 128bit aligned for 32bit argument passing 4057 ABI. Only called if TARGET_SSE. */ 4058static bool 4059contains_128bit_aligned_vector_p (tree type) 4060{ 4061 enum machine_mode mode = TYPE_MODE (type); 4062 if (SSE_REG_MODE_P (mode) 4063 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 4064 return true; 4065 if (TYPE_ALIGN (type) < 128) 4066 return false; 4067 4068 if (AGGREGATE_TYPE_P (type)) 4069 { 4070 /* Walk the aggregates recursively. */ 4071 switch (TREE_CODE (type)) 4072 { 4073 case RECORD_TYPE: 4074 case UNION_TYPE: 4075 case QUAL_UNION_TYPE: 4076 { 4077 tree field; 4078 4079 if (TYPE_BINFO (type)) 4080 { 4081 tree binfo, base_binfo; 4082 int i; 4083 4084 for (binfo = TYPE_BINFO (type), i = 0; 4085 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) 4086 if (contains_128bit_aligned_vector_p 4087 (BINFO_TYPE (base_binfo))) 4088 return true; 4089 } 4090 /* And now merge the fields of structure. */ 4091 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4092 { 4093 if (TREE_CODE (field) == FIELD_DECL 4094 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 4095 return true; 4096 } 4097 break; 4098 } 4099 4100 case ARRAY_TYPE: 4101 /* Just for use if some languages passes arrays by value. */ 4102 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 4103 return true; 4104 break; 4105 4106 default: 4107 gcc_unreachable (); 4108 } 4109 } 4110 return false; 4111} 4112 4113/* Gives the alignment boundary, in bits, of an argument with the 4114 specified mode and type. */ 4115 4116int 4117ix86_function_arg_boundary (enum machine_mode mode, tree type) 4118{ 4119 int align; 4120 if (type) 4121 align = TYPE_ALIGN (type); 4122 else 4123 align = GET_MODE_ALIGNMENT (mode); 4124 if (align < PARM_BOUNDARY) 4125 align = PARM_BOUNDARY; 4126 if (!TARGET_64BIT) 4127 { 4128 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 4129 make an exception for SSE modes since these require 128bit 4130 alignment. 4131 4132 The handling here differs from field_alignment. ICC aligns MMX 4133 arguments to 4 byte boundaries, while structure fields are aligned 4134 to 8 byte boundaries. */ 4135 if (!TARGET_SSE) 4136 align = PARM_BOUNDARY; 4137 else if (!type) 4138 { 4139 if (!SSE_REG_MODE_P (mode)) 4140 align = PARM_BOUNDARY; 4141 } 4142 else 4143 { 4144 if (!contains_128bit_aligned_vector_p (type)) 4145 align = PARM_BOUNDARY; 4146 } 4147 } 4148 if (align > 128) 4149 align = 128; 4150 return align; 4151} 4152 4153/* Return true if N is a possible register number of function value. */ 4154bool 4155ix86_function_value_regno_p (int regno) 4156{ 4157 if (TARGET_MACHO) 4158 { 4159 if (!TARGET_64BIT) 4160 { 4161 return ((regno) == 0 4162 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 4163 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 4164 } 4165 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 4166 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 4167 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 4168 } 4169 else 4170 { 4171 if (regno == 0 4172 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 4173 || (regno == FIRST_SSE_REG && TARGET_SSE)) 4174 return true; 4175 4176 if (!TARGET_64BIT 4177 && (regno == FIRST_MMX_REG && TARGET_MMX)) 4178 return true; 4179 4180 return false; 4181 } 4182} 4183 4184/* Define how to find the value returned by a function. 4185 VALTYPE is the data type of the value (as a tree). 4186 If the precise function being called is known, FUNC is its FUNCTION_DECL; 4187 otherwise, FUNC is 0. */ 4188rtx 4189ix86_function_value (tree valtype, tree fntype_or_decl, 4190 bool outgoing ATTRIBUTE_UNUSED) 4191{ 4192 enum machine_mode natmode = type_natural_mode (valtype); 4193 4194 if (TARGET_64BIT) 4195 { 4196 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, 4197 1, REGPARM_MAX, SSE_REGPARM_MAX, 4198 x86_64_int_return_registers, 0); 4199 /* For zero sized structures, construct_container return NULL, but we 4200 need to keep rest of compiler happy by returning meaningful value. */ 4201 if (!ret) 4202 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 4203 return ret; 4204 } 4205 else 4206 { 4207 tree fn = NULL_TREE, fntype; 4208 if (fntype_or_decl 4209 && DECL_P (fntype_or_decl)) 4210 fn = fntype_or_decl; 4211 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 4212 return gen_rtx_REG (TYPE_MODE (valtype), 4213 ix86_value_regno (natmode, fn, fntype)); 4214 } 4215} 4216 4217/* Return true iff type is returned in memory. */ 4218int 4219ix86_return_in_memory (tree type) 4220{ 4221 int needed_intregs, needed_sseregs, size; 4222 enum machine_mode mode = type_natural_mode (type); 4223 4224 if (TARGET_64BIT) 4225 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 4226 4227 if (mode == BLKmode) 4228 return 1; 4229 4230 size = int_size_in_bytes (type); 4231 4232 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 4233 return 0; 4234 4235 if (VECTOR_MODE_P (mode) || mode == TImode) 4236 { 4237 /* User-created vectors small enough to fit in EAX. */ 4238 if (size < 8) 4239 return 0; 4240 4241 /* MMX/3dNow values are returned in MM0, 4242 except when it doesn't exits. */ 4243 if (size == 8) 4244 return (TARGET_MMX ? 0 : 1); 4245 4246 /* SSE values are returned in XMM0, except when it doesn't exist. */ 4247 if (size == 16) 4248 return (TARGET_SSE ? 0 : 1); 4249 } 4250 4251 if (mode == XFmode) 4252 return 0; 4253 4254 if (mode == TDmode) 4255 return 1; 4256 4257 if (size > 12) 4258 return 1; 4259 return 0; 4260} 4261 4262/* When returning SSE vector types, we have a choice of either 4263 (1) being abi incompatible with a -march switch, or 4264 (2) generating an error. 4265 Given no good solution, I think the safest thing is one warning. 4266 The user won't be able to use -Werror, but.... 4267 4268 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 4269 called in response to actually generating a caller or callee that 4270 uses such a type. As opposed to RETURN_IN_MEMORY, which is called 4271 via aggregate_value_p for general type probing from tree-ssa. */ 4272 4273static rtx 4274ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 4275{ 4276 static bool warnedsse, warnedmmx; 4277 4278 if (type) 4279 { 4280 /* Look at the return type of the function, not the function type. */ 4281 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 4282 4283 if (!TARGET_SSE && !warnedsse) 4284 { 4285 if (mode == TImode 4286 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4287 { 4288 warnedsse = true; 4289 warning (0, "SSE vector return without SSE enabled " 4290 "changes the ABI"); 4291 } 4292 } 4293 4294 if (!TARGET_MMX && !warnedmmx) 4295 { 4296 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4297 { 4298 warnedmmx = true; 4299 warning (0, "MMX vector return without MMX enabled " 4300 "changes the ABI"); 4301 } 4302 } 4303 } 4304 4305 return NULL; 4306} 4307 4308/* Define how to find the value returned by a library function 4309 assuming the value has mode MODE. */ 4310rtx 4311ix86_libcall_value (enum machine_mode mode) 4312{ 4313 if (TARGET_64BIT) 4314 { 4315 switch (mode) 4316 { 4317 case SFmode: 4318 case SCmode: 4319 case DFmode: 4320 case DCmode: 4321 case TFmode: 4322 case SDmode: 4323 case DDmode: 4324 case TDmode: 4325 return gen_rtx_REG (mode, FIRST_SSE_REG); 4326 case XFmode: 4327 case XCmode: 4328 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 4329 case TCmode: 4330 return NULL; 4331 default: 4332 return gen_rtx_REG (mode, 0); 4333 } 4334 } 4335 else 4336 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); 4337} 4338 4339/* Given a mode, return the register to use for a return value. */ 4340 4341static int 4342ix86_value_regno (enum machine_mode mode, tree func, tree fntype) 4343{ 4344 gcc_assert (!TARGET_64BIT); 4345 4346 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 4347 we normally prevent this case when mmx is not available. However 4348 some ABIs may require the result to be returned like DImode. */ 4349 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4350 return TARGET_MMX ? FIRST_MMX_REG : 0; 4351 4352 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 4353 we prevent this case when sse is not available. However some ABIs 4354 may require the result to be returned like integer TImode. */ 4355 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4356 return TARGET_SSE ? FIRST_SSE_REG : 0; 4357 4358 /* Decimal floating point values can go in %eax, unlike other float modes. */ 4359 if (DECIMAL_FLOAT_MODE_P (mode)) 4360 return 0; 4361 4362 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ 4363 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) 4364 return 0; 4365 4366 /* Floating point return values in %st(0), except for local functions when 4367 SSE math is enabled or for functions with sseregparm attribute. */ 4368 if ((func || fntype) 4369 && (mode == SFmode || mode == DFmode)) 4370 { 4371 int sse_level = ix86_function_sseregparm (fntype, func); 4372 if ((sse_level >= 1 && mode == SFmode) 4373 || (sse_level == 2 && mode == DFmode)) 4374 return FIRST_SSE_REG; 4375 } 4376 4377 return FIRST_FLOAT_REG; 4378} 4379 4380/* Create the va_list data type. */ 4381 4382static tree 4383ix86_build_builtin_va_list (void) 4384{ 4385 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 4386 4387 /* For i386 we use plain pointer to argument area. */ 4388 if (!TARGET_64BIT) 4389 return build_pointer_type (char_type_node); 4390 4391 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4392 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 4393 4394 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 4395 unsigned_type_node); 4396 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 4397 unsigned_type_node); 4398 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 4399 ptr_type_node); 4400 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 4401 ptr_type_node); 4402 4403 va_list_gpr_counter_field = f_gpr; 4404 va_list_fpr_counter_field = f_fpr; 4405 4406 DECL_FIELD_CONTEXT (f_gpr) = record; 4407 DECL_FIELD_CONTEXT (f_fpr) = record; 4408 DECL_FIELD_CONTEXT (f_ovf) = record; 4409 DECL_FIELD_CONTEXT (f_sav) = record; 4410 4411 TREE_CHAIN (record) = type_decl; 4412 TYPE_NAME (record) = type_decl; 4413 TYPE_FIELDS (record) = f_gpr; 4414 TREE_CHAIN (f_gpr) = f_fpr; 4415 TREE_CHAIN (f_fpr) = f_ovf; 4416 TREE_CHAIN (f_ovf) = f_sav; 4417 4418 layout_type (record); 4419 4420 /* The correct type is an array type of one element. */ 4421 return build_array_type (record, build_index_type (size_zero_node)); 4422} 4423 4424/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 4425 4426static void 4427ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4428 tree type, int *pretend_size ATTRIBUTE_UNUSED, 4429 int no_rtl) 4430{ 4431 CUMULATIVE_ARGS next_cum; 4432 rtx save_area = NULL_RTX, mem; 4433 rtx label; 4434 rtx label_ref; 4435 rtx tmp_reg; 4436 rtx nsse_reg; 4437 int set; 4438 tree fntype; 4439 int stdarg_p; 4440 int i; 4441 4442 if (!TARGET_64BIT) 4443 return; 4444 4445 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) 4446 return; 4447 4448 /* Indicate to allocate space on the stack for varargs save area. */ 4449 ix86_save_varrargs_registers = 1; 4450 4451 cfun->stack_alignment_needed = 128; 4452 4453 fntype = TREE_TYPE (current_function_decl); 4454 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 4455 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 4456 != void_type_node)); 4457 4458 /* For varargs, we do not want to skip the dummy va_dcl argument. 4459 For stdargs, we do want to skip the last named argument. */ 4460 next_cum = *cum; 4461 if (stdarg_p) 4462 function_arg_advance (&next_cum, mode, type, 1); 4463 4464 if (!no_rtl) 4465 save_area = frame_pointer_rtx; 4466 4467 set = get_varargs_alias_set (); 4468 4469 for (i = next_cum.regno; 4470 i < ix86_regparm 4471 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4472 i++) 4473 { 4474 mem = gen_rtx_MEM (Pmode, 4475 plus_constant (save_area, i * UNITS_PER_WORD)); 4476 MEM_NOTRAP_P (mem) = 1; 4477 set_mem_alias_set (mem, set); 4478 emit_move_insn (mem, gen_rtx_REG (Pmode, 4479 x86_64_int_parameter_registers[i])); 4480 } 4481 4482 if (next_cum.sse_nregs && cfun->va_list_fpr_size) 4483 { 4484 /* Now emit code to save SSE registers. The AX parameter contains number 4485 of SSE parameter registers used to call this function. We use 4486 sse_prologue_save insn template that produces computed jump across 4487 SSE saves. We need some preparation work to get this working. */ 4488 4489 label = gen_label_rtx (); 4490 label_ref = gen_rtx_LABEL_REF (Pmode, label); 4491 4492 /* Compute address to jump to : 4493 label - 5*eax + nnamed_sse_arguments*5 */ 4494 tmp_reg = gen_reg_rtx (Pmode); 4495 nsse_reg = gen_reg_rtx (Pmode); 4496 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 4497 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4498 gen_rtx_MULT (Pmode, nsse_reg, 4499 GEN_INT (4)))); 4500 if (next_cum.sse_regno) 4501 emit_move_insn 4502 (nsse_reg, 4503 gen_rtx_CONST (DImode, 4504 gen_rtx_PLUS (DImode, 4505 label_ref, 4506 GEN_INT (next_cum.sse_regno * 4)))); 4507 else 4508 emit_move_insn (nsse_reg, label_ref); 4509 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 4510 4511 /* Compute address of memory block we save into. We always use pointer 4512 pointing 127 bytes after first byte to store - this is needed to keep 4513 instruction size limited by 4 bytes. */ 4514 tmp_reg = gen_reg_rtx (Pmode); 4515 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4516 plus_constant (save_area, 4517 8 * REGPARM_MAX + 127))); 4518 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 4519 MEM_NOTRAP_P (mem) = 1; 4520 set_mem_alias_set (mem, set); 4521 set_mem_align (mem, BITS_PER_WORD); 4522 4523 /* And finally do the dirty job! */ 4524 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 4525 GEN_INT (next_cum.sse_regno), label)); 4526 } 4527 4528} 4529 4530/* Implement va_start. */ 4531 4532void 4533ix86_va_start (tree valist, rtx nextarg) 4534{ 4535 HOST_WIDE_INT words, n_gpr, n_fpr; 4536 tree f_gpr, f_fpr, f_ovf, f_sav; 4537 tree gpr, fpr, ovf, sav, t; 4538 tree type; 4539 4540 /* Only 64bit target needs something special. */ 4541 if (!TARGET_64BIT) 4542 { 4543 std_expand_builtin_va_start (valist, nextarg); 4544 return; 4545 } 4546 4547 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4548 f_fpr = TREE_CHAIN (f_gpr); 4549 f_ovf = TREE_CHAIN (f_fpr); 4550 f_sav = TREE_CHAIN (f_ovf); 4551 4552 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4553 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4554 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4555 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4556 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4557 4558 /* Count number of gp and fp argument registers used. */ 4559 words = current_function_args_info.words; 4560 n_gpr = current_function_args_info.regno; 4561 n_fpr = current_function_args_info.sse_regno; 4562 4563 if (TARGET_DEBUG_ARG) 4564 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 4565 (int) words, (int) n_gpr, (int) n_fpr); 4566 4567 if (cfun->va_list_gpr_size) 4568 { 4569 type = TREE_TYPE (gpr); 4570 t = build2 (MODIFY_EXPR, type, gpr, 4571 build_int_cst (type, n_gpr * 8)); 4572 TREE_SIDE_EFFECTS (t) = 1; 4573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4574 } 4575 4576 if (cfun->va_list_fpr_size) 4577 { 4578 type = TREE_TYPE (fpr); 4579 t = build2 (MODIFY_EXPR, type, fpr, 4580 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); 4581 TREE_SIDE_EFFECTS (t) = 1; 4582 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4583 } 4584 4585 /* Find the overflow area. */ 4586 type = TREE_TYPE (ovf); 4587 t = make_tree (type, virtual_incoming_args_rtx); 4588 if (words != 0) 4589 t = build2 (PLUS_EXPR, type, t, 4590 build_int_cst (type, words * UNITS_PER_WORD)); 4591 t = build2 (MODIFY_EXPR, type, ovf, t); 4592 TREE_SIDE_EFFECTS (t) = 1; 4593 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4594 4595 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) 4596 { 4597 /* Find the register save area. 4598 Prologue of the function save it right above stack frame. */ 4599 type = TREE_TYPE (sav); 4600 t = make_tree (type, frame_pointer_rtx); 4601 t = build2 (MODIFY_EXPR, type, sav, t); 4602 TREE_SIDE_EFFECTS (t) = 1; 4603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4604 } 4605} 4606 4607/* Implement va_arg. */ 4608 4609tree 4610ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) 4611{ 4612 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4613 tree f_gpr, f_fpr, f_ovf, f_sav; 4614 tree gpr, fpr, ovf, sav, t; 4615 int size, rsize; 4616 tree lab_false, lab_over = NULL_TREE; 4617 tree addr, t2; 4618 rtx container; 4619 int indirect_p = 0; 4620 tree ptrtype; 4621 enum machine_mode nat_mode; 4622 4623 /* Only 64bit target needs something special. */ 4624 if (!TARGET_64BIT) 4625 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4626 4627 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4628 f_fpr = TREE_CHAIN (f_gpr); 4629 f_ovf = TREE_CHAIN (f_fpr); 4630 f_sav = TREE_CHAIN (f_ovf); 4631 4632 valist = build_va_arg_indirect_ref (valist); 4633 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4634 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4635 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4636 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4637 4638 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 4639 if (indirect_p) 4640 type = build_pointer_type (type); 4641 size = int_size_in_bytes (type); 4642 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4643 4644 nat_mode = type_natural_mode (type); 4645 container = construct_container (nat_mode, TYPE_MODE (type), type, 0, 4646 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 4647 4648 /* Pull the value out of the saved registers. */ 4649 4650 addr = create_tmp_var (ptr_type_node, "addr"); 4651 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 4652 4653 if (container) 4654 { 4655 int needed_intregs, needed_sseregs; 4656 bool need_temp; 4657 tree int_addr, sse_addr; 4658 4659 lab_false = create_artificial_label (); 4660 lab_over = create_artificial_label (); 4661 4662 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4663 4664 need_temp = (!REG_P (container) 4665 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4666 || TYPE_ALIGN (type) > 128)); 4667 4668 /* In case we are passing structure, verify that it is consecutive block 4669 on the register save area. If not we need to do moves. */ 4670 if (!need_temp && !REG_P (container)) 4671 { 4672 /* Verify that all registers are strictly consecutive */ 4673 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4674 { 4675 int i; 4676 4677 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4678 { 4679 rtx slot = XVECEXP (container, 0, i); 4680 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4681 || INTVAL (XEXP (slot, 1)) != i * 16) 4682 need_temp = 1; 4683 } 4684 } 4685 else 4686 { 4687 int i; 4688 4689 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4690 { 4691 rtx slot = XVECEXP (container, 0, i); 4692 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4693 || INTVAL (XEXP (slot, 1)) != i * 8) 4694 need_temp = 1; 4695 } 4696 } 4697 } 4698 if (!need_temp) 4699 { 4700 int_addr = addr; 4701 sse_addr = addr; 4702 } 4703 else 4704 { 4705 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4706 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 4707 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4708 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 4709 } 4710 4711 /* First ensure that we fit completely in registers. */ 4712 if (needed_intregs) 4713 { 4714 t = build_int_cst (TREE_TYPE (gpr), 4715 (REGPARM_MAX - needed_intregs + 1) * 8); 4716 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4717 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4718 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4719 gimplify_and_add (t, pre_p); 4720 } 4721 if (needed_sseregs) 4722 { 4723 t = build_int_cst (TREE_TYPE (fpr), 4724 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4725 + REGPARM_MAX * 8); 4726 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4727 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4728 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4729 gimplify_and_add (t, pre_p); 4730 } 4731 4732 /* Compute index to start of area used for integer regs. */ 4733 if (needed_intregs) 4734 { 4735 /* int_addr = gpr + sav; */ 4736 t = fold_convert (ptr_type_node, gpr); 4737 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4738 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); 4739 gimplify_and_add (t, pre_p); 4740 } 4741 if (needed_sseregs) 4742 { 4743 /* sse_addr = fpr + sav; */ 4744 t = fold_convert (ptr_type_node, fpr); 4745 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4746 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); 4747 gimplify_and_add (t, pre_p); 4748 } 4749 if (need_temp) 4750 { 4751 int i; 4752 tree temp = create_tmp_var (type, "va_arg_tmp"); 4753 4754 /* addr = &temp; */ 4755 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4756 t = build2 (MODIFY_EXPR, void_type_node, addr, t); 4757 gimplify_and_add (t, pre_p); 4758 4759 for (i = 0; i < XVECLEN (container, 0); i++) 4760 { 4761 rtx slot = XVECEXP (container, 0, i); 4762 rtx reg = XEXP (slot, 0); 4763 enum machine_mode mode = GET_MODE (reg); 4764 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 4765 tree addr_type = build_pointer_type (piece_type); 4766 tree src_addr, src; 4767 int src_offset; 4768 tree dest_addr, dest; 4769 4770 if (SSE_REGNO_P (REGNO (reg))) 4771 { 4772 src_addr = sse_addr; 4773 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4774 } 4775 else 4776 { 4777 src_addr = int_addr; 4778 src_offset = REGNO (reg) * 8; 4779 } 4780 src_addr = fold_convert (addr_type, src_addr); 4781 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, 4782 size_int (src_offset))); 4783 src = build_va_arg_indirect_ref (src_addr); 4784 4785 dest_addr = fold_convert (addr_type, addr); 4786 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, 4787 size_int (INTVAL (XEXP (slot, 1))))); 4788 dest = build_va_arg_indirect_ref (dest_addr); 4789 4790 t = build2 (MODIFY_EXPR, void_type_node, dest, src); 4791 gimplify_and_add (t, pre_p); 4792 } 4793 } 4794 4795 if (needed_intregs) 4796 { 4797 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4798 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4799 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 4800 gimplify_and_add (t, pre_p); 4801 } 4802 if (needed_sseregs) 4803 { 4804 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4805 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4806 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 4807 gimplify_and_add (t, pre_p); 4808 } 4809 4810 t = build1 (GOTO_EXPR, void_type_node, lab_over); 4811 gimplify_and_add (t, pre_p); 4812 4813 t = build1 (LABEL_EXPR, void_type_node, lab_false); 4814 append_to_statement_list (t, pre_p); 4815 } 4816 4817 /* ... otherwise out of the overflow area. */ 4818 4819 /* Care for on-stack alignment if needed. */ 4820 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 4821 || integer_zerop (TYPE_SIZE (type))) 4822 t = ovf; 4823 else 4824 { 4825 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 4826 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, 4827 build_int_cst (TREE_TYPE (ovf), align - 1)); 4828 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4829 build_int_cst (TREE_TYPE (t), -align)); 4830 } 4831 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4832 4833 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); 4834 gimplify_and_add (t2, pre_p); 4835 4836 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 4837 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); 4838 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 4839 gimplify_and_add (t, pre_p); 4840 4841 if (container) 4842 { 4843 t = build1 (LABEL_EXPR, void_type_node, lab_over); 4844 append_to_statement_list (t, pre_p); 4845 } 4846 4847 ptrtype = build_pointer_type (type); 4848 addr = fold_convert (ptrtype, addr); 4849 4850 if (indirect_p) 4851 addr = build_va_arg_indirect_ref (addr); 4852 return build_va_arg_indirect_ref (addr); 4853} 4854 4855/* Return nonzero if OPNUM's MEM should be matched 4856 in movabs* patterns. */ 4857 4858int 4859ix86_check_movabs (rtx insn, int opnum) 4860{ 4861 rtx set, mem; 4862 4863 set = PATTERN (insn); 4864 if (GET_CODE (set) == PARALLEL) 4865 set = XVECEXP (set, 0, 0); 4866 gcc_assert (GET_CODE (set) == SET); 4867 mem = XEXP (set, opnum); 4868 while (GET_CODE (mem) == SUBREG) 4869 mem = SUBREG_REG (mem); 4870 gcc_assert (GET_CODE (mem) == MEM); 4871 return (volatile_ok || !MEM_VOLATILE_P (mem)); 4872} 4873 4874/* Initialize the table of extra 80387 mathematical constants. */ 4875 4876static void 4877init_ext_80387_constants (void) 4878{ 4879 static const char * cst[5] = 4880 { 4881 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4882 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4883 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4884 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4885 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4886 }; 4887 int i; 4888 4889 for (i = 0; i < 5; i++) 4890 { 4891 real_from_string (&ext_80387_constants_table[i], cst[i]); 4892 /* Ensure each constant is rounded to XFmode precision. */ 4893 real_convert (&ext_80387_constants_table[i], 4894 XFmode, &ext_80387_constants_table[i]); 4895 } 4896 4897 ext_80387_constants_init = 1; 4898} 4899 4900/* Return true if the constant is something that can be loaded with 4901 a special instruction. */ 4902 4903int 4904standard_80387_constant_p (rtx x) 4905{ 4906 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4907 return -1; 4908 4909 if (x == CONST0_RTX (GET_MODE (x))) 4910 return 1; 4911 if (x == CONST1_RTX (GET_MODE (x))) 4912 return 2; 4913 4914 /* For XFmode constants, try to find a special 80387 instruction when 4915 optimizing for size or on those CPUs that benefit from them. */ 4916 if (GET_MODE (x) == XFmode 4917 && (optimize_size || x86_ext_80387_constants & TUNEMASK)) 4918 { 4919 REAL_VALUE_TYPE r; 4920 int i; 4921 4922 if (! ext_80387_constants_init) 4923 init_ext_80387_constants (); 4924 4925 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4926 for (i = 0; i < 5; i++) 4927 if (real_identical (&r, &ext_80387_constants_table[i])) 4928 return i + 3; 4929 } 4930 4931 return 0; 4932} 4933 4934/* Return the opcode of the special instruction to be used to load 4935 the constant X. */ 4936 4937const char * 4938standard_80387_constant_opcode (rtx x) 4939{ 4940 switch (standard_80387_constant_p (x)) 4941 { 4942 case 1: 4943 return "fldz"; 4944 case 2: 4945 return "fld1"; 4946 case 3: 4947 return "fldlg2"; 4948 case 4: 4949 return "fldln2"; 4950 case 5: 4951 return "fldl2e"; 4952 case 6: 4953 return "fldl2t"; 4954 case 7: 4955 return "fldpi"; 4956 default: 4957 gcc_unreachable (); 4958 } 4959} 4960 4961/* Return the CONST_DOUBLE representing the 80387 constant that is 4962 loaded by the specified special instruction. The argument IDX 4963 matches the return value from standard_80387_constant_p. */ 4964 4965rtx 4966standard_80387_constant_rtx (int idx) 4967{ 4968 int i; 4969 4970 if (! ext_80387_constants_init) 4971 init_ext_80387_constants (); 4972 4973 switch (idx) 4974 { 4975 case 3: 4976 case 4: 4977 case 5: 4978 case 6: 4979 case 7: 4980 i = idx - 3; 4981 break; 4982 4983 default: 4984 gcc_unreachable (); 4985 } 4986 4987 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4988 XFmode); 4989} 4990 4991/* Return 1 if mode is a valid mode for sse. */ 4992static int 4993standard_sse_mode_p (enum machine_mode mode) 4994{ 4995 switch (mode) 4996 { 4997 case V16QImode: 4998 case V8HImode: 4999 case V4SImode: 5000 case V2DImode: 5001 case V4SFmode: 5002 case V2DFmode: 5003 return 1; 5004 5005 default: 5006 return 0; 5007 } 5008} 5009 5010/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 5011 */ 5012int 5013standard_sse_constant_p (rtx x) 5014{ 5015 enum machine_mode mode = GET_MODE (x); 5016 5017 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 5018 return 1; 5019 if (vector_all_ones_operand (x, mode) 5020 && standard_sse_mode_p (mode)) 5021 return TARGET_SSE2 ? 2 : -1; 5022 5023 return 0; 5024} 5025 5026/* Return the opcode of the special instruction to be used to load 5027 the constant X. */ 5028 5029const char * 5030standard_sse_constant_opcode (rtx insn, rtx x) 5031{ 5032 switch (standard_sse_constant_p (x)) 5033 { 5034 case 1: 5035 if (get_attr_mode (insn) == MODE_V4SF) 5036 return "xorps\t%0, %0"; 5037 else if (get_attr_mode (insn) == MODE_V2DF) 5038 return "xorpd\t%0, %0"; 5039 else 5040 return "pxor\t%0, %0"; 5041 case 2: 5042 return "pcmpeqd\t%0, %0"; 5043 } 5044 gcc_unreachable (); 5045} 5046 5047/* Returns 1 if OP contains a symbol reference */ 5048 5049int 5050symbolic_reference_mentioned_p (rtx op) 5051{ 5052 const char *fmt; 5053 int i; 5054 5055 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 5056 return 1; 5057 5058 fmt = GET_RTX_FORMAT (GET_CODE (op)); 5059 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 5060 { 5061 if (fmt[i] == 'E') 5062 { 5063 int j; 5064 5065 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 5066 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 5067 return 1; 5068 } 5069 5070 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 5071 return 1; 5072 } 5073 5074 return 0; 5075} 5076 5077/* Return 1 if it is appropriate to emit `ret' instructions in the 5078 body of a function. Do this only if the epilogue is simple, needing a 5079 couple of insns. Prior to reloading, we can't tell how many registers 5080 must be saved, so return 0 then. Return 0 if there is no frame 5081 marker to de-allocate. */ 5082 5083int 5084ix86_can_use_return_insn_p (void) 5085{ 5086 struct ix86_frame frame; 5087 5088 if (! reload_completed || frame_pointer_needed) 5089 return 0; 5090 5091 /* Don't allow more than 32 pop, since that's all we can do 5092 with one instruction. */ 5093 if (current_function_pops_args 5094 && current_function_args_size >= 32768) 5095 return 0; 5096 5097 ix86_compute_frame_layout (&frame); 5098 return frame.to_allocate == 0 && frame.nregs == 0; 5099} 5100 5101/* Value should be nonzero if functions must have frame pointers. 5102 Zero means the frame pointer need not be set up (and parms may 5103 be accessed via the stack pointer) in functions that seem suitable. */ 5104 5105int 5106ix86_frame_pointer_required (void) 5107{ 5108 /* If we accessed previous frames, then the generated code expects 5109 to be able to access the saved ebp value in our frame. */ 5110 if (cfun->machine->accesses_prev_frame) 5111 return 1; 5112 5113 /* Several x86 os'es need a frame pointer for other reasons, 5114 usually pertaining to setjmp. */ 5115 if (SUBTARGET_FRAME_POINTER_REQUIRED) 5116 return 1; 5117 5118 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 5119 the frame pointer by default. Turn it back on now if we've not 5120 got a leaf function. */ 5121 if (TARGET_OMIT_LEAF_FRAME_POINTER 5122 && (!current_function_is_leaf 5123 || ix86_current_function_calls_tls_descriptor)) 5124 return 1; 5125 5126 if (current_function_profile) 5127 return 1; 5128 5129 return 0; 5130} 5131 5132/* Record that the current function accesses previous call frames. */ 5133 5134void 5135ix86_setup_frame_addresses (void) 5136{ 5137 cfun->machine->accesses_prev_frame = 1; 5138} 5139 5140#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 5141# define USE_HIDDEN_LINKONCE 1 5142#else 5143# define USE_HIDDEN_LINKONCE 0 5144#endif 5145 5146static int pic_labels_used; 5147 5148/* Fills in the label name that should be used for a pc thunk for 5149 the given register. */ 5150 5151static void 5152get_pc_thunk_name (char name[32], unsigned int regno) 5153{ 5154 gcc_assert (!TARGET_64BIT); 5155 5156 if (USE_HIDDEN_LINKONCE) 5157 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 5158 else 5159 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 5160} 5161 5162 5163/* This function generates code for -fpic that loads %ebx with 5164 the return address of the caller and then returns. */ 5165 5166void 5167ix86_file_end (void) 5168{ 5169 rtx xops[2]; 5170 int regno; 5171 5172 for (regno = 0; regno < 8; ++regno) 5173 { 5174 char name[32]; 5175 5176 if (! ((pic_labels_used >> regno) & 1)) 5177 continue; 5178 5179 get_pc_thunk_name (name, regno); 5180 5181#if TARGET_MACHO 5182 if (TARGET_MACHO) 5183 { 5184 switch_to_section (darwin_sections[text_coal_section]); 5185 fputs ("\t.weak_definition\t", asm_out_file); 5186 assemble_name (asm_out_file, name); 5187 fputs ("\n\t.private_extern\t", asm_out_file); 5188 assemble_name (asm_out_file, name); 5189 fputs ("\n", asm_out_file); 5190 ASM_OUTPUT_LABEL (asm_out_file, name); 5191 } 5192 else 5193#endif 5194 if (USE_HIDDEN_LINKONCE) 5195 { 5196 tree decl; 5197 5198 decl = build_decl (FUNCTION_DECL, get_identifier (name), 5199 error_mark_node); 5200 TREE_PUBLIC (decl) = 1; 5201 TREE_STATIC (decl) = 1; 5202 DECL_ONE_ONLY (decl) = 1; 5203 5204 (*targetm.asm_out.unique_section) (decl, 0); 5205 switch_to_section (get_named_section (decl, NULL, 0)); 5206 5207 (*targetm.asm_out.globalize_label) (asm_out_file, name); 5208 fputs ("\t.hidden\t", asm_out_file); 5209 assemble_name (asm_out_file, name); 5210 fputc ('\n', asm_out_file); 5211 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 5212 } 5213 else 5214 { 5215 switch_to_section (text_section); 5216 ASM_OUTPUT_LABEL (asm_out_file, name); 5217 } 5218 5219 xops[0] = gen_rtx_REG (SImode, regno); 5220 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 5221 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 5222 output_asm_insn ("ret", xops); 5223 } 5224 5225 if (NEED_INDICATE_EXEC_STACK) 5226 file_end_indicate_exec_stack (); 5227} 5228 5229/* Emit code for the SET_GOT patterns. */ 5230 5231const char * 5232output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 5233{ 5234 rtx xops[3]; 5235 5236 xops[0] = dest; 5237 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 5238 5239 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 5240 { 5241 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 5242 5243 if (!flag_pic) 5244 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 5245 else 5246 output_asm_insn ("call\t%a2", xops); 5247 5248#if TARGET_MACHO 5249 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5250 is what will be referenced by the Mach-O PIC subsystem. */ 5251 if (!label) 5252 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5253#endif 5254 5255 (*targetm.asm_out.internal_label) (asm_out_file, "L", 5256 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 5257 5258 if (flag_pic) 5259 output_asm_insn ("pop{l}\t%0", xops); 5260 } 5261 else 5262 { 5263 char name[32]; 5264 get_pc_thunk_name (name, REGNO (dest)); 5265 pic_labels_used |= 1 << REGNO (dest); 5266 5267 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 5268 xops[2] = gen_rtx_MEM (QImode, xops[2]); 5269 output_asm_insn ("call\t%X2", xops); 5270 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 5271 is what will be referenced by the Mach-O PIC subsystem. */ 5272#if TARGET_MACHO 5273 if (!label) 5274 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 5275 else 5276 targetm.asm_out.internal_label (asm_out_file, "L", 5277 CODE_LABEL_NUMBER (label)); 5278#endif 5279 } 5280 5281 if (TARGET_MACHO) 5282 return ""; 5283 5284 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 5285 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 5286 else 5287 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 5288 5289 return ""; 5290} 5291 5292/* Generate an "push" pattern for input ARG. */ 5293 5294static rtx 5295gen_push (rtx arg) 5296{ 5297 return gen_rtx_SET (VOIDmode, 5298 gen_rtx_MEM (Pmode, 5299 gen_rtx_PRE_DEC (Pmode, 5300 stack_pointer_rtx)), 5301 arg); 5302} 5303 5304/* Return >= 0 if there is an unused call-clobbered register available 5305 for the entire function. */ 5306 5307static unsigned int 5308ix86_select_alt_pic_regnum (void) 5309{ 5310 if (current_function_is_leaf && !current_function_profile 5311 && !ix86_current_function_calls_tls_descriptor) 5312 { 5313 int i; 5314 for (i = 2; i >= 0; --i) 5315 if (!regs_ever_live[i]) 5316 return i; 5317 } 5318 5319 return INVALID_REGNUM; 5320} 5321 5322/* Return 1 if we need to save REGNO. */ 5323static int 5324ix86_save_reg (unsigned int regno, int maybe_eh_return) 5325{ 5326 if (pic_offset_table_rtx 5327 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5329 || current_function_profile 5330 || current_function_calls_eh_return 5331 || current_function_uses_const_pool)) 5332 { 5333 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 5334 return 0; 5335 return 1; 5336 } 5337 5338 if (current_function_calls_eh_return && maybe_eh_return) 5339 { 5340 unsigned i; 5341 for (i = 0; ; i++) 5342 { 5343 unsigned test = EH_RETURN_DATA_REGNO (i); 5344 if (test == INVALID_REGNUM) 5345 break; 5346 if (test == regno) 5347 return 1; 5348 } 5349 } 5350 5351 if (cfun->machine->force_align_arg_pointer 5352 && regno == REGNO (cfun->machine->force_align_arg_pointer)) 5353 return 1; 5354 5355 return (regs_ever_live[regno] 5356 && !call_used_regs[regno] 5357 && !fixed_regs[regno] 5358 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5359} 5360 5361/* Return number of registers to be saved on the stack. */ 5362 5363static int 5364ix86_nsaved_regs (void) 5365{ 5366 int nregs = 0; 5367 int regno; 5368 5369 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5370 if (ix86_save_reg (regno, true)) 5371 nregs++; 5372 return nregs; 5373} 5374 5375/* Return the offset between two registers, one to be eliminated, and the other 5376 its replacement, at the start of a routine. */ 5377 5378HOST_WIDE_INT 5379ix86_initial_elimination_offset (int from, int to) 5380{ 5381 struct ix86_frame frame; 5382 ix86_compute_frame_layout (&frame); 5383 5384 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5385 return frame.hard_frame_pointer_offset; 5386 else if (from == FRAME_POINTER_REGNUM 5387 && to == HARD_FRAME_POINTER_REGNUM) 5388 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5389 else 5390 { 5391 gcc_assert (to == STACK_POINTER_REGNUM); 5392 5393 if (from == ARG_POINTER_REGNUM) 5394 return frame.stack_pointer_offset; 5395 5396 gcc_assert (from == FRAME_POINTER_REGNUM); 5397 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5398 } 5399} 5400 5401/* Fill structure ix86_frame about frame of currently computed function. */ 5402 5403static void 5404ix86_compute_frame_layout (struct ix86_frame *frame) 5405{ 5406 HOST_WIDE_INT total_size; 5407 unsigned int stack_alignment_needed; 5408 HOST_WIDE_INT offset; 5409 unsigned int preferred_alignment; 5410 HOST_WIDE_INT size = get_frame_size (); 5411 5412 frame->nregs = ix86_nsaved_regs (); 5413 total_size = size; 5414 5415 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5416 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5417 5418 /* During reload iteration the amount of registers saved can change. 5419 Recompute the value as needed. Do not recompute when amount of registers 5420 didn't change as reload does multiple calls to the function and does not 5421 expect the decision to change within single iteration. */ 5422 if (!optimize_size 5423 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5424 { 5425 int count = frame->nregs; 5426 5427 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5428 /* The fast prologue uses move instead of push to save registers. This 5429 is significantly longer, but also executes faster as modern hardware 5430 can execute the moves in parallel, but can't do that for push/pop. 5431 5432 Be careful about choosing what prologue to emit: When function takes 5433 many instructions to execute we may use slow version as well as in 5434 case function is known to be outside hot spot (this is known with 5435 feedback only). Weight the size of function by number of registers 5436 to save as it is cheap to use one or two push instructions but very 5437 slow to use many of them. */ 5438 if (count) 5439 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5440 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5441 || (flag_branch_probabilities 5442 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5443 cfun->machine->use_fast_prologue_epilogue = false; 5444 else 5445 cfun->machine->use_fast_prologue_epilogue 5446 = !expensive_function_p (count); 5447 } 5448 if (TARGET_PROLOGUE_USING_MOVE 5449 && cfun->machine->use_fast_prologue_epilogue) 5450 frame->save_regs_using_mov = true; 5451 else 5452 frame->save_regs_using_mov = false; 5453 5454 5455 /* Skip return address and saved base pointer. */ 5456 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5457 5458 frame->hard_frame_pointer_offset = offset; 5459 5460 /* Do some sanity checking of stack_alignment_needed and 5461 preferred_alignment, since i386 port is the only using those features 5462 that may break easily. */ 5463 5464 gcc_assert (!size || stack_alignment_needed); 5465 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 5466 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5467 gcc_assert (stack_alignment_needed 5468 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5469 5470 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5471 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5472 5473 /* Register save area */ 5474 offset += frame->nregs * UNITS_PER_WORD; 5475 5476 /* Va-arg area */ 5477 if (ix86_save_varrargs_registers) 5478 { 5479 offset += X86_64_VARARGS_SIZE; 5480 frame->va_arg_size = X86_64_VARARGS_SIZE; 5481 } 5482 else 5483 frame->va_arg_size = 0; 5484 5485 /* Align start of frame for local function. */ 5486 frame->padding1 = ((offset + stack_alignment_needed - 1) 5487 & -stack_alignment_needed) - offset; 5488 5489 offset += frame->padding1; 5490 5491 /* Frame pointer points here. */ 5492 frame->frame_pointer_offset = offset; 5493 5494 offset += size; 5495 5496 /* Add outgoing arguments area. Can be skipped if we eliminated 5497 all the function calls as dead code. 5498 Skipping is however impossible when function calls alloca. Alloca 5499 expander assumes that last current_function_outgoing_args_size 5500 of stack frame are unused. */ 5501 if (ACCUMULATE_OUTGOING_ARGS 5502 && (!current_function_is_leaf || current_function_calls_alloca 5503 || ix86_current_function_calls_tls_descriptor)) 5504 { 5505 offset += current_function_outgoing_args_size; 5506 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5507 } 5508 else 5509 frame->outgoing_arguments_size = 0; 5510 5511 /* Align stack boundary. Only needed if we're calling another function 5512 or using alloca. */ 5513 if (!current_function_is_leaf || current_function_calls_alloca 5514 || ix86_current_function_calls_tls_descriptor) 5515 frame->padding2 = ((offset + preferred_alignment - 1) 5516 & -preferred_alignment) - offset; 5517 else 5518 frame->padding2 = 0; 5519 5520 offset += frame->padding2; 5521 5522 /* We've reached end of stack frame. */ 5523 frame->stack_pointer_offset = offset; 5524 5525 /* Size prologue needs to allocate. */ 5526 frame->to_allocate = 5527 (size + frame->padding1 + frame->padding2 5528 + frame->outgoing_arguments_size + frame->va_arg_size); 5529 5530 if ((!frame->to_allocate && frame->nregs <= 1) 5531 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5532 frame->save_regs_using_mov = false; 5533 5534 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5535 && current_function_is_leaf 5536 && !ix86_current_function_calls_tls_descriptor) 5537 { 5538 frame->red_zone_size = frame->to_allocate; 5539 if (frame->save_regs_using_mov) 5540 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5541 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5542 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5543 } 5544 else 5545 frame->red_zone_size = 0; 5546 frame->to_allocate -= frame->red_zone_size; 5547 frame->stack_pointer_offset -= frame->red_zone_size; 5548#if 0 5549 fprintf (stderr, "nregs: %i\n", frame->nregs); 5550 fprintf (stderr, "size: %i\n", size); 5551 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5552 fprintf (stderr, "padding1: %i\n", frame->padding1); 5553 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5554 fprintf (stderr, "padding2: %i\n", frame->padding2); 5555 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5556 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5557 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5558 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5559 frame->hard_frame_pointer_offset); 5560 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5561#endif 5562} 5563 5564/* Emit code to save registers in the prologue. */ 5565 5566static void 5567ix86_emit_save_regs (void) 5568{ 5569 unsigned int regno; 5570 rtx insn; 5571 5572 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) 5573 if (ix86_save_reg (regno, true)) 5574 { 5575 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5576 RTX_FRAME_RELATED_P (insn) = 1; 5577 } 5578} 5579 5580/* Emit code to save registers using MOV insns. First register 5581 is restored from POINTER + OFFSET. */ 5582static void 5583ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5584{ 5585 unsigned int regno; 5586 rtx insn; 5587 5588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5589 if (ix86_save_reg (regno, true)) 5590 { 5591 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5592 Pmode, offset), 5593 gen_rtx_REG (Pmode, regno)); 5594 RTX_FRAME_RELATED_P (insn) = 1; 5595 offset += UNITS_PER_WORD; 5596 } 5597} 5598 5599/* Expand prologue or epilogue stack adjustment. 5600 The pattern exist to put a dependency on all ebp-based memory accesses. 5601 STYLE should be negative if instructions should be marked as frame related, 5602 zero if %r11 register is live and cannot be freely used and positive 5603 otherwise. */ 5604 5605static void 5606pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5607{ 5608 rtx insn; 5609 5610 if (! TARGET_64BIT) 5611 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5612 else if (x86_64_immediate_operand (offset, DImode)) 5613 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5614 else 5615 { 5616 rtx r11; 5617 /* r11 is used by indirect sibcall return as well, set before the 5618 epilogue and used after the epilogue. ATM indirect sibcall 5619 shouldn't be used together with huge frame sizes in one 5620 function because of the frame_size check in sibcall.c. */ 5621 gcc_assert (style); 5622 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5623 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5624 if (style < 0) 5625 RTX_FRAME_RELATED_P (insn) = 1; 5626 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5627 offset)); 5628 } 5629 if (style < 0) 5630 RTX_FRAME_RELATED_P (insn) = 1; 5631} 5632 5633/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 5634 5635static rtx 5636ix86_internal_arg_pointer (void) 5637{ 5638 bool has_force_align_arg_pointer = 5639 (0 != lookup_attribute (ix86_force_align_arg_pointer_string, 5640 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); 5641 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN 5642 && DECL_NAME (current_function_decl) 5643 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 5644 && DECL_FILE_SCOPE_P (current_function_decl)) 5645 || ix86_force_align_arg_pointer 5646 || has_force_align_arg_pointer) 5647 { 5648 /* Nested functions can't realign the stack due to a register 5649 conflict. */ 5650 if (DECL_CONTEXT (current_function_decl) 5651 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) 5652 { 5653 if (ix86_force_align_arg_pointer) 5654 warning (0, "-mstackrealign ignored for nested functions"); 5655 if (has_force_align_arg_pointer) 5656 error ("%s not supported for nested functions", 5657 ix86_force_align_arg_pointer_string); 5658 return virtual_incoming_args_rtx; 5659 } 5660 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); 5661 return copy_to_reg (cfun->machine->force_align_arg_pointer); 5662 } 5663 else 5664 return virtual_incoming_args_rtx; 5665} 5666 5667/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 5668 This is called from dwarf2out.c to emit call frame instructions 5669 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 5670static void 5671ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 5672{ 5673 rtx unspec = SET_SRC (pattern); 5674 gcc_assert (GET_CODE (unspec) == UNSPEC); 5675 5676 switch (index) 5677 { 5678 case UNSPEC_REG_SAVE: 5679 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 5680 SET_DEST (pattern)); 5681 break; 5682 case UNSPEC_DEF_CFA: 5683 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 5684 INTVAL (XVECEXP (unspec, 0, 0))); 5685 break; 5686 default: 5687 gcc_unreachable (); 5688 } 5689} 5690 5691/* Expand the prologue into a bunch of separate insns. */ 5692 5693void 5694ix86_expand_prologue (void) 5695{ 5696 rtx insn; 5697 bool pic_reg_used; 5698 struct ix86_frame frame; 5699 HOST_WIDE_INT allocate; 5700 5701 ix86_compute_frame_layout (&frame); 5702 5703 if (cfun->machine->force_align_arg_pointer) 5704 { 5705 rtx x, y; 5706 5707 /* Grab the argument pointer. */ 5708 x = plus_constant (stack_pointer_rtx, 4); 5709 y = cfun->machine->force_align_arg_pointer; 5710 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 5711 RTX_FRAME_RELATED_P (insn) = 1; 5712 5713 /* The unwind info consists of two parts: install the fafp as the cfa, 5714 and record the fafp as the "save register" of the stack pointer. 5715 The later is there in order that the unwinder can see where it 5716 should restore the stack pointer across the and insn. */ 5717 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); 5718 x = gen_rtx_SET (VOIDmode, y, x); 5719 RTX_FRAME_RELATED_P (x) = 1; 5720 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), 5721 UNSPEC_REG_SAVE); 5722 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); 5723 RTX_FRAME_RELATED_P (y) = 1; 5724 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); 5725 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5726 REG_NOTES (insn) = x; 5727 5728 /* Align the stack. */ 5729 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, 5730 GEN_INT (-16))); 5731 5732 /* And here we cheat like madmen with the unwind info. We force the 5733 cfa register back to sp+4, which is exactly what it was at the 5734 start of the function. Re-pushing the return address results in 5735 the return at the same spot relative to the cfa, and thus is 5736 correct wrt the unwind info. */ 5737 x = cfun->machine->force_align_arg_pointer; 5738 x = gen_frame_mem (Pmode, plus_constant (x, -4)); 5739 insn = emit_insn (gen_push (x)); 5740 RTX_FRAME_RELATED_P (insn) = 1; 5741 5742 x = GEN_INT (4); 5743 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); 5744 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); 5745 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5746 REG_NOTES (insn) = x; 5747 } 5748 5749 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5750 slower on all targets. Also sdb doesn't like it. */ 5751 5752 if (frame_pointer_needed) 5753 { 5754 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5755 RTX_FRAME_RELATED_P (insn) = 1; 5756 5757 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5758 RTX_FRAME_RELATED_P (insn) = 1; 5759 } 5760 5761 allocate = frame.to_allocate; 5762 5763 if (!frame.save_regs_using_mov) 5764 ix86_emit_save_regs (); 5765 else 5766 allocate += frame.nregs * UNITS_PER_WORD; 5767 5768 /* When using red zone we may start register saving before allocating 5769 the stack frame saving one cycle of the prologue. */ 5770 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5771 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5772 : stack_pointer_rtx, 5773 -frame.nregs * UNITS_PER_WORD); 5774 5775 if (allocate == 0) 5776 ; 5777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5779 GEN_INT (-allocate), -1); 5780 else 5781 { 5782 /* Only valid for Win32. */ 5783 rtx eax = gen_rtx_REG (SImode, 0); 5784 bool eax_live = ix86_eax_live_at_start_p (); 5785 rtx t; 5786 5787 gcc_assert (!TARGET_64BIT); 5788 5789 if (eax_live) 5790 { 5791 emit_insn (gen_push (eax)); 5792 allocate -= 4; 5793 } 5794 5795 emit_move_insn (eax, GEN_INT (allocate)); 5796 5797 insn = emit_insn (gen_allocate_stack_worker (eax)); 5798 RTX_FRAME_RELATED_P (insn) = 1; 5799 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 5800 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 5801 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 5802 t, REG_NOTES (insn)); 5803 5804 if (eax_live) 5805 { 5806 if (frame_pointer_needed) 5807 t = plus_constant (hard_frame_pointer_rtx, 5808 allocate 5809 - frame.to_allocate 5810 - frame.nregs * UNITS_PER_WORD); 5811 else 5812 t = plus_constant (stack_pointer_rtx, allocate); 5813 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5814 } 5815 } 5816 5817 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5818 { 5819 if (!frame_pointer_needed || !frame.to_allocate) 5820 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5821 else 5822 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5823 -frame.nregs * UNITS_PER_WORD); 5824 } 5825 5826 pic_reg_used = false; 5827 if (pic_offset_table_rtx 5828 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5829 || current_function_profile)) 5830 { 5831 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5832 5833 if (alt_pic_reg_used != INVALID_REGNUM) 5834 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5835 5836 pic_reg_used = true; 5837 } 5838 5839 if (pic_reg_used) 5840 { 5841 if (TARGET_64BIT) 5842 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 5843 else 5844 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5845 5846 /* Even with accurate pre-reload life analysis, we can wind up 5847 deleting all references to the pic register after reload. 5848 Consider if cross-jumping unifies two sides of a branch 5849 controlled by a comparison vs the only read from a global. 5850 In which case, allow the set_got to be deleted, though we're 5851 too late to do anything about the ebx save in the prologue. */ 5852 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5853 } 5854 5855 /* Prevent function calls from be scheduled before the call to mcount. 5856 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5857 if (current_function_profile) 5858 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5859} 5860 5861/* Emit code to restore saved registers using MOV insns. First register 5862 is restored from POINTER + OFFSET. */ 5863static void 5864ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5865 int maybe_eh_return) 5866{ 5867 int regno; 5868 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5869 5870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5871 if (ix86_save_reg (regno, maybe_eh_return)) 5872 { 5873 /* Ensure that adjust_address won't be forced to produce pointer 5874 out of range allowed by x86-64 instruction set. */ 5875 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5876 { 5877 rtx r11; 5878 5879 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5880 emit_move_insn (r11, GEN_INT (offset)); 5881 emit_insn (gen_adddi3 (r11, r11, pointer)); 5882 base_address = gen_rtx_MEM (Pmode, r11); 5883 offset = 0; 5884 } 5885 emit_move_insn (gen_rtx_REG (Pmode, regno), 5886 adjust_address (base_address, Pmode, offset)); 5887 offset += UNITS_PER_WORD; 5888 } 5889} 5890 5891/* Restore function stack, frame, and registers. */ 5892 5893void 5894ix86_expand_epilogue (int style) 5895{ 5896 int regno; 5897 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5898 struct ix86_frame frame; 5899 HOST_WIDE_INT offset; 5900 5901 ix86_compute_frame_layout (&frame); 5902 5903 /* Calculate start of saved registers relative to ebp. Special care 5904 must be taken for the normal return case of a function using 5905 eh_return: the eax and edx registers are marked as saved, but not 5906 restored along this path. */ 5907 offset = frame.nregs; 5908 if (current_function_calls_eh_return && style != 2) 5909 offset -= 2; 5910 offset *= -UNITS_PER_WORD; 5911 5912 /* If we're only restoring one register and sp is not valid then 5913 using a move instruction to restore the register since it's 5914 less work than reloading sp and popping the register. 5915 5916 The default code result in stack adjustment using add/lea instruction, 5917 while this code results in LEAVE instruction (or discrete equivalent), 5918 so it is profitable in some other cases as well. Especially when there 5919 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5920 and there is exactly one register to pop. This heuristic may need some 5921 tuning in future. */ 5922 if ((!sp_valid && frame.nregs <= 1) 5923 || (TARGET_EPILOGUE_USING_MOVE 5924 && cfun->machine->use_fast_prologue_epilogue 5925 && (frame.nregs > 1 || frame.to_allocate)) 5926 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5927 || (frame_pointer_needed && TARGET_USE_LEAVE 5928 && cfun->machine->use_fast_prologue_epilogue 5929 && frame.nregs == 1) 5930 || current_function_calls_eh_return) 5931 { 5932 /* Restore registers. We can use ebp or esp to address the memory 5933 locations. If both are available, default to ebp, since offsets 5934 are known to be small. Only exception is esp pointing directly to the 5935 end of block of saved registers, where we may simplify addressing 5936 mode. */ 5937 5938 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5939 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5940 frame.to_allocate, style == 2); 5941 else 5942 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5943 offset, style == 2); 5944 5945 /* eh_return epilogues need %ecx added to the stack pointer. */ 5946 if (style == 2) 5947 { 5948 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5949 5950 if (frame_pointer_needed) 5951 { 5952 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5953 tmp = plus_constant (tmp, UNITS_PER_WORD); 5954 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5955 5956 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5957 emit_move_insn (hard_frame_pointer_rtx, tmp); 5958 5959 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5960 const0_rtx, style); 5961 } 5962 else 5963 { 5964 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5965 tmp = plus_constant (tmp, (frame.to_allocate 5966 + frame.nregs * UNITS_PER_WORD)); 5967 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5968 } 5969 } 5970 else if (!frame_pointer_needed) 5971 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5972 GEN_INT (frame.to_allocate 5973 + frame.nregs * UNITS_PER_WORD), 5974 style); 5975 /* If not an i386, mov & pop is faster than "leave". */ 5976 else if (TARGET_USE_LEAVE || optimize_size 5977 || !cfun->machine->use_fast_prologue_epilogue) 5978 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5979 else 5980 { 5981 pro_epilogue_adjust_stack (stack_pointer_rtx, 5982 hard_frame_pointer_rtx, 5983 const0_rtx, style); 5984 if (TARGET_64BIT) 5985 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5986 else 5987 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5988 } 5989 } 5990 else 5991 { 5992 /* First step is to deallocate the stack frame so that we can 5993 pop the registers. */ 5994 if (!sp_valid) 5995 { 5996 gcc_assert (frame_pointer_needed); 5997 pro_epilogue_adjust_stack (stack_pointer_rtx, 5998 hard_frame_pointer_rtx, 5999 GEN_INT (offset), style); 6000 } 6001 else if (frame.to_allocate) 6002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 6003 GEN_INT (frame.to_allocate), style); 6004 6005 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 6006 if (ix86_save_reg (regno, false)) 6007 { 6008 if (TARGET_64BIT) 6009 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 6010 else 6011 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 6012 } 6013 if (frame_pointer_needed) 6014 { 6015 /* Leave results in shorter dependency chains on CPUs that are 6016 able to grok it fast. */ 6017 if (TARGET_USE_LEAVE) 6018 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 6019 else if (TARGET_64BIT) 6020 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 6021 else 6022 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 6023 } 6024 } 6025 6026 if (cfun->machine->force_align_arg_pointer) 6027 { 6028 emit_insn (gen_addsi3 (stack_pointer_rtx, 6029 cfun->machine->force_align_arg_pointer, 6030 GEN_INT (-4))); 6031 } 6032 6033 /* Sibcall epilogues don't want a return instruction. */ 6034 if (style == 0) 6035 return; 6036 6037 if (current_function_pops_args && current_function_args_size) 6038 { 6039 rtx popc = GEN_INT (current_function_pops_args); 6040 6041 /* i386 can only pop 64K bytes. If asked to pop more, pop 6042 return address, do explicit add, and jump indirectly to the 6043 caller. */ 6044 6045 if (current_function_pops_args >= 65536) 6046 { 6047 rtx ecx = gen_rtx_REG (SImode, 2); 6048 6049 /* There is no "pascal" calling convention in 64bit ABI. */ 6050 gcc_assert (!TARGET_64BIT); 6051 6052 emit_insn (gen_popsi1 (ecx)); 6053 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 6054 emit_jump_insn (gen_return_indirect_internal (ecx)); 6055 } 6056 else 6057 emit_jump_insn (gen_return_pop_internal (popc)); 6058 } 6059 else 6060 emit_jump_insn (gen_return_internal ()); 6061} 6062 6063/* Reset from the function's potential modifications. */ 6064 6065static void 6066ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 6067 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 6068{ 6069 if (pic_offset_table_rtx) 6070 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 6071#if TARGET_MACHO 6072 /* Mach-O doesn't support labels at the end of objects, so if 6073 it looks like we might want one, insert a NOP. */ 6074 { 6075 rtx insn = get_last_insn (); 6076 while (insn 6077 && NOTE_P (insn) 6078 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) 6079 insn = PREV_INSN (insn); 6080 if (insn 6081 && (LABEL_P (insn) 6082 || (NOTE_P (insn) 6083 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) 6084 fputs ("\tnop\n", file); 6085 } 6086#endif 6087 6088} 6089 6090/* Extract the parts of an RTL expression that is a valid memory address 6091 for an instruction. Return 0 if the structure of the address is 6092 grossly off. Return -1 if the address contains ASHIFT, so it is not 6093 strictly valid, but still used for computing length of lea instruction. */ 6094 6095int 6096ix86_decompose_address (rtx addr, struct ix86_address *out) 6097{ 6098 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 6099 rtx base_reg, index_reg; 6100 HOST_WIDE_INT scale = 1; 6101 rtx scale_rtx = NULL_RTX; 6102 int retval = 1; 6103 enum ix86_address_seg seg = SEG_DEFAULT; 6104 6105 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 6106 base = addr; 6107 else if (GET_CODE (addr) == PLUS) 6108 { 6109 rtx addends[4], op; 6110 int n = 0, i; 6111 6112 op = addr; 6113 do 6114 { 6115 if (n >= 4) 6116 return 0; 6117 addends[n++] = XEXP (op, 1); 6118 op = XEXP (op, 0); 6119 } 6120 while (GET_CODE (op) == PLUS); 6121 if (n >= 4) 6122 return 0; 6123 addends[n] = op; 6124 6125 for (i = n; i >= 0; --i) 6126 { 6127 op = addends[i]; 6128 switch (GET_CODE (op)) 6129 { 6130 case MULT: 6131 if (index) 6132 return 0; 6133 index = XEXP (op, 0); 6134 scale_rtx = XEXP (op, 1); 6135 break; 6136 6137 case UNSPEC: 6138 if (XINT (op, 1) == UNSPEC_TP 6139 && TARGET_TLS_DIRECT_SEG_REFS 6140 && seg == SEG_DEFAULT) 6141 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 6142 else 6143 return 0; 6144 break; 6145 6146 case REG: 6147 case SUBREG: 6148 if (!base) 6149 base = op; 6150 else if (!index) 6151 index = op; 6152 else 6153 return 0; 6154 break; 6155 6156 case CONST: 6157 case CONST_INT: 6158 case SYMBOL_REF: 6159 case LABEL_REF: 6160 if (disp) 6161 return 0; 6162 disp = op; 6163 break; 6164 6165 default: 6166 return 0; 6167 } 6168 } 6169 } 6170 else if (GET_CODE (addr) == MULT) 6171 { 6172 index = XEXP (addr, 0); /* index*scale */ 6173 scale_rtx = XEXP (addr, 1); 6174 } 6175 else if (GET_CODE (addr) == ASHIFT) 6176 { 6177 rtx tmp; 6178 6179 /* We're called for lea too, which implements ashift on occasion. */ 6180 index = XEXP (addr, 0); 6181 tmp = XEXP (addr, 1); 6182 if (GET_CODE (tmp) != CONST_INT) 6183 return 0; 6184 scale = INTVAL (tmp); 6185 if ((unsigned HOST_WIDE_INT) scale > 3) 6186 return 0; 6187 scale = 1 << scale; 6188 retval = -1; 6189 } 6190 else 6191 disp = addr; /* displacement */ 6192 6193 /* Extract the integral value of scale. */ 6194 if (scale_rtx) 6195 { 6196 if (GET_CODE (scale_rtx) != CONST_INT) 6197 return 0; 6198 scale = INTVAL (scale_rtx); 6199 } 6200 6201 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 6202 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 6203 6204 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 6205 if (base_reg && index_reg && scale == 1 6206 && (index_reg == arg_pointer_rtx 6207 || index_reg == frame_pointer_rtx 6208 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 6209 { 6210 rtx tmp; 6211 tmp = base, base = index, index = tmp; 6212 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 6213 } 6214 6215 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 6216 if ((base_reg == hard_frame_pointer_rtx 6217 || base_reg == frame_pointer_rtx 6218 || base_reg == arg_pointer_rtx) && !disp) 6219 disp = const0_rtx; 6220 6221 /* Special case: on K6, [%esi] makes the instruction vector decoded. 6222 Avoid this by transforming to [%esi+0]. */ 6223 if (ix86_tune == PROCESSOR_K6 && !optimize_size 6224 && base_reg && !index_reg && !disp 6225 && REG_P (base_reg) 6226 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 6227 disp = const0_rtx; 6228 6229 /* Special case: encode reg+reg instead of reg*2. */ 6230 if (!base && index && scale && scale == 2) 6231 base = index, base_reg = index_reg, scale = 1; 6232 6233 /* Special case: scaling cannot be encoded without base or displacement. */ 6234 if (!base && !disp && index && scale != 1) 6235 disp = const0_rtx; 6236 6237 out->base = base; 6238 out->index = index; 6239 out->disp = disp; 6240 out->scale = scale; 6241 out->seg = seg; 6242 6243 return retval; 6244} 6245 6246/* Return cost of the memory address x. 6247 For i386, it is better to use a complex address than let gcc copy 6248 the address into a reg and make a new pseudo. But not if the address 6249 requires to two regs - that would mean more pseudos with longer 6250 lifetimes. */ 6251static int 6252ix86_address_cost (rtx x) 6253{ 6254 struct ix86_address parts; 6255 int cost = 1; 6256 int ok = ix86_decompose_address (x, &parts); 6257 6258 gcc_assert (ok); 6259 6260 if (parts.base && GET_CODE (parts.base) == SUBREG) 6261 parts.base = SUBREG_REG (parts.base); 6262 if (parts.index && GET_CODE (parts.index) == SUBREG) 6263 parts.index = SUBREG_REG (parts.index); 6264 6265 /* More complex memory references are better. */ 6266 if (parts.disp && parts.disp != const0_rtx) 6267 cost--; 6268 if (parts.seg != SEG_DEFAULT) 6269 cost--; 6270 6271 /* Attempt to minimize number of registers in the address. */ 6272 if ((parts.base 6273 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 6274 || (parts.index 6275 && (!REG_P (parts.index) 6276 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 6277 cost++; 6278 6279 if (parts.base 6280 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 6281 && parts.index 6282 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 6283 && parts.base != parts.index) 6284 cost++; 6285 6286 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 6287 since it's predecode logic can't detect the length of instructions 6288 and it degenerates to vector decoded. Increase cost of such 6289 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 6290 to split such addresses or even refuse such addresses at all. 6291 6292 Following addressing modes are affected: 6293 [base+scale*index] 6294 [scale*index+disp] 6295 [base+index] 6296 6297 The first and last case may be avoidable by explicitly coding the zero in 6298 memory address, but I don't have AMD-K6 machine handy to check this 6299 theory. */ 6300 6301 if (TARGET_K6 6302 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 6303 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 6304 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 6305 cost += 10; 6306 6307 return cost; 6308} 6309 6310/* If X is a machine specific address (i.e. a symbol or label being 6311 referenced as a displacement from the GOT implemented using an 6312 UNSPEC), then return the base term. Otherwise return X. */ 6313 6314rtx 6315ix86_find_base_term (rtx x) 6316{ 6317 rtx term; 6318 6319 if (TARGET_64BIT) 6320 { 6321 if (GET_CODE (x) != CONST) 6322 return x; 6323 term = XEXP (x, 0); 6324 if (GET_CODE (term) == PLUS 6325 && (GET_CODE (XEXP (term, 1)) == CONST_INT 6326 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 6327 term = XEXP (term, 0); 6328 if (GET_CODE (term) != UNSPEC 6329 || XINT (term, 1) != UNSPEC_GOTPCREL) 6330 return x; 6331 6332 term = XVECEXP (term, 0, 0); 6333 6334 if (GET_CODE (term) != SYMBOL_REF 6335 && GET_CODE (term) != LABEL_REF) 6336 return x; 6337 6338 return term; 6339 } 6340 6341 term = ix86_delegitimize_address (x); 6342 6343 if (GET_CODE (term) != SYMBOL_REF 6344 && GET_CODE (term) != LABEL_REF) 6345 return x; 6346 6347 return term; 6348} 6349 6350/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 6351 this is used for to form addresses to local data when -fPIC is in 6352 use. */ 6353 6354static bool 6355darwin_local_data_pic (rtx disp) 6356{ 6357 if (GET_CODE (disp) == MINUS) 6358 { 6359 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 6360 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 6361 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 6362 { 6363 const char *sym_name = XSTR (XEXP (disp, 1), 0); 6364 if (! strcmp (sym_name, "<pic base>")) 6365 return true; 6366 } 6367 } 6368 6369 return false; 6370} 6371 6372/* Determine if a given RTX is a valid constant. We already know this 6373 satisfies CONSTANT_P. */ 6374 6375bool 6376legitimate_constant_p (rtx x) 6377{ 6378 switch (GET_CODE (x)) 6379 { 6380 case CONST: 6381 x = XEXP (x, 0); 6382 6383 if (GET_CODE (x) == PLUS) 6384 { 6385 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6386 return false; 6387 x = XEXP (x, 0); 6388 } 6389 6390 if (TARGET_MACHO && darwin_local_data_pic (x)) 6391 return true; 6392 6393 /* Only some unspecs are valid as "constants". */ 6394 if (GET_CODE (x) == UNSPEC) 6395 switch (XINT (x, 1)) 6396 { 6397 case UNSPEC_GOTOFF: 6398 return TARGET_64BIT; 6399 case UNSPEC_TPOFF: 6400 case UNSPEC_NTPOFF: 6401 x = XVECEXP (x, 0, 0); 6402 return (GET_CODE (x) == SYMBOL_REF 6403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6404 case UNSPEC_DTPOFF: 6405 x = XVECEXP (x, 0, 0); 6406 return (GET_CODE (x) == SYMBOL_REF 6407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 6408 default: 6409 return false; 6410 } 6411 6412 /* We must have drilled down to a symbol. */ 6413 if (GET_CODE (x) == LABEL_REF) 6414 return true; 6415 if (GET_CODE (x) != SYMBOL_REF) 6416 return false; 6417 /* FALLTHRU */ 6418 6419 case SYMBOL_REF: 6420 /* TLS symbols are never valid. */ 6421 if (SYMBOL_REF_TLS_MODEL (x)) 6422 return false; 6423 break; 6424 6425 case CONST_DOUBLE: 6426 if (GET_MODE (x) == TImode 6427 && x != CONST0_RTX (TImode) 6428 && !TARGET_64BIT) 6429 return false; 6430 break; 6431 6432 case CONST_VECTOR: 6433 if (x == CONST0_RTX (GET_MODE (x))) 6434 return true; 6435 return false; 6436 6437 default: 6438 break; 6439 } 6440 6441 /* Otherwise we handle everything else in the move patterns. */ 6442 return true; 6443} 6444 6445/* Determine if it's legal to put X into the constant pool. This 6446 is not possible for the address of thread-local symbols, which 6447 is checked above. */ 6448 6449static bool 6450ix86_cannot_force_const_mem (rtx x) 6451{ 6452 /* We can always put integral constants and vectors in memory. */ 6453 switch (GET_CODE (x)) 6454 { 6455 case CONST_INT: 6456 case CONST_DOUBLE: 6457 case CONST_VECTOR: 6458 return false; 6459 6460 default: 6461 break; 6462 } 6463 return !legitimate_constant_p (x); 6464} 6465 6466/* Determine if a given RTX is a valid constant address. */ 6467 6468bool 6469constant_address_p (rtx x) 6470{ 6471 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 6472} 6473 6474/* Nonzero if the constant value X is a legitimate general operand 6475 when generating PIC code. It is given that flag_pic is on and 6476 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 6477 6478bool 6479legitimate_pic_operand_p (rtx x) 6480{ 6481 rtx inner; 6482 6483 switch (GET_CODE (x)) 6484 { 6485 case CONST: 6486 inner = XEXP (x, 0); 6487 if (GET_CODE (inner) == PLUS 6488 && GET_CODE (XEXP (inner, 1)) == CONST_INT) 6489 inner = XEXP (inner, 0); 6490 6491 /* Only some unspecs are valid as "constants". */ 6492 if (GET_CODE (inner) == UNSPEC) 6493 switch (XINT (inner, 1)) 6494 { 6495 case UNSPEC_GOTOFF: 6496 return TARGET_64BIT; 6497 case UNSPEC_TPOFF: 6498 x = XVECEXP (inner, 0, 0); 6499 return (GET_CODE (x) == SYMBOL_REF 6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6501 default: 6502 return false; 6503 } 6504 /* FALLTHRU */ 6505 6506 case SYMBOL_REF: 6507 case LABEL_REF: 6508 return legitimate_pic_address_disp_p (x); 6509 6510 default: 6511 return true; 6512 } 6513} 6514 6515/* Determine if a given CONST RTX is a valid memory displacement 6516 in PIC mode. */ 6517 6518int 6519legitimate_pic_address_disp_p (rtx disp) 6520{ 6521 bool saw_plus; 6522 6523 /* In 64bit mode we can allow direct addresses of symbols and labels 6524 when they are not dynamic symbols. */ 6525 if (TARGET_64BIT) 6526 { 6527 rtx op0 = disp, op1; 6528 6529 switch (GET_CODE (disp)) 6530 { 6531 case LABEL_REF: 6532 return true; 6533 6534 case CONST: 6535 if (GET_CODE (XEXP (disp, 0)) != PLUS) 6536 break; 6537 op0 = XEXP (XEXP (disp, 0), 0); 6538 op1 = XEXP (XEXP (disp, 0), 1); 6539 if (GET_CODE (op1) != CONST_INT 6540 || INTVAL (op1) >= 16*1024*1024 6541 || INTVAL (op1) < -16*1024*1024) 6542 break; 6543 if (GET_CODE (op0) == LABEL_REF) 6544 return true; 6545 if (GET_CODE (op0) != SYMBOL_REF) 6546 break; 6547 /* FALLTHRU */ 6548 6549 case SYMBOL_REF: 6550 /* TLS references should always be enclosed in UNSPEC. */ 6551 if (SYMBOL_REF_TLS_MODEL (op0)) 6552 return false; 6553 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) 6554 return true; 6555 break; 6556 6557 default: 6558 break; 6559 } 6560 } 6561 if (GET_CODE (disp) != CONST) 6562 return 0; 6563 disp = XEXP (disp, 0); 6564 6565 if (TARGET_64BIT) 6566 { 6567 /* We are unsafe to allow PLUS expressions. This limit allowed distance 6568 of GOT tables. We should not need these anyway. */ 6569 if (GET_CODE (disp) != UNSPEC 6570 || (XINT (disp, 1) != UNSPEC_GOTPCREL 6571 && XINT (disp, 1) != UNSPEC_GOTOFF)) 6572 return 0; 6573 6574 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 6575 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 6576 return 0; 6577 return 1; 6578 } 6579 6580 saw_plus = false; 6581 if (GET_CODE (disp) == PLUS) 6582 { 6583 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 6584 return 0; 6585 disp = XEXP (disp, 0); 6586 saw_plus = true; 6587 } 6588 6589 if (TARGET_MACHO && darwin_local_data_pic (disp)) 6590 return 1; 6591 6592 if (GET_CODE (disp) != UNSPEC) 6593 return 0; 6594 6595 switch (XINT (disp, 1)) 6596 { 6597 case UNSPEC_GOT: 6598 if (saw_plus) 6599 return false; 6600 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6601 case UNSPEC_GOTOFF: 6602 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 6603 While ABI specify also 32bit relocation but we don't produce it in 6604 small PIC model at all. */ 6605 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6606 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6607 && !TARGET_64BIT) 6608 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6609 return false; 6610 case UNSPEC_GOTTPOFF: 6611 case UNSPEC_GOTNTPOFF: 6612 case UNSPEC_INDNTPOFF: 6613 if (saw_plus) 6614 return false; 6615 disp = XVECEXP (disp, 0, 0); 6616 return (GET_CODE (disp) == SYMBOL_REF 6617 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 6618 case UNSPEC_NTPOFF: 6619 disp = XVECEXP (disp, 0, 0); 6620 return (GET_CODE (disp) == SYMBOL_REF 6621 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 6622 case UNSPEC_DTPOFF: 6623 disp = XVECEXP (disp, 0, 0); 6624 return (GET_CODE (disp) == SYMBOL_REF 6625 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 6626 } 6627 6628 return 0; 6629} 6630 6631/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6632 memory address for an instruction. The MODE argument is the machine mode 6633 for the MEM expression that wants to use this address. 6634 6635 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6636 convert common non-canonical forms to canonical form so that they will 6637 be recognized. */ 6638 6639int 6640legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6641{ 6642 struct ix86_address parts; 6643 rtx base, index, disp; 6644 HOST_WIDE_INT scale; 6645 const char *reason = NULL; 6646 rtx reason_rtx = NULL_RTX; 6647 6648 if (TARGET_DEBUG_ADDR) 6649 { 6650 fprintf (stderr, 6651 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6652 GET_MODE_NAME (mode), strict); 6653 debug_rtx (addr); 6654 } 6655 6656 if (ix86_decompose_address (addr, &parts) <= 0) 6657 { 6658 reason = "decomposition failed"; 6659 goto report_error; 6660 } 6661 6662 base = parts.base; 6663 index = parts.index; 6664 disp = parts.disp; 6665 scale = parts.scale; 6666 6667 /* Validate base register. 6668 6669 Don't allow SUBREG's that span more than a word here. It can lead to spill 6670 failures when the base is one word out of a two word structure, which is 6671 represented internally as a DImode int. */ 6672 6673 if (base) 6674 { 6675 rtx reg; 6676 reason_rtx = base; 6677 6678 if (REG_P (base)) 6679 reg = base; 6680 else if (GET_CODE (base) == SUBREG 6681 && REG_P (SUBREG_REG (base)) 6682 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 6683 <= UNITS_PER_WORD) 6684 reg = SUBREG_REG (base); 6685 else 6686 { 6687 reason = "base is not a register"; 6688 goto report_error; 6689 } 6690 6691 if (GET_MODE (base) != Pmode) 6692 { 6693 reason = "base is not in Pmode"; 6694 goto report_error; 6695 } 6696 6697 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 6698 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 6699 { 6700 reason = "base is not valid"; 6701 goto report_error; 6702 } 6703 } 6704 6705 /* Validate index register. 6706 6707 Don't allow SUBREG's that span more than a word here -- same as above. */ 6708 6709 if (index) 6710 { 6711 rtx reg; 6712 reason_rtx = index; 6713 6714 if (REG_P (index)) 6715 reg = index; 6716 else if (GET_CODE (index) == SUBREG 6717 && REG_P (SUBREG_REG (index)) 6718 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 6719 <= UNITS_PER_WORD) 6720 reg = SUBREG_REG (index); 6721 else 6722 { 6723 reason = "index is not a register"; 6724 goto report_error; 6725 } 6726 6727 if (GET_MODE (index) != Pmode) 6728 { 6729 reason = "index is not in Pmode"; 6730 goto report_error; 6731 } 6732 6733 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 6734 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 6735 { 6736 reason = "index is not valid"; 6737 goto report_error; 6738 } 6739 } 6740 6741 /* Validate scale factor. */ 6742 if (scale != 1) 6743 { 6744 reason_rtx = GEN_INT (scale); 6745 if (!index) 6746 { 6747 reason = "scale without index"; 6748 goto report_error; 6749 } 6750 6751 if (scale != 2 && scale != 4 && scale != 8) 6752 { 6753 reason = "scale is not a valid multiplier"; 6754 goto report_error; 6755 } 6756 } 6757 6758 /* Validate displacement. */ 6759 if (disp) 6760 { 6761 reason_rtx = disp; 6762 6763 if (GET_CODE (disp) == CONST 6764 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6765 switch (XINT (XEXP (disp, 0), 1)) 6766 { 6767 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 6768 used. While ABI specify also 32bit relocations, we don't produce 6769 them at all and use IP relative instead. */ 6770 case UNSPEC_GOT: 6771 case UNSPEC_GOTOFF: 6772 gcc_assert (flag_pic); 6773 if (!TARGET_64BIT) 6774 goto is_legitimate_pic; 6775 reason = "64bit address unspec"; 6776 goto report_error; 6777 6778 case UNSPEC_GOTPCREL: 6779 gcc_assert (flag_pic); 6780 goto is_legitimate_pic; 6781 6782 case UNSPEC_GOTTPOFF: 6783 case UNSPEC_GOTNTPOFF: 6784 case UNSPEC_INDNTPOFF: 6785 case UNSPEC_NTPOFF: 6786 case UNSPEC_DTPOFF: 6787 break; 6788 6789 default: 6790 reason = "invalid address unspec"; 6791 goto report_error; 6792 } 6793 6794 else if (SYMBOLIC_CONST (disp) 6795 && (flag_pic 6796 || (TARGET_MACHO 6797#if TARGET_MACHO 6798 && MACHOPIC_INDIRECT 6799 && !machopic_operand_p (disp) 6800#endif 6801 ))) 6802 { 6803 6804 is_legitimate_pic: 6805 if (TARGET_64BIT && (index || base)) 6806 { 6807 /* foo@dtpoff(%rX) is ok. */ 6808 if (GET_CODE (disp) != CONST 6809 || GET_CODE (XEXP (disp, 0)) != PLUS 6810 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6811 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6812 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6813 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6814 { 6815 reason = "non-constant pic memory reference"; 6816 goto report_error; 6817 } 6818 } 6819 else if (! legitimate_pic_address_disp_p (disp)) 6820 { 6821 reason = "displacement is an invalid pic construct"; 6822 goto report_error; 6823 } 6824 6825 /* This code used to verify that a symbolic pic displacement 6826 includes the pic_offset_table_rtx register. 6827 6828 While this is good idea, unfortunately these constructs may 6829 be created by "adds using lea" optimization for incorrect 6830 code like: 6831 6832 int a; 6833 int foo(int i) 6834 { 6835 return *(&a+i); 6836 } 6837 6838 This code is nonsensical, but results in addressing 6839 GOT table with pic_offset_table_rtx base. We can't 6840 just refuse it easily, since it gets matched by 6841 "addsi3" pattern, that later gets split to lea in the 6842 case output register differs from input. While this 6843 can be handled by separate addsi pattern for this case 6844 that never results in lea, this seems to be easier and 6845 correct fix for crash to disable this test. */ 6846 } 6847 else if (GET_CODE (disp) != LABEL_REF 6848 && GET_CODE (disp) != CONST_INT 6849 && (GET_CODE (disp) != CONST 6850 || !legitimate_constant_p (disp)) 6851 && (GET_CODE (disp) != SYMBOL_REF 6852 || !legitimate_constant_p (disp))) 6853 { 6854 reason = "displacement is not constant"; 6855 goto report_error; 6856 } 6857 else if (TARGET_64BIT 6858 && !x86_64_immediate_operand (disp, VOIDmode)) 6859 { 6860 reason = "displacement is out of range"; 6861 goto report_error; 6862 } 6863 } 6864 6865 /* Everything looks valid. */ 6866 if (TARGET_DEBUG_ADDR) 6867 fprintf (stderr, "Success.\n"); 6868 return TRUE; 6869 6870 report_error: 6871 if (TARGET_DEBUG_ADDR) 6872 { 6873 fprintf (stderr, "Error: %s\n", reason); 6874 debug_rtx (reason_rtx); 6875 } 6876 return FALSE; 6877} 6878 6879/* Return a unique alias set for the GOT. */ 6880 6881static HOST_WIDE_INT 6882ix86_GOT_alias_set (void) 6883{ 6884 static HOST_WIDE_INT set = -1; 6885 if (set == -1) 6886 set = new_alias_set (); 6887 return set; 6888} 6889 6890/* Return a legitimate reference for ORIG (an address) using the 6891 register REG. If REG is 0, a new pseudo is generated. 6892 6893 There are two types of references that must be handled: 6894 6895 1. Global data references must load the address from the GOT, via 6896 the PIC reg. An insn is emitted to do this load, and the reg is 6897 returned. 6898 6899 2. Static data references, constant pool addresses, and code labels 6900 compute the address as an offset from the GOT, whose base is in 6901 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6902 differentiate them from global data objects. The returned 6903 address is the PIC reg + an unspec constant. 6904 6905 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6906 reg also appears in the address. */ 6907 6908static rtx 6909legitimize_pic_address (rtx orig, rtx reg) 6910{ 6911 rtx addr = orig; 6912 rtx new = orig; 6913 rtx base; 6914 6915#if TARGET_MACHO 6916 if (TARGET_MACHO && !TARGET_64BIT) 6917 { 6918 if (reg == 0) 6919 reg = gen_reg_rtx (Pmode); 6920 /* Use the generic Mach-O PIC machinery. */ 6921 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6922 } 6923#endif 6924 6925 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6926 new = addr; 6927 else if (TARGET_64BIT 6928 && ix86_cmodel != CM_SMALL_PIC 6929 && local_symbolic_operand (addr, Pmode)) 6930 { 6931 rtx tmpreg; 6932 /* This symbol may be referenced via a displacement from the PIC 6933 base address (@GOTOFF). */ 6934 6935 if (reload_in_progress) 6936 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6937 if (GET_CODE (addr) == CONST) 6938 addr = XEXP (addr, 0); 6939 if (GET_CODE (addr) == PLUS) 6940 { 6941 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6942 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6943 } 6944 else 6945 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6946 new = gen_rtx_CONST (Pmode, new); 6947 if (!reg) 6948 tmpreg = gen_reg_rtx (Pmode); 6949 else 6950 tmpreg = reg; 6951 emit_move_insn (tmpreg, new); 6952 6953 if (reg != 0) 6954 { 6955 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 6956 tmpreg, 1, OPTAB_DIRECT); 6957 new = reg; 6958 } 6959 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 6960 } 6961 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6962 { 6963 /* This symbol may be referenced via a displacement from the PIC 6964 base address (@GOTOFF). */ 6965 6966 if (reload_in_progress) 6967 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6968 if (GET_CODE (addr) == CONST) 6969 addr = XEXP (addr, 0); 6970 if (GET_CODE (addr) == PLUS) 6971 { 6972 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6973 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6974 } 6975 else 6976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6977 new = gen_rtx_CONST (Pmode, new); 6978 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6979 6980 if (reg != 0) 6981 { 6982 emit_move_insn (reg, new); 6983 new = reg; 6984 } 6985 } 6986 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 6987 { 6988 if (TARGET_64BIT) 6989 { 6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6991 new = gen_rtx_CONST (Pmode, new); 6992 new = gen_const_mem (Pmode, new); 6993 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6994 6995 if (reg == 0) 6996 reg = gen_reg_rtx (Pmode); 6997 /* Use directly gen_movsi, otherwise the address is loaded 6998 into register for CSE. We don't want to CSE this addresses, 6999 instead we CSE addresses from the GOT table, so skip this. */ 7000 emit_insn (gen_movsi (reg, new)); 7001 new = reg; 7002 } 7003 else 7004 { 7005 /* This symbol must be referenced via a load from the 7006 Global Offset Table (@GOT). */ 7007 7008 if (reload_in_progress) 7009 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7010 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 7011 new = gen_rtx_CONST (Pmode, new); 7012 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 7013 new = gen_const_mem (Pmode, new); 7014 set_mem_alias_set (new, ix86_GOT_alias_set ()); 7015 7016 if (reg == 0) 7017 reg = gen_reg_rtx (Pmode); 7018 emit_move_insn (reg, new); 7019 new = reg; 7020 } 7021 } 7022 else 7023 { 7024 if (GET_CODE (addr) == CONST_INT 7025 && !x86_64_immediate_operand (addr, VOIDmode)) 7026 { 7027 if (reg) 7028 { 7029 emit_move_insn (reg, addr); 7030 new = reg; 7031 } 7032 else 7033 new = force_reg (Pmode, addr); 7034 } 7035 else if (GET_CODE (addr) == CONST) 7036 { 7037 addr = XEXP (addr, 0); 7038 7039 /* We must match stuff we generate before. Assume the only 7040 unspecs that can get here are ours. Not that we could do 7041 anything with them anyway.... */ 7042 if (GET_CODE (addr) == UNSPEC 7043 || (GET_CODE (addr) == PLUS 7044 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 7045 return orig; 7046 gcc_assert (GET_CODE (addr) == PLUS); 7047 } 7048 if (GET_CODE (addr) == PLUS) 7049 { 7050 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 7051 7052 /* Check first to see if this is a constant offset from a @GOTOFF 7053 symbol reference. */ 7054 if (local_symbolic_operand (op0, Pmode) 7055 && GET_CODE (op1) == CONST_INT) 7056 { 7057 if (!TARGET_64BIT) 7058 { 7059 if (reload_in_progress) 7060 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7061 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7062 UNSPEC_GOTOFF); 7063 new = gen_rtx_PLUS (Pmode, new, op1); 7064 new = gen_rtx_CONST (Pmode, new); 7065 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 7066 7067 if (reg != 0) 7068 { 7069 emit_move_insn (reg, new); 7070 new = reg; 7071 } 7072 } 7073 else 7074 { 7075 if (INTVAL (op1) < -16*1024*1024 7076 || INTVAL (op1) >= 16*1024*1024) 7077 { 7078 if (!x86_64_immediate_operand (op1, Pmode)) 7079 op1 = force_reg (Pmode, op1); 7080 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 7081 } 7082 } 7083 } 7084 else 7085 { 7086 base = legitimize_pic_address (XEXP (addr, 0), reg); 7087 new = legitimize_pic_address (XEXP (addr, 1), 7088 base == reg ? NULL_RTX : reg); 7089 7090 if (GET_CODE (new) == CONST_INT) 7091 new = plus_constant (base, INTVAL (new)); 7092 else 7093 { 7094 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 7095 { 7096 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 7097 new = XEXP (new, 1); 7098 } 7099 new = gen_rtx_PLUS (Pmode, base, new); 7100 } 7101 } 7102 } 7103 } 7104 return new; 7105} 7106 7107/* Load the thread pointer. If TO_REG is true, force it into a register. */ 7108 7109static rtx 7110get_thread_pointer (int to_reg) 7111{ 7112 rtx tp, reg, insn; 7113 7114 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 7115 if (!to_reg) 7116 return tp; 7117 7118 reg = gen_reg_rtx (Pmode); 7119 insn = gen_rtx_SET (VOIDmode, reg, tp); 7120 insn = emit_insn (insn); 7121 7122 return reg; 7123} 7124 7125/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 7126 false if we expect this to be used for a memory address and true if 7127 we expect to load the address into a register. */ 7128 7129static rtx 7130legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 7131{ 7132 rtx dest, base, off, pic, tp; 7133 int type; 7134 7135 switch (model) 7136 { 7137 case TLS_MODEL_GLOBAL_DYNAMIC: 7138 dest = gen_reg_rtx (Pmode); 7139 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 7140 7141 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 7142 { 7143 rtx rax = gen_rtx_REG (Pmode, 0), insns; 7144 7145 start_sequence (); 7146 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 7147 insns = get_insns (); 7148 end_sequence (); 7149 7150 emit_libcall_block (insns, dest, rax, x); 7151 } 7152 else if (TARGET_64BIT && TARGET_GNU2_TLS) 7153 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 7154 else 7155 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 7156 7157 if (TARGET_GNU2_TLS) 7158 { 7159 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 7160 7161 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 7162 } 7163 break; 7164 7165 case TLS_MODEL_LOCAL_DYNAMIC: 7166 base = gen_reg_rtx (Pmode); 7167 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 7168 7169 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 7170 { 7171 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 7172 7173 start_sequence (); 7174 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 7175 insns = get_insns (); 7176 end_sequence (); 7177 7178 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 7179 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 7180 emit_libcall_block (insns, base, rax, note); 7181 } 7182 else if (TARGET_64BIT && TARGET_GNU2_TLS) 7183 emit_insn (gen_tls_local_dynamic_base_64 (base)); 7184 else 7185 emit_insn (gen_tls_local_dynamic_base_32 (base)); 7186 7187 if (TARGET_GNU2_TLS) 7188 { 7189 rtx x = ix86_tls_module_base (); 7190 7191 set_unique_reg_note (get_last_insn (), REG_EQUIV, 7192 gen_rtx_MINUS (Pmode, x, tp)); 7193 } 7194 7195 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 7196 off = gen_rtx_CONST (Pmode, off); 7197 7198 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 7199 7200 if (TARGET_GNU2_TLS) 7201 { 7202 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 7203 7204 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 7205 } 7206 7207 break; 7208 7209 case TLS_MODEL_INITIAL_EXEC: 7210 if (TARGET_64BIT) 7211 { 7212 pic = NULL; 7213 type = UNSPEC_GOTNTPOFF; 7214 } 7215 else if (flag_pic) 7216 { 7217 if (reload_in_progress) 7218 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 7219 pic = pic_offset_table_rtx; 7220 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 7221 } 7222 else if (!TARGET_ANY_GNU_TLS) 7223 { 7224 pic = gen_reg_rtx (Pmode); 7225 emit_insn (gen_set_got (pic)); 7226 type = UNSPEC_GOTTPOFF; 7227 } 7228 else 7229 { 7230 pic = NULL; 7231 type = UNSPEC_INDNTPOFF; 7232 } 7233 7234 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 7235 off = gen_rtx_CONST (Pmode, off); 7236 if (pic) 7237 off = gen_rtx_PLUS (Pmode, pic, off); 7238 off = gen_const_mem (Pmode, off); 7239 set_mem_alias_set (off, ix86_GOT_alias_set ()); 7240 7241 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7242 { 7243 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7244 off = force_reg (Pmode, off); 7245 return gen_rtx_PLUS (Pmode, base, off); 7246 } 7247 else 7248 { 7249 base = get_thread_pointer (true); 7250 dest = gen_reg_rtx (Pmode); 7251 emit_insn (gen_subsi3 (dest, base, off)); 7252 } 7253 break; 7254 7255 case TLS_MODEL_LOCAL_EXEC: 7256 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 7257 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7258 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 7259 off = gen_rtx_CONST (Pmode, off); 7260 7261 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 7262 { 7263 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 7264 return gen_rtx_PLUS (Pmode, base, off); 7265 } 7266 else 7267 { 7268 base = get_thread_pointer (true); 7269 dest = gen_reg_rtx (Pmode); 7270 emit_insn (gen_subsi3 (dest, base, off)); 7271 } 7272 break; 7273 7274 default: 7275 gcc_unreachable (); 7276 } 7277 7278 return dest; 7279} 7280 7281/* Try machine-dependent ways of modifying an illegitimate address 7282 to be legitimate. If we find one, return the new, valid address. 7283 This macro is used in only one place: `memory_address' in explow.c. 7284 7285 OLDX is the address as it was before break_out_memory_refs was called. 7286 In some cases it is useful to look at this to decide what needs to be done. 7287 7288 MODE and WIN are passed so that this macro can use 7289 GO_IF_LEGITIMATE_ADDRESS. 7290 7291 It is always safe for this macro to do nothing. It exists to recognize 7292 opportunities to optimize the output. 7293 7294 For the 80386, we handle X+REG by loading X into a register R and 7295 using R+REG. R will go in a general reg and indexing will be used. 7296 However, if REG is a broken-out memory address or multiplication, 7297 nothing needs to be done because REG can certainly go in a general reg. 7298 7299 When -fpic is used, special handling is needed for symbolic references. 7300 See comments by legitimize_pic_address in i386.c for details. */ 7301 7302rtx 7303legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 7304{ 7305 int changed = 0; 7306 unsigned log; 7307 7308 if (TARGET_DEBUG_ADDR) 7309 { 7310 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 7311 GET_MODE_NAME (mode)); 7312 debug_rtx (x); 7313 } 7314 7315 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 7316 if (log) 7317 return legitimize_tls_address (x, log, false); 7318 if (GET_CODE (x) == CONST 7319 && GET_CODE (XEXP (x, 0)) == PLUS 7320 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 7321 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 7322 { 7323 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); 7324 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 7325 } 7326 7327 if (flag_pic && SYMBOLIC_CONST (x)) 7328 return legitimize_pic_address (x, 0); 7329 7330 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 7331 if (GET_CODE (x) == ASHIFT 7332 && GET_CODE (XEXP (x, 1)) == CONST_INT 7333 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 7334 { 7335 changed = 1; 7336 log = INTVAL (XEXP (x, 1)); 7337 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 7338 GEN_INT (1 << log)); 7339 } 7340 7341 if (GET_CODE (x) == PLUS) 7342 { 7343 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 7344 7345 if (GET_CODE (XEXP (x, 0)) == ASHIFT 7346 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 7348 { 7349 changed = 1; 7350 log = INTVAL (XEXP (XEXP (x, 0), 1)); 7351 XEXP (x, 0) = gen_rtx_MULT (Pmode, 7352 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 7353 GEN_INT (1 << log)); 7354 } 7355 7356 if (GET_CODE (XEXP (x, 1)) == ASHIFT 7357 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 7358 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 7359 { 7360 changed = 1; 7361 log = INTVAL (XEXP (XEXP (x, 1), 1)); 7362 XEXP (x, 1) = gen_rtx_MULT (Pmode, 7363 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 7364 GEN_INT (1 << log)); 7365 } 7366 7367 /* Put multiply first if it isn't already. */ 7368 if (GET_CODE (XEXP (x, 1)) == MULT) 7369 { 7370 rtx tmp = XEXP (x, 0); 7371 XEXP (x, 0) = XEXP (x, 1); 7372 XEXP (x, 1) = tmp; 7373 changed = 1; 7374 } 7375 7376 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 7377 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 7378 created by virtual register instantiation, register elimination, and 7379 similar optimizations. */ 7380 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 7381 { 7382 changed = 1; 7383 x = gen_rtx_PLUS (Pmode, 7384 gen_rtx_PLUS (Pmode, XEXP (x, 0), 7385 XEXP (XEXP (x, 1), 0)), 7386 XEXP (XEXP (x, 1), 1)); 7387 } 7388 7389 /* Canonicalize 7390 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 7392 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 7393 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 7394 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 7395 && CONSTANT_P (XEXP (x, 1))) 7396 { 7397 rtx constant; 7398 rtx other = NULL_RTX; 7399 7400 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7401 { 7402 constant = XEXP (x, 1); 7403 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 7404 } 7405 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 7406 { 7407 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 7408 other = XEXP (x, 1); 7409 } 7410 else 7411 constant = 0; 7412 7413 if (constant) 7414 { 7415 changed = 1; 7416 x = gen_rtx_PLUS (Pmode, 7417 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 7418 XEXP (XEXP (XEXP (x, 0), 1), 0)), 7419 plus_constant (other, INTVAL (constant))); 7420 } 7421 } 7422 7423 if (changed && legitimate_address_p (mode, x, FALSE)) 7424 return x; 7425 7426 if (GET_CODE (XEXP (x, 0)) == MULT) 7427 { 7428 changed = 1; 7429 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 7430 } 7431 7432 if (GET_CODE (XEXP (x, 1)) == MULT) 7433 { 7434 changed = 1; 7435 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 7436 } 7437 7438 if (changed 7439 && GET_CODE (XEXP (x, 1)) == REG 7440 && GET_CODE (XEXP (x, 0)) == REG) 7441 return x; 7442 7443 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 7444 { 7445 changed = 1; 7446 x = legitimize_pic_address (x, 0); 7447 } 7448 7449 if (changed && legitimate_address_p (mode, x, FALSE)) 7450 return x; 7451 7452 if (GET_CODE (XEXP (x, 0)) == REG) 7453 { 7454 rtx temp = gen_reg_rtx (Pmode); 7455 rtx val = force_operand (XEXP (x, 1), temp); 7456 if (val != temp) 7457 emit_move_insn (temp, val); 7458 7459 XEXP (x, 1) = temp; 7460 return x; 7461 } 7462 7463 else if (GET_CODE (XEXP (x, 1)) == REG) 7464 { 7465 rtx temp = gen_reg_rtx (Pmode); 7466 rtx val = force_operand (XEXP (x, 0), temp); 7467 if (val != temp) 7468 emit_move_insn (temp, val); 7469 7470 XEXP (x, 0) = temp; 7471 return x; 7472 } 7473 } 7474 7475 return x; 7476} 7477 7478/* Print an integer constant expression in assembler syntax. Addition 7479 and subtraction are the only arithmetic that may appear in these 7480 expressions. FILE is the stdio stream to write to, X is the rtx, and 7481 CODE is the operand print code from the output string. */ 7482 7483static void 7484output_pic_addr_const (FILE *file, rtx x, int code) 7485{ 7486 char buf[256]; 7487 7488 switch (GET_CODE (x)) 7489 { 7490 case PC: 7491 gcc_assert (flag_pic); 7492 putc ('.', file); 7493 break; 7494 7495 case SYMBOL_REF: 7496 if (! TARGET_MACHO || TARGET_64BIT) 7497 output_addr_const (file, x); 7498 else 7499 { 7500 const char *name = XSTR (x, 0); 7501 7502 /* Mark the decl as referenced so that cgraph will output the function. */ 7503 if (SYMBOL_REF_DECL (x)) 7504 mark_decl_referenced (SYMBOL_REF_DECL (x)); 7505 7506#if TARGET_MACHO 7507 if (MACHOPIC_INDIRECT 7508 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 7509 name = machopic_indirection_name (x, /*stub_p=*/true); 7510#endif 7511 assemble_name (file, name); 7512 } 7513 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 7514 fputs ("@PLT", file); 7515 break; 7516 7517 case LABEL_REF: 7518 x = XEXP (x, 0); 7519 /* FALLTHRU */ 7520 case CODE_LABEL: 7521 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 7522 assemble_name (asm_out_file, buf); 7523 break; 7524 7525 case CONST_INT: 7526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7527 break; 7528 7529 case CONST: 7530 /* This used to output parentheses around the expression, 7531 but that does not work on the 386 (either ATT or BSD assembler). */ 7532 output_pic_addr_const (file, XEXP (x, 0), code); 7533 break; 7534 7535 case CONST_DOUBLE: 7536 if (GET_MODE (x) == VOIDmode) 7537 { 7538 /* We can use %d if the number is <32 bits and positive. */ 7539 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 7540 fprintf (file, "0x%lx%08lx", 7541 (unsigned long) CONST_DOUBLE_HIGH (x), 7542 (unsigned long) CONST_DOUBLE_LOW (x)); 7543 else 7544 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 7545 } 7546 else 7547 /* We can't handle floating point constants; 7548 PRINT_OPERAND must handle them. */ 7549 output_operand_lossage ("floating constant misused"); 7550 break; 7551 7552 case PLUS: 7553 /* Some assemblers need integer constants to appear first. */ 7554 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 7555 { 7556 output_pic_addr_const (file, XEXP (x, 0), code); 7557 putc ('+', file); 7558 output_pic_addr_const (file, XEXP (x, 1), code); 7559 } 7560 else 7561 { 7562 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); 7563 output_pic_addr_const (file, XEXP (x, 1), code); 7564 putc ('+', file); 7565 output_pic_addr_const (file, XEXP (x, 0), code); 7566 } 7567 break; 7568 7569 case MINUS: 7570 if (!TARGET_MACHO) 7571 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 7572 output_pic_addr_const (file, XEXP (x, 0), code); 7573 putc ('-', file); 7574 output_pic_addr_const (file, XEXP (x, 1), code); 7575 if (!TARGET_MACHO) 7576 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 7577 break; 7578 7579 case UNSPEC: 7580 gcc_assert (XVECLEN (x, 0) == 1); 7581 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 7582 switch (XINT (x, 1)) 7583 { 7584 case UNSPEC_GOT: 7585 fputs ("@GOT", file); 7586 break; 7587 case UNSPEC_GOTOFF: 7588 fputs ("@GOTOFF", file); 7589 break; 7590 case UNSPEC_GOTPCREL: 7591 fputs ("@GOTPCREL(%rip)", file); 7592 break; 7593 case UNSPEC_GOTTPOFF: 7594 /* FIXME: This might be @TPOFF in Sun ld too. */ 7595 fputs ("@GOTTPOFF", file); 7596 break; 7597 case UNSPEC_TPOFF: 7598 fputs ("@TPOFF", file); 7599 break; 7600 case UNSPEC_NTPOFF: 7601 if (TARGET_64BIT) 7602 fputs ("@TPOFF", file); 7603 else 7604 fputs ("@NTPOFF", file); 7605 break; 7606 case UNSPEC_DTPOFF: 7607 fputs ("@DTPOFF", file); 7608 break; 7609 case UNSPEC_GOTNTPOFF: 7610 if (TARGET_64BIT) 7611 fputs ("@GOTTPOFF(%rip)", file); 7612 else 7613 fputs ("@GOTNTPOFF", file); 7614 break; 7615 case UNSPEC_INDNTPOFF: 7616 fputs ("@INDNTPOFF", file); 7617 break; 7618 default: 7619 output_operand_lossage ("invalid UNSPEC as operand"); 7620 break; 7621 } 7622 break; 7623 7624 default: 7625 output_operand_lossage ("invalid expression as operand"); 7626 } 7627} 7628 7629/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7630 We need to emit DTP-relative relocations. */ 7631 7632static void 7633i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 7634{ 7635 fputs (ASM_LONG, file); 7636 output_addr_const (file, x); 7637 fputs ("@DTPOFF", file); 7638 switch (size) 7639 { 7640 case 4: 7641 break; 7642 case 8: 7643 fputs (", 0", file); 7644 break; 7645 default: 7646 gcc_unreachable (); 7647 } 7648} 7649 7650/* In the name of slightly smaller debug output, and to cater to 7651 general assembler lossage, recognize PIC+GOTOFF and turn it back 7652 into a direct symbol reference. 7653 7654 On Darwin, this is necessary to avoid a crash, because Darwin 7655 has a different PIC label for each routine but the DWARF debugging 7656 information is not associated with any particular routine, so it's 7657 necessary to remove references to the PIC label from RTL stored by 7658 the DWARF output code. */ 7659 7660static rtx 7661ix86_delegitimize_address (rtx orig_x) 7662{ 7663 rtx x = orig_x; 7664 /* reg_addend is NULL or a multiple of some register. */ 7665 rtx reg_addend = NULL_RTX; 7666 /* const_addend is NULL or a const_int. */ 7667 rtx const_addend = NULL_RTX; 7668 /* This is the result, or NULL. */ 7669 rtx result = NULL_RTX; 7670 7671 if (GET_CODE (x) == MEM) 7672 x = XEXP (x, 0); 7673 7674 if (TARGET_64BIT) 7675 { 7676 if (GET_CODE (x) != CONST 7677 || GET_CODE (XEXP (x, 0)) != UNSPEC 7678 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 7679 || GET_CODE (orig_x) != MEM) 7680 return orig_x; 7681 return XVECEXP (XEXP (x, 0), 0, 0); 7682 } 7683 7684 if (GET_CODE (x) != PLUS 7685 || GET_CODE (XEXP (x, 1)) != CONST) 7686 return orig_x; 7687 7688 if (GET_CODE (XEXP (x, 0)) == REG 7689 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7690 /* %ebx + GOT/GOTOFF */ 7691 ; 7692 else if (GET_CODE (XEXP (x, 0)) == PLUS) 7693 { 7694 /* %ebx + %reg * scale + GOT/GOTOFF */ 7695 reg_addend = XEXP (x, 0); 7696 if (GET_CODE (XEXP (reg_addend, 0)) == REG 7697 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) 7698 reg_addend = XEXP (reg_addend, 1); 7699 else if (GET_CODE (XEXP (reg_addend, 1)) == REG 7700 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) 7701 reg_addend = XEXP (reg_addend, 0); 7702 else 7703 return orig_x; 7704 if (GET_CODE (reg_addend) != REG 7705 && GET_CODE (reg_addend) != MULT 7706 && GET_CODE (reg_addend) != ASHIFT) 7707 return orig_x; 7708 } 7709 else 7710 return orig_x; 7711 7712 x = XEXP (XEXP (x, 1), 0); 7713 if (GET_CODE (x) == PLUS 7714 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7715 { 7716 const_addend = XEXP (x, 1); 7717 x = XEXP (x, 0); 7718 } 7719 7720 if (GET_CODE (x) == UNSPEC 7721 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7722 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 7723 result = XVECEXP (x, 0, 0); 7724 7725 if (TARGET_MACHO && darwin_local_data_pic (x) 7726 && GET_CODE (orig_x) != MEM) 7727 result = XEXP (x, 0); 7728 7729 if (! result) 7730 return orig_x; 7731 7732 if (const_addend) 7733 result = gen_rtx_PLUS (Pmode, result, const_addend); 7734 if (reg_addend) 7735 result = gen_rtx_PLUS (Pmode, reg_addend, result); 7736 return result; 7737} 7738 7739static void 7740put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7741 int fp, FILE *file) 7742{ 7743 const char *suffix; 7744 7745 if (mode == CCFPmode || mode == CCFPUmode) 7746 { 7747 enum rtx_code second_code, bypass_code; 7748 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7749 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 7750 code = ix86_fp_compare_code_to_integer (code); 7751 mode = CCmode; 7752 } 7753 if (reverse) 7754 code = reverse_condition (code); 7755 7756 switch (code) 7757 { 7758 case EQ: 7759 suffix = "e"; 7760 break; 7761 case NE: 7762 suffix = "ne"; 7763 break; 7764 case GT: 7765 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 7766 suffix = "g"; 7767 break; 7768 case GTU: 7769 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 7770 Those same assemblers have the same but opposite lossage on cmov. */ 7771 gcc_assert (mode == CCmode); 7772 suffix = fp ? "nbe" : "a"; 7773 break; 7774 case LT: 7775 switch (mode) 7776 { 7777 case CCNOmode: 7778 case CCGOCmode: 7779 suffix = "s"; 7780 break; 7781 7782 case CCmode: 7783 case CCGCmode: 7784 suffix = "l"; 7785 break; 7786 7787 default: 7788 gcc_unreachable (); 7789 } 7790 break; 7791 case LTU: 7792 gcc_assert (mode == CCmode); 7793 suffix = "b"; 7794 break; 7795 case GE: 7796 switch (mode) 7797 { 7798 case CCNOmode: 7799 case CCGOCmode: 7800 suffix = "ns"; 7801 break; 7802 7803 case CCmode: 7804 case CCGCmode: 7805 suffix = "ge"; 7806 break; 7807 7808 default: 7809 gcc_unreachable (); 7810 } 7811 break; 7812 case GEU: 7813 /* ??? As above. */ 7814 gcc_assert (mode == CCmode); 7815 suffix = fp ? "nb" : "ae"; 7816 break; 7817 case LE: 7818 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 7819 suffix = "le"; 7820 break; 7821 case LEU: 7822 gcc_assert (mode == CCmode); 7823 suffix = "be"; 7824 break; 7825 case UNORDERED: 7826 suffix = fp ? "u" : "p"; 7827 break; 7828 case ORDERED: 7829 suffix = fp ? "nu" : "np"; 7830 break; 7831 default: 7832 gcc_unreachable (); 7833 } 7834 fputs (suffix, file); 7835} 7836 7837/* Print the name of register X to FILE based on its machine mode and number. 7838 If CODE is 'w', pretend the mode is HImode. 7839 If CODE is 'b', pretend the mode is QImode. 7840 If CODE is 'k', pretend the mode is SImode. 7841 If CODE is 'q', pretend the mode is DImode. 7842 If CODE is 'h', pretend the reg is the 'high' byte register. 7843 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7844 7845void 7846print_reg (rtx x, int code, FILE *file) 7847{ 7848 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM 7849 && REGNO (x) != FRAME_POINTER_REGNUM 7850 && REGNO (x) != FLAGS_REG 7851 && REGNO (x) != FPSR_REG); 7852 7853 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7854 putc ('%', file); 7855 7856 if (code == 'w' || MMX_REG_P (x)) 7857 code = 2; 7858 else if (code == 'b') 7859 code = 1; 7860 else if (code == 'k') 7861 code = 4; 7862 else if (code == 'q') 7863 code = 8; 7864 else if (code == 'y') 7865 code = 3; 7866 else if (code == 'h') 7867 code = 0; 7868 else 7869 code = GET_MODE_SIZE (GET_MODE (x)); 7870 7871 /* Irritatingly, AMD extended registers use different naming convention 7872 from the normal registers. */ 7873 if (REX_INT_REG_P (x)) 7874 { 7875 gcc_assert (TARGET_64BIT); 7876 switch (code) 7877 { 7878 case 0: 7879 error ("extended registers have no high halves"); 7880 break; 7881 case 1: 7882 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7883 break; 7884 case 2: 7885 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7886 break; 7887 case 4: 7888 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7889 break; 7890 case 8: 7891 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7892 break; 7893 default: 7894 error ("unsupported operand size for extended register"); 7895 break; 7896 } 7897 return; 7898 } 7899 switch (code) 7900 { 7901 case 3: 7902 if (STACK_TOP_P (x)) 7903 { 7904 fputs ("st(0)", file); 7905 break; 7906 } 7907 /* FALLTHRU */ 7908 case 8: 7909 case 4: 7910 case 12: 7911 if (! ANY_FP_REG_P (x)) 7912 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7913 /* FALLTHRU */ 7914 case 16: 7915 case 2: 7916 normal: 7917 fputs (hi_reg_name[REGNO (x)], file); 7918 break; 7919 case 1: 7920 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7921 goto normal; 7922 fputs (qi_reg_name[REGNO (x)], file); 7923 break; 7924 case 0: 7925 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7926 goto normal; 7927 fputs (qi_high_reg_name[REGNO (x)], file); 7928 break; 7929 default: 7930 gcc_unreachable (); 7931 } 7932} 7933 7934/* Locate some local-dynamic symbol still in use by this function 7935 so that we can print its name in some tls_local_dynamic_base 7936 pattern. */ 7937 7938static const char * 7939get_some_local_dynamic_name (void) 7940{ 7941 rtx insn; 7942 7943 if (cfun->machine->some_ld_name) 7944 return cfun->machine->some_ld_name; 7945 7946 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7947 if (INSN_P (insn) 7948 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7949 return cfun->machine->some_ld_name; 7950 7951 gcc_unreachable (); 7952} 7953 7954static int 7955get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7956{ 7957 rtx x = *px; 7958 7959 if (GET_CODE (x) == SYMBOL_REF 7960 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 7961 { 7962 cfun->machine->some_ld_name = XSTR (x, 0); 7963 return 1; 7964 } 7965 7966 return 0; 7967} 7968 7969/* Meaning of CODE: 7970 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7971 C -- print opcode suffix for set/cmov insn. 7972 c -- like C, but print reversed condition 7973 F,f -- likewise, but for floating-point. 7974 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7975 otherwise nothing 7976 R -- print the prefix for register names. 7977 z -- print the opcode suffix for the size of the current operand. 7978 * -- print a star (in certain assembler syntax) 7979 A -- print an absolute memory reference. 7980 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7981 s -- print a shift double count, followed by the assemblers argument 7982 delimiter. 7983 b -- print the QImode name of the register for the indicated operand. 7984 %b0 would print %al if operands[0] is reg 0. 7985 w -- likewise, print the HImode name of the register. 7986 k -- likewise, print the SImode name of the register. 7987 q -- likewise, print the DImode name of the register. 7988 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7989 y -- print "st(0)" instead of "st" as a register. 7990 D -- print condition for SSE cmp instruction. 7991 P -- if PIC, print an @PLT suffix. 7992 X -- don't print any sort of PIC '@' suffix for a symbol. 7993 & -- print some in-use local-dynamic symbol name. 7994 H -- print a memory address offset by 8; used for sse high-parts 7995 */ 7996 7997void 7998print_operand (FILE *file, rtx x, int code) 7999{ 8000 if (code) 8001 { 8002 switch (code) 8003 { 8004 case '*': 8005 if (ASSEMBLER_DIALECT == ASM_ATT) 8006 putc ('*', file); 8007 return; 8008 8009 case '&': 8010 assemble_name (file, get_some_local_dynamic_name ()); 8011 return; 8012 8013 case 'A': 8014 switch (ASSEMBLER_DIALECT) 8015 { 8016 case ASM_ATT: 8017 putc ('*', file); 8018 break; 8019 8020 case ASM_INTEL: 8021 /* Intel syntax. For absolute addresses, registers should not 8022 be surrounded by braces. */ 8023 if (GET_CODE (x) != REG) 8024 { 8025 putc ('[', file); 8026 PRINT_OPERAND (file, x, 0); 8027 putc (']', file); 8028 return; 8029 } 8030 break; 8031 8032 default: 8033 gcc_unreachable (); 8034 } 8035 8036 PRINT_OPERAND (file, x, 0); 8037 return; 8038 8039 8040 case 'L': 8041 if (ASSEMBLER_DIALECT == ASM_ATT) 8042 putc ('l', file); 8043 return; 8044 8045 case 'W': 8046 if (ASSEMBLER_DIALECT == ASM_ATT) 8047 putc ('w', file); 8048 return; 8049 8050 case 'B': 8051 if (ASSEMBLER_DIALECT == ASM_ATT) 8052 putc ('b', file); 8053 return; 8054 8055 case 'Q': 8056 if (ASSEMBLER_DIALECT == ASM_ATT) 8057 putc ('l', file); 8058 return; 8059 8060 case 'S': 8061 if (ASSEMBLER_DIALECT == ASM_ATT) 8062 putc ('s', file); 8063 return; 8064 8065 case 'T': 8066 if (ASSEMBLER_DIALECT == ASM_ATT) 8067 putc ('t', file); 8068 return; 8069 8070 case 'z': 8071 /* 387 opcodes don't get size suffixes if the operands are 8072 registers. */ 8073 if (STACK_REG_P (x)) 8074 return; 8075 8076 /* Likewise if using Intel opcodes. */ 8077 if (ASSEMBLER_DIALECT == ASM_INTEL) 8078 return; 8079 8080 /* This is the size of op from size of operand. */ 8081 switch (GET_MODE_SIZE (GET_MODE (x))) 8082 { 8083 case 2: 8084#ifdef HAVE_GAS_FILDS_FISTS 8085 putc ('s', file); 8086#endif 8087 return; 8088 8089 case 4: 8090 if (GET_MODE (x) == SFmode) 8091 { 8092 putc ('s', file); 8093 return; 8094 } 8095 else 8096 putc ('l', file); 8097 return; 8098 8099 case 12: 8100 case 16: 8101 putc ('t', file); 8102 return; 8103 8104 case 8: 8105 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 8106 { 8107#ifdef GAS_MNEMONICS 8108 putc ('q', file); 8109#else 8110 putc ('l', file); 8111 putc ('l', file); 8112#endif 8113 } 8114 else 8115 putc ('l', file); 8116 return; 8117 8118 default: 8119 gcc_unreachable (); 8120 } 8121 8122 case 'b': 8123 case 'w': 8124 case 'k': 8125 case 'q': 8126 case 'h': 8127 case 'y': 8128 case 'X': 8129 case 'P': 8130 break; 8131 8132 case 's': 8133 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 8134 { 8135 PRINT_OPERAND (file, x, 0); 8136 putc (',', file); 8137 } 8138 return; 8139 8140 case 'D': 8141 /* Little bit of braindamage here. The SSE compare instructions 8142 does use completely different names for the comparisons that the 8143 fp conditional moves. */ 8144 switch (GET_CODE (x)) 8145 { 8146 case EQ: 8147 case UNEQ: 8148 fputs ("eq", file); 8149 break; 8150 case LT: 8151 case UNLT: 8152 fputs ("lt", file); 8153 break; 8154 case LE: 8155 case UNLE: 8156 fputs ("le", file); 8157 break; 8158 case UNORDERED: 8159 fputs ("unord", file); 8160 break; 8161 case NE: 8162 case LTGT: 8163 fputs ("neq", file); 8164 break; 8165 case UNGE: 8166 case GE: 8167 fputs ("nlt", file); 8168 break; 8169 case UNGT: 8170 case GT: 8171 fputs ("nle", file); 8172 break; 8173 case ORDERED: 8174 fputs ("ord", file); 8175 break; 8176 default: 8177 gcc_unreachable (); 8178 } 8179 return; 8180 case 'O': 8181#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8182 if (ASSEMBLER_DIALECT == ASM_ATT) 8183 { 8184 switch (GET_MODE (x)) 8185 { 8186 case HImode: putc ('w', file); break; 8187 case SImode: 8188 case SFmode: putc ('l', file); break; 8189 case DImode: 8190 case DFmode: putc ('q', file); break; 8191 default: gcc_unreachable (); 8192 } 8193 putc ('.', file); 8194 } 8195#endif 8196 return; 8197 case 'C': 8198 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 8199 return; 8200 case 'F': 8201#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8202 if (ASSEMBLER_DIALECT == ASM_ATT) 8203 putc ('.', file); 8204#endif 8205 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 8206 return; 8207 8208 /* Like above, but reverse condition */ 8209 case 'c': 8210 /* Check to see if argument to %c is really a constant 8211 and not a condition code which needs to be reversed. */ 8212 if (!COMPARISON_P (x)) 8213 { 8214 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 8215 return; 8216 } 8217 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 8218 return; 8219 case 'f': 8220#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 8221 if (ASSEMBLER_DIALECT == ASM_ATT) 8222 putc ('.', file); 8223#endif 8224 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 8225 return; 8226 8227 case 'H': 8228 /* It doesn't actually matter what mode we use here, as we're 8229 only going to use this for printing. */ 8230 x = adjust_address_nv (x, DImode, 8); 8231 break; 8232 8233 case '+': 8234 { 8235 rtx x; 8236 8237 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 8238 return; 8239 8240 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 8241 if (x) 8242 { 8243 int pred_val = INTVAL (XEXP (x, 0)); 8244 8245 if (pred_val < REG_BR_PROB_BASE * 45 / 100 8246 || pred_val > REG_BR_PROB_BASE * 55 / 100) 8247 { 8248 int taken = pred_val > REG_BR_PROB_BASE / 2; 8249 int cputaken = final_forward_branch_p (current_output_insn) == 0; 8250 8251 /* Emit hints only in the case default branch prediction 8252 heuristics would fail. */ 8253 if (taken != cputaken) 8254 { 8255 /* We use 3e (DS) prefix for taken branches and 8256 2e (CS) prefix for not taken branches. */ 8257 if (taken) 8258 fputs ("ds ; ", file); 8259 else 8260 fputs ("cs ; ", file); 8261 } 8262 } 8263 } 8264 return; 8265 } 8266 default: 8267 output_operand_lossage ("invalid operand code '%c'", code); 8268 } 8269 } 8270 8271 if (GET_CODE (x) == REG) 8272 print_reg (x, code, file); 8273 8274 else if (GET_CODE (x) == MEM) 8275 { 8276 /* No `byte ptr' prefix for call instructions. */ 8277 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 8278 { 8279 const char * size; 8280 switch (GET_MODE_SIZE (GET_MODE (x))) 8281 { 8282 case 1: size = "BYTE"; break; 8283 case 2: size = "WORD"; break; 8284 case 4: size = "DWORD"; break; 8285 case 8: size = "QWORD"; break; 8286 case 12: size = "XWORD"; break; 8287 case 16: size = "XMMWORD"; break; 8288 default: 8289 gcc_unreachable (); 8290 } 8291 8292 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 8293 if (code == 'b') 8294 size = "BYTE"; 8295 else if (code == 'w') 8296 size = "WORD"; 8297 else if (code == 'k') 8298 size = "DWORD"; 8299 8300 fputs (size, file); 8301 fputs (" PTR ", file); 8302 } 8303 8304 x = XEXP (x, 0); 8305 /* Avoid (%rip) for call operands. */ 8306 if (CONSTANT_ADDRESS_P (x) && code == 'P' 8307 && GET_CODE (x) != CONST_INT) 8308 output_addr_const (file, x); 8309 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 8310 output_operand_lossage ("invalid constraints for operand"); 8311 else 8312 output_address (x); 8313 } 8314 8315 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 8316 { 8317 REAL_VALUE_TYPE r; 8318 long l; 8319 8320 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 8321 REAL_VALUE_TO_TARGET_SINGLE (r, l); 8322 8323 if (ASSEMBLER_DIALECT == ASM_ATT) 8324 putc ('$', file); 8325 fprintf (file, "0x%08lx", l); 8326 } 8327 8328 /* These float cases don't actually occur as immediate operands. */ 8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 8330 { 8331 char dstr[30]; 8332 8333 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8334 fprintf (file, "%s", dstr); 8335 } 8336 8337 else if (GET_CODE (x) == CONST_DOUBLE 8338 && GET_MODE (x) == XFmode) 8339 { 8340 char dstr[30]; 8341 8342 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8343 fprintf (file, "%s", dstr); 8344 } 8345 8346 else 8347 { 8348 /* We have patterns that allow zero sets of memory, for instance. 8349 In 64-bit mode, we should probably support all 8-byte vectors, 8350 since we can in fact encode that into an immediate. */ 8351 if (GET_CODE (x) == CONST_VECTOR) 8352 { 8353 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 8354 x = const0_rtx; 8355 } 8356 8357 if (code != 'P') 8358 { 8359 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8360 { 8361 if (ASSEMBLER_DIALECT == ASM_ATT) 8362 putc ('$', file); 8363 } 8364 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 8365 || GET_CODE (x) == LABEL_REF) 8366 { 8367 if (ASSEMBLER_DIALECT == ASM_ATT) 8368 putc ('$', file); 8369 else 8370 fputs ("OFFSET FLAT:", file); 8371 } 8372 } 8373 if (GET_CODE (x) == CONST_INT) 8374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 8375 else if (flag_pic) 8376 output_pic_addr_const (file, x, code); 8377 else 8378 output_addr_const (file, x); 8379 } 8380} 8381 8382/* Print a memory operand whose address is ADDR. */ 8383 8384void 8385print_operand_address (FILE *file, rtx addr) 8386{ 8387 struct ix86_address parts; 8388 rtx base, index, disp; 8389 int scale; 8390 int ok = ix86_decompose_address (addr, &parts); 8391 8392 gcc_assert (ok); 8393 8394 base = parts.base; 8395 index = parts.index; 8396 disp = parts.disp; 8397 scale = parts.scale; 8398 8399 switch (parts.seg) 8400 { 8401 case SEG_DEFAULT: 8402 break; 8403 case SEG_FS: 8404 case SEG_GS: 8405 if (USER_LABEL_PREFIX[0] == 0) 8406 putc ('%', file); 8407 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 8408 break; 8409 default: 8410 gcc_unreachable (); 8411 } 8412 8413 if (!base && !index) 8414 { 8415 /* Displacement only requires special attention. */ 8416 8417 if (GET_CODE (disp) == CONST_INT) 8418 { 8419 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 8420 { 8421 if (USER_LABEL_PREFIX[0] == 0) 8422 putc ('%', file); 8423 fputs ("ds:", file); 8424 } 8425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 8426 } 8427 else if (flag_pic) 8428 output_pic_addr_const (file, disp, 0); 8429 else 8430 output_addr_const (file, disp); 8431 8432 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 8433 if (TARGET_64BIT) 8434 { 8435 if (GET_CODE (disp) == CONST 8436 && GET_CODE (XEXP (disp, 0)) == PLUS 8437 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8438 disp = XEXP (XEXP (disp, 0), 0); 8439 if (GET_CODE (disp) == LABEL_REF 8440 || (GET_CODE (disp) == SYMBOL_REF 8441 && SYMBOL_REF_TLS_MODEL (disp) == 0)) 8442 fputs ("(%rip)", file); 8443 } 8444 } 8445 else 8446 { 8447 if (ASSEMBLER_DIALECT == ASM_ATT) 8448 { 8449 if (disp) 8450 { 8451 if (flag_pic) 8452 output_pic_addr_const (file, disp, 0); 8453 else if (GET_CODE (disp) == LABEL_REF) 8454 output_asm_label (disp); 8455 else 8456 output_addr_const (file, disp); 8457 } 8458 8459 putc ('(', file); 8460 if (base) 8461 print_reg (base, 0, file); 8462 if (index) 8463 { 8464 putc (',', file); 8465 print_reg (index, 0, file); 8466 if (scale != 1) 8467 fprintf (file, ",%d", scale); 8468 } 8469 putc (')', file); 8470 } 8471 else 8472 { 8473 rtx offset = NULL_RTX; 8474 8475 if (disp) 8476 { 8477 /* Pull out the offset of a symbol; print any symbol itself. */ 8478 if (GET_CODE (disp) == CONST 8479 && GET_CODE (XEXP (disp, 0)) == PLUS 8480 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8481 { 8482 offset = XEXP (XEXP (disp, 0), 1); 8483 disp = gen_rtx_CONST (VOIDmode, 8484 XEXP (XEXP (disp, 0), 0)); 8485 } 8486 8487 if (flag_pic) 8488 output_pic_addr_const (file, disp, 0); 8489 else if (GET_CODE (disp) == LABEL_REF) 8490 output_asm_label (disp); 8491 else if (GET_CODE (disp) == CONST_INT) 8492 offset = disp; 8493 else 8494 output_addr_const (file, disp); 8495 } 8496 8497 putc ('[', file); 8498 if (base) 8499 { 8500 print_reg (base, 0, file); 8501 if (offset) 8502 { 8503 if (INTVAL (offset) >= 0) 8504 putc ('+', file); 8505 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8506 } 8507 } 8508 else if (offset) 8509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8510 else 8511 putc ('0', file); 8512 8513 if (index) 8514 { 8515 putc ('+', file); 8516 print_reg (index, 0, file); 8517 if (scale != 1) 8518 fprintf (file, "*%d", scale); 8519 } 8520 putc (']', file); 8521 } 8522 } 8523} 8524 8525bool 8526output_addr_const_extra (FILE *file, rtx x) 8527{ 8528 rtx op; 8529 8530 if (GET_CODE (x) != UNSPEC) 8531 return false; 8532 8533 op = XVECEXP (x, 0, 0); 8534 switch (XINT (x, 1)) 8535 { 8536 case UNSPEC_GOTTPOFF: 8537 output_addr_const (file, op); 8538 /* FIXME: This might be @TPOFF in Sun ld. */ 8539 fputs ("@GOTTPOFF", file); 8540 break; 8541 case UNSPEC_TPOFF: 8542 output_addr_const (file, op); 8543 fputs ("@TPOFF", file); 8544 break; 8545 case UNSPEC_NTPOFF: 8546 output_addr_const (file, op); 8547 if (TARGET_64BIT) 8548 fputs ("@TPOFF", file); 8549 else 8550 fputs ("@NTPOFF", file); 8551 break; 8552 case UNSPEC_DTPOFF: 8553 output_addr_const (file, op); 8554 fputs ("@DTPOFF", file); 8555 break; 8556 case UNSPEC_GOTNTPOFF: 8557 output_addr_const (file, op); 8558 if (TARGET_64BIT) 8559 fputs ("@GOTTPOFF(%rip)", file); 8560 else 8561 fputs ("@GOTNTPOFF", file); 8562 break; 8563 case UNSPEC_INDNTPOFF: 8564 output_addr_const (file, op); 8565 fputs ("@INDNTPOFF", file); 8566 break; 8567 8568 default: 8569 return false; 8570 } 8571 8572 return true; 8573} 8574 8575/* Split one or more DImode RTL references into pairs of SImode 8576 references. The RTL can be REG, offsettable MEM, integer constant, or 8577 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8578 split and "num" is its length. lo_half and hi_half are output arrays 8579 that parallel "operands". */ 8580 8581void 8582split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8583{ 8584 while (num--) 8585 { 8586 rtx op = operands[num]; 8587 8588 /* simplify_subreg refuse to split volatile memory addresses, 8589 but we still have to handle it. */ 8590 if (GET_CODE (op) == MEM) 8591 { 8592 lo_half[num] = adjust_address (op, SImode, 0); 8593 hi_half[num] = adjust_address (op, SImode, 4); 8594 } 8595 else 8596 { 8597 lo_half[num] = simplify_gen_subreg (SImode, op, 8598 GET_MODE (op) == VOIDmode 8599 ? DImode : GET_MODE (op), 0); 8600 hi_half[num] = simplify_gen_subreg (SImode, op, 8601 GET_MODE (op) == VOIDmode 8602 ? DImode : GET_MODE (op), 4); 8603 } 8604 } 8605} 8606/* Split one or more TImode RTL references into pairs of DImode 8607 references. The RTL can be REG, offsettable MEM, integer constant, or 8608 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8609 split and "num" is its length. lo_half and hi_half are output arrays 8610 that parallel "operands". */ 8611 8612void 8613split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8614{ 8615 while (num--) 8616 { 8617 rtx op = operands[num]; 8618 8619 /* simplify_subreg refuse to split volatile memory addresses, but we 8620 still have to handle it. */ 8621 if (GET_CODE (op) == MEM) 8622 { 8623 lo_half[num] = adjust_address (op, DImode, 0); 8624 hi_half[num] = adjust_address (op, DImode, 8); 8625 } 8626 else 8627 { 8628 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 8629 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 8630 } 8631 } 8632} 8633 8634/* Output code to perform a 387 binary operation in INSN, one of PLUS, 8635 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 8636 is the expression of the binary operation. The output may either be 8637 emitted here, or returned to the caller, like all output_* functions. 8638 8639 There is no guarantee that the operands are the same mode, as they 8640 might be within FLOAT or FLOAT_EXTEND expressions. */ 8641 8642#ifndef SYSV386_COMPAT 8643/* Set to 1 for compatibility with brain-damaged assemblers. No-one 8644 wants to fix the assemblers because that causes incompatibility 8645 with gcc. No-one wants to fix gcc because that causes 8646 incompatibility with assemblers... You can use the option of 8647 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 8648#define SYSV386_COMPAT 1 8649#endif 8650 8651const char * 8652output_387_binary_op (rtx insn, rtx *operands) 8653{ 8654 static char buf[30]; 8655 const char *p; 8656 const char *ssep; 8657 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 8658 8659#ifdef ENABLE_CHECKING 8660 /* Even if we do not want to check the inputs, this documents input 8661 constraints. Which helps in understanding the following code. */ 8662 if (STACK_REG_P (operands[0]) 8663 && ((REG_P (operands[1]) 8664 && REGNO (operands[0]) == REGNO (operands[1]) 8665 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 8666 || (REG_P (operands[2]) 8667 && REGNO (operands[0]) == REGNO (operands[2]) 8668 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 8669 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 8670 ; /* ok */ 8671 else 8672 gcc_assert (is_sse); 8673#endif 8674 8675 switch (GET_CODE (operands[3])) 8676 { 8677 case PLUS: 8678 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8679 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8680 p = "fiadd"; 8681 else 8682 p = "fadd"; 8683 ssep = "add"; 8684 break; 8685 8686 case MINUS: 8687 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8688 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8689 p = "fisub"; 8690 else 8691 p = "fsub"; 8692 ssep = "sub"; 8693 break; 8694 8695 case MULT: 8696 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8697 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8698 p = "fimul"; 8699 else 8700 p = "fmul"; 8701 ssep = "mul"; 8702 break; 8703 8704 case DIV: 8705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8707 p = "fidiv"; 8708 else 8709 p = "fdiv"; 8710 ssep = "div"; 8711 break; 8712 8713 default: 8714 gcc_unreachable (); 8715 } 8716 8717 if (is_sse) 8718 { 8719 strcpy (buf, ssep); 8720 if (GET_MODE (operands[0]) == SFmode) 8721 strcat (buf, "ss\t{%2, %0|%0, %2}"); 8722 else 8723 strcat (buf, "sd\t{%2, %0|%0, %2}"); 8724 return buf; 8725 } 8726 strcpy (buf, p); 8727 8728 switch (GET_CODE (operands[3])) 8729 { 8730 case MULT: 8731 case PLUS: 8732 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 8733 { 8734 rtx temp = operands[2]; 8735 operands[2] = operands[1]; 8736 operands[1] = temp; 8737 } 8738 8739 /* know operands[0] == operands[1]. */ 8740 8741 if (GET_CODE (operands[2]) == MEM) 8742 { 8743 p = "%z2\t%2"; 8744 break; 8745 } 8746 8747 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8748 { 8749 if (STACK_TOP_P (operands[0])) 8750 /* How is it that we are storing to a dead operand[2]? 8751 Well, presumably operands[1] is dead too. We can't 8752 store the result to st(0) as st(0) gets popped on this 8753 instruction. Instead store to operands[2] (which I 8754 think has to be st(1)). st(1) will be popped later. 8755 gcc <= 2.8.1 didn't have this check and generated 8756 assembly code that the Unixware assembler rejected. */ 8757 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8758 else 8759 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8760 break; 8761 } 8762 8763 if (STACK_TOP_P (operands[0])) 8764 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8765 else 8766 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8767 break; 8768 8769 case MINUS: 8770 case DIV: 8771 if (GET_CODE (operands[1]) == MEM) 8772 { 8773 p = "r%z1\t%1"; 8774 break; 8775 } 8776 8777 if (GET_CODE (operands[2]) == MEM) 8778 { 8779 p = "%z2\t%2"; 8780 break; 8781 } 8782 8783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8784 { 8785#if SYSV386_COMPAT 8786 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8787 derived assemblers, confusingly reverse the direction of 8788 the operation for fsub{r} and fdiv{r} when the 8789 destination register is not st(0). The Intel assembler 8790 doesn't have this brain damage. Read !SYSV386_COMPAT to 8791 figure out what the hardware really does. */ 8792 if (STACK_TOP_P (operands[0])) 8793 p = "{p\t%0, %2|rp\t%2, %0}"; 8794 else 8795 p = "{rp\t%2, %0|p\t%0, %2}"; 8796#else 8797 if (STACK_TOP_P (operands[0])) 8798 /* As above for fmul/fadd, we can't store to st(0). */ 8799 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8800 else 8801 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8802#endif 8803 break; 8804 } 8805 8806 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8807 { 8808#if SYSV386_COMPAT 8809 if (STACK_TOP_P (operands[0])) 8810 p = "{rp\t%0, %1|p\t%1, %0}"; 8811 else 8812 p = "{p\t%1, %0|rp\t%0, %1}"; 8813#else 8814 if (STACK_TOP_P (operands[0])) 8815 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8816 else 8817 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8818#endif 8819 break; 8820 } 8821 8822 if (STACK_TOP_P (operands[0])) 8823 { 8824 if (STACK_TOP_P (operands[1])) 8825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8826 else 8827 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8828 break; 8829 } 8830 else if (STACK_TOP_P (operands[1])) 8831 { 8832#if SYSV386_COMPAT 8833 p = "{\t%1, %0|r\t%0, %1}"; 8834#else 8835 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8836#endif 8837 } 8838 else 8839 { 8840#if SYSV386_COMPAT 8841 p = "{r\t%2, %0|\t%0, %2}"; 8842#else 8843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8844#endif 8845 } 8846 break; 8847 8848 default: 8849 gcc_unreachable (); 8850 } 8851 8852 strcat (buf, p); 8853 return buf; 8854} 8855 8856/* Return needed mode for entity in optimize_mode_switching pass. */ 8857 8858int 8859ix86_mode_needed (int entity, rtx insn) 8860{ 8861 enum attr_i387_cw mode; 8862 8863 /* The mode UNINITIALIZED is used to store control word after a 8864 function call or ASM pattern. The mode ANY specify that function 8865 has no requirements on the control word and make no changes in the 8866 bits we are interested in. */ 8867 8868 if (CALL_P (insn) 8869 || (NONJUMP_INSN_P (insn) 8870 && (asm_noperands (PATTERN (insn)) >= 0 8871 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 8872 return I387_CW_UNINITIALIZED; 8873 8874 if (recog_memoized (insn) < 0) 8875 return I387_CW_ANY; 8876 8877 mode = get_attr_i387_cw (insn); 8878 8879 switch (entity) 8880 { 8881 case I387_TRUNC: 8882 if (mode == I387_CW_TRUNC) 8883 return mode; 8884 break; 8885 8886 case I387_FLOOR: 8887 if (mode == I387_CW_FLOOR) 8888 return mode; 8889 break; 8890 8891 case I387_CEIL: 8892 if (mode == I387_CW_CEIL) 8893 return mode; 8894 break; 8895 8896 case I387_MASK_PM: 8897 if (mode == I387_CW_MASK_PM) 8898 return mode; 8899 break; 8900 8901 default: 8902 gcc_unreachable (); 8903 } 8904 8905 return I387_CW_ANY; 8906} 8907 8908/* Output code to initialize control word copies used by trunc?f?i and 8909 rounding patterns. CURRENT_MODE is set to current control word, 8910 while NEW_MODE is set to new control word. */ 8911 8912void 8913emit_i387_cw_initialization (int mode) 8914{ 8915 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 8916 rtx new_mode; 8917 8918 int slot; 8919 8920 rtx reg = gen_reg_rtx (HImode); 8921 8922 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 8923 emit_move_insn (reg, stored_mode); 8924 8925 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) 8926 { 8927 switch (mode) 8928 { 8929 case I387_CW_TRUNC: 8930 /* round toward zero (truncate) */ 8931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 8932 slot = SLOT_CW_TRUNC; 8933 break; 8934 8935 case I387_CW_FLOOR: 8936 /* round down toward -oo */ 8937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 8939 slot = SLOT_CW_FLOOR; 8940 break; 8941 8942 case I387_CW_CEIL: 8943 /* round up toward +oo */ 8944 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 8946 slot = SLOT_CW_CEIL; 8947 break; 8948 8949 case I387_CW_MASK_PM: 8950 /* mask precision exception for nearbyint() */ 8951 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8952 slot = SLOT_CW_MASK_PM; 8953 break; 8954 8955 default: 8956 gcc_unreachable (); 8957 } 8958 } 8959 else 8960 { 8961 switch (mode) 8962 { 8963 case I387_CW_TRUNC: 8964 /* round toward zero (truncate) */ 8965 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8966 slot = SLOT_CW_TRUNC; 8967 break; 8968 8969 case I387_CW_FLOOR: 8970 /* round down toward -oo */ 8971 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 8972 slot = SLOT_CW_FLOOR; 8973 break; 8974 8975 case I387_CW_CEIL: 8976 /* round up toward +oo */ 8977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 8978 slot = SLOT_CW_CEIL; 8979 break; 8980 8981 case I387_CW_MASK_PM: 8982 /* mask precision exception for nearbyint() */ 8983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8984 slot = SLOT_CW_MASK_PM; 8985 break; 8986 8987 default: 8988 gcc_unreachable (); 8989 } 8990 } 8991 8992 gcc_assert (slot < MAX_386_STACK_LOCALS); 8993 8994 new_mode = assign_386_stack_local (HImode, slot); 8995 emit_move_insn (new_mode, reg); 8996} 8997 8998/* Output code for INSN to convert a float to a signed int. OPERANDS 8999 are the insn operands. The output may be [HSD]Imode and the input 9000 operand may be [SDX]Fmode. */ 9001 9002const char * 9003output_fix_trunc (rtx insn, rtx *operands, int fisttp) 9004{ 9005 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 9006 int dimode_p = GET_MODE (operands[0]) == DImode; 9007 int round_mode = get_attr_i387_cw (insn); 9008 9009 /* Jump through a hoop or two for DImode, since the hardware has no 9010 non-popping instruction. We used to do this a different way, but 9011 that was somewhat fragile and broke with post-reload splitters. */ 9012 if ((dimode_p || fisttp) && !stack_top_dies) 9013 output_asm_insn ("fld\t%y1", operands); 9014 9015 gcc_assert (STACK_TOP_P (operands[1])); 9016 gcc_assert (GET_CODE (operands[0]) == MEM); 9017 9018 if (fisttp) 9019 output_asm_insn ("fisttp%z0\t%0", operands); 9020 else 9021 { 9022 if (round_mode != I387_CW_ANY) 9023 output_asm_insn ("fldcw\t%3", operands); 9024 if (stack_top_dies || dimode_p) 9025 output_asm_insn ("fistp%z0\t%0", operands); 9026 else 9027 output_asm_insn ("fist%z0\t%0", operands); 9028 if (round_mode != I387_CW_ANY) 9029 output_asm_insn ("fldcw\t%2", operands); 9030 } 9031 9032 return ""; 9033} 9034 9035/* Output code for x87 ffreep insn. The OPNO argument, which may only 9036 have the values zero or one, indicates the ffreep insn's operand 9037 from the OPERANDS array. */ 9038 9039static const char * 9040output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 9041{ 9042 if (TARGET_USE_FFREEP) 9043#if HAVE_AS_IX86_FFREEP 9044 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 9045#else 9046 switch (REGNO (operands[opno])) 9047 { 9048 case FIRST_STACK_REG + 0: return ".word\t0xc0df"; 9049 case FIRST_STACK_REG + 1: return ".word\t0xc1df"; 9050 case FIRST_STACK_REG + 2: return ".word\t0xc2df"; 9051 case FIRST_STACK_REG + 3: return ".word\t0xc3df"; 9052 case FIRST_STACK_REG + 4: return ".word\t0xc4df"; 9053 case FIRST_STACK_REG + 5: return ".word\t0xc5df"; 9054 case FIRST_STACK_REG + 6: return ".word\t0xc6df"; 9055 case FIRST_STACK_REG + 7: return ".word\t0xc7df"; 9056 } 9057#endif 9058 9059 return opno ? "fstp\t%y1" : "fstp\t%y0"; 9060} 9061 9062 9063/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 9064 should be used. UNORDERED_P is true when fucom should be used. */ 9065 9066const char * 9067output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 9068{ 9069 int stack_top_dies; 9070 rtx cmp_op0, cmp_op1; 9071 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 9072 9073 if (eflags_p) 9074 { 9075 cmp_op0 = operands[0]; 9076 cmp_op1 = operands[1]; 9077 } 9078 else 9079 { 9080 cmp_op0 = operands[1]; 9081 cmp_op1 = operands[2]; 9082 } 9083 9084 if (is_sse) 9085 { 9086 if (GET_MODE (operands[0]) == SFmode) 9087 if (unordered_p) 9088 return "ucomiss\t{%1, %0|%0, %1}"; 9089 else 9090 return "comiss\t{%1, %0|%0, %1}"; 9091 else 9092 if (unordered_p) 9093 return "ucomisd\t{%1, %0|%0, %1}"; 9094 else 9095 return "comisd\t{%1, %0|%0, %1}"; 9096 } 9097 9098 gcc_assert (STACK_TOP_P (cmp_op0)); 9099 9100 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 9101 9102 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 9103 { 9104 if (stack_top_dies) 9105 { 9106 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 9107 return output_387_ffreep (operands, 1); 9108 } 9109 else 9110 return "ftst\n\tfnstsw\t%0"; 9111 } 9112 9113 if (STACK_REG_P (cmp_op1) 9114 && stack_top_dies 9115 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 9116 && REGNO (cmp_op1) != FIRST_STACK_REG) 9117 { 9118 /* If both the top of the 387 stack dies, and the other operand 9119 is also a stack register that dies, then this must be a 9120 `fcompp' float compare */ 9121 9122 if (eflags_p) 9123 { 9124 /* There is no double popping fcomi variant. Fortunately, 9125 eflags is immune from the fstp's cc clobbering. */ 9126 if (unordered_p) 9127 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 9128 else 9129 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 9130 return output_387_ffreep (operands, 0); 9131 } 9132 else 9133 { 9134 if (unordered_p) 9135 return "fucompp\n\tfnstsw\t%0"; 9136 else 9137 return "fcompp\n\tfnstsw\t%0"; 9138 } 9139 } 9140 else 9141 { 9142 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 9143 9144 static const char * const alt[16] = 9145 { 9146 "fcom%z2\t%y2\n\tfnstsw\t%0", 9147 "fcomp%z2\t%y2\n\tfnstsw\t%0", 9148 "fucom%z2\t%y2\n\tfnstsw\t%0", 9149 "fucomp%z2\t%y2\n\tfnstsw\t%0", 9150 9151 "ficom%z2\t%y2\n\tfnstsw\t%0", 9152 "ficomp%z2\t%y2\n\tfnstsw\t%0", 9153 NULL, 9154 NULL, 9155 9156 "fcomi\t{%y1, %0|%0, %y1}", 9157 "fcomip\t{%y1, %0|%0, %y1}", 9158 "fucomi\t{%y1, %0|%0, %y1}", 9159 "fucomip\t{%y1, %0|%0, %y1}", 9160 9161 NULL, 9162 NULL, 9163 NULL, 9164 NULL 9165 }; 9166 9167 int mask; 9168 const char *ret; 9169 9170 mask = eflags_p << 3; 9171 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 9172 mask |= unordered_p << 1; 9173 mask |= stack_top_dies; 9174 9175 gcc_assert (mask < 16); 9176 ret = alt[mask]; 9177 gcc_assert (ret); 9178 9179 return ret; 9180 } 9181} 9182 9183void 9184ix86_output_addr_vec_elt (FILE *file, int value) 9185{ 9186 const char *directive = ASM_LONG; 9187 9188#ifdef ASM_QUAD 9189 if (TARGET_64BIT) 9190 directive = ASM_QUAD; 9191#else 9192 gcc_assert (!TARGET_64BIT); 9193#endif 9194 9195 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 9196} 9197 9198void 9199ix86_output_addr_diff_elt (FILE *file, int value, int rel) 9200{ 9201 if (TARGET_64BIT) 9202 fprintf (file, "%s%s%d-%s%d\n", 9203 ASM_LONG, LPREFIX, value, LPREFIX, rel); 9204 else if (HAVE_AS_GOTOFF_IN_DATA) 9205 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 9206#if TARGET_MACHO 9207 else if (TARGET_MACHO) 9208 { 9209 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 9210 machopic_output_function_base_name (file); 9211 fprintf(file, "\n"); 9212 } 9213#endif 9214 else 9215 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 9216 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 9217} 9218 9219/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 9220 for the target. */ 9221 9222void 9223ix86_expand_clear (rtx dest) 9224{ 9225 rtx tmp; 9226 9227 /* We play register width games, which are only valid after reload. */ 9228 gcc_assert (reload_completed); 9229 9230 /* Avoid HImode and its attendant prefix byte. */ 9231 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 9232 dest = gen_rtx_REG (SImode, REGNO (dest)); 9233 9234 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 9235 9236 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 9237 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 9238 { 9239 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 9240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 9241 } 9242 9243 emit_insn (tmp); 9244} 9245 9246/* X is an unchanging MEM. If it is a constant pool reference, return 9247 the constant pool rtx, else NULL. */ 9248 9249rtx 9250maybe_get_pool_constant (rtx x) 9251{ 9252 x = ix86_delegitimize_address (XEXP (x, 0)); 9253 9254 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 9255 return get_pool_constant (x); 9256 9257 return NULL_RTX; 9258} 9259 9260void 9261ix86_expand_move (enum machine_mode mode, rtx operands[]) 9262{ 9263 int strict = (reload_in_progress || reload_completed); 9264 rtx op0, op1; 9265 enum tls_model model; 9266 9267 op0 = operands[0]; 9268 op1 = operands[1]; 9269 9270 if (GET_CODE (op1) == SYMBOL_REF) 9271 { 9272 model = SYMBOL_REF_TLS_MODEL (op1); 9273 if (model) 9274 { 9275 op1 = legitimize_tls_address (op1, model, true); 9276 op1 = force_operand (op1, op0); 9277 if (op1 == op0) 9278 return; 9279 } 9280 } 9281 else if (GET_CODE (op1) == CONST 9282 && GET_CODE (XEXP (op1, 0)) == PLUS 9283 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 9284 { 9285 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); 9286 if (model) 9287 { 9288 rtx addend = XEXP (XEXP (op1, 0), 1); 9289 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); 9290 op1 = force_operand (op1, NULL); 9291 op1 = expand_simple_binop (Pmode, PLUS, op1, addend, 9292 op0, 1, OPTAB_DIRECT); 9293 if (op1 == op0) 9294 return; 9295 } 9296 } 9297 9298 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 9299 { 9300 if (TARGET_MACHO && !TARGET_64BIT) 9301 { 9302#if TARGET_MACHO 9303 if (MACHOPIC_PURE) 9304 { 9305 rtx temp = ((reload_in_progress 9306 || ((op0 && GET_CODE (op0) == REG) 9307 && mode == Pmode)) 9308 ? op0 : gen_reg_rtx (Pmode)); 9309 op1 = machopic_indirect_data_reference (op1, temp); 9310 op1 = machopic_legitimize_pic_address (op1, mode, 9311 temp == op1 ? 0 : temp); 9312 } 9313 else if (MACHOPIC_INDIRECT) 9314 op1 = machopic_indirect_data_reference (op1, 0); 9315 if (op0 == op1) 9316 return; 9317#endif 9318 } 9319 else 9320 { 9321 if (GET_CODE (op0) == MEM) 9322 op1 = force_reg (Pmode, op1); 9323 else 9324 op1 = legitimize_address (op1, op1, Pmode); 9325 } 9326 } 9327 else 9328 { 9329 if (GET_CODE (op0) == MEM 9330 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 9331 || !push_operand (op0, mode)) 9332 && GET_CODE (op1) == MEM) 9333 op1 = force_reg (mode, op1); 9334 9335 if (push_operand (op0, mode) 9336 && ! general_no_elim_operand (op1, mode)) 9337 op1 = copy_to_mode_reg (mode, op1); 9338 9339 /* Force large constants in 64bit compilation into register 9340 to get them CSEed. */ 9341 if (TARGET_64BIT && mode == DImode 9342 && immediate_operand (op1, mode) 9343 && !x86_64_zext_immediate_operand (op1, VOIDmode) 9344 && !register_operand (op0, mode) 9345 && optimize && !reload_completed && !reload_in_progress) 9346 op1 = copy_to_mode_reg (mode, op1); 9347 9348 if (FLOAT_MODE_P (mode)) 9349 { 9350 /* If we are loading a floating point constant to a register, 9351 force the value to memory now, since we'll get better code 9352 out the back end. */ 9353 9354 if (strict) 9355 ; 9356 else if (GET_CODE (op1) == CONST_DOUBLE) 9357 { 9358 op1 = validize_mem (force_const_mem (mode, op1)); 9359 if (!register_operand (op0, mode)) 9360 { 9361 rtx temp = gen_reg_rtx (mode); 9362 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 9363 emit_move_insn (op0, temp); 9364 return; 9365 } 9366 } 9367 } 9368 } 9369 9370 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9371} 9372 9373void 9374ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 9375{ 9376 rtx op0 = operands[0], op1 = operands[1]; 9377 9378 /* Force constants other than zero into memory. We do not know how 9379 the instructions used to build constants modify the upper 64 bits 9380 of the register, once we have that information we may be able 9381 to handle some of them more efficiently. */ 9382 if ((reload_in_progress | reload_completed) == 0 9383 && register_operand (op0, mode) 9384 && CONSTANT_P (op1) 9385 && standard_sse_constant_p (op1) <= 0) 9386 op1 = validize_mem (force_const_mem (mode, op1)); 9387 9388 /* Make operand1 a register if it isn't already. */ 9389 if (!no_new_pseudos 9390 && !register_operand (op0, mode) 9391 && !register_operand (op1, mode)) 9392 { 9393 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 9394 return; 9395 } 9396 9397 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9398} 9399 9400/* Implement the movmisalign patterns for SSE. Non-SSE modes go 9401 straight to ix86_expand_vector_move. */ 9402 9403void 9404ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 9405{ 9406 rtx op0, op1, m; 9407 9408 op0 = operands[0]; 9409 op1 = operands[1]; 9410 9411 if (MEM_P (op1)) 9412 { 9413 /* If we're optimizing for size, movups is the smallest. */ 9414 if (optimize_size) 9415 { 9416 op0 = gen_lowpart (V4SFmode, op0); 9417 op1 = gen_lowpart (V4SFmode, op1); 9418 emit_insn (gen_sse_movups (op0, op1)); 9419 return; 9420 } 9421 9422 /* ??? If we have typed data, then it would appear that using 9423 movdqu is the only way to get unaligned data loaded with 9424 integer type. */ 9425 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9426 { 9427 op0 = gen_lowpart (V16QImode, op0); 9428 op1 = gen_lowpart (V16QImode, op1); 9429 emit_insn (gen_sse2_movdqu (op0, op1)); 9430 return; 9431 } 9432 9433 if (TARGET_SSE2 && mode == V2DFmode) 9434 { 9435 rtx zero; 9436 9437 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 9438 { 9439 op0 = gen_lowpart (V2DFmode, op0); 9440 op1 = gen_lowpart (V2DFmode, op1); 9441 emit_insn (gen_sse2_movupd (op0, op1)); 9442 return; 9443 } 9444 9445 /* When SSE registers are split into halves, we can avoid 9446 writing to the top half twice. */ 9447 if (TARGET_SSE_SPLIT_REGS) 9448 { 9449 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9450 zero = op0; 9451 } 9452 else 9453 { 9454 /* ??? Not sure about the best option for the Intel chips. 9455 The following would seem to satisfy; the register is 9456 entirely cleared, breaking the dependency chain. We 9457 then store to the upper half, with a dependency depth 9458 of one. A rumor has it that Intel recommends two movsd 9459 followed by an unpacklpd, but this is unconfirmed. And 9460 given that the dependency depth of the unpacklpd would 9461 still be one, I'm not sure why this would be better. */ 9462 zero = CONST0_RTX (V2DFmode); 9463 } 9464 9465 m = adjust_address (op1, DFmode, 0); 9466 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 9467 m = adjust_address (op1, DFmode, 8); 9468 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 9469 } 9470 else 9471 { 9472 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 9473 { 9474 op0 = gen_lowpart (V4SFmode, op0); 9475 op1 = gen_lowpart (V4SFmode, op1); 9476 emit_insn (gen_sse_movups (op0, op1)); 9477 return; 9478 } 9479 9480 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 9481 emit_move_insn (op0, CONST0_RTX (mode)); 9482 else 9483 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9484 9485 if (mode != V4SFmode) 9486 op0 = gen_lowpart (V4SFmode, op0); 9487 m = adjust_address (op1, V2SFmode, 0); 9488 emit_insn (gen_sse_loadlps (op0, op0, m)); 9489 m = adjust_address (op1, V2SFmode, 8); 9490 emit_insn (gen_sse_loadhps (op0, op0, m)); 9491 } 9492 } 9493 else if (MEM_P (op0)) 9494 { 9495 /* If we're optimizing for size, movups is the smallest. */ 9496 if (optimize_size) 9497 { 9498 op0 = gen_lowpart (V4SFmode, op0); 9499 op1 = gen_lowpart (V4SFmode, op1); 9500 emit_insn (gen_sse_movups (op0, op1)); 9501 return; 9502 } 9503 9504 /* ??? Similar to above, only less clear because of quote 9505 typeless stores unquote. */ 9506 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 9507 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9508 { 9509 op0 = gen_lowpart (V16QImode, op0); 9510 op1 = gen_lowpart (V16QImode, op1); 9511 emit_insn (gen_sse2_movdqu (op0, op1)); 9512 return; 9513 } 9514 9515 if (TARGET_SSE2 && mode == V2DFmode) 9516 { 9517 m = adjust_address (op0, DFmode, 0); 9518 emit_insn (gen_sse2_storelpd (m, op1)); 9519 m = adjust_address (op0, DFmode, 8); 9520 emit_insn (gen_sse2_storehpd (m, op1)); 9521 } 9522 else 9523 { 9524 if (mode != V4SFmode) 9525 op1 = gen_lowpart (V4SFmode, op1); 9526 m = adjust_address (op0, V2SFmode, 0); 9527 emit_insn (gen_sse_storelps (m, op1)); 9528 m = adjust_address (op0, V2SFmode, 8); 9529 emit_insn (gen_sse_storehps (m, op1)); 9530 } 9531 } 9532 else 9533 gcc_unreachable (); 9534} 9535 9536/* Expand a push in MODE. This is some mode for which we do not support 9537 proper push instructions, at least from the registers that we expect 9538 the value to live in. */ 9539 9540void 9541ix86_expand_push (enum machine_mode mode, rtx x) 9542{ 9543 rtx tmp; 9544 9545 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 9546 GEN_INT (-GET_MODE_SIZE (mode)), 9547 stack_pointer_rtx, 1, OPTAB_DIRECT); 9548 if (tmp != stack_pointer_rtx) 9549 emit_move_insn (stack_pointer_rtx, tmp); 9550 9551 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 9552 emit_move_insn (tmp, x); 9553} 9554 9555/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 9556 destination to use for the operation. If different from the true 9557 destination in operands[0], a copy operation will be required. */ 9558 9559rtx 9560ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 9561 rtx operands[]) 9562{ 9563 int matching_memory; 9564 rtx src1, src2, dst; 9565 9566 dst = operands[0]; 9567 src1 = operands[1]; 9568 src2 = operands[2]; 9569 9570 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 9571 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9572 && (rtx_equal_p (dst, src2) 9573 || immediate_operand (src1, mode))) 9574 { 9575 rtx temp = src1; 9576 src1 = src2; 9577 src2 = temp; 9578 } 9579 9580 /* If the destination is memory, and we do not have matching source 9581 operands, do things in registers. */ 9582 matching_memory = 0; 9583 if (GET_CODE (dst) == MEM) 9584 { 9585 if (rtx_equal_p (dst, src1)) 9586 matching_memory = 1; 9587 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9588 && rtx_equal_p (dst, src2)) 9589 matching_memory = 2; 9590 else 9591 dst = gen_reg_rtx (mode); 9592 } 9593 9594 /* Both source operands cannot be in memory. */ 9595 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 9596 { 9597 if (matching_memory != 2) 9598 src2 = force_reg (mode, src2); 9599 else 9600 src1 = force_reg (mode, src1); 9601 } 9602 9603 /* If the operation is not commutable, source 1 cannot be a constant 9604 or non-matching memory. */ 9605 if ((CONSTANT_P (src1) 9606 || (!matching_memory && GET_CODE (src1) == MEM)) 9607 && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9608 src1 = force_reg (mode, src1); 9609 9610 src1 = operands[1] = src1; 9611 src2 = operands[2] = src2; 9612 return dst; 9613} 9614 9615/* Similarly, but assume that the destination has already been 9616 set up properly. */ 9617 9618void 9619ix86_fixup_binary_operands_no_copy (enum rtx_code code, 9620 enum machine_mode mode, rtx operands[]) 9621{ 9622 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 9623 gcc_assert (dst == operands[0]); 9624} 9625 9626/* Attempt to expand a binary operator. Make the expansion closer to the 9627 actual machine, then just general_operand, which will allow 3 separate 9628 memory references (one output, two input) in a single insn. */ 9629 9630void 9631ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 9632 rtx operands[]) 9633{ 9634 rtx src1, src2, dst, op, clob; 9635 9636 dst = ix86_fixup_binary_operands (code, mode, operands); 9637 src1 = operands[1]; 9638 src2 = operands[2]; 9639 9640 /* Emit the instruction. */ 9641 9642 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 9643 if (reload_in_progress) 9644 { 9645 /* Reload doesn't know about the flags register, and doesn't know that 9646 it doesn't want to clobber it. We can only do this with PLUS. */ 9647 gcc_assert (code == PLUS); 9648 emit_insn (op); 9649 } 9650 else 9651 { 9652 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9653 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9654 } 9655 9656 /* Fix up the destination if needed. */ 9657 if (dst != operands[0]) 9658 emit_move_insn (operands[0], dst); 9659} 9660 9661/* Return TRUE or FALSE depending on whether the binary operator meets the 9662 appropriate constraints. */ 9663 9664int 9665ix86_binary_operator_ok (enum rtx_code code, 9666 enum machine_mode mode ATTRIBUTE_UNUSED, 9667 rtx operands[3]) 9668{ 9669 /* Both source operands cannot be in memory. */ 9670 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 9671 return 0; 9672 /* If the operation is not commutable, source 1 cannot be a constant. */ 9673 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9674 return 0; 9675 /* If the destination is memory, we must have a matching source operand. */ 9676 if (GET_CODE (operands[0]) == MEM 9677 && ! (rtx_equal_p (operands[0], operands[1]) 9678 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9679 && rtx_equal_p (operands[0], operands[2])))) 9680 return 0; 9681 /* If the operation is not commutable and the source 1 is memory, we must 9682 have a matching destination. */ 9683 if (GET_CODE (operands[1]) == MEM 9684 && GET_RTX_CLASS (code) != RTX_COMM_ARITH 9685 && ! rtx_equal_p (operands[0], operands[1])) 9686 return 0; 9687 return 1; 9688} 9689 9690/* Attempt to expand a unary operator. Make the expansion closer to the 9691 actual machine, then just general_operand, which will allow 2 separate 9692 memory references (one output, one input) in a single insn. */ 9693 9694void 9695ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 9696 rtx operands[]) 9697{ 9698 int matching_memory; 9699 rtx src, dst, op, clob; 9700 9701 dst = operands[0]; 9702 src = operands[1]; 9703 9704 /* If the destination is memory, and we do not have matching source 9705 operands, do things in registers. */ 9706 matching_memory = 0; 9707 if (MEM_P (dst)) 9708 { 9709 if (rtx_equal_p (dst, src)) 9710 matching_memory = 1; 9711 else 9712 dst = gen_reg_rtx (mode); 9713 } 9714 9715 /* When source operand is memory, destination must match. */ 9716 if (MEM_P (src) && !matching_memory) 9717 src = force_reg (mode, src); 9718 9719 /* Emit the instruction. */ 9720 9721 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 9722 if (reload_in_progress || code == NOT) 9723 { 9724 /* Reload doesn't know about the flags register, and doesn't know that 9725 it doesn't want to clobber it. */ 9726 gcc_assert (code == NOT); 9727 emit_insn (op); 9728 } 9729 else 9730 { 9731 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9732 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9733 } 9734 9735 /* Fix up the destination if needed. */ 9736 if (dst != operands[0]) 9737 emit_move_insn (operands[0], dst); 9738} 9739 9740/* Return TRUE or FALSE depending on whether the unary operator meets the 9741 appropriate constraints. */ 9742 9743int 9744ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 9745 enum machine_mode mode ATTRIBUTE_UNUSED, 9746 rtx operands[2] ATTRIBUTE_UNUSED) 9747{ 9748 /* If one of operands is memory, source and destination must match. */ 9749 if ((GET_CODE (operands[0]) == MEM 9750 || GET_CODE (operands[1]) == MEM) 9751 && ! rtx_equal_p (operands[0], operands[1])) 9752 return FALSE; 9753 return TRUE; 9754} 9755 9756/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. 9757 Create a mask for the sign bit in MODE for an SSE register. If VECT is 9758 true, then replicate the mask for all elements of the vector register. 9759 If INVERT is true, then create a mask excluding the sign bit. */ 9760 9761rtx 9762ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 9763{ 9764 enum machine_mode vec_mode; 9765 HOST_WIDE_INT hi, lo; 9766 int shift = 63; 9767 rtvec v; 9768 rtx mask; 9769 9770 /* Find the sign bit, sign extended to 2*HWI. */ 9771 if (mode == SFmode) 9772 lo = 0x80000000, hi = lo < 0; 9773 else if (HOST_BITS_PER_WIDE_INT >= 64) 9774 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 9775 else 9776 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 9777 9778 if (invert) 9779 lo = ~lo, hi = ~hi; 9780 9781 /* Force this value into the low part of a fp vector constant. */ 9782 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); 9783 mask = gen_lowpart (mode, mask); 9784 9785 if (mode == SFmode) 9786 { 9787 if (vect) 9788 v = gen_rtvec (4, mask, mask, mask, mask); 9789 else 9790 v = gen_rtvec (4, mask, CONST0_RTX (SFmode), 9791 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9792 vec_mode = V4SFmode; 9793 } 9794 else 9795 { 9796 if (vect) 9797 v = gen_rtvec (2, mask, mask); 9798 else 9799 v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); 9800 vec_mode = V2DFmode; 9801 } 9802 9803 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); 9804} 9805 9806/* Generate code for floating point ABS or NEG. */ 9807 9808void 9809ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 9810 rtx operands[]) 9811{ 9812 rtx mask, set, use, clob, dst, src; 9813 bool matching_memory; 9814 bool use_sse = false; 9815 bool vector_mode = VECTOR_MODE_P (mode); 9816 enum machine_mode elt_mode = mode; 9817 9818 if (vector_mode) 9819 { 9820 elt_mode = GET_MODE_INNER (mode); 9821 use_sse = true; 9822 } 9823 else if (TARGET_SSE_MATH) 9824 use_sse = SSE_FLOAT_MODE_P (mode); 9825 9826 /* NEG and ABS performed with SSE use bitwise mask operations. 9827 Create the appropriate mask now. */ 9828 if (use_sse) 9829 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 9830 else 9831 mask = NULL_RTX; 9832 9833 dst = operands[0]; 9834 src = operands[1]; 9835 9836 /* If the destination is memory, and we don't have matching source 9837 operands or we're using the x87, do things in registers. */ 9838 matching_memory = false; 9839 if (MEM_P (dst)) 9840 { 9841 if (use_sse && rtx_equal_p (dst, src)) 9842 matching_memory = true; 9843 else 9844 dst = gen_reg_rtx (mode); 9845 } 9846 if (MEM_P (src) && !matching_memory) 9847 src = force_reg (mode, src); 9848 9849 if (vector_mode) 9850 { 9851 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 9852 set = gen_rtx_SET (VOIDmode, dst, set); 9853 emit_insn (set); 9854 } 9855 else 9856 { 9857 set = gen_rtx_fmt_e (code, mode, src); 9858 set = gen_rtx_SET (VOIDmode, dst, set); 9859 if (mask) 9860 { 9861 use = gen_rtx_USE (VOIDmode, mask); 9862 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9863 emit_insn (gen_rtx_PARALLEL (VOIDmode, 9864 gen_rtvec (3, set, use, clob))); 9865 } 9866 else 9867 emit_insn (set); 9868 } 9869 9870 if (dst != operands[0]) 9871 emit_move_insn (operands[0], dst); 9872} 9873 9874/* Expand a copysign operation. Special case operand 0 being a constant. */ 9875 9876void 9877ix86_expand_copysign (rtx operands[]) 9878{ 9879 enum machine_mode mode, vmode; 9880 rtx dest, op0, op1, mask, nmask; 9881 9882 dest = operands[0]; 9883 op0 = operands[1]; 9884 op1 = operands[2]; 9885 9886 mode = GET_MODE (dest); 9887 vmode = mode == SFmode ? V4SFmode : V2DFmode; 9888 9889 if (GET_CODE (op0) == CONST_DOUBLE) 9890 { 9891 rtvec v; 9892 9893 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 9894 op0 = simplify_unary_operation (ABS, mode, op0, mode); 9895 9896 if (op0 == CONST0_RTX (mode)) 9897 op0 = CONST0_RTX (vmode); 9898 else 9899 { 9900 if (mode == SFmode) 9901 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 9902 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9903 else 9904 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 9905 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 9906 } 9907 9908 mask = ix86_build_signbit_mask (mode, 0, 0); 9909 9910 if (mode == SFmode) 9911 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); 9912 else 9913 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); 9914 } 9915 else 9916 { 9917 nmask = ix86_build_signbit_mask (mode, 0, 1); 9918 mask = ix86_build_signbit_mask (mode, 0, 0); 9919 9920 if (mode == SFmode) 9921 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); 9922 else 9923 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); 9924 } 9925} 9926 9927/* Deconstruct a copysign operation into bit masks. Operand 0 is known to 9928 be a constant, and so has already been expanded into a vector constant. */ 9929 9930void 9931ix86_split_copysign_const (rtx operands[]) 9932{ 9933 enum machine_mode mode, vmode; 9934 rtx dest, op0, op1, mask, x; 9935 9936 dest = operands[0]; 9937 op0 = operands[1]; 9938 op1 = operands[2]; 9939 mask = operands[3]; 9940 9941 mode = GET_MODE (dest); 9942 vmode = GET_MODE (mask); 9943 9944 dest = simplify_gen_subreg (vmode, dest, mode, 0); 9945 x = gen_rtx_AND (vmode, dest, mask); 9946 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9947 9948 if (op0 != CONST0_RTX (vmode)) 9949 { 9950 x = gen_rtx_IOR (vmode, dest, op0); 9951 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9952 } 9953} 9954 9955/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 9956 so we have to do two masks. */ 9957 9958void 9959ix86_split_copysign_var (rtx operands[]) 9960{ 9961 enum machine_mode mode, vmode; 9962 rtx dest, scratch, op0, op1, mask, nmask, x; 9963 9964 dest = operands[0]; 9965 scratch = operands[1]; 9966 op0 = operands[2]; 9967 op1 = operands[3]; 9968 nmask = operands[4]; 9969 mask = operands[5]; 9970 9971 mode = GET_MODE (dest); 9972 vmode = GET_MODE (mask); 9973 9974 if (rtx_equal_p (op0, op1)) 9975 { 9976 /* Shouldn't happen often (it's useless, obviously), but when it does 9977 we'd generate incorrect code if we continue below. */ 9978 emit_move_insn (dest, op0); 9979 return; 9980 } 9981 9982 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 9983 { 9984 gcc_assert (REGNO (op1) == REGNO (scratch)); 9985 9986 x = gen_rtx_AND (vmode, scratch, mask); 9987 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9988 9989 dest = mask; 9990 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9991 x = gen_rtx_NOT (vmode, dest); 9992 x = gen_rtx_AND (vmode, x, op0); 9993 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9994 } 9995 else 9996 { 9997 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 9998 { 9999 x = gen_rtx_AND (vmode, scratch, mask); 10000 } 10001 else /* alternative 2,4 */ 10002 { 10003 gcc_assert (REGNO (mask) == REGNO (scratch)); 10004 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 10005 x = gen_rtx_AND (vmode, scratch, op1); 10006 } 10007 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 10008 10009 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 10010 { 10011 dest = simplify_gen_subreg (vmode, op0, mode, 0); 10012 x = gen_rtx_AND (vmode, dest, nmask); 10013 } 10014 else /* alternative 3,4 */ 10015 { 10016 gcc_assert (REGNO (nmask) == REGNO (dest)); 10017 dest = nmask; 10018 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 10019 x = gen_rtx_AND (vmode, dest, op0); 10020 } 10021 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 10022 } 10023 10024 x = gen_rtx_IOR (vmode, dest, scratch); 10025 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 10026} 10027 10028/* Return TRUE or FALSE depending on whether the first SET in INSN 10029 has source and destination with matching CC modes, and that the 10030 CC mode is at least as constrained as REQ_MODE. */ 10031 10032int 10033ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 10034{ 10035 rtx set; 10036 enum machine_mode set_mode; 10037 10038 set = PATTERN (insn); 10039 if (GET_CODE (set) == PARALLEL) 10040 set = XVECEXP (set, 0, 0); 10041 gcc_assert (GET_CODE (set) == SET); 10042 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 10043 10044 set_mode = GET_MODE (SET_DEST (set)); 10045 switch (set_mode) 10046 { 10047 case CCNOmode: 10048 if (req_mode != CCNOmode 10049 && (req_mode != CCmode 10050 || XEXP (SET_SRC (set), 1) != const0_rtx)) 10051 return 0; 10052 break; 10053 case CCmode: 10054 if (req_mode == CCGCmode) 10055 return 0; 10056 /* FALLTHRU */ 10057 case CCGCmode: 10058 if (req_mode == CCGOCmode || req_mode == CCNOmode) 10059 return 0; 10060 /* FALLTHRU */ 10061 case CCGOCmode: 10062 if (req_mode == CCZmode) 10063 return 0; 10064 /* FALLTHRU */ 10065 case CCZmode: 10066 break; 10067 10068 default: 10069 gcc_unreachable (); 10070 } 10071 10072 return (GET_MODE (SET_SRC (set)) == set_mode); 10073} 10074 10075/* Generate insn patterns to do an integer compare of OPERANDS. */ 10076 10077static rtx 10078ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 10079{ 10080 enum machine_mode cmpmode; 10081 rtx tmp, flags; 10082 10083 cmpmode = SELECT_CC_MODE (code, op0, op1); 10084 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 10085 10086 /* This is very simple, but making the interface the same as in the 10087 FP case makes the rest of the code easier. */ 10088 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 10089 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 10090 10091 /* Return the test that should be put into the flags user, i.e. 10092 the bcc, scc, or cmov instruction. */ 10093 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 10094} 10095 10096/* Figure out whether to use ordered or unordered fp comparisons. 10097 Return the appropriate mode to use. */ 10098 10099enum machine_mode 10100ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 10101{ 10102 /* ??? In order to make all comparisons reversible, we do all comparisons 10103 non-trapping when compiling for IEEE. Once gcc is able to distinguish 10104 all forms trapping and nontrapping comparisons, we can make inequality 10105 comparisons trapping again, since it results in better code when using 10106 FCOM based compares. */ 10107 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 10108} 10109 10110enum machine_mode 10111ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 10112{ 10113 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10114 return ix86_fp_compare_mode (code); 10115 switch (code) 10116 { 10117 /* Only zero flag is needed. */ 10118 case EQ: /* ZF=0 */ 10119 case NE: /* ZF!=0 */ 10120 return CCZmode; 10121 /* Codes needing carry flag. */ 10122 case GEU: /* CF=0 */ 10123 case GTU: /* CF=0 & ZF=0 */ 10124 case LTU: /* CF=1 */ 10125 case LEU: /* CF=1 | ZF=1 */ 10126 return CCmode; 10127 /* Codes possibly doable only with sign flag when 10128 comparing against zero. */ 10129 case GE: /* SF=OF or SF=0 */ 10130 case LT: /* SF<>OF or SF=1 */ 10131 if (op1 == const0_rtx) 10132 return CCGOCmode; 10133 else 10134 /* For other cases Carry flag is not required. */ 10135 return CCGCmode; 10136 /* Codes doable only with sign flag when comparing 10137 against zero, but we miss jump instruction for it 10138 so we need to use relational tests against overflow 10139 that thus needs to be zero. */ 10140 case GT: /* ZF=0 & SF=OF */ 10141 case LE: /* ZF=1 | SF<>OF */ 10142 if (op1 == const0_rtx) 10143 return CCNOmode; 10144 else 10145 return CCGCmode; 10146 /* strcmp pattern do (use flags) and combine may ask us for proper 10147 mode. */ 10148 case USE: 10149 return CCmode; 10150 default: 10151 gcc_unreachable (); 10152 } 10153} 10154 10155/* Return the fixed registers used for condition codes. */ 10156 10157static bool 10158ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 10159{ 10160 *p1 = FLAGS_REG; 10161 *p2 = FPSR_REG; 10162 return true; 10163} 10164 10165/* If two condition code modes are compatible, return a condition code 10166 mode which is compatible with both. Otherwise, return 10167 VOIDmode. */ 10168 10169static enum machine_mode 10170ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 10171{ 10172 if (m1 == m2) 10173 return m1; 10174 10175 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 10176 return VOIDmode; 10177 10178 if ((m1 == CCGCmode && m2 == CCGOCmode) 10179 || (m1 == CCGOCmode && m2 == CCGCmode)) 10180 return CCGCmode; 10181 10182 switch (m1) 10183 { 10184 default: 10185 gcc_unreachable (); 10186 10187 case CCmode: 10188 case CCGCmode: 10189 case CCGOCmode: 10190 case CCNOmode: 10191 case CCZmode: 10192 switch (m2) 10193 { 10194 default: 10195 return VOIDmode; 10196 10197 case CCmode: 10198 case CCGCmode: 10199 case CCGOCmode: 10200 case CCNOmode: 10201 case CCZmode: 10202 return CCmode; 10203 } 10204 10205 case CCFPmode: 10206 case CCFPUmode: 10207 /* These are only compatible with themselves, which we already 10208 checked above. */ 10209 return VOIDmode; 10210 } 10211} 10212 10213/* Return true if we should use an FCOMI instruction for this fp comparison. */ 10214 10215int 10216ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 10217{ 10218 enum rtx_code swapped_code = swap_condition (code); 10219 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 10220 || (ix86_fp_comparison_cost (swapped_code) 10221 == ix86_fp_comparison_fcomi_cost (swapped_code))); 10222} 10223 10224/* Swap, force into registers, or otherwise massage the two operands 10225 to a fp comparison. The operands are updated in place; the new 10226 comparison code is returned. */ 10227 10228static enum rtx_code 10229ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 10230{ 10231 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 10232 rtx op0 = *pop0, op1 = *pop1; 10233 enum machine_mode op_mode = GET_MODE (op0); 10234 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 10235 10236 /* All of the unordered compare instructions only work on registers. 10237 The same is true of the fcomi compare instructions. The XFmode 10238 compare instructions require registers except when comparing 10239 against zero or when converting operand 1 from fixed point to 10240 floating point. */ 10241 10242 if (!is_sse 10243 && (fpcmp_mode == CCFPUmode 10244 || (op_mode == XFmode 10245 && ! (standard_80387_constant_p (op0) == 1 10246 || standard_80387_constant_p (op1) == 1) 10247 && GET_CODE (op1) != FLOAT) 10248 || ix86_use_fcomi_compare (code))) 10249 { 10250 op0 = force_reg (op_mode, op0); 10251 op1 = force_reg (op_mode, op1); 10252 } 10253 else 10254 { 10255 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 10256 things around if they appear profitable, otherwise force op0 10257 into a register. */ 10258 10259 if (standard_80387_constant_p (op0) == 0 10260 || (GET_CODE (op0) == MEM 10261 && ! (standard_80387_constant_p (op1) == 0 10262 || GET_CODE (op1) == MEM))) 10263 { 10264 rtx tmp; 10265 tmp = op0, op0 = op1, op1 = tmp; 10266 code = swap_condition (code); 10267 } 10268 10269 if (GET_CODE (op0) != REG) 10270 op0 = force_reg (op_mode, op0); 10271 10272 if (CONSTANT_P (op1)) 10273 { 10274 int tmp = standard_80387_constant_p (op1); 10275 if (tmp == 0) 10276 op1 = validize_mem (force_const_mem (op_mode, op1)); 10277 else if (tmp == 1) 10278 { 10279 if (TARGET_CMOVE) 10280 op1 = force_reg (op_mode, op1); 10281 } 10282 else 10283 op1 = force_reg (op_mode, op1); 10284 } 10285 } 10286 10287 /* Try to rearrange the comparison to make it cheaper. */ 10288 if (ix86_fp_comparison_cost (code) 10289 > ix86_fp_comparison_cost (swap_condition (code)) 10290 && (GET_CODE (op1) == REG || !no_new_pseudos)) 10291 { 10292 rtx tmp; 10293 tmp = op0, op0 = op1, op1 = tmp; 10294 code = swap_condition (code); 10295 if (GET_CODE (op0) != REG) 10296 op0 = force_reg (op_mode, op0); 10297 } 10298 10299 *pop0 = op0; 10300 *pop1 = op1; 10301 return code; 10302} 10303 10304/* Convert comparison codes we use to represent FP comparison to integer 10305 code that will result in proper branch. Return UNKNOWN if no such code 10306 is available. */ 10307 10308enum rtx_code 10309ix86_fp_compare_code_to_integer (enum rtx_code code) 10310{ 10311 switch (code) 10312 { 10313 case GT: 10314 return GTU; 10315 case GE: 10316 return GEU; 10317 case ORDERED: 10318 case UNORDERED: 10319 return code; 10320 break; 10321 case UNEQ: 10322 return EQ; 10323 break; 10324 case UNLT: 10325 return LTU; 10326 break; 10327 case UNLE: 10328 return LEU; 10329 break; 10330 case LTGT: 10331 return NE; 10332 break; 10333 default: 10334 return UNKNOWN; 10335 } 10336} 10337 10338/* Split comparison code CODE into comparisons we can do using branch 10339 instructions. BYPASS_CODE is comparison code for branch that will 10340 branch around FIRST_CODE and SECOND_CODE. If some of branches 10341 is not required, set value to UNKNOWN. 10342 We never require more than two branches. */ 10343 10344void 10345ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 10346 enum rtx_code *first_code, 10347 enum rtx_code *second_code) 10348{ 10349 *first_code = code; 10350 *bypass_code = UNKNOWN; 10351 *second_code = UNKNOWN; 10352 10353 /* The fcomi comparison sets flags as follows: 10354 10355 cmp ZF PF CF 10356 > 0 0 0 10357 < 0 0 1 10358 = 1 0 0 10359 un 1 1 1 */ 10360 10361 switch (code) 10362 { 10363 case GT: /* GTU - CF=0 & ZF=0 */ 10364 case GE: /* GEU - CF=0 */ 10365 case ORDERED: /* PF=0 */ 10366 case UNORDERED: /* PF=1 */ 10367 case UNEQ: /* EQ - ZF=1 */ 10368 case UNLT: /* LTU - CF=1 */ 10369 case UNLE: /* LEU - CF=1 | ZF=1 */ 10370 case LTGT: /* EQ - ZF=0 */ 10371 break; 10372 case LT: /* LTU - CF=1 - fails on unordered */ 10373 *first_code = UNLT; 10374 *bypass_code = UNORDERED; 10375 break; 10376 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 10377 *first_code = UNLE; 10378 *bypass_code = UNORDERED; 10379 break; 10380 case EQ: /* EQ - ZF=1 - fails on unordered */ 10381 *first_code = UNEQ; 10382 *bypass_code = UNORDERED; 10383 break; 10384 case NE: /* NE - ZF=0 - fails on unordered */ 10385 *first_code = LTGT; 10386 *second_code = UNORDERED; 10387 break; 10388 case UNGE: /* GEU - CF=0 - fails on unordered */ 10389 *first_code = GE; 10390 *second_code = UNORDERED; 10391 break; 10392 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 10393 *first_code = GT; 10394 *second_code = UNORDERED; 10395 break; 10396 default: 10397 gcc_unreachable (); 10398 } 10399 if (!TARGET_IEEE_FP) 10400 { 10401 *second_code = UNKNOWN; 10402 *bypass_code = UNKNOWN; 10403 } 10404} 10405 10406/* Return cost of comparison done fcom + arithmetics operations on AX. 10407 All following functions do use number of instructions as a cost metrics. 10408 In future this should be tweaked to compute bytes for optimize_size and 10409 take into account performance of various instructions on various CPUs. */ 10410static int 10411ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 10412{ 10413 if (!TARGET_IEEE_FP) 10414 return 4; 10415 /* The cost of code output by ix86_expand_fp_compare. */ 10416 switch (code) 10417 { 10418 case UNLE: 10419 case UNLT: 10420 case LTGT: 10421 case GT: 10422 case GE: 10423 case UNORDERED: 10424 case ORDERED: 10425 case UNEQ: 10426 return 4; 10427 break; 10428 case LT: 10429 case NE: 10430 case EQ: 10431 case UNGE: 10432 return 5; 10433 break; 10434 case LE: 10435 case UNGT: 10436 return 6; 10437 break; 10438 default: 10439 gcc_unreachable (); 10440 } 10441} 10442 10443/* Return cost of comparison done using fcomi operation. 10444 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10445static int 10446ix86_fp_comparison_fcomi_cost (enum rtx_code code) 10447{ 10448 enum rtx_code bypass_code, first_code, second_code; 10449 /* Return arbitrarily high cost when instruction is not supported - this 10450 prevents gcc from using it. */ 10451 if (!TARGET_CMOVE) 10452 return 1024; 10453 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10454 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 10455} 10456 10457/* Return cost of comparison done using sahf operation. 10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10459static int 10460ix86_fp_comparison_sahf_cost (enum rtx_code code) 10461{ 10462 enum rtx_code bypass_code, first_code, second_code; 10463 /* Return arbitrarily high cost when instruction is not preferred - this 10464 avoids gcc from using it. */ 10465 if (!TARGET_USE_SAHF && !optimize_size) 10466 return 1024; 10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 10469} 10470 10471/* Compute cost of the comparison done using any method. 10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10473static int 10474ix86_fp_comparison_cost (enum rtx_code code) 10475{ 10476 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 10477 int min; 10478 10479 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 10480 sahf_cost = ix86_fp_comparison_sahf_cost (code); 10481 10482 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 10483 if (min > sahf_cost) 10484 min = sahf_cost; 10485 if (min > fcomi_cost) 10486 min = fcomi_cost; 10487 return min; 10488} 10489 10490/* Generate insn patterns to do a floating point compare of OPERANDS. */ 10491 10492static rtx 10493ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 10494 rtx *second_test, rtx *bypass_test) 10495{ 10496 enum machine_mode fpcmp_mode, intcmp_mode; 10497 rtx tmp, tmp2; 10498 int cost = ix86_fp_comparison_cost (code); 10499 enum rtx_code bypass_code, first_code, second_code; 10500 10501 fpcmp_mode = ix86_fp_compare_mode (code); 10502 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 10503 10504 if (second_test) 10505 *second_test = NULL_RTX; 10506 if (bypass_test) 10507 *bypass_test = NULL_RTX; 10508 10509 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10510 10511 /* Do fcomi/sahf based test when profitable. */ 10512 if ((bypass_code == UNKNOWN || bypass_test) 10513 && (second_code == UNKNOWN || second_test) 10514 && ix86_fp_comparison_arithmetics_cost (code) > cost) 10515 { 10516 if (TARGET_CMOVE) 10517 { 10518 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10519 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 10520 tmp); 10521 emit_insn (tmp); 10522 } 10523 else 10524 { 10525 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10526 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10527 if (!scratch) 10528 scratch = gen_reg_rtx (HImode); 10529 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10530 emit_insn (gen_x86_sahf_1 (scratch)); 10531 } 10532 10533 /* The FP codes work out to act like unsigned. */ 10534 intcmp_mode = fpcmp_mode; 10535 code = first_code; 10536 if (bypass_code != UNKNOWN) 10537 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 10538 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10539 const0_rtx); 10540 if (second_code != UNKNOWN) 10541 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 10542 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10543 const0_rtx); 10544 } 10545 else 10546 { 10547 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 10548 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10549 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10550 if (!scratch) 10551 scratch = gen_reg_rtx (HImode); 10552 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10553 10554 /* In the unordered case, we have to check C2 for NaN's, which 10555 doesn't happen to work out to anything nice combination-wise. 10556 So do some bit twiddling on the value we've got in AH to come 10557 up with an appropriate set of condition codes. */ 10558 10559 intcmp_mode = CCNOmode; 10560 switch (code) 10561 { 10562 case GT: 10563 case UNGT: 10564 if (code == GT || !TARGET_IEEE_FP) 10565 { 10566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10567 code = EQ; 10568 } 10569 else 10570 { 10571 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10572 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10573 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 10574 intcmp_mode = CCmode; 10575 code = GEU; 10576 } 10577 break; 10578 case LT: 10579 case UNLT: 10580 if (code == LT && TARGET_IEEE_FP) 10581 { 10582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10583 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 10584 intcmp_mode = CCmode; 10585 code = EQ; 10586 } 10587 else 10588 { 10589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 10590 code = NE; 10591 } 10592 break; 10593 case GE: 10594 case UNGE: 10595 if (code == GE || !TARGET_IEEE_FP) 10596 { 10597 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 10598 code = EQ; 10599 } 10600 else 10601 { 10602 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10603 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10604 GEN_INT (0x01))); 10605 code = NE; 10606 } 10607 break; 10608 case LE: 10609 case UNLE: 10610 if (code == LE && TARGET_IEEE_FP) 10611 { 10612 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10613 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10614 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10615 intcmp_mode = CCmode; 10616 code = LTU; 10617 } 10618 else 10619 { 10620 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10621 code = NE; 10622 } 10623 break; 10624 case EQ: 10625 case UNEQ: 10626 if (code == EQ && TARGET_IEEE_FP) 10627 { 10628 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10629 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10630 intcmp_mode = CCmode; 10631 code = EQ; 10632 } 10633 else 10634 { 10635 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10636 code = NE; 10637 break; 10638 } 10639 break; 10640 case NE: 10641 case LTGT: 10642 if (code == NE && TARGET_IEEE_FP) 10643 { 10644 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10645 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10646 GEN_INT (0x40))); 10647 code = NE; 10648 } 10649 else 10650 { 10651 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10652 code = EQ; 10653 } 10654 break; 10655 10656 case UNORDERED: 10657 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10658 code = NE; 10659 break; 10660 case ORDERED: 10661 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10662 code = EQ; 10663 break; 10664 10665 default: 10666 gcc_unreachable (); 10667 } 10668 } 10669 10670 /* Return the test that should be put into the flags user, i.e. 10671 the bcc, scc, or cmov instruction. */ 10672 return gen_rtx_fmt_ee (code, VOIDmode, 10673 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10674 const0_rtx); 10675} 10676 10677rtx 10678ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 10679{ 10680 rtx op0, op1, ret; 10681 op0 = ix86_compare_op0; 10682 op1 = ix86_compare_op1; 10683 10684 if (second_test) 10685 *second_test = NULL_RTX; 10686 if (bypass_test) 10687 *bypass_test = NULL_RTX; 10688 10689 if (ix86_compare_emitted) 10690 { 10691 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 10692 ix86_compare_emitted = NULL_RTX; 10693 } 10694 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10695 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10696 second_test, bypass_test); 10697 else 10698 ret = ix86_expand_int_compare (code, op0, op1); 10699 10700 return ret; 10701} 10702 10703/* Return true if the CODE will result in nontrivial jump sequence. */ 10704bool 10705ix86_fp_jump_nontrivial_p (enum rtx_code code) 10706{ 10707 enum rtx_code bypass_code, first_code, second_code; 10708 if (!TARGET_CMOVE) 10709 return true; 10710 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10711 return bypass_code != UNKNOWN || second_code != UNKNOWN; 10712} 10713 10714void 10715ix86_expand_branch (enum rtx_code code, rtx label) 10716{ 10717 rtx tmp; 10718 10719 /* If we have emitted a compare insn, go straight to simple. 10720 ix86_expand_compare won't emit anything if ix86_compare_emitted 10721 is non NULL. */ 10722 if (ix86_compare_emitted) 10723 goto simple; 10724 10725 switch (GET_MODE (ix86_compare_op0)) 10726 { 10727 case QImode: 10728 case HImode: 10729 case SImode: 10730 simple: 10731 tmp = ix86_expand_compare (code, NULL, NULL); 10732 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10733 gen_rtx_LABEL_REF (VOIDmode, label), 10734 pc_rtx); 10735 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 10736 return; 10737 10738 case SFmode: 10739 case DFmode: 10740 case XFmode: 10741 { 10742 rtvec vec; 10743 int use_fcomi; 10744 enum rtx_code bypass_code, first_code, second_code; 10745 10746 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 10747 &ix86_compare_op1); 10748 10749 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10750 10751 /* Check whether we will use the natural sequence with one jump. If 10752 so, we can expand jump early. Otherwise delay expansion by 10753 creating compound insn to not confuse optimizers. */ 10754 if (bypass_code == UNKNOWN && second_code == UNKNOWN 10755 && TARGET_CMOVE) 10756 { 10757 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 10758 gen_rtx_LABEL_REF (VOIDmode, label), 10759 pc_rtx, NULL_RTX, NULL_RTX); 10760 } 10761 else 10762 { 10763 tmp = gen_rtx_fmt_ee (code, VOIDmode, 10764 ix86_compare_op0, ix86_compare_op1); 10765 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10766 gen_rtx_LABEL_REF (VOIDmode, label), 10767 pc_rtx); 10768 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 10769 10770 use_fcomi = ix86_use_fcomi_compare (code); 10771 vec = rtvec_alloc (3 + !use_fcomi); 10772 RTVEC_ELT (vec, 0) = tmp; 10773 RTVEC_ELT (vec, 1) 10774 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 10775 RTVEC_ELT (vec, 2) 10776 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 10777 if (! use_fcomi) 10778 RTVEC_ELT (vec, 3) 10779 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 10780 10781 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 10782 } 10783 return; 10784 } 10785 10786 case DImode: 10787 if (TARGET_64BIT) 10788 goto simple; 10789 case TImode: 10790 /* Expand DImode branch into multiple compare+branch. */ 10791 { 10792 rtx lo[2], hi[2], label2; 10793 enum rtx_code code1, code2, code3; 10794 enum machine_mode submode; 10795 10796 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 10797 { 10798 tmp = ix86_compare_op0; 10799 ix86_compare_op0 = ix86_compare_op1; 10800 ix86_compare_op1 = tmp; 10801 code = swap_condition (code); 10802 } 10803 if (GET_MODE (ix86_compare_op0) == DImode) 10804 { 10805 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 10806 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 10807 submode = SImode; 10808 } 10809 else 10810 { 10811 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 10812 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 10813 submode = DImode; 10814 } 10815 10816 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 10817 avoid two branches. This costs one extra insn, so disable when 10818 optimizing for size. */ 10819 10820 if ((code == EQ || code == NE) 10821 && (!optimize_size 10822 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 10823 { 10824 rtx xor0, xor1; 10825 10826 xor1 = hi[0]; 10827 if (hi[1] != const0_rtx) 10828 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 10829 NULL_RTX, 0, OPTAB_WIDEN); 10830 10831 xor0 = lo[0]; 10832 if (lo[1] != const0_rtx) 10833 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 10834 NULL_RTX, 0, OPTAB_WIDEN); 10835 10836 tmp = expand_binop (submode, ior_optab, xor1, xor0, 10837 NULL_RTX, 0, OPTAB_WIDEN); 10838 10839 ix86_compare_op0 = tmp; 10840 ix86_compare_op1 = const0_rtx; 10841 ix86_expand_branch (code, label); 10842 return; 10843 } 10844 10845 /* Otherwise, if we are doing less-than or greater-or-equal-than, 10846 op1 is a constant and the low word is zero, then we can just 10847 examine the high word. */ 10848 10849 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 10850 switch (code) 10851 { 10852 case LT: case LTU: case GE: case GEU: 10853 ix86_compare_op0 = hi[0]; 10854 ix86_compare_op1 = hi[1]; 10855 ix86_expand_branch (code, label); 10856 return; 10857 default: 10858 break; 10859 } 10860 10861 /* Otherwise, we need two or three jumps. */ 10862 10863 label2 = gen_label_rtx (); 10864 10865 code1 = code; 10866 code2 = swap_condition (code); 10867 code3 = unsigned_condition (code); 10868 10869 switch (code) 10870 { 10871 case LT: case GT: case LTU: case GTU: 10872 break; 10873 10874 case LE: code1 = LT; code2 = GT; break; 10875 case GE: code1 = GT; code2 = LT; break; 10876 case LEU: code1 = LTU; code2 = GTU; break; 10877 case GEU: code1 = GTU; code2 = LTU; break; 10878 10879 case EQ: code1 = UNKNOWN; code2 = NE; break; 10880 case NE: code2 = UNKNOWN; break; 10881 10882 default: 10883 gcc_unreachable (); 10884 } 10885 10886 /* 10887 * a < b => 10888 * if (hi(a) < hi(b)) goto true; 10889 * if (hi(a) > hi(b)) goto false; 10890 * if (lo(a) < lo(b)) goto true; 10891 * false: 10892 */ 10893 10894 ix86_compare_op0 = hi[0]; 10895 ix86_compare_op1 = hi[1]; 10896 10897 if (code1 != UNKNOWN) 10898 ix86_expand_branch (code1, label); 10899 if (code2 != UNKNOWN) 10900 ix86_expand_branch (code2, label2); 10901 10902 ix86_compare_op0 = lo[0]; 10903 ix86_compare_op1 = lo[1]; 10904 ix86_expand_branch (code3, label); 10905 10906 if (code2 != UNKNOWN) 10907 emit_label (label2); 10908 return; 10909 } 10910 10911 default: 10912 gcc_unreachable (); 10913 } 10914} 10915 10916/* Split branch based on floating point condition. */ 10917void 10918ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 10919 rtx target1, rtx target2, rtx tmp, rtx pushed) 10920{ 10921 rtx second, bypass; 10922 rtx label = NULL_RTX; 10923 rtx condition; 10924 int bypass_probability = -1, second_probability = -1, probability = -1; 10925 rtx i; 10926 10927 if (target2 != pc_rtx) 10928 { 10929 rtx tmp = target2; 10930 code = reverse_condition_maybe_unordered (code); 10931 target2 = target1; 10932 target1 = tmp; 10933 } 10934 10935 condition = ix86_expand_fp_compare (code, op1, op2, 10936 tmp, &second, &bypass); 10937 10938 /* Remove pushed operand from stack. */ 10939 if (pushed) 10940 ix86_free_from_memory (GET_MODE (pushed)); 10941 10942 if (split_branch_probability >= 0) 10943 { 10944 /* Distribute the probabilities across the jumps. 10945 Assume the BYPASS and SECOND to be always test 10946 for UNORDERED. */ 10947 probability = split_branch_probability; 10948 10949 /* Value of 1 is low enough to make no need for probability 10950 to be updated. Later we may run some experiments and see 10951 if unordered values are more frequent in practice. */ 10952 if (bypass) 10953 bypass_probability = 1; 10954 if (second) 10955 second_probability = 1; 10956 } 10957 if (bypass != NULL_RTX) 10958 { 10959 label = gen_label_rtx (); 10960 i = emit_jump_insn (gen_rtx_SET 10961 (VOIDmode, pc_rtx, 10962 gen_rtx_IF_THEN_ELSE (VOIDmode, 10963 bypass, 10964 gen_rtx_LABEL_REF (VOIDmode, 10965 label), 10966 pc_rtx))); 10967 if (bypass_probability >= 0) 10968 REG_NOTES (i) 10969 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10970 GEN_INT (bypass_probability), 10971 REG_NOTES (i)); 10972 } 10973 i = emit_jump_insn (gen_rtx_SET 10974 (VOIDmode, pc_rtx, 10975 gen_rtx_IF_THEN_ELSE (VOIDmode, 10976 condition, target1, target2))); 10977 if (probability >= 0) 10978 REG_NOTES (i) 10979 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10980 GEN_INT (probability), 10981 REG_NOTES (i)); 10982 if (second != NULL_RTX) 10983 { 10984 i = emit_jump_insn (gen_rtx_SET 10985 (VOIDmode, pc_rtx, 10986 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 10987 target2))); 10988 if (second_probability >= 0) 10989 REG_NOTES (i) 10990 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10991 GEN_INT (second_probability), 10992 REG_NOTES (i)); 10993 } 10994 if (label != NULL_RTX) 10995 emit_label (label); 10996} 10997 10998int 10999ix86_expand_setcc (enum rtx_code code, rtx dest) 11000{ 11001 rtx ret, tmp, tmpreg, equiv; 11002 rtx second_test, bypass_test; 11003 11004 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 11005 return 0; /* FAIL */ 11006 11007 gcc_assert (GET_MODE (dest) == QImode); 11008 11009 ret = ix86_expand_compare (code, &second_test, &bypass_test); 11010 PUT_MODE (ret, QImode); 11011 11012 tmp = dest; 11013 tmpreg = dest; 11014 11015 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 11016 if (bypass_test || second_test) 11017 { 11018 rtx test = second_test; 11019 int bypass = 0; 11020 rtx tmp2 = gen_reg_rtx (QImode); 11021 if (bypass_test) 11022 { 11023 gcc_assert (!second_test); 11024 test = bypass_test; 11025 bypass = 1; 11026 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 11027 } 11028 PUT_MODE (test, QImode); 11029 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 11030 11031 if (bypass) 11032 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 11033 else 11034 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 11035 } 11036 11037 /* Attach a REG_EQUAL note describing the comparison result. */ 11038 if (ix86_compare_op0 && ix86_compare_op1) 11039 { 11040 equiv = simplify_gen_relational (code, QImode, 11041 GET_MODE (ix86_compare_op0), 11042 ix86_compare_op0, ix86_compare_op1); 11043 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 11044 } 11045 11046 return 1; /* DONE */ 11047} 11048 11049/* Expand comparison setting or clearing carry flag. Return true when 11050 successful and set pop for the operation. */ 11051static bool 11052ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 11053{ 11054 enum machine_mode mode = 11055 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 11056 11057 /* Do not handle DImode compares that go through special path. Also we can't 11058 deal with FP compares yet. This is possible to add. */ 11059 if (mode == (TARGET_64BIT ? TImode : DImode)) 11060 return false; 11061 if (FLOAT_MODE_P (mode)) 11062 { 11063 rtx second_test = NULL, bypass_test = NULL; 11064 rtx compare_op, compare_seq; 11065 11066 /* Shortcut: following common codes never translate into carry flag compares. */ 11067 if (code == EQ || code == NE || code == UNEQ || code == LTGT 11068 || code == ORDERED || code == UNORDERED) 11069 return false; 11070 11071 /* These comparisons require zero flag; swap operands so they won't. */ 11072 if ((code == GT || code == UNLE || code == LE || code == UNGT) 11073 && !TARGET_IEEE_FP) 11074 { 11075 rtx tmp = op0; 11076 op0 = op1; 11077 op1 = tmp; 11078 code = swap_condition (code); 11079 } 11080 11081 /* Try to expand the comparison and verify that we end up with carry flag 11082 based comparison. This is fails to be true only when we decide to expand 11083 comparison using arithmetic that is not too common scenario. */ 11084 start_sequence (); 11085 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 11086 &second_test, &bypass_test); 11087 compare_seq = get_insns (); 11088 end_sequence (); 11089 11090 if (second_test || bypass_test) 11091 return false; 11092 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11093 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11094 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 11095 else 11096 code = GET_CODE (compare_op); 11097 if (code != LTU && code != GEU) 11098 return false; 11099 emit_insn (compare_seq); 11100 *pop = compare_op; 11101 return true; 11102 } 11103 if (!INTEGRAL_MODE_P (mode)) 11104 return false; 11105 switch (code) 11106 { 11107 case LTU: 11108 case GEU: 11109 break; 11110 11111 /* Convert a==0 into (unsigned)a<1. */ 11112 case EQ: 11113 case NE: 11114 if (op1 != const0_rtx) 11115 return false; 11116 op1 = const1_rtx; 11117 code = (code == EQ ? LTU : GEU); 11118 break; 11119 11120 /* Convert a>b into b<a or a>=b-1. */ 11121 case GTU: 11122 case LEU: 11123 if (GET_CODE (op1) == CONST_INT) 11124 { 11125 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 11126 /* Bail out on overflow. We still can swap operands but that 11127 would force loading of the constant into register. */ 11128 if (op1 == const0_rtx 11129 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 11130 return false; 11131 code = (code == GTU ? GEU : LTU); 11132 } 11133 else 11134 { 11135 rtx tmp = op1; 11136 op1 = op0; 11137 op0 = tmp; 11138 code = (code == GTU ? LTU : GEU); 11139 } 11140 break; 11141 11142 /* Convert a>=0 into (unsigned)a<0x80000000. */ 11143 case LT: 11144 case GE: 11145 if (mode == DImode || op1 != const0_rtx) 11146 return false; 11147 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 11148 code = (code == LT ? GEU : LTU); 11149 break; 11150 case LE: 11151 case GT: 11152 if (mode == DImode || op1 != constm1_rtx) 11153 return false; 11154 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 11155 code = (code == LE ? GEU : LTU); 11156 break; 11157 11158 default: 11159 return false; 11160 } 11161 /* Swapping operands may cause constant to appear as first operand. */ 11162 if (!nonimmediate_operand (op0, VOIDmode)) 11163 { 11164 if (no_new_pseudos) 11165 return false; 11166 op0 = force_reg (mode, op0); 11167 } 11168 ix86_compare_op0 = op0; 11169 ix86_compare_op1 = op1; 11170 *pop = ix86_expand_compare (code, NULL, NULL); 11171 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 11172 return true; 11173} 11174 11175int 11176ix86_expand_int_movcc (rtx operands[]) 11177{ 11178 enum rtx_code code = GET_CODE (operands[1]), compare_code; 11179 rtx compare_seq, compare_op; 11180 rtx second_test, bypass_test; 11181 enum machine_mode mode = GET_MODE (operands[0]); 11182 bool sign_bit_compare_p = false;; 11183 11184 start_sequence (); 11185 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11186 compare_seq = get_insns (); 11187 end_sequence (); 11188 11189 compare_code = GET_CODE (compare_op); 11190 11191 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 11192 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 11193 sign_bit_compare_p = true; 11194 11195 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 11196 HImode insns, we'd be swallowed in word prefix ops. */ 11197 11198 if ((mode != HImode || TARGET_FAST_PREFIX) 11199 && (mode != (TARGET_64BIT ? TImode : DImode)) 11200 && GET_CODE (operands[2]) == CONST_INT 11201 && GET_CODE (operands[3]) == CONST_INT) 11202 { 11203 rtx out = operands[0]; 11204 HOST_WIDE_INT ct = INTVAL (operands[2]); 11205 HOST_WIDE_INT cf = INTVAL (operands[3]); 11206 HOST_WIDE_INT diff; 11207 11208 diff = ct - cf; 11209 /* Sign bit compares are better done using shifts than we do by using 11210 sbb. */ 11211 if (sign_bit_compare_p 11212 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11213 ix86_compare_op1, &compare_op)) 11214 { 11215 /* Detect overlap between destination and compare sources. */ 11216 rtx tmp = out; 11217 11218 if (!sign_bit_compare_p) 11219 { 11220 bool fpcmp = false; 11221 11222 compare_code = GET_CODE (compare_op); 11223 11224 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11225 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11226 { 11227 fpcmp = true; 11228 compare_code = ix86_fp_compare_code_to_integer (compare_code); 11229 } 11230 11231 /* To simplify rest of code, restrict to the GEU case. */ 11232 if (compare_code == LTU) 11233 { 11234 HOST_WIDE_INT tmp = ct; 11235 ct = cf; 11236 cf = tmp; 11237 compare_code = reverse_condition (compare_code); 11238 code = reverse_condition (code); 11239 } 11240 else 11241 { 11242 if (fpcmp) 11243 PUT_CODE (compare_op, 11244 reverse_condition_maybe_unordered 11245 (GET_CODE (compare_op))); 11246 else 11247 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11248 } 11249 diff = ct - cf; 11250 11251 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 11252 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 11253 tmp = gen_reg_rtx (mode); 11254 11255 if (mode == DImode) 11256 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 11257 else 11258 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 11259 } 11260 else 11261 { 11262 if (code == GT || code == GE) 11263 code = reverse_condition (code); 11264 else 11265 { 11266 HOST_WIDE_INT tmp = ct; 11267 ct = cf; 11268 cf = tmp; 11269 diff = ct - cf; 11270 } 11271 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 11272 ix86_compare_op1, VOIDmode, 0, -1); 11273 } 11274 11275 if (diff == 1) 11276 { 11277 /* 11278 * cmpl op0,op1 11279 * sbbl dest,dest 11280 * [addl dest, ct] 11281 * 11282 * Size 5 - 8. 11283 */ 11284 if (ct) 11285 tmp = expand_simple_binop (mode, PLUS, 11286 tmp, GEN_INT (ct), 11287 copy_rtx (tmp), 1, OPTAB_DIRECT); 11288 } 11289 else if (cf == -1) 11290 { 11291 /* 11292 * cmpl op0,op1 11293 * sbbl dest,dest 11294 * orl $ct, dest 11295 * 11296 * Size 8. 11297 */ 11298 tmp = expand_simple_binop (mode, IOR, 11299 tmp, GEN_INT (ct), 11300 copy_rtx (tmp), 1, OPTAB_DIRECT); 11301 } 11302 else if (diff == -1 && ct) 11303 { 11304 /* 11305 * cmpl op0,op1 11306 * sbbl dest,dest 11307 * notl dest 11308 * [addl dest, cf] 11309 * 11310 * Size 8 - 11. 11311 */ 11312 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11313 if (cf) 11314 tmp = expand_simple_binop (mode, PLUS, 11315 copy_rtx (tmp), GEN_INT (cf), 11316 copy_rtx (tmp), 1, OPTAB_DIRECT); 11317 } 11318 else 11319 { 11320 /* 11321 * cmpl op0,op1 11322 * sbbl dest,dest 11323 * [notl dest] 11324 * andl cf - ct, dest 11325 * [addl dest, ct] 11326 * 11327 * Size 8 - 11. 11328 */ 11329 11330 if (cf == 0) 11331 { 11332 cf = ct; 11333 ct = 0; 11334 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 11335 } 11336 11337 tmp = expand_simple_binop (mode, AND, 11338 copy_rtx (tmp), 11339 gen_int_mode (cf - ct, mode), 11340 copy_rtx (tmp), 1, OPTAB_DIRECT); 11341 if (ct) 11342 tmp = expand_simple_binop (mode, PLUS, 11343 copy_rtx (tmp), GEN_INT (ct), 11344 copy_rtx (tmp), 1, OPTAB_DIRECT); 11345 } 11346 11347 if (!rtx_equal_p (tmp, out)) 11348 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 11349 11350 return 1; /* DONE */ 11351 } 11352 11353 if (diff < 0) 11354 { 11355 HOST_WIDE_INT tmp; 11356 tmp = ct, ct = cf, cf = tmp; 11357 diff = -diff; 11358 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11359 { 11360 /* We may be reversing unordered compare to normal compare, that 11361 is not valid in general (we may convert non-trapping condition 11362 to trapping one), however on i386 we currently emit all 11363 comparisons unordered. */ 11364 compare_code = reverse_condition_maybe_unordered (compare_code); 11365 code = reverse_condition_maybe_unordered (code); 11366 } 11367 else 11368 { 11369 compare_code = reverse_condition (compare_code); 11370 code = reverse_condition (code); 11371 } 11372 } 11373 11374 compare_code = UNKNOWN; 11375 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 11376 && GET_CODE (ix86_compare_op1) == CONST_INT) 11377 { 11378 if (ix86_compare_op1 == const0_rtx 11379 && (code == LT || code == GE)) 11380 compare_code = code; 11381 else if (ix86_compare_op1 == constm1_rtx) 11382 { 11383 if (code == LE) 11384 compare_code = LT; 11385 else if (code == GT) 11386 compare_code = GE; 11387 } 11388 } 11389 11390 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 11391 if (compare_code != UNKNOWN 11392 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 11393 && (cf == -1 || ct == -1)) 11394 { 11395 /* If lea code below could be used, only optimize 11396 if it results in a 2 insn sequence. */ 11397 11398 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 11399 || diff == 3 || diff == 5 || diff == 9) 11400 || (compare_code == LT && ct == -1) 11401 || (compare_code == GE && cf == -1)) 11402 { 11403 /* 11404 * notl op1 (if necessary) 11405 * sarl $31, op1 11406 * orl cf, op1 11407 */ 11408 if (ct != -1) 11409 { 11410 cf = ct; 11411 ct = -1; 11412 code = reverse_condition (code); 11413 } 11414 11415 out = emit_store_flag (out, code, ix86_compare_op0, 11416 ix86_compare_op1, VOIDmode, 0, -1); 11417 11418 out = expand_simple_binop (mode, IOR, 11419 out, GEN_INT (cf), 11420 out, 1, OPTAB_DIRECT); 11421 if (out != operands[0]) 11422 emit_move_insn (operands[0], out); 11423 11424 return 1; /* DONE */ 11425 } 11426 } 11427 11428 11429 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 11430 || diff == 3 || diff == 5 || diff == 9) 11431 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 11432 && (mode != DImode 11433 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 11434 { 11435 /* 11436 * xorl dest,dest 11437 * cmpl op1,op2 11438 * setcc dest 11439 * lea cf(dest*(ct-cf)),dest 11440 * 11441 * Size 14. 11442 * 11443 * This also catches the degenerate setcc-only case. 11444 */ 11445 11446 rtx tmp; 11447 int nops; 11448 11449 out = emit_store_flag (out, code, ix86_compare_op0, 11450 ix86_compare_op1, VOIDmode, 0, 1); 11451 11452 nops = 0; 11453 /* On x86_64 the lea instruction operates on Pmode, so we need 11454 to get arithmetics done in proper mode to match. */ 11455 if (diff == 1) 11456 tmp = copy_rtx (out); 11457 else 11458 { 11459 rtx out1; 11460 out1 = copy_rtx (out); 11461 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 11462 nops++; 11463 if (diff & 1) 11464 { 11465 tmp = gen_rtx_PLUS (mode, tmp, out1); 11466 nops++; 11467 } 11468 } 11469 if (cf != 0) 11470 { 11471 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 11472 nops++; 11473 } 11474 if (!rtx_equal_p (tmp, out)) 11475 { 11476 if (nops == 1) 11477 out = force_operand (tmp, copy_rtx (out)); 11478 else 11479 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 11480 } 11481 if (!rtx_equal_p (out, operands[0])) 11482 emit_move_insn (operands[0], copy_rtx (out)); 11483 11484 return 1; /* DONE */ 11485 } 11486 11487 /* 11488 * General case: Jumpful: 11489 * xorl dest,dest cmpl op1, op2 11490 * cmpl op1, op2 movl ct, dest 11491 * setcc dest jcc 1f 11492 * decl dest movl cf, dest 11493 * andl (cf-ct),dest 1: 11494 * addl ct,dest 11495 * 11496 * Size 20. Size 14. 11497 * 11498 * This is reasonably steep, but branch mispredict costs are 11499 * high on modern cpus, so consider failing only if optimizing 11500 * for space. 11501 */ 11502 11503 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11504 && BRANCH_COST >= 2) 11505 { 11506 if (cf == 0) 11507 { 11508 cf = ct; 11509 ct = 0; 11510 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11511 /* We may be reversing unordered compare to normal compare, 11512 that is not valid in general (we may convert non-trapping 11513 condition to trapping one), however on i386 we currently 11514 emit all comparisons unordered. */ 11515 code = reverse_condition_maybe_unordered (code); 11516 else 11517 { 11518 code = reverse_condition (code); 11519 if (compare_code != UNKNOWN) 11520 compare_code = reverse_condition (compare_code); 11521 } 11522 } 11523 11524 if (compare_code != UNKNOWN) 11525 { 11526 /* notl op1 (if needed) 11527 sarl $31, op1 11528 andl (cf-ct), op1 11529 addl ct, op1 11530 11531 For x < 0 (resp. x <= -1) there will be no notl, 11532 so if possible swap the constants to get rid of the 11533 complement. 11534 True/false will be -1/0 while code below (store flag 11535 followed by decrement) is 0/-1, so the constants need 11536 to be exchanged once more. */ 11537 11538 if (compare_code == GE || !cf) 11539 { 11540 code = reverse_condition (code); 11541 compare_code = LT; 11542 } 11543 else 11544 { 11545 HOST_WIDE_INT tmp = cf; 11546 cf = ct; 11547 ct = tmp; 11548 } 11549 11550 out = emit_store_flag (out, code, ix86_compare_op0, 11551 ix86_compare_op1, VOIDmode, 0, -1); 11552 } 11553 else 11554 { 11555 out = emit_store_flag (out, code, ix86_compare_op0, 11556 ix86_compare_op1, VOIDmode, 0, 1); 11557 11558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 11559 copy_rtx (out), 1, OPTAB_DIRECT); 11560 } 11561 11562 out = expand_simple_binop (mode, AND, copy_rtx (out), 11563 gen_int_mode (cf - ct, mode), 11564 copy_rtx (out), 1, OPTAB_DIRECT); 11565 if (ct) 11566 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 11567 copy_rtx (out), 1, OPTAB_DIRECT); 11568 if (!rtx_equal_p (out, operands[0])) 11569 emit_move_insn (operands[0], copy_rtx (out)); 11570 11571 return 1; /* DONE */ 11572 } 11573 } 11574 11575 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11576 { 11577 /* Try a few things more with specific constants and a variable. */ 11578 11579 optab op; 11580 rtx var, orig_out, out, tmp; 11581 11582 if (BRANCH_COST <= 2) 11583 return 0; /* FAIL */ 11584 11585 /* If one of the two operands is an interesting constant, load a 11586 constant with the above and mask it in with a logical operation. */ 11587 11588 if (GET_CODE (operands[2]) == CONST_INT) 11589 { 11590 var = operands[3]; 11591 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 11592 operands[3] = constm1_rtx, op = and_optab; 11593 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 11594 operands[3] = const0_rtx, op = ior_optab; 11595 else 11596 return 0; /* FAIL */ 11597 } 11598 else if (GET_CODE (operands[3]) == CONST_INT) 11599 { 11600 var = operands[2]; 11601 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 11602 operands[2] = constm1_rtx, op = and_optab; 11603 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 11604 operands[2] = const0_rtx, op = ior_optab; 11605 else 11606 return 0; /* FAIL */ 11607 } 11608 else 11609 return 0; /* FAIL */ 11610 11611 orig_out = operands[0]; 11612 tmp = gen_reg_rtx (mode); 11613 operands[0] = tmp; 11614 11615 /* Recurse to get the constant loaded. */ 11616 if (ix86_expand_int_movcc (operands) == 0) 11617 return 0; /* FAIL */ 11618 11619 /* Mask in the interesting variable. */ 11620 out = expand_binop (mode, op, var, tmp, orig_out, 0, 11621 OPTAB_WIDEN); 11622 if (!rtx_equal_p (out, orig_out)) 11623 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 11624 11625 return 1; /* DONE */ 11626 } 11627 11628 /* 11629 * For comparison with above, 11630 * 11631 * movl cf,dest 11632 * movl ct,tmp 11633 * cmpl op1,op2 11634 * cmovcc tmp,dest 11635 * 11636 * Size 15. 11637 */ 11638 11639 if (! nonimmediate_operand (operands[2], mode)) 11640 operands[2] = force_reg (mode, operands[2]); 11641 if (! nonimmediate_operand (operands[3], mode)) 11642 operands[3] = force_reg (mode, operands[3]); 11643 11644 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11645 { 11646 rtx tmp = gen_reg_rtx (mode); 11647 emit_move_insn (tmp, operands[3]); 11648 operands[3] = tmp; 11649 } 11650 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11651 { 11652 rtx tmp = gen_reg_rtx (mode); 11653 emit_move_insn (tmp, operands[2]); 11654 operands[2] = tmp; 11655 } 11656 11657 if (! register_operand (operands[2], VOIDmode) 11658 && (mode == QImode 11659 || ! register_operand (operands[3], VOIDmode))) 11660 operands[2] = force_reg (mode, operands[2]); 11661 11662 if (mode == QImode 11663 && ! register_operand (operands[3], VOIDmode)) 11664 operands[3] = force_reg (mode, operands[3]); 11665 11666 emit_insn (compare_seq); 11667 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11668 gen_rtx_IF_THEN_ELSE (mode, 11669 compare_op, operands[2], 11670 operands[3]))); 11671 if (bypass_test) 11672 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11673 gen_rtx_IF_THEN_ELSE (mode, 11674 bypass_test, 11675 copy_rtx (operands[3]), 11676 copy_rtx (operands[0])))); 11677 if (second_test) 11678 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11679 gen_rtx_IF_THEN_ELSE (mode, 11680 second_test, 11681 copy_rtx (operands[2]), 11682 copy_rtx (operands[0])))); 11683 11684 return 1; /* DONE */ 11685} 11686 11687/* Swap, force into registers, or otherwise massage the two operands 11688 to an sse comparison with a mask result. Thus we differ a bit from 11689 ix86_prepare_fp_compare_args which expects to produce a flags result. 11690 11691 The DEST operand exists to help determine whether to commute commutative 11692 operators. The POP0/POP1 operands are updated in place. The new 11693 comparison code is returned, or UNKNOWN if not implementable. */ 11694 11695static enum rtx_code 11696ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 11697 rtx *pop0, rtx *pop1) 11698{ 11699 rtx tmp; 11700 11701 switch (code) 11702 { 11703 case LTGT: 11704 case UNEQ: 11705 /* We have no LTGT as an operator. We could implement it with 11706 NE & ORDERED, but this requires an extra temporary. It's 11707 not clear that it's worth it. */ 11708 return UNKNOWN; 11709 11710 case LT: 11711 case LE: 11712 case UNGT: 11713 case UNGE: 11714 /* These are supported directly. */ 11715 break; 11716 11717 case EQ: 11718 case NE: 11719 case UNORDERED: 11720 case ORDERED: 11721 /* For commutative operators, try to canonicalize the destination 11722 operand to be first in the comparison - this helps reload to 11723 avoid extra moves. */ 11724 if (!dest || !rtx_equal_p (dest, *pop1)) 11725 break; 11726 /* FALLTHRU */ 11727 11728 case GE: 11729 case GT: 11730 case UNLE: 11731 case UNLT: 11732 /* These are not supported directly. Swap the comparison operands 11733 to transform into something that is supported. */ 11734 tmp = *pop0; 11735 *pop0 = *pop1; 11736 *pop1 = tmp; 11737 code = swap_condition (code); 11738 break; 11739 11740 default: 11741 gcc_unreachable (); 11742 } 11743 11744 return code; 11745} 11746 11747/* Detect conditional moves that exactly match min/max operational 11748 semantics. Note that this is IEEE safe, as long as we don't 11749 interchange the operands. 11750 11751 Returns FALSE if this conditional move doesn't match a MIN/MAX, 11752 and TRUE if the operation is successful and instructions are emitted. */ 11753 11754static bool 11755ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 11756 rtx cmp_op1, rtx if_true, rtx if_false) 11757{ 11758 enum machine_mode mode; 11759 bool is_min; 11760 rtx tmp; 11761 11762 if (code == LT) 11763 ; 11764 else if (code == UNGE) 11765 { 11766 tmp = if_true; 11767 if_true = if_false; 11768 if_false = tmp; 11769 } 11770 else 11771 return false; 11772 11773 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 11774 is_min = true; 11775 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 11776 is_min = false; 11777 else 11778 return false; 11779 11780 mode = GET_MODE (dest); 11781 11782 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 11783 but MODE may be a vector mode and thus not appropriate. */ 11784 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 11785 { 11786 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 11787 rtvec v; 11788 11789 if_true = force_reg (mode, if_true); 11790 v = gen_rtvec (2, if_true, if_false); 11791 tmp = gen_rtx_UNSPEC (mode, v, u); 11792 } 11793 else 11794 { 11795 code = is_min ? SMIN : SMAX; 11796 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 11797 } 11798 11799 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 11800 return true; 11801} 11802 11803/* Expand an sse vector comparison. Return the register with the result. */ 11804 11805static rtx 11806ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 11807 rtx op_true, rtx op_false) 11808{ 11809 enum machine_mode mode = GET_MODE (dest); 11810 rtx x; 11811 11812 cmp_op0 = force_reg (mode, cmp_op0); 11813 if (!nonimmediate_operand (cmp_op1, mode)) 11814 cmp_op1 = force_reg (mode, cmp_op1); 11815 11816 if (optimize 11817 || reg_overlap_mentioned_p (dest, op_true) 11818 || reg_overlap_mentioned_p (dest, op_false)) 11819 dest = gen_reg_rtx (mode); 11820 11821 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 11822 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11823 11824 return dest; 11825} 11826 11827/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 11828 operations. This is used for both scalar and vector conditional moves. */ 11829 11830static void 11831ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 11832{ 11833 enum machine_mode mode = GET_MODE (dest); 11834 rtx t2, t3, x; 11835 11836 if (op_false == CONST0_RTX (mode)) 11837 { 11838 op_true = force_reg (mode, op_true); 11839 x = gen_rtx_AND (mode, cmp, op_true); 11840 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11841 } 11842 else if (op_true == CONST0_RTX (mode)) 11843 { 11844 op_false = force_reg (mode, op_false); 11845 x = gen_rtx_NOT (mode, cmp); 11846 x = gen_rtx_AND (mode, x, op_false); 11847 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11848 } 11849 else 11850 { 11851 op_true = force_reg (mode, op_true); 11852 op_false = force_reg (mode, op_false); 11853 11854 t2 = gen_reg_rtx (mode); 11855 if (optimize) 11856 t3 = gen_reg_rtx (mode); 11857 else 11858 t3 = dest; 11859 11860 x = gen_rtx_AND (mode, op_true, cmp); 11861 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 11862 11863 x = gen_rtx_NOT (mode, cmp); 11864 x = gen_rtx_AND (mode, x, op_false); 11865 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 11866 11867 x = gen_rtx_IOR (mode, t3, t2); 11868 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11869 } 11870} 11871 11872/* Expand a floating-point conditional move. Return true if successful. */ 11873 11874int 11875ix86_expand_fp_movcc (rtx operands[]) 11876{ 11877 enum machine_mode mode = GET_MODE (operands[0]); 11878 enum rtx_code code = GET_CODE (operands[1]); 11879 rtx tmp, compare_op, second_test, bypass_test; 11880 11881 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 11882 { 11883 enum machine_mode cmode; 11884 11885 /* Since we've no cmove for sse registers, don't force bad register 11886 allocation just to gain access to it. Deny movcc when the 11887 comparison mode doesn't match the move mode. */ 11888 cmode = GET_MODE (ix86_compare_op0); 11889 if (cmode == VOIDmode) 11890 cmode = GET_MODE (ix86_compare_op1); 11891 if (cmode != mode) 11892 return 0; 11893 11894 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11895 &ix86_compare_op0, 11896 &ix86_compare_op1); 11897 if (code == UNKNOWN) 11898 return 0; 11899 11900 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 11901 ix86_compare_op1, operands[2], 11902 operands[3])) 11903 return 1; 11904 11905 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 11906 ix86_compare_op1, operands[2], operands[3]); 11907 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 11908 return 1; 11909 } 11910 11911 /* The floating point conditional move instructions don't directly 11912 support conditions resulting from a signed integer comparison. */ 11913 11914 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11915 11916 /* The floating point conditional move instructions don't directly 11917 support signed integer comparisons. */ 11918 11919 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 11920 { 11921 gcc_assert (!second_test && !bypass_test); 11922 tmp = gen_reg_rtx (QImode); 11923 ix86_expand_setcc (code, tmp); 11924 code = NE; 11925 ix86_compare_op0 = tmp; 11926 ix86_compare_op1 = const0_rtx; 11927 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11928 } 11929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11930 { 11931 tmp = gen_reg_rtx (mode); 11932 emit_move_insn (tmp, operands[3]); 11933 operands[3] = tmp; 11934 } 11935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11936 { 11937 tmp = gen_reg_rtx (mode); 11938 emit_move_insn (tmp, operands[2]); 11939 operands[2] = tmp; 11940 } 11941 11942 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11943 gen_rtx_IF_THEN_ELSE (mode, compare_op, 11944 operands[2], operands[3]))); 11945 if (bypass_test) 11946 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11947 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 11948 operands[3], operands[0]))); 11949 if (second_test) 11950 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11951 gen_rtx_IF_THEN_ELSE (mode, second_test, 11952 operands[2], operands[0]))); 11953 11954 return 1; 11955} 11956 11957/* Expand a floating-point vector conditional move; a vcond operation 11958 rather than a movcc operation. */ 11959 11960bool 11961ix86_expand_fp_vcond (rtx operands[]) 11962{ 11963 enum rtx_code code = GET_CODE (operands[3]); 11964 rtx cmp; 11965 11966 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11967 &operands[4], &operands[5]); 11968 if (code == UNKNOWN) 11969 return false; 11970 11971 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 11972 operands[5], operands[1], operands[2])) 11973 return true; 11974 11975 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 11976 operands[1], operands[2]); 11977 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 11978 return true; 11979} 11980 11981/* Expand a signed integral vector conditional move. */ 11982 11983bool 11984ix86_expand_int_vcond (rtx operands[]) 11985{ 11986 enum machine_mode mode = GET_MODE (operands[0]); 11987 enum rtx_code code = GET_CODE (operands[3]); 11988 bool negate = false; 11989 rtx x, cop0, cop1; 11990 11991 cop0 = operands[4]; 11992 cop1 = operands[5]; 11993 11994 /* Canonicalize the comparison to EQ, GT, GTU. */ 11995 switch (code) 11996 { 11997 case EQ: 11998 case GT: 11999 case GTU: 12000 break; 12001 12002 case NE: 12003 case LE: 12004 case LEU: 12005 code = reverse_condition (code); 12006 negate = true; 12007 break; 12008 12009 case GE: 12010 case GEU: 12011 code = reverse_condition (code); 12012 negate = true; 12013 /* FALLTHRU */ 12014 12015 case LT: 12016 case LTU: 12017 code = swap_condition (code); 12018 x = cop0, cop0 = cop1, cop1 = x; 12019 break; 12020 12021 default: 12022 gcc_unreachable (); 12023 } 12024 12025 /* Unsigned parallel compare is not supported by the hardware. Play some 12026 tricks to turn this into a signed comparison against 0. */ 12027 if (code == GTU) 12028 { 12029 cop0 = force_reg (mode, cop0); 12030 12031 switch (mode) 12032 { 12033 case V4SImode: 12034 { 12035 rtx t1, t2, mask; 12036 12037 /* Perform a parallel modulo subtraction. */ 12038 t1 = gen_reg_rtx (mode); 12039 emit_insn (gen_subv4si3 (t1, cop0, cop1)); 12040 12041 /* Extract the original sign bit of op0. */ 12042 mask = GEN_INT (-0x80000000); 12043 mask = gen_rtx_CONST_VECTOR (mode, 12044 gen_rtvec (4, mask, mask, mask, mask)); 12045 mask = force_reg (mode, mask); 12046 t2 = gen_reg_rtx (mode); 12047 emit_insn (gen_andv4si3 (t2, cop0, mask)); 12048 12049 /* XOR it back into the result of the subtraction. This results 12050 in the sign bit set iff we saw unsigned underflow. */ 12051 x = gen_reg_rtx (mode); 12052 emit_insn (gen_xorv4si3 (x, t1, t2)); 12053 12054 code = GT; 12055 } 12056 break; 12057 12058 case V16QImode: 12059 case V8HImode: 12060 /* Perform a parallel unsigned saturating subtraction. */ 12061 x = gen_reg_rtx (mode); 12062 emit_insn (gen_rtx_SET (VOIDmode, x, 12063 gen_rtx_US_MINUS (mode, cop0, cop1))); 12064 12065 code = EQ; 12066 negate = !negate; 12067 break; 12068 12069 default: 12070 gcc_unreachable (); 12071 } 12072 12073 cop0 = x; 12074 cop1 = CONST0_RTX (mode); 12075 } 12076 12077 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 12078 operands[1+negate], operands[2-negate]); 12079 12080 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 12081 operands[2-negate]); 12082 return true; 12083} 12084 12085/* Expand conditional increment or decrement using adb/sbb instructions. 12086 The default case using setcc followed by the conditional move can be 12087 done by generic code. */ 12088int 12089ix86_expand_int_addcc (rtx operands[]) 12090{ 12091 enum rtx_code code = GET_CODE (operands[1]); 12092 rtx compare_op; 12093 rtx val = const0_rtx; 12094 bool fpcmp = false; 12095 enum machine_mode mode = GET_MODE (operands[0]); 12096 12097 if (operands[3] != const1_rtx 12098 && operands[3] != constm1_rtx) 12099 return 0; 12100 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 12101 ix86_compare_op1, &compare_op)) 12102 return 0; 12103 code = GET_CODE (compare_op); 12104 12105 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 12106 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 12107 { 12108 fpcmp = true; 12109 code = ix86_fp_compare_code_to_integer (code); 12110 } 12111 12112 if (code != LTU) 12113 { 12114 val = constm1_rtx; 12115 if (fpcmp) 12116 PUT_CODE (compare_op, 12117 reverse_condition_maybe_unordered 12118 (GET_CODE (compare_op))); 12119 else 12120 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 12121 } 12122 PUT_MODE (compare_op, mode); 12123 12124 /* Construct either adc or sbb insn. */ 12125 if ((code == LTU) == (operands[3] == constm1_rtx)) 12126 { 12127 switch (GET_MODE (operands[0])) 12128 { 12129 case QImode: 12130 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 12131 break; 12132 case HImode: 12133 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 12134 break; 12135 case SImode: 12136 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 12137 break; 12138 case DImode: 12139 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 12140 break; 12141 default: 12142 gcc_unreachable (); 12143 } 12144 } 12145 else 12146 { 12147 switch (GET_MODE (operands[0])) 12148 { 12149 case QImode: 12150 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 12151 break; 12152 case HImode: 12153 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 12154 break; 12155 case SImode: 12156 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 12157 break; 12158 case DImode: 12159 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 12160 break; 12161 default: 12162 gcc_unreachable (); 12163 } 12164 } 12165 return 1; /* DONE */ 12166} 12167 12168 12169/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 12170 works for floating pointer parameters and nonoffsetable memories. 12171 For pushes, it returns just stack offsets; the values will be saved 12172 in the right order. Maximally three parts are generated. */ 12173 12174static int 12175ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 12176{ 12177 int size; 12178 12179 if (!TARGET_64BIT) 12180 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 12181 else 12182 size = (GET_MODE_SIZE (mode) + 4) / 8; 12183 12184 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); 12185 gcc_assert (size >= 2 && size <= 3); 12186 12187 /* Optimize constant pool reference to immediates. This is used by fp 12188 moves, that force all constants to memory to allow combining. */ 12189 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) 12190 { 12191 rtx tmp = maybe_get_pool_constant (operand); 12192 if (tmp) 12193 operand = tmp; 12194 } 12195 12196 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 12197 { 12198 /* The only non-offsetable memories we handle are pushes. */ 12199 int ok = push_operand (operand, VOIDmode); 12200 12201 gcc_assert (ok); 12202 12203 operand = copy_rtx (operand); 12204 PUT_MODE (operand, Pmode); 12205 parts[0] = parts[1] = parts[2] = operand; 12206 return size; 12207 } 12208 12209 if (GET_CODE (operand) == CONST_VECTOR) 12210 { 12211 enum machine_mode imode = int_mode_for_mode (mode); 12212 /* Caution: if we looked through a constant pool memory above, 12213 the operand may actually have a different mode now. That's 12214 ok, since we want to pun this all the way back to an integer. */ 12215 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 12216 gcc_assert (operand != NULL); 12217 mode = imode; 12218 } 12219 12220 if (!TARGET_64BIT) 12221 { 12222 if (mode == DImode) 12223 split_di (&operand, 1, &parts[0], &parts[1]); 12224 else 12225 { 12226 if (REG_P (operand)) 12227 { 12228 gcc_assert (reload_completed); 12229 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 12230 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 12231 if (size == 3) 12232 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 12233 } 12234 else if (offsettable_memref_p (operand)) 12235 { 12236 operand = adjust_address (operand, SImode, 0); 12237 parts[0] = operand; 12238 parts[1] = adjust_address (operand, SImode, 4); 12239 if (size == 3) 12240 parts[2] = adjust_address (operand, SImode, 8); 12241 } 12242 else if (GET_CODE (operand) == CONST_DOUBLE) 12243 { 12244 REAL_VALUE_TYPE r; 12245 long l[4]; 12246 12247 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12248 switch (mode) 12249 { 12250 case XFmode: 12251 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 12252 parts[2] = gen_int_mode (l[2], SImode); 12253 break; 12254 case DFmode: 12255 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 12256 break; 12257 default: 12258 gcc_unreachable (); 12259 } 12260 parts[1] = gen_int_mode (l[1], SImode); 12261 parts[0] = gen_int_mode (l[0], SImode); 12262 } 12263 else 12264 gcc_unreachable (); 12265 } 12266 } 12267 else 12268 { 12269 if (mode == TImode) 12270 split_ti (&operand, 1, &parts[0], &parts[1]); 12271 if (mode == XFmode || mode == TFmode) 12272 { 12273 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 12274 if (REG_P (operand)) 12275 { 12276 gcc_assert (reload_completed); 12277 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 12278 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 12279 } 12280 else if (offsettable_memref_p (operand)) 12281 { 12282 operand = adjust_address (operand, DImode, 0); 12283 parts[0] = operand; 12284 parts[1] = adjust_address (operand, upper_mode, 8); 12285 } 12286 else if (GET_CODE (operand) == CONST_DOUBLE) 12287 { 12288 REAL_VALUE_TYPE r; 12289 long l[4]; 12290 12291 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 12292 real_to_target (l, &r, mode); 12293 12294 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 12295 if (HOST_BITS_PER_WIDE_INT >= 64) 12296 parts[0] 12297 = gen_int_mode 12298 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12299 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 12300 DImode); 12301 else 12302 parts[0] = immed_double_const (l[0], l[1], DImode); 12303 12304 if (upper_mode == SImode) 12305 parts[1] = gen_int_mode (l[2], SImode); 12306 else if (HOST_BITS_PER_WIDE_INT >= 64) 12307 parts[1] 12308 = gen_int_mode 12309 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 12310 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 12311 DImode); 12312 else 12313 parts[1] = immed_double_const (l[2], l[3], DImode); 12314 } 12315 else 12316 gcc_unreachable (); 12317 } 12318 } 12319 12320 return size; 12321} 12322 12323/* Emit insns to perform a move or push of DI, DF, and XF values. 12324 Return false when normal moves are needed; true when all required 12325 insns have been emitted. Operands 2-4 contain the input values 12326 int the correct order; operands 5-7 contain the output values. */ 12327 12328void 12329ix86_split_long_move (rtx operands[]) 12330{ 12331 rtx part[2][3]; 12332 int nparts; 12333 int push = 0; 12334 int collisions = 0; 12335 enum machine_mode mode = GET_MODE (operands[0]); 12336 12337 /* The DFmode expanders may ask us to move double. 12338 For 64bit target this is single move. By hiding the fact 12339 here we simplify i386.md splitters. */ 12340 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 12341 { 12342 /* Optimize constant pool reference to immediates. This is used by 12343 fp moves, that force all constants to memory to allow combining. */ 12344 12345 if (GET_CODE (operands[1]) == MEM 12346 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 12347 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 12348 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 12349 if (push_operand (operands[0], VOIDmode)) 12350 { 12351 operands[0] = copy_rtx (operands[0]); 12352 PUT_MODE (operands[0], Pmode); 12353 } 12354 else 12355 operands[0] = gen_lowpart (DImode, operands[0]); 12356 operands[1] = gen_lowpart (DImode, operands[1]); 12357 emit_move_insn (operands[0], operands[1]); 12358 return; 12359 } 12360 12361 /* The only non-offsettable memory we handle is push. */ 12362 if (push_operand (operands[0], VOIDmode)) 12363 push = 1; 12364 else 12365 gcc_assert (GET_CODE (operands[0]) != MEM 12366 || offsettable_memref_p (operands[0])); 12367 12368 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 12369 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 12370 12371 /* When emitting push, take care for source operands on the stack. */ 12372 if (push && GET_CODE (operands[1]) == MEM 12373 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 12374 { 12375 if (nparts == 3) 12376 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 12377 XEXP (part[1][2], 0)); 12378 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 12379 XEXP (part[1][1], 0)); 12380 } 12381 12382 /* We need to do copy in the right order in case an address register 12383 of the source overlaps the destination. */ 12384 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 12385 { 12386 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 12387 collisions++; 12388 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12389 collisions++; 12390 if (nparts == 3 12391 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 12392 collisions++; 12393 12394 /* Collision in the middle part can be handled by reordering. */ 12395 if (collisions == 1 && nparts == 3 12396 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12397 { 12398 rtx tmp; 12399 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 12400 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 12401 } 12402 12403 /* If there are more collisions, we can't handle it by reordering. 12404 Do an lea to the last part and use only one colliding move. */ 12405 else if (collisions > 1) 12406 { 12407 rtx base; 12408 12409 collisions = 1; 12410 12411 base = part[0][nparts - 1]; 12412 12413 /* Handle the case when the last part isn't valid for lea. 12414 Happens in 64-bit mode storing the 12-byte XFmode. */ 12415 if (GET_MODE (base) != Pmode) 12416 base = gen_rtx_REG (Pmode, REGNO (base)); 12417 12418 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 12419 part[1][0] = replace_equiv_address (part[1][0], base); 12420 part[1][1] = replace_equiv_address (part[1][1], 12421 plus_constant (base, UNITS_PER_WORD)); 12422 if (nparts == 3) 12423 part[1][2] = replace_equiv_address (part[1][2], 12424 plus_constant (base, 8)); 12425 } 12426 } 12427 12428 if (push) 12429 { 12430 if (!TARGET_64BIT) 12431 { 12432 if (nparts == 3) 12433 { 12434 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 12435 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 12436 emit_move_insn (part[0][2], part[1][2]); 12437 } 12438 } 12439 else 12440 { 12441 /* In 64bit mode we don't have 32bit push available. In case this is 12442 register, it is OK - we will just use larger counterpart. We also 12443 retype memory - these comes from attempt to avoid REX prefix on 12444 moving of second half of TFmode value. */ 12445 if (GET_MODE (part[1][1]) == SImode) 12446 { 12447 switch (GET_CODE (part[1][1])) 12448 { 12449 case MEM: 12450 part[1][1] = adjust_address (part[1][1], DImode, 0); 12451 break; 12452 12453 case REG: 12454 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 12455 break; 12456 12457 default: 12458 gcc_unreachable (); 12459 } 12460 12461 if (GET_MODE (part[1][0]) == SImode) 12462 part[1][0] = part[1][1]; 12463 } 12464 } 12465 emit_move_insn (part[0][1], part[1][1]); 12466 emit_move_insn (part[0][0], part[1][0]); 12467 return; 12468 } 12469 12470 /* Choose correct order to not overwrite the source before it is copied. */ 12471 if ((REG_P (part[0][0]) 12472 && REG_P (part[1][1]) 12473 && (REGNO (part[0][0]) == REGNO (part[1][1]) 12474 || (nparts == 3 12475 && REGNO (part[0][0]) == REGNO (part[1][2])))) 12476 || (collisions > 0 12477 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 12478 { 12479 if (nparts == 3) 12480 { 12481 operands[2] = part[0][2]; 12482 operands[3] = part[0][1]; 12483 operands[4] = part[0][0]; 12484 operands[5] = part[1][2]; 12485 operands[6] = part[1][1]; 12486 operands[7] = part[1][0]; 12487 } 12488 else 12489 { 12490 operands[2] = part[0][1]; 12491 operands[3] = part[0][0]; 12492 operands[5] = part[1][1]; 12493 operands[6] = part[1][0]; 12494 } 12495 } 12496 else 12497 { 12498 if (nparts == 3) 12499 { 12500 operands[2] = part[0][0]; 12501 operands[3] = part[0][1]; 12502 operands[4] = part[0][2]; 12503 operands[5] = part[1][0]; 12504 operands[6] = part[1][1]; 12505 operands[7] = part[1][2]; 12506 } 12507 else 12508 { 12509 operands[2] = part[0][0]; 12510 operands[3] = part[0][1]; 12511 operands[5] = part[1][0]; 12512 operands[6] = part[1][1]; 12513 } 12514 } 12515 12516 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 12517 if (optimize_size) 12518 { 12519 if (GET_CODE (operands[5]) == CONST_INT 12520 && operands[5] != const0_rtx 12521 && REG_P (operands[2])) 12522 { 12523 if (GET_CODE (operands[6]) == CONST_INT 12524 && INTVAL (operands[6]) == INTVAL (operands[5])) 12525 operands[6] = operands[2]; 12526 12527 if (nparts == 3 12528 && GET_CODE (operands[7]) == CONST_INT 12529 && INTVAL (operands[7]) == INTVAL (operands[5])) 12530 operands[7] = operands[2]; 12531 } 12532 12533 if (nparts == 3 12534 && GET_CODE (operands[6]) == CONST_INT 12535 && operands[6] != const0_rtx 12536 && REG_P (operands[3]) 12537 && GET_CODE (operands[7]) == CONST_INT 12538 && INTVAL (operands[7]) == INTVAL (operands[6])) 12539 operands[7] = operands[3]; 12540 } 12541 12542 emit_move_insn (operands[2], operands[5]); 12543 emit_move_insn (operands[3], operands[6]); 12544 if (nparts == 3) 12545 emit_move_insn (operands[4], operands[7]); 12546 12547 return; 12548} 12549 12550/* Helper function of ix86_split_ashl used to generate an SImode/DImode 12551 left shift by a constant, either using a single shift or 12552 a sequence of add instructions. */ 12553 12554static void 12555ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 12556{ 12557 if (count == 1) 12558 { 12559 emit_insn ((mode == DImode 12560 ? gen_addsi3 12561 : gen_adddi3) (operand, operand, operand)); 12562 } 12563 else if (!optimize_size 12564 && count * ix86_cost->add <= ix86_cost->shift_const) 12565 { 12566 int i; 12567 for (i=0; i<count; i++) 12568 { 12569 emit_insn ((mode == DImode 12570 ? gen_addsi3 12571 : gen_adddi3) (operand, operand, operand)); 12572 } 12573 } 12574 else 12575 emit_insn ((mode == DImode 12576 ? gen_ashlsi3 12577 : gen_ashldi3) (operand, operand, GEN_INT (count))); 12578} 12579 12580void 12581ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 12582{ 12583 rtx low[2], high[2]; 12584 int count; 12585 const int single_width = mode == DImode ? 32 : 64; 12586 12587 if (GET_CODE (operands[2]) == CONST_INT) 12588 { 12589 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12590 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12591 12592 if (count >= single_width) 12593 { 12594 emit_move_insn (high[0], low[1]); 12595 emit_move_insn (low[0], const0_rtx); 12596 12597 if (count > single_width) 12598 ix86_expand_ashl_const (high[0], count - single_width, mode); 12599 } 12600 else 12601 { 12602 if (!rtx_equal_p (operands[0], operands[1])) 12603 emit_move_insn (operands[0], operands[1]); 12604 emit_insn ((mode == DImode 12605 ? gen_x86_shld_1 12606 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 12607 ix86_expand_ashl_const (low[0], count, mode); 12608 } 12609 return; 12610 } 12611 12612 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12613 12614 if (operands[1] == const1_rtx) 12615 { 12616 /* Assuming we've chosen a QImode capable registers, then 1 << N 12617 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 12618 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 12619 { 12620 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 12621 12622 ix86_expand_clear (low[0]); 12623 ix86_expand_clear (high[0]); 12624 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 12625 12626 d = gen_lowpart (QImode, low[0]); 12627 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12628 s = gen_rtx_EQ (QImode, flags, const0_rtx); 12629 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12630 12631 d = gen_lowpart (QImode, high[0]); 12632 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12633 s = gen_rtx_NE (QImode, flags, const0_rtx); 12634 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12635 } 12636 12637 /* Otherwise, we can get the same results by manually performing 12638 a bit extract operation on bit 5/6, and then performing the two 12639 shifts. The two methods of getting 0/1 into low/high are exactly 12640 the same size. Avoiding the shift in the bit extract case helps 12641 pentium4 a bit; no one else seems to care much either way. */ 12642 else 12643 { 12644 rtx x; 12645 12646 if (TARGET_PARTIAL_REG_STALL && !optimize_size) 12647 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 12648 else 12649 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 12650 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 12651 12652 emit_insn ((mode == DImode 12653 ? gen_lshrsi3 12654 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 12655 emit_insn ((mode == DImode 12656 ? gen_andsi3 12657 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 12658 emit_move_insn (low[0], high[0]); 12659 emit_insn ((mode == DImode 12660 ? gen_xorsi3 12661 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 12662 } 12663 12664 emit_insn ((mode == DImode 12665 ? gen_ashlsi3 12666 : gen_ashldi3) (low[0], low[0], operands[2])); 12667 emit_insn ((mode == DImode 12668 ? gen_ashlsi3 12669 : gen_ashldi3) (high[0], high[0], operands[2])); 12670 return; 12671 } 12672 12673 if (operands[1] == constm1_rtx) 12674 { 12675 /* For -1 << N, we can avoid the shld instruction, because we 12676 know that we're shifting 0...31/63 ones into a -1. */ 12677 emit_move_insn (low[0], constm1_rtx); 12678 if (optimize_size) 12679 emit_move_insn (high[0], low[0]); 12680 else 12681 emit_move_insn (high[0], constm1_rtx); 12682 } 12683 else 12684 { 12685 if (!rtx_equal_p (operands[0], operands[1])) 12686 emit_move_insn (operands[0], operands[1]); 12687 12688 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12689 emit_insn ((mode == DImode 12690 ? gen_x86_shld_1 12691 : gen_x86_64_shld) (high[0], low[0], operands[2])); 12692 } 12693 12694 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 12695 12696 if (TARGET_CMOVE && scratch) 12697 { 12698 ix86_expand_clear (scratch); 12699 emit_insn ((mode == DImode 12700 ? gen_x86_shift_adj_1 12701 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); 12702 } 12703 else 12704 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 12705} 12706 12707void 12708ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 12709{ 12710 rtx low[2], high[2]; 12711 int count; 12712 const int single_width = mode == DImode ? 32 : 64; 12713 12714 if (GET_CODE (operands[2]) == CONST_INT) 12715 { 12716 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12717 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12718 12719 if (count == single_width * 2 - 1) 12720 { 12721 emit_move_insn (high[0], high[1]); 12722 emit_insn ((mode == DImode 12723 ? gen_ashrsi3 12724 : gen_ashrdi3) (high[0], high[0], 12725 GEN_INT (single_width - 1))); 12726 emit_move_insn (low[0], high[0]); 12727 12728 } 12729 else if (count >= single_width) 12730 { 12731 emit_move_insn (low[0], high[1]); 12732 emit_move_insn (high[0], low[0]); 12733 emit_insn ((mode == DImode 12734 ? gen_ashrsi3 12735 : gen_ashrdi3) (high[0], high[0], 12736 GEN_INT (single_width - 1))); 12737 if (count > single_width) 12738 emit_insn ((mode == DImode 12739 ? gen_ashrsi3 12740 : gen_ashrdi3) (low[0], low[0], 12741 GEN_INT (count - single_width))); 12742 } 12743 else 12744 { 12745 if (!rtx_equal_p (operands[0], operands[1])) 12746 emit_move_insn (operands[0], operands[1]); 12747 emit_insn ((mode == DImode 12748 ? gen_x86_shrd_1 12749 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12750 emit_insn ((mode == DImode 12751 ? gen_ashrsi3 12752 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 12753 } 12754 } 12755 else 12756 { 12757 if (!rtx_equal_p (operands[0], operands[1])) 12758 emit_move_insn (operands[0], operands[1]); 12759 12760 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12761 12762 emit_insn ((mode == DImode 12763 ? gen_x86_shrd_1 12764 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12765 emit_insn ((mode == DImode 12766 ? gen_ashrsi3 12767 : gen_ashrdi3) (high[0], high[0], operands[2])); 12768 12769 if (TARGET_CMOVE && scratch) 12770 { 12771 emit_move_insn (scratch, high[0]); 12772 emit_insn ((mode == DImode 12773 ? gen_ashrsi3 12774 : gen_ashrdi3) (scratch, scratch, 12775 GEN_INT (single_width - 1))); 12776 emit_insn ((mode == DImode 12777 ? gen_x86_shift_adj_1 12778 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12779 scratch)); 12780 } 12781 else 12782 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 12783 } 12784} 12785 12786void 12787ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 12788{ 12789 rtx low[2], high[2]; 12790 int count; 12791 const int single_width = mode == DImode ? 32 : 64; 12792 12793 if (GET_CODE (operands[2]) == CONST_INT) 12794 { 12795 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12796 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12797 12798 if (count >= single_width) 12799 { 12800 emit_move_insn (low[0], high[1]); 12801 ix86_expand_clear (high[0]); 12802 12803 if (count > single_width) 12804 emit_insn ((mode == DImode 12805 ? gen_lshrsi3 12806 : gen_lshrdi3) (low[0], low[0], 12807 GEN_INT (count - single_width))); 12808 } 12809 else 12810 { 12811 if (!rtx_equal_p (operands[0], operands[1])) 12812 emit_move_insn (operands[0], operands[1]); 12813 emit_insn ((mode == DImode 12814 ? gen_x86_shrd_1 12815 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12816 emit_insn ((mode == DImode 12817 ? gen_lshrsi3 12818 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 12819 } 12820 } 12821 else 12822 { 12823 if (!rtx_equal_p (operands[0], operands[1])) 12824 emit_move_insn (operands[0], operands[1]); 12825 12826 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12827 12828 emit_insn ((mode == DImode 12829 ? gen_x86_shrd_1 12830 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12831 emit_insn ((mode == DImode 12832 ? gen_lshrsi3 12833 : gen_lshrdi3) (high[0], high[0], operands[2])); 12834 12835 /* Heh. By reversing the arguments, we can reuse this pattern. */ 12836 if (TARGET_CMOVE && scratch) 12837 { 12838 ix86_expand_clear (scratch); 12839 emit_insn ((mode == DImode 12840 ? gen_x86_shift_adj_1 12841 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12842 scratch)); 12843 } 12844 else 12845 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 12846 } 12847} 12848 12849/* Helper function for the string operations below. Dest VARIABLE whether 12850 it is aligned to VALUE bytes. If true, jump to the label. */ 12851static rtx 12852ix86_expand_aligntest (rtx variable, int value) 12853{ 12854 rtx label = gen_label_rtx (); 12855 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 12856 if (GET_MODE (variable) == DImode) 12857 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 12858 else 12859 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 12860 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 12861 1, label); 12862 return label; 12863} 12864 12865/* Adjust COUNTER by the VALUE. */ 12866static void 12867ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 12868{ 12869 if (GET_MODE (countreg) == DImode) 12870 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 12871 else 12872 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 12873} 12874 12875/* Zero extend possibly SImode EXP to Pmode register. */ 12876rtx 12877ix86_zero_extend_to_Pmode (rtx exp) 12878{ 12879 rtx r; 12880 if (GET_MODE (exp) == VOIDmode) 12881 return force_reg (Pmode, exp); 12882 if (GET_MODE (exp) == Pmode) 12883 return copy_to_mode_reg (Pmode, exp); 12884 r = gen_reg_rtx (Pmode); 12885 emit_insn (gen_zero_extendsidi2 (r, exp)); 12886 return r; 12887} 12888 12889/* Expand string move (memcpy) operation. Use i386 string operations when 12890 profitable. expand_clrmem contains similar code. */ 12891int 12892ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) 12893{ 12894 rtx srcreg, destreg, countreg, srcexp, destexp; 12895 enum machine_mode counter_mode; 12896 HOST_WIDE_INT align = 0; 12897 unsigned HOST_WIDE_INT count = 0; 12898 12899 if (GET_CODE (align_exp) == CONST_INT) 12900 align = INTVAL (align_exp); 12901 12902 /* Can't use any of this if the user has appropriated esi or edi. */ 12903 if (global_regs[4] || global_regs[5]) 12904 return 0; 12905 12906 /* This simple hack avoids all inlining code and simplifies code below. */ 12907 if (!TARGET_ALIGN_STRINGOPS) 12908 align = 64; 12909 12910 if (GET_CODE (count_exp) == CONST_INT) 12911 { 12912 count = INTVAL (count_exp); 12913 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12914 return 0; 12915 } 12916 12917 /* Figure out proper mode for counter. For 32bits it is always SImode, 12918 for 64bits use SImode when possible, otherwise DImode. 12919 Set count to number of bytes copied when known at compile time. */ 12920 if (!TARGET_64BIT 12921 || GET_MODE (count_exp) == SImode 12922 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12923 counter_mode = SImode; 12924 else 12925 counter_mode = DImode; 12926 12927 gcc_assert (counter_mode == SImode || counter_mode == DImode); 12928 12929 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12930 if (destreg != XEXP (dst, 0)) 12931 dst = replace_equiv_address_nv (dst, destreg); 12932 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 12933 if (srcreg != XEXP (src, 0)) 12934 src = replace_equiv_address_nv (src, srcreg); 12935 12936 /* When optimizing for size emit simple rep ; movsb instruction for 12937 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? 12938 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. 12939 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is 12940 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, 12941 but we don't know whether upper 24 (resp. 56) bits of %ecx will be 12942 known to be zero or not. The rep; movsb sequence causes higher 12943 register pressure though, so take that into account. */ 12944 12945 if ((!optimize || optimize_size) 12946 && (count == 0 12947 || ((count & 0x03) 12948 && (!optimize_size 12949 || count > 5 * 4 12950 || (count & 3) + count / 4 > 6)))) 12951 { 12952 emit_insn (gen_cld ()); 12953 countreg = ix86_zero_extend_to_Pmode (count_exp); 12954 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12955 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 12956 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 12957 destexp, srcexp)); 12958 } 12959 12960 /* For constant aligned (or small unaligned) copies use rep movsl 12961 followed by code copying the rest. For PentiumPro ensure 8 byte 12962 alignment to allow rep movsl acceleration. */ 12963 12964 else if (count != 0 12965 && (align >= 8 12966 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12967 || optimize_size || count < (unsigned int) 64)) 12968 { 12969 unsigned HOST_WIDE_INT offset = 0; 12970 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12971 rtx srcmem, dstmem; 12972 12973 emit_insn (gen_cld ()); 12974 if (count & ~(size - 1)) 12975 { 12976 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) 12977 { 12978 enum machine_mode movs_mode = size == 4 ? SImode : DImode; 12979 12980 while (offset < (count & ~(size - 1))) 12981 { 12982 srcmem = adjust_automodify_address_nv (src, movs_mode, 12983 srcreg, offset); 12984 dstmem = adjust_automodify_address_nv (dst, movs_mode, 12985 destreg, offset); 12986 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12987 offset += size; 12988 } 12989 } 12990 else 12991 { 12992 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) 12993 & (TARGET_64BIT ? -1 : 0x3fffffff)); 12994 countreg = copy_to_mode_reg (counter_mode, countreg); 12995 countreg = ix86_zero_extend_to_Pmode (countreg); 12996 12997 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12998 GEN_INT (size == 4 ? 2 : 3)); 12999 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 13000 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13001 13002 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 13003 countreg, destexp, srcexp)); 13004 offset = count & ~(size - 1); 13005 } 13006 } 13007 if (size == 8 && (count & 0x04)) 13008 { 13009 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 13010 offset); 13011 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 13012 offset); 13013 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13014 offset += 4; 13015 } 13016 if (count & 0x02) 13017 { 13018 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 13019 offset); 13020 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 13021 offset); 13022 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13023 offset += 2; 13024 } 13025 if (count & 0x01) 13026 { 13027 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 13028 offset); 13029 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 13030 offset); 13031 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13032 } 13033 } 13034 /* The generic code based on the glibc implementation: 13035 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 13036 allowing accelerated copying there) 13037 - copy the data using rep movsl 13038 - copy the rest. */ 13039 else 13040 { 13041 rtx countreg2; 13042 rtx label = NULL; 13043 rtx srcmem, dstmem; 13044 int desired_alignment = (TARGET_PENTIUMPRO 13045 && (count == 0 || count >= (unsigned int) 260) 13046 ? 8 : UNITS_PER_WORD); 13047 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 13048 dst = change_address (dst, BLKmode, destreg); 13049 src = change_address (src, BLKmode, srcreg); 13050 13051 /* In case we don't know anything about the alignment, default to 13052 library version, since it is usually equally fast and result in 13053 shorter code. 13054 13055 Also emit call when we know that the count is large and call overhead 13056 will not be important. */ 13057 if (!TARGET_INLINE_ALL_STRINGOPS 13058 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13059 return 0; 13060 13061 if (TARGET_SINGLE_STRINGOP) 13062 emit_insn (gen_cld ()); 13063 13064 countreg2 = gen_reg_rtx (Pmode); 13065 countreg = copy_to_mode_reg (counter_mode, count_exp); 13066 13067 /* We don't use loops to align destination and to copy parts smaller 13068 than 4 bytes, because gcc is able to optimize such code better (in 13069 the case the destination or the count really is aligned, gcc is often 13070 able to predict the branches) and also it is friendlier to the 13071 hardware branch prediction. 13072 13073 Using loops is beneficial for generic case, because we can 13074 handle small counts using the loops. Many CPUs (such as Athlon) 13075 have large REP prefix setup costs. 13076 13077 This is quite costly. Maybe we can revisit this decision later or 13078 add some customizability to this code. */ 13079 13080 if (count == 0 && align < desired_alignment) 13081 { 13082 label = gen_label_rtx (); 13083 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13084 LEU, 0, counter_mode, 1, label); 13085 } 13086 if (align <= 1) 13087 { 13088 rtx label = ix86_expand_aligntest (destreg, 1); 13089 srcmem = change_address (src, QImode, srcreg); 13090 dstmem = change_address (dst, QImode, destreg); 13091 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13092 ix86_adjust_counter (countreg, 1); 13093 emit_label (label); 13094 LABEL_NUSES (label) = 1; 13095 } 13096 if (align <= 2) 13097 { 13098 rtx label = ix86_expand_aligntest (destreg, 2); 13099 srcmem = change_address (src, HImode, srcreg); 13100 dstmem = change_address (dst, HImode, destreg); 13101 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13102 ix86_adjust_counter (countreg, 2); 13103 emit_label (label); 13104 LABEL_NUSES (label) = 1; 13105 } 13106 if (align <= 4 && desired_alignment > 4) 13107 { 13108 rtx label = ix86_expand_aligntest (destreg, 4); 13109 srcmem = change_address (src, SImode, srcreg); 13110 dstmem = change_address (dst, SImode, destreg); 13111 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13112 ix86_adjust_counter (countreg, 4); 13113 emit_label (label); 13114 LABEL_NUSES (label) = 1; 13115 } 13116 13117 if (label && desired_alignment > 4 && !TARGET_64BIT) 13118 { 13119 emit_label (label); 13120 LABEL_NUSES (label) = 1; 13121 label = NULL_RTX; 13122 } 13123 if (!TARGET_SINGLE_STRINGOP) 13124 emit_insn (gen_cld ()); 13125 if (TARGET_64BIT) 13126 { 13127 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13128 GEN_INT (3))); 13129 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13130 } 13131 else 13132 { 13133 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13134 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13135 } 13136 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 13137 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 13139 countreg2, destexp, srcexp)); 13140 13141 if (label) 13142 { 13143 emit_label (label); 13144 LABEL_NUSES (label) = 1; 13145 } 13146 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13147 { 13148 srcmem = change_address (src, SImode, srcreg); 13149 dstmem = change_address (dst, SImode, destreg); 13150 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13151 } 13152 if ((align <= 4 || count == 0) && TARGET_64BIT) 13153 { 13154 rtx label = ix86_expand_aligntest (countreg, 4); 13155 srcmem = change_address (src, SImode, srcreg); 13156 dstmem = change_address (dst, SImode, destreg); 13157 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13158 emit_label (label); 13159 LABEL_NUSES (label) = 1; 13160 } 13161 if (align > 2 && count != 0 && (count & 2)) 13162 { 13163 srcmem = change_address (src, HImode, srcreg); 13164 dstmem = change_address (dst, HImode, destreg); 13165 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13166 } 13167 if (align <= 2 || count == 0) 13168 { 13169 rtx label = ix86_expand_aligntest (countreg, 2); 13170 srcmem = change_address (src, HImode, srcreg); 13171 dstmem = change_address (dst, HImode, destreg); 13172 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13173 emit_label (label); 13174 LABEL_NUSES (label) = 1; 13175 } 13176 if (align > 1 && count != 0 && (count & 1)) 13177 { 13178 srcmem = change_address (src, QImode, srcreg); 13179 dstmem = change_address (dst, QImode, destreg); 13180 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13181 } 13182 if (align <= 1 || count == 0) 13183 { 13184 rtx label = ix86_expand_aligntest (countreg, 1); 13185 srcmem = change_address (src, QImode, srcreg); 13186 dstmem = change_address (dst, QImode, destreg); 13187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 13188 emit_label (label); 13189 LABEL_NUSES (label) = 1; 13190 } 13191 } 13192 13193 return 1; 13194} 13195 13196/* Expand string clear operation (bzero). Use i386 string operations when 13197 profitable. expand_movmem contains similar code. */ 13198int 13199ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) 13200{ 13201 rtx destreg, zeroreg, countreg, destexp; 13202 enum machine_mode counter_mode; 13203 HOST_WIDE_INT align = 0; 13204 unsigned HOST_WIDE_INT count = 0; 13205 13206 if (GET_CODE (align_exp) == CONST_INT) 13207 align = INTVAL (align_exp); 13208 13209 /* Can't use any of this if the user has appropriated esi. */ 13210 if (global_regs[4]) 13211 return 0; 13212 13213 /* This simple hack avoids all inlining code and simplifies code below. */ 13214 if (!TARGET_ALIGN_STRINGOPS) 13215 align = 32; 13216 13217 if (GET_CODE (count_exp) == CONST_INT) 13218 { 13219 count = INTVAL (count_exp); 13220 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 13221 return 0; 13222 } 13223 /* Figure out proper mode for counter. For 32bits it is always SImode, 13224 for 64bits use SImode when possible, otherwise DImode. 13225 Set count to number of bytes copied when known at compile time. */ 13226 if (!TARGET_64BIT 13227 || GET_MODE (count_exp) == SImode 13228 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 13229 counter_mode = SImode; 13230 else 13231 counter_mode = DImode; 13232 13233 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 13234 if (destreg != XEXP (dst, 0)) 13235 dst = replace_equiv_address_nv (dst, destreg); 13236 13237 13238 /* When optimizing for size emit simple rep ; movsb instruction for 13239 counts not divisible by 4. The movl $N, %ecx; rep; stosb 13240 sequence is 7 bytes long, so if optimizing for size and count is 13241 small enough that some stosl, stosw and stosb instructions without 13242 rep are shorter, fall back into the next if. */ 13243 13244 if ((!optimize || optimize_size) 13245 && (count == 0 13246 || ((count & 0x03) 13247 && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) 13248 { 13249 emit_insn (gen_cld ()); 13250 13251 countreg = ix86_zero_extend_to_Pmode (count_exp); 13252 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 13253 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 13254 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 13255 } 13256 else if (count != 0 13257 && (align >= 8 13258 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 13259 || optimize_size || count < (unsigned int) 64)) 13260 { 13261 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 13262 unsigned HOST_WIDE_INT offset = 0; 13263 13264 emit_insn (gen_cld ()); 13265 13266 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 13267 if (count & ~(size - 1)) 13268 { 13269 unsigned HOST_WIDE_INT repcount; 13270 unsigned int max_nonrep; 13271 13272 repcount = count >> (size == 4 ? 2 : 3); 13273 if (!TARGET_64BIT) 13274 repcount &= 0x3fffffff; 13275 13276 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. 13277 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN 13278 bytes. In both cases the latter seems to be faster for small 13279 values of N. */ 13280 max_nonrep = size == 4 ? 7 : 4; 13281 if (!optimize_size) 13282 switch (ix86_tune) 13283 { 13284 case PROCESSOR_PENTIUM4: 13285 case PROCESSOR_NOCONA: 13286 max_nonrep = 3; 13287 break; 13288 default: 13289 break; 13290 } 13291 13292 if (repcount <= max_nonrep) 13293 while (repcount-- > 0) 13294 { 13295 rtx mem = adjust_automodify_address_nv (dst, 13296 GET_MODE (zeroreg), 13297 destreg, offset); 13298 emit_insn (gen_strset (destreg, mem, zeroreg)); 13299 offset += size; 13300 } 13301 else 13302 { 13303 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); 13304 countreg = ix86_zero_extend_to_Pmode (countreg); 13305 destexp = gen_rtx_ASHIFT (Pmode, countreg, 13306 GEN_INT (size == 4 ? 2 : 3)); 13307 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13308 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, 13309 destexp)); 13310 offset = count & ~(size - 1); 13311 } 13312 } 13313 if (size == 8 && (count & 0x04)) 13314 { 13315 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 13316 offset); 13317 emit_insn (gen_strset (destreg, mem, 13318 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13319 offset += 4; 13320 } 13321 if (count & 0x02) 13322 { 13323 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 13324 offset); 13325 emit_insn (gen_strset (destreg, mem, 13326 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13327 offset += 2; 13328 } 13329 if (count & 0x01) 13330 { 13331 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 13332 offset); 13333 emit_insn (gen_strset (destreg, mem, 13334 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13335 } 13336 } 13337 else 13338 { 13339 rtx countreg2; 13340 rtx label = NULL; 13341 /* Compute desired alignment of the string operation. */ 13342 int desired_alignment = (TARGET_PENTIUMPRO 13343 && (count == 0 || count >= (unsigned int) 260) 13344 ? 8 : UNITS_PER_WORD); 13345 13346 /* In case we don't know anything about the alignment, default to 13347 library version, since it is usually equally fast and result in 13348 shorter code. 13349 13350 Also emit call when we know that the count is large and call overhead 13351 will not be important. */ 13352 if (!TARGET_INLINE_ALL_STRINGOPS 13353 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13354 return 0; 13355 13356 if (TARGET_SINGLE_STRINGOP) 13357 emit_insn (gen_cld ()); 13358 13359 countreg2 = gen_reg_rtx (Pmode); 13360 countreg = copy_to_mode_reg (counter_mode, count_exp); 13361 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 13362 /* Get rid of MEM_OFFSET, it won't be accurate. */ 13363 dst = change_address (dst, BLKmode, destreg); 13364 13365 if (count == 0 && align < desired_alignment) 13366 { 13367 label = gen_label_rtx (); 13368 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13369 LEU, 0, counter_mode, 1, label); 13370 } 13371 if (align <= 1) 13372 { 13373 rtx label = ix86_expand_aligntest (destreg, 1); 13374 emit_insn (gen_strset (destreg, dst, 13375 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13376 ix86_adjust_counter (countreg, 1); 13377 emit_label (label); 13378 LABEL_NUSES (label) = 1; 13379 } 13380 if (align <= 2) 13381 { 13382 rtx label = ix86_expand_aligntest (destreg, 2); 13383 emit_insn (gen_strset (destreg, dst, 13384 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13385 ix86_adjust_counter (countreg, 2); 13386 emit_label (label); 13387 LABEL_NUSES (label) = 1; 13388 } 13389 if (align <= 4 && desired_alignment > 4) 13390 { 13391 rtx label = ix86_expand_aligntest (destreg, 4); 13392 emit_insn (gen_strset (destreg, dst, 13393 (TARGET_64BIT 13394 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 13395 : zeroreg))); 13396 ix86_adjust_counter (countreg, 4); 13397 emit_label (label); 13398 LABEL_NUSES (label) = 1; 13399 } 13400 13401 if (label && desired_alignment > 4 && !TARGET_64BIT) 13402 { 13403 emit_label (label); 13404 LABEL_NUSES (label) = 1; 13405 label = NULL_RTX; 13406 } 13407 13408 if (!TARGET_SINGLE_STRINGOP) 13409 emit_insn (gen_cld ()); 13410 if (TARGET_64BIT) 13411 { 13412 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13413 GEN_INT (3))); 13414 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13415 } 13416 else 13417 { 13418 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13419 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13420 } 13421 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13422 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 13423 13424 if (label) 13425 { 13426 emit_label (label); 13427 LABEL_NUSES (label) = 1; 13428 } 13429 13430 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13431 emit_insn (gen_strset (destreg, dst, 13432 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13433 if (TARGET_64BIT && (align <= 4 || count == 0)) 13434 { 13435 rtx label = ix86_expand_aligntest (countreg, 4); 13436 emit_insn (gen_strset (destreg, dst, 13437 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13438 emit_label (label); 13439 LABEL_NUSES (label) = 1; 13440 } 13441 if (align > 2 && count != 0 && (count & 2)) 13442 emit_insn (gen_strset (destreg, dst, 13443 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13444 if (align <= 2 || count == 0) 13445 { 13446 rtx label = ix86_expand_aligntest (countreg, 2); 13447 emit_insn (gen_strset (destreg, dst, 13448 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13449 emit_label (label); 13450 LABEL_NUSES (label) = 1; 13451 } 13452 if (align > 1 && count != 0 && (count & 1)) 13453 emit_insn (gen_strset (destreg, dst, 13454 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13455 if (align <= 1 || count == 0) 13456 { 13457 rtx label = ix86_expand_aligntest (countreg, 1); 13458 emit_insn (gen_strset (destreg, dst, 13459 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13460 emit_label (label); 13461 LABEL_NUSES (label) = 1; 13462 } 13463 } 13464 return 1; 13465} 13466 13467/* Expand strlen. */ 13468int 13469ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 13470{ 13471 rtx addr, scratch1, scratch2, scratch3, scratch4; 13472 13473 /* The generic case of strlen expander is long. Avoid it's 13474 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 13475 13476 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13477 && !TARGET_INLINE_ALL_STRINGOPS 13478 && !optimize_size 13479 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 13480 return 0; 13481 13482 addr = force_reg (Pmode, XEXP (src, 0)); 13483 scratch1 = gen_reg_rtx (Pmode); 13484 13485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13486 && !optimize_size) 13487 { 13488 /* Well it seems that some optimizer does not combine a call like 13489 foo(strlen(bar), strlen(bar)); 13490 when the move and the subtraction is done here. It does calculate 13491 the length just once when these instructions are done inside of 13492 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 13493 often used and I use one fewer register for the lifetime of 13494 output_strlen_unroll() this is better. */ 13495 13496 emit_move_insn (out, addr); 13497 13498 ix86_expand_strlensi_unroll_1 (out, src, align); 13499 13500 /* strlensi_unroll_1 returns the address of the zero at the end of 13501 the string, like memchr(), so compute the length by subtracting 13502 the start address. */ 13503 if (TARGET_64BIT) 13504 emit_insn (gen_subdi3 (out, out, addr)); 13505 else 13506 emit_insn (gen_subsi3 (out, out, addr)); 13507 } 13508 else 13509 { 13510 rtx unspec; 13511 scratch2 = gen_reg_rtx (Pmode); 13512 scratch3 = gen_reg_rtx (Pmode); 13513 scratch4 = force_reg (Pmode, constm1_rtx); 13514 13515 emit_move_insn (scratch3, addr); 13516 eoschar = force_reg (QImode, eoschar); 13517 13518 emit_insn (gen_cld ()); 13519 src = replace_equiv_address_nv (src, scratch3); 13520 13521 /* If .md starts supporting :P, this can be done in .md. */ 13522 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 13523 scratch4), UNSPEC_SCAS); 13524 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 13525 if (TARGET_64BIT) 13526 { 13527 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 13528 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 13529 } 13530 else 13531 { 13532 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 13533 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 13534 } 13535 } 13536 return 1; 13537} 13538 13539/* Expand the appropriate insns for doing strlen if not just doing 13540 repnz; scasb 13541 13542 out = result, initialized with the start address 13543 align_rtx = alignment of the address. 13544 scratch = scratch register, initialized with the startaddress when 13545 not aligned, otherwise undefined 13546 13547 This is just the body. It needs the initializations mentioned above and 13548 some address computing at the end. These things are done in i386.md. */ 13549 13550static void 13551ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 13552{ 13553 int align; 13554 rtx tmp; 13555 rtx align_2_label = NULL_RTX; 13556 rtx align_3_label = NULL_RTX; 13557 rtx align_4_label = gen_label_rtx (); 13558 rtx end_0_label = gen_label_rtx (); 13559 rtx mem; 13560 rtx tmpreg = gen_reg_rtx (SImode); 13561 rtx scratch = gen_reg_rtx (SImode); 13562 rtx cmp; 13563 13564 align = 0; 13565 if (GET_CODE (align_rtx) == CONST_INT) 13566 align = INTVAL (align_rtx); 13567 13568 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 13569 13570 /* Is there a known alignment and is it less than 4? */ 13571 if (align < 4) 13572 { 13573 rtx scratch1 = gen_reg_rtx (Pmode); 13574 emit_move_insn (scratch1, out); 13575 /* Is there a known alignment and is it not 2? */ 13576 if (align != 2) 13577 { 13578 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 13579 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 13580 13581 /* Leave just the 3 lower bits. */ 13582 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 13583 NULL_RTX, 0, OPTAB_WIDEN); 13584 13585 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13586 Pmode, 1, align_4_label); 13587 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 13588 Pmode, 1, align_2_label); 13589 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 13590 Pmode, 1, align_3_label); 13591 } 13592 else 13593 { 13594 /* Since the alignment is 2, we have to check 2 or 0 bytes; 13595 check if is aligned to 4 - byte. */ 13596 13597 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 13598 NULL_RTX, 0, OPTAB_WIDEN); 13599 13600 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13601 Pmode, 1, align_4_label); 13602 } 13603 13604 mem = change_address (src, QImode, out); 13605 13606 /* Now compare the bytes. */ 13607 13608 /* Compare the first n unaligned byte on a byte per byte basis. */ 13609 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 13610 QImode, 1, end_0_label); 13611 13612 /* Increment the address. */ 13613 if (TARGET_64BIT) 13614 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13615 else 13616 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13617 13618 /* Not needed with an alignment of 2 */ 13619 if (align != 2) 13620 { 13621 emit_label (align_2_label); 13622 13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13624 end_0_label); 13625 13626 if (TARGET_64BIT) 13627 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13628 else 13629 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13630 13631 emit_label (align_3_label); 13632 } 13633 13634 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13635 end_0_label); 13636 13637 if (TARGET_64BIT) 13638 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13639 else 13640 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13641 } 13642 13643 /* Generate loop to check 4 bytes at a time. It is not a good idea to 13644 align this loop. It gives only huge programs, but does not help to 13645 speed up. */ 13646 emit_label (align_4_label); 13647 13648 mem = change_address (src, SImode, out); 13649 emit_move_insn (scratch, mem); 13650 if (TARGET_64BIT) 13651 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 13652 else 13653 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 13654 13655 /* This formula yields a nonzero result iff one of the bytes is zero. 13656 This saves three branches inside loop and many cycles. */ 13657 13658 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 13659 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 13660 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 13661 emit_insn (gen_andsi3 (tmpreg, tmpreg, 13662 gen_int_mode (0x80808080, SImode))); 13663 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 13664 align_4_label); 13665 13666 if (TARGET_CMOVE) 13667 { 13668 rtx reg = gen_reg_rtx (SImode); 13669 rtx reg2 = gen_reg_rtx (Pmode); 13670 emit_move_insn (reg, tmpreg); 13671 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 13672 13673 /* If zero is not in the first two bytes, move two bytes forward. */ 13674 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13675 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13676 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13677 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 13678 gen_rtx_IF_THEN_ELSE (SImode, tmp, 13679 reg, 13680 tmpreg))); 13681 /* Emit lea manually to avoid clobbering of flags. */ 13682 emit_insn (gen_rtx_SET (SImode, reg2, 13683 gen_rtx_PLUS (Pmode, out, const2_rtx))); 13684 13685 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13686 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13687 emit_insn (gen_rtx_SET (VOIDmode, out, 13688 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 13689 reg2, 13690 out))); 13691 13692 } 13693 else 13694 { 13695 rtx end_2_label = gen_label_rtx (); 13696 /* Is zero in the first two bytes? */ 13697 13698 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13700 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 13701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 13702 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 13703 pc_rtx); 13704 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 13705 JUMP_LABEL (tmp) = end_2_label; 13706 13707 /* Not in the first two. Move two bytes forward. */ 13708 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 13709 if (TARGET_64BIT) 13710 emit_insn (gen_adddi3 (out, out, const2_rtx)); 13711 else 13712 emit_insn (gen_addsi3 (out, out, const2_rtx)); 13713 13714 emit_label (end_2_label); 13715 13716 } 13717 13718 /* Avoid branch in fixing the byte. */ 13719 tmpreg = gen_lowpart (QImode, tmpreg); 13720 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 13721 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 13722 if (TARGET_64BIT) 13723 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 13724 else 13725 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 13726 13727 emit_label (end_0_label); 13728} 13729 13730void 13731ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 13732 rtx callarg2 ATTRIBUTE_UNUSED, 13733 rtx pop, int sibcall) 13734{ 13735 rtx use = NULL, call; 13736 13737 if (pop == const0_rtx) 13738 pop = NULL; 13739 gcc_assert (!TARGET_64BIT || !pop); 13740 13741 if (TARGET_MACHO && !TARGET_64BIT) 13742 { 13743#if TARGET_MACHO 13744 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 13745 fnaddr = machopic_indirect_call_target (fnaddr); 13746#endif 13747 } 13748 else 13749 { 13750 /* Static functions and indirect calls don't need the pic register. */ 13751 if (! TARGET_64BIT && flag_pic 13752 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 13753 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 13754 use_reg (&use, pic_offset_table_rtx); 13755 } 13756 13757 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 13758 { 13759 rtx al = gen_rtx_REG (QImode, 0); 13760 emit_move_insn (al, callarg2); 13761 use_reg (&use, al); 13762 } 13763 13764 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 13765 { 13766 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13767 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13768 } 13769 if (sibcall && TARGET_64BIT 13770 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 13771 { 13772 rtx addr; 13773 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13774 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 13775 emit_move_insn (fnaddr, addr); 13776 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13777 } 13778 13779 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 13780 if (retval) 13781 call = gen_rtx_SET (VOIDmode, retval, call); 13782 if (pop) 13783 { 13784 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 13785 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 13786 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 13787 } 13788 13789 call = emit_call_insn (call); 13790 if (use) 13791 CALL_INSN_FUNCTION_USAGE (call) = use; 13792} 13793 13794 13795/* Clear stack slot assignments remembered from previous functions. 13796 This is called from INIT_EXPANDERS once before RTL is emitted for each 13797 function. */ 13798 13799static struct machine_function * 13800ix86_init_machine_status (void) 13801{ 13802 struct machine_function *f; 13803 13804 f = ggc_alloc_cleared (sizeof (struct machine_function)); 13805 f->use_fast_prologue_epilogue_nregs = -1; 13806 f->tls_descriptor_call_expanded_p = 0; 13807 13808 return f; 13809} 13810 13811/* Return a MEM corresponding to a stack slot with mode MODE. 13812 Allocate a new slot if necessary. 13813 13814 The RTL for a function can have several slots available: N is 13815 which slot to use. */ 13816 13817rtx 13818assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 13819{ 13820 struct stack_local_entry *s; 13821 13822 gcc_assert (n < MAX_386_STACK_LOCALS); 13823 13824 /* Virtual slot is valid only before vregs are instantiated. */ 13825 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 13826 13827 for (s = ix86_stack_locals; s; s = s->next) 13828 if (s->mode == mode && s->n == n) 13829 return s->rtl; 13830 13831 s = (struct stack_local_entry *) 13832 ggc_alloc (sizeof (struct stack_local_entry)); 13833 s->n = n; 13834 s->mode = mode; 13835 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 13836 13837 s->next = ix86_stack_locals; 13838 ix86_stack_locals = s; 13839 return s->rtl; 13840} 13841 13842/* Construct the SYMBOL_REF for the tls_get_addr function. */ 13843 13844static GTY(()) rtx ix86_tls_symbol; 13845rtx 13846ix86_tls_get_addr (void) 13847{ 13848 13849 if (!ix86_tls_symbol) 13850 { 13851 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 13852 (TARGET_ANY_GNU_TLS 13853 && !TARGET_64BIT) 13854 ? "___tls_get_addr" 13855 : "__tls_get_addr"); 13856 } 13857 13858 return ix86_tls_symbol; 13859} 13860 13861/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 13862 13863static GTY(()) rtx ix86_tls_module_base_symbol; 13864rtx 13865ix86_tls_module_base (void) 13866{ 13867 13868 if (!ix86_tls_module_base_symbol) 13869 { 13870 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 13871 "_TLS_MODULE_BASE_"); 13872 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 13873 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 13874 } 13875 13876 return ix86_tls_module_base_symbol; 13877} 13878 13879/* Calculate the length of the memory address in the instruction 13880 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 13881 13882int 13883memory_address_length (rtx addr) 13884{ 13885 struct ix86_address parts; 13886 rtx base, index, disp; 13887 int len; 13888 int ok; 13889 13890 if (GET_CODE (addr) == PRE_DEC 13891 || GET_CODE (addr) == POST_INC 13892 || GET_CODE (addr) == PRE_MODIFY 13893 || GET_CODE (addr) == POST_MODIFY) 13894 return 0; 13895 13896 ok = ix86_decompose_address (addr, &parts); 13897 gcc_assert (ok); 13898 13899 if (parts.base && GET_CODE (parts.base) == SUBREG) 13900 parts.base = SUBREG_REG (parts.base); 13901 if (parts.index && GET_CODE (parts.index) == SUBREG) 13902 parts.index = SUBREG_REG (parts.index); 13903 13904 base = parts.base; 13905 index = parts.index; 13906 disp = parts.disp; 13907 len = 0; 13908 13909 /* Rule of thumb: 13910 - esp as the base always wants an index, 13911 - ebp as the base always wants a displacement. */ 13912 13913 /* Register Indirect. */ 13914 if (base && !index && !disp) 13915 { 13916 /* esp (for its index) and ebp (for its displacement) need 13917 the two-byte modrm form. */ 13918 if (addr == stack_pointer_rtx 13919 || addr == arg_pointer_rtx 13920 || addr == frame_pointer_rtx 13921 || addr == hard_frame_pointer_rtx) 13922 len = 1; 13923 } 13924 13925 /* Direct Addressing. */ 13926 else if (disp && !base && !index) 13927 len = 4; 13928 13929 else 13930 { 13931 /* Find the length of the displacement constant. */ 13932 if (disp) 13933 { 13934 if (base && satisfies_constraint_K (disp)) 13935 len = 1; 13936 else 13937 len = 4; 13938 } 13939 /* ebp always wants a displacement. */ 13940 else if (base == hard_frame_pointer_rtx) 13941 len = 1; 13942 13943 /* An index requires the two-byte modrm form.... */ 13944 if (index 13945 /* ...like esp, which always wants an index. */ 13946 || base == stack_pointer_rtx 13947 || base == arg_pointer_rtx 13948 || base == frame_pointer_rtx) 13949 len += 1; 13950 } 13951 13952 return len; 13953} 13954 13955/* Compute default value for "length_immediate" attribute. When SHORTFORM 13956 is set, expect that insn have 8bit immediate alternative. */ 13957int 13958ix86_attr_length_immediate_default (rtx insn, int shortform) 13959{ 13960 int len = 0; 13961 int i; 13962 extract_insn_cached (insn); 13963 for (i = recog_data.n_operands - 1; i >= 0; --i) 13964 if (CONSTANT_P (recog_data.operand[i])) 13965 { 13966 gcc_assert (!len); 13967 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 13968 len = 1; 13969 else 13970 { 13971 switch (get_attr_mode (insn)) 13972 { 13973 case MODE_QI: 13974 len+=1; 13975 break; 13976 case MODE_HI: 13977 len+=2; 13978 break; 13979 case MODE_SI: 13980 len+=4; 13981 break; 13982 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 13983 case MODE_DI: 13984 len+=4; 13985 break; 13986 default: 13987 fatal_insn ("unknown insn mode", insn); 13988 } 13989 } 13990 } 13991 return len; 13992} 13993/* Compute default value for "length_address" attribute. */ 13994int 13995ix86_attr_length_address_default (rtx insn) 13996{ 13997 int i; 13998 13999 if (get_attr_type (insn) == TYPE_LEA) 14000 { 14001 rtx set = PATTERN (insn); 14002 14003 if (GET_CODE (set) == PARALLEL) 14004 set = XVECEXP (set, 0, 0); 14005 14006 gcc_assert (GET_CODE (set) == SET); 14007 14008 return memory_address_length (SET_SRC (set)); 14009 } 14010 14011 extract_insn_cached (insn); 14012 for (i = recog_data.n_operands - 1; i >= 0; --i) 14013 if (GET_CODE (recog_data.operand[i]) == MEM) 14014 { 14015 return memory_address_length (XEXP (recog_data.operand[i], 0)); 14016 break; 14017 } 14018 return 0; 14019} 14020 14021/* Return the maximum number of instructions a cpu can issue. */ 14022 14023static int 14024ix86_issue_rate (void) 14025{ 14026 switch (ix86_tune) 14027 { 14028 case PROCESSOR_PENTIUM: 14029 case PROCESSOR_K6: 14030 return 2; 14031 14032 case PROCESSOR_PENTIUMPRO: 14033 case PROCESSOR_PENTIUM4: 14034 case PROCESSOR_ATHLON: 14035 case PROCESSOR_K8: 14036 case PROCESSOR_AMDFAM10: 14037 case PROCESSOR_NOCONA: 14038 case PROCESSOR_GENERIC32: 14039 case PROCESSOR_GENERIC64: 14040 return 3; 14041 14042 case PROCESSOR_CORE2: 14043 return 4; 14044 14045 default: 14046 return 1; 14047 } 14048} 14049 14050/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 14051 by DEP_INSN and nothing set by DEP_INSN. */ 14052 14053static int 14054ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 14055{ 14056 rtx set, set2; 14057 14058 /* Simplify the test for uninteresting insns. */ 14059 if (insn_type != TYPE_SETCC 14060 && insn_type != TYPE_ICMOV 14061 && insn_type != TYPE_FCMOV 14062 && insn_type != TYPE_IBR) 14063 return 0; 14064 14065 if ((set = single_set (dep_insn)) != 0) 14066 { 14067 set = SET_DEST (set); 14068 set2 = NULL_RTX; 14069 } 14070 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 14071 && XVECLEN (PATTERN (dep_insn), 0) == 2 14072 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 14073 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 14074 { 14075 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 14076 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 14077 } 14078 else 14079 return 0; 14080 14081 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 14082 return 0; 14083 14084 /* This test is true if the dependent insn reads the flags but 14085 not any other potentially set register. */ 14086 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 14087 return 0; 14088 14089 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 14090 return 0; 14091 14092 return 1; 14093} 14094 14095/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 14096 address with operands set by DEP_INSN. */ 14097 14098static int 14099ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 14100{ 14101 rtx addr; 14102 14103 if (insn_type == TYPE_LEA 14104 && TARGET_PENTIUM) 14105 { 14106 addr = PATTERN (insn); 14107 14108 if (GET_CODE (addr) == PARALLEL) 14109 addr = XVECEXP (addr, 0, 0); 14110 14111 gcc_assert (GET_CODE (addr) == SET); 14112 14113 addr = SET_SRC (addr); 14114 } 14115 else 14116 { 14117 int i; 14118 extract_insn_cached (insn); 14119 for (i = recog_data.n_operands - 1; i >= 0; --i) 14120 if (GET_CODE (recog_data.operand[i]) == MEM) 14121 { 14122 addr = XEXP (recog_data.operand[i], 0); 14123 goto found; 14124 } 14125 return 0; 14126 found:; 14127 } 14128 14129 return modified_in_p (addr, dep_insn); 14130} 14131 14132static int 14133ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 14134{ 14135 enum attr_type insn_type, dep_insn_type; 14136 enum attr_memory memory; 14137 rtx set, set2; 14138 int dep_insn_code_number; 14139 14140 /* Anti and output dependencies have zero cost on all CPUs. */ 14141 if (REG_NOTE_KIND (link) != 0) 14142 return 0; 14143 14144 dep_insn_code_number = recog_memoized (dep_insn); 14145 14146 /* If we can't recognize the insns, we can't really do anything. */ 14147 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 14148 return cost; 14149 14150 insn_type = get_attr_type (insn); 14151 dep_insn_type = get_attr_type (dep_insn); 14152 14153 switch (ix86_tune) 14154 { 14155 case PROCESSOR_PENTIUM: 14156 /* Address Generation Interlock adds a cycle of latency. */ 14157 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 14158 cost += 1; 14159 14160 /* ??? Compares pair with jump/setcc. */ 14161 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 14162 cost = 0; 14163 14164 /* Floating point stores require value to be ready one cycle earlier. */ 14165 if (insn_type == TYPE_FMOV 14166 && get_attr_memory (insn) == MEMORY_STORE 14167 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14168 cost += 1; 14169 break; 14170 14171 case PROCESSOR_PENTIUMPRO: 14172 memory = get_attr_memory (insn); 14173 14174 /* INT->FP conversion is expensive. */ 14175 if (get_attr_fp_int_src (dep_insn)) 14176 cost += 5; 14177 14178 /* There is one cycle extra latency between an FP op and a store. */ 14179 if (insn_type == TYPE_FMOV 14180 && (set = single_set (dep_insn)) != NULL_RTX 14181 && (set2 = single_set (insn)) != NULL_RTX 14182 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 14183 && GET_CODE (SET_DEST (set2)) == MEM) 14184 cost += 1; 14185 14186 /* Show ability of reorder buffer to hide latency of load by executing 14187 in parallel with previous instruction in case 14188 previous instruction is not needed to compute the address. */ 14189 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14190 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14191 { 14192 /* Claim moves to take one cycle, as core can issue one load 14193 at time and the next load can start cycle later. */ 14194 if (dep_insn_type == TYPE_IMOV 14195 || dep_insn_type == TYPE_FMOV) 14196 cost = 1; 14197 else if (cost > 1) 14198 cost--; 14199 } 14200 break; 14201 14202 case PROCESSOR_K6: 14203 memory = get_attr_memory (insn); 14204 14205 /* The esp dependency is resolved before the instruction is really 14206 finished. */ 14207 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 14208 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 14209 return 1; 14210 14211 /* INT->FP conversion is expensive. */ 14212 if (get_attr_fp_int_src (dep_insn)) 14213 cost += 5; 14214 14215 /* Show ability of reorder buffer to hide latency of load by executing 14216 in parallel with previous instruction in case 14217 previous instruction is not needed to compute the address. */ 14218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14219 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14220 { 14221 /* Claim moves to take one cycle, as core can issue one load 14222 at time and the next load can start cycle later. */ 14223 if (dep_insn_type == TYPE_IMOV 14224 || dep_insn_type == TYPE_FMOV) 14225 cost = 1; 14226 else if (cost > 2) 14227 cost -= 2; 14228 else 14229 cost = 1; 14230 } 14231 break; 14232 14233 case PROCESSOR_ATHLON: 14234 case PROCESSOR_K8: 14235 case PROCESSOR_AMDFAM10: 14236 case PROCESSOR_GENERIC32: 14237 case PROCESSOR_GENERIC64: 14238 memory = get_attr_memory (insn); 14239 14240 /* Show ability of reorder buffer to hide latency of load by executing 14241 in parallel with previous instruction in case 14242 previous instruction is not needed to compute the address. */ 14243 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 14244 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 14245 { 14246 enum attr_unit unit = get_attr_unit (insn); 14247 int loadcost = 3; 14248 14249 /* Because of the difference between the length of integer and 14250 floating unit pipeline preparation stages, the memory operands 14251 for floating point are cheaper. 14252 14253 ??? For Athlon it the difference is most probably 2. */ 14254 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 14255 loadcost = 3; 14256 else 14257 loadcost = TARGET_ATHLON ? 2 : 0; 14258 14259 if (cost >= loadcost) 14260 cost -= loadcost; 14261 else 14262 cost = 0; 14263 } 14264 14265 default: 14266 break; 14267 } 14268 14269 return cost; 14270} 14271 14272/* How many alternative schedules to try. This should be as wide as the 14273 scheduling freedom in the DFA, but no wider. Making this value too 14274 large results extra work for the scheduler. */ 14275 14276static int 14277ia32_multipass_dfa_lookahead (void) 14278{ 14279 if (ix86_tune == PROCESSOR_PENTIUM) 14280 return 2; 14281 14282 if (ix86_tune == PROCESSOR_PENTIUMPRO 14283 || ix86_tune == PROCESSOR_K6) 14284 return 1; 14285 14286 else 14287 return 0; 14288} 14289 14290 14291/* Compute the alignment given to a constant that is being placed in memory. 14292 EXP is the constant and ALIGN is the alignment that the object would 14293 ordinarily have. 14294 The value of this function is used instead of that alignment to align 14295 the object. */ 14296 14297int 14298ix86_constant_alignment (tree exp, int align) 14299{ 14300 if (TREE_CODE (exp) == REAL_CST) 14301 { 14302 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 14303 return 64; 14304 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 14305 return 128; 14306 } 14307 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 14308 && !TARGET_NO_ALIGN_LONG_STRINGS 14309 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 14310 return BITS_PER_WORD; 14311 14312 return align; 14313} 14314 14315/* Compute the alignment for a static variable. 14316 TYPE is the data type, and ALIGN is the alignment that 14317 the object would ordinarily have. The value of this function is used 14318 instead of that alignment to align the object. */ 14319 14320int 14321ix86_data_alignment (tree type, int align) 14322{ 14323 int max_align = optimize_size ? BITS_PER_WORD : 256; 14324 14325 if (AGGREGATE_TYPE_P (type) 14326 && TYPE_SIZE (type) 14327 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14328 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 14329 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 14330 && align < max_align) 14331 align = max_align; 14332 14333 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14334 to 16byte boundary. */ 14335 if (TARGET_64BIT) 14336 { 14337 if (AGGREGATE_TYPE_P (type) 14338 && TYPE_SIZE (type) 14339 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14340 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 14341 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14342 return 128; 14343 } 14344 14345 if (TREE_CODE (type) == ARRAY_TYPE) 14346 { 14347 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14348 return 64; 14349 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14350 return 128; 14351 } 14352 else if (TREE_CODE (type) == COMPLEX_TYPE) 14353 { 14354 14355 if (TYPE_MODE (type) == DCmode && align < 64) 14356 return 64; 14357 if (TYPE_MODE (type) == XCmode && align < 128) 14358 return 128; 14359 } 14360 else if ((TREE_CODE (type) == RECORD_TYPE 14361 || TREE_CODE (type) == UNION_TYPE 14362 || TREE_CODE (type) == QUAL_UNION_TYPE) 14363 && TYPE_FIELDS (type)) 14364 { 14365 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14366 return 64; 14367 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14368 return 128; 14369 } 14370 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14371 || TREE_CODE (type) == INTEGER_TYPE) 14372 { 14373 if (TYPE_MODE (type) == DFmode && align < 64) 14374 return 64; 14375 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14376 return 128; 14377 } 14378 14379 return align; 14380} 14381 14382/* Compute the alignment for a local variable. 14383 TYPE is the data type, and ALIGN is the alignment that 14384 the object would ordinarily have. The value of this macro is used 14385 instead of that alignment to align the object. */ 14386 14387int 14388ix86_local_alignment (tree type, int align) 14389{ 14390 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14391 to 16byte boundary. */ 14392 if (TARGET_64BIT) 14393 { 14394 if (AGGREGATE_TYPE_P (type) 14395 && TYPE_SIZE (type) 14396 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14397 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 14398 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14399 return 128; 14400 } 14401 if (TREE_CODE (type) == ARRAY_TYPE) 14402 { 14403 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14404 return 64; 14405 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14406 return 128; 14407 } 14408 else if (TREE_CODE (type) == COMPLEX_TYPE) 14409 { 14410 if (TYPE_MODE (type) == DCmode && align < 64) 14411 return 64; 14412 if (TYPE_MODE (type) == XCmode && align < 128) 14413 return 128; 14414 } 14415 else if ((TREE_CODE (type) == RECORD_TYPE 14416 || TREE_CODE (type) == UNION_TYPE 14417 || TREE_CODE (type) == QUAL_UNION_TYPE) 14418 && TYPE_FIELDS (type)) 14419 { 14420 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14421 return 64; 14422 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14423 return 128; 14424 } 14425 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14426 || TREE_CODE (type) == INTEGER_TYPE) 14427 { 14428 14429 if (TYPE_MODE (type) == DFmode && align < 64) 14430 return 64; 14431 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14432 return 128; 14433 } 14434 return align; 14435} 14436 14437/* Emit RTL insns to initialize the variable parts of a trampoline. 14438 FNADDR is an RTX for the address of the function's pure code. 14439 CXT is an RTX for the static chain value for the function. */ 14440void 14441x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 14442{ 14443 if (!TARGET_64BIT) 14444 { 14445 /* Compute offset from the end of the jmp to the target function. */ 14446 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 14447 plus_constant (tramp, 10), 14448 NULL_RTX, 1, OPTAB_DIRECT); 14449 emit_move_insn (gen_rtx_MEM (QImode, tramp), 14450 gen_int_mode (0xb9, QImode)); 14451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 14452 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 14453 gen_int_mode (0xe9, QImode)); 14454 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 14455 } 14456 else 14457 { 14458 int offset = 0; 14459 /* Try to load address using shorter movl instead of movabs. 14460 We may want to support movq for kernel mode, but kernel does not use 14461 trampolines at the moment. */ 14462 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 14463 { 14464 fnaddr = copy_to_mode_reg (DImode, fnaddr); 14465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14466 gen_int_mode (0xbb41, HImode)); 14467 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 14468 gen_lowpart (SImode, fnaddr)); 14469 offset += 6; 14470 } 14471 else 14472 { 14473 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14474 gen_int_mode (0xbb49, HImode)); 14475 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14476 fnaddr); 14477 offset += 10; 14478 } 14479 /* Load static chain using movabs to r10. */ 14480 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14481 gen_int_mode (0xba49, HImode)); 14482 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14483 cxt); 14484 offset += 10; 14485 /* Jump to the r11 */ 14486 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14487 gen_int_mode (0xff49, HImode)); 14488 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 14489 gen_int_mode (0xe3, QImode)); 14490 offset += 3; 14491 gcc_assert (offset <= TRAMPOLINE_SIZE); 14492 } 14493 14494#ifdef ENABLE_EXECUTE_STACK 14495 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 14496 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 14497#endif 14498} 14499 14500/* Codes for all the SSE/MMX builtins. */ 14501enum ix86_builtins 14502{ 14503 IX86_BUILTIN_ADDPS, 14504 IX86_BUILTIN_ADDSS, 14505 IX86_BUILTIN_DIVPS, 14506 IX86_BUILTIN_DIVSS, 14507 IX86_BUILTIN_MULPS, 14508 IX86_BUILTIN_MULSS, 14509 IX86_BUILTIN_SUBPS, 14510 IX86_BUILTIN_SUBSS, 14511 14512 IX86_BUILTIN_CMPEQPS, 14513 IX86_BUILTIN_CMPLTPS, 14514 IX86_BUILTIN_CMPLEPS, 14515 IX86_BUILTIN_CMPGTPS, 14516 IX86_BUILTIN_CMPGEPS, 14517 IX86_BUILTIN_CMPNEQPS, 14518 IX86_BUILTIN_CMPNLTPS, 14519 IX86_BUILTIN_CMPNLEPS, 14520 IX86_BUILTIN_CMPNGTPS, 14521 IX86_BUILTIN_CMPNGEPS, 14522 IX86_BUILTIN_CMPORDPS, 14523 IX86_BUILTIN_CMPUNORDPS, 14524 IX86_BUILTIN_CMPEQSS, 14525 IX86_BUILTIN_CMPLTSS, 14526 IX86_BUILTIN_CMPLESS, 14527 IX86_BUILTIN_CMPNEQSS, 14528 IX86_BUILTIN_CMPNLTSS, 14529 IX86_BUILTIN_CMPNLESS, 14530 IX86_BUILTIN_CMPNGTSS, 14531 IX86_BUILTIN_CMPNGESS, 14532 IX86_BUILTIN_CMPORDSS, 14533 IX86_BUILTIN_CMPUNORDSS, 14534 14535 IX86_BUILTIN_COMIEQSS, 14536 IX86_BUILTIN_COMILTSS, 14537 IX86_BUILTIN_COMILESS, 14538 IX86_BUILTIN_COMIGTSS, 14539 IX86_BUILTIN_COMIGESS, 14540 IX86_BUILTIN_COMINEQSS, 14541 IX86_BUILTIN_UCOMIEQSS, 14542 IX86_BUILTIN_UCOMILTSS, 14543 IX86_BUILTIN_UCOMILESS, 14544 IX86_BUILTIN_UCOMIGTSS, 14545 IX86_BUILTIN_UCOMIGESS, 14546 IX86_BUILTIN_UCOMINEQSS, 14547 14548 IX86_BUILTIN_CVTPI2PS, 14549 IX86_BUILTIN_CVTPS2PI, 14550 IX86_BUILTIN_CVTSI2SS, 14551 IX86_BUILTIN_CVTSI642SS, 14552 IX86_BUILTIN_CVTSS2SI, 14553 IX86_BUILTIN_CVTSS2SI64, 14554 IX86_BUILTIN_CVTTPS2PI, 14555 IX86_BUILTIN_CVTTSS2SI, 14556 IX86_BUILTIN_CVTTSS2SI64, 14557 14558 IX86_BUILTIN_MAXPS, 14559 IX86_BUILTIN_MAXSS, 14560 IX86_BUILTIN_MINPS, 14561 IX86_BUILTIN_MINSS, 14562 14563 IX86_BUILTIN_LOADUPS, 14564 IX86_BUILTIN_STOREUPS, 14565 IX86_BUILTIN_MOVSS, 14566 14567 IX86_BUILTIN_MOVHLPS, 14568 IX86_BUILTIN_MOVLHPS, 14569 IX86_BUILTIN_LOADHPS, 14570 IX86_BUILTIN_LOADLPS, 14571 IX86_BUILTIN_STOREHPS, 14572 IX86_BUILTIN_STORELPS, 14573 14574 IX86_BUILTIN_MASKMOVQ, 14575 IX86_BUILTIN_MOVMSKPS, 14576 IX86_BUILTIN_PMOVMSKB, 14577 14578 IX86_BUILTIN_MOVNTPS, 14579 IX86_BUILTIN_MOVNTQ, 14580 14581 IX86_BUILTIN_LOADDQU, 14582 IX86_BUILTIN_STOREDQU, 14583 14584 IX86_BUILTIN_PACKSSWB, 14585 IX86_BUILTIN_PACKSSDW, 14586 IX86_BUILTIN_PACKUSWB, 14587 14588 IX86_BUILTIN_PADDB, 14589 IX86_BUILTIN_PADDW, 14590 IX86_BUILTIN_PADDD, 14591 IX86_BUILTIN_PADDQ, 14592 IX86_BUILTIN_PADDSB, 14593 IX86_BUILTIN_PADDSW, 14594 IX86_BUILTIN_PADDUSB, 14595 IX86_BUILTIN_PADDUSW, 14596 IX86_BUILTIN_PSUBB, 14597 IX86_BUILTIN_PSUBW, 14598 IX86_BUILTIN_PSUBD, 14599 IX86_BUILTIN_PSUBQ, 14600 IX86_BUILTIN_PSUBSB, 14601 IX86_BUILTIN_PSUBSW, 14602 IX86_BUILTIN_PSUBUSB, 14603 IX86_BUILTIN_PSUBUSW, 14604 14605 IX86_BUILTIN_PAND, 14606 IX86_BUILTIN_PANDN, 14607 IX86_BUILTIN_POR, 14608 IX86_BUILTIN_PXOR, 14609 14610 IX86_BUILTIN_PAVGB, 14611 IX86_BUILTIN_PAVGW, 14612 14613 IX86_BUILTIN_PCMPEQB, 14614 IX86_BUILTIN_PCMPEQW, 14615 IX86_BUILTIN_PCMPEQD, 14616 IX86_BUILTIN_PCMPGTB, 14617 IX86_BUILTIN_PCMPGTW, 14618 IX86_BUILTIN_PCMPGTD, 14619 14620 IX86_BUILTIN_PMADDWD, 14621 14622 IX86_BUILTIN_PMAXSW, 14623 IX86_BUILTIN_PMAXUB, 14624 IX86_BUILTIN_PMINSW, 14625 IX86_BUILTIN_PMINUB, 14626 14627 IX86_BUILTIN_PMULHUW, 14628 IX86_BUILTIN_PMULHW, 14629 IX86_BUILTIN_PMULLW, 14630 14631 IX86_BUILTIN_PSADBW, 14632 IX86_BUILTIN_PSHUFW, 14633 14634 IX86_BUILTIN_PSLLW, 14635 IX86_BUILTIN_PSLLD, 14636 IX86_BUILTIN_PSLLQ, 14637 IX86_BUILTIN_PSRAW, 14638 IX86_BUILTIN_PSRAD, 14639 IX86_BUILTIN_PSRLW, 14640 IX86_BUILTIN_PSRLD, 14641 IX86_BUILTIN_PSRLQ, 14642 IX86_BUILTIN_PSLLWI, 14643 IX86_BUILTIN_PSLLDI, 14644 IX86_BUILTIN_PSLLQI, 14645 IX86_BUILTIN_PSRAWI, 14646 IX86_BUILTIN_PSRADI, 14647 IX86_BUILTIN_PSRLWI, 14648 IX86_BUILTIN_PSRLDI, 14649 IX86_BUILTIN_PSRLQI, 14650 14651 IX86_BUILTIN_PUNPCKHBW, 14652 IX86_BUILTIN_PUNPCKHWD, 14653 IX86_BUILTIN_PUNPCKHDQ, 14654 IX86_BUILTIN_PUNPCKLBW, 14655 IX86_BUILTIN_PUNPCKLWD, 14656 IX86_BUILTIN_PUNPCKLDQ, 14657 14658 IX86_BUILTIN_SHUFPS, 14659 14660 IX86_BUILTIN_RCPPS, 14661 IX86_BUILTIN_RCPSS, 14662 IX86_BUILTIN_RSQRTPS, 14663 IX86_BUILTIN_RSQRTSS, 14664 IX86_BUILTIN_SQRTPS, 14665 IX86_BUILTIN_SQRTSS, 14666 14667 IX86_BUILTIN_UNPCKHPS, 14668 IX86_BUILTIN_UNPCKLPS, 14669 14670 IX86_BUILTIN_ANDPS, 14671 IX86_BUILTIN_ANDNPS, 14672 IX86_BUILTIN_ORPS, 14673 IX86_BUILTIN_XORPS, 14674 14675 IX86_BUILTIN_EMMS, 14676 IX86_BUILTIN_LDMXCSR, 14677 IX86_BUILTIN_STMXCSR, 14678 IX86_BUILTIN_SFENCE, 14679 14680 /* 3DNow! Original */ 14681 IX86_BUILTIN_FEMMS, 14682 IX86_BUILTIN_PAVGUSB, 14683 IX86_BUILTIN_PF2ID, 14684 IX86_BUILTIN_PFACC, 14685 IX86_BUILTIN_PFADD, 14686 IX86_BUILTIN_PFCMPEQ, 14687 IX86_BUILTIN_PFCMPGE, 14688 IX86_BUILTIN_PFCMPGT, 14689 IX86_BUILTIN_PFMAX, 14690 IX86_BUILTIN_PFMIN, 14691 IX86_BUILTIN_PFMUL, 14692 IX86_BUILTIN_PFRCP, 14693 IX86_BUILTIN_PFRCPIT1, 14694 IX86_BUILTIN_PFRCPIT2, 14695 IX86_BUILTIN_PFRSQIT1, 14696 IX86_BUILTIN_PFRSQRT, 14697 IX86_BUILTIN_PFSUB, 14698 IX86_BUILTIN_PFSUBR, 14699 IX86_BUILTIN_PI2FD, 14700 IX86_BUILTIN_PMULHRW, 14701 14702 /* 3DNow! Athlon Extensions */ 14703 IX86_BUILTIN_PF2IW, 14704 IX86_BUILTIN_PFNACC, 14705 IX86_BUILTIN_PFPNACC, 14706 IX86_BUILTIN_PI2FW, 14707 IX86_BUILTIN_PSWAPDSI, 14708 IX86_BUILTIN_PSWAPDSF, 14709 14710 /* SSE2 */ 14711 IX86_BUILTIN_ADDPD, 14712 IX86_BUILTIN_ADDSD, 14713 IX86_BUILTIN_DIVPD, 14714 IX86_BUILTIN_DIVSD, 14715 IX86_BUILTIN_MULPD, 14716 IX86_BUILTIN_MULSD, 14717 IX86_BUILTIN_SUBPD, 14718 IX86_BUILTIN_SUBSD, 14719 14720 IX86_BUILTIN_CMPEQPD, 14721 IX86_BUILTIN_CMPLTPD, 14722 IX86_BUILTIN_CMPLEPD, 14723 IX86_BUILTIN_CMPGTPD, 14724 IX86_BUILTIN_CMPGEPD, 14725 IX86_BUILTIN_CMPNEQPD, 14726 IX86_BUILTIN_CMPNLTPD, 14727 IX86_BUILTIN_CMPNLEPD, 14728 IX86_BUILTIN_CMPNGTPD, 14729 IX86_BUILTIN_CMPNGEPD, 14730 IX86_BUILTIN_CMPORDPD, 14731 IX86_BUILTIN_CMPUNORDPD, 14732 IX86_BUILTIN_CMPNEPD, 14733 IX86_BUILTIN_CMPEQSD, 14734 IX86_BUILTIN_CMPLTSD, 14735 IX86_BUILTIN_CMPLESD, 14736 IX86_BUILTIN_CMPNEQSD, 14737 IX86_BUILTIN_CMPNLTSD, 14738 IX86_BUILTIN_CMPNLESD, 14739 IX86_BUILTIN_CMPORDSD, 14740 IX86_BUILTIN_CMPUNORDSD, 14741 IX86_BUILTIN_CMPNESD, 14742 14743 IX86_BUILTIN_COMIEQSD, 14744 IX86_BUILTIN_COMILTSD, 14745 IX86_BUILTIN_COMILESD, 14746 IX86_BUILTIN_COMIGTSD, 14747 IX86_BUILTIN_COMIGESD, 14748 IX86_BUILTIN_COMINEQSD, 14749 IX86_BUILTIN_UCOMIEQSD, 14750 IX86_BUILTIN_UCOMILTSD, 14751 IX86_BUILTIN_UCOMILESD, 14752 IX86_BUILTIN_UCOMIGTSD, 14753 IX86_BUILTIN_UCOMIGESD, 14754 IX86_BUILTIN_UCOMINEQSD, 14755 14756 IX86_BUILTIN_MAXPD, 14757 IX86_BUILTIN_MAXSD, 14758 IX86_BUILTIN_MINPD, 14759 IX86_BUILTIN_MINSD, 14760 14761 IX86_BUILTIN_ANDPD, 14762 IX86_BUILTIN_ANDNPD, 14763 IX86_BUILTIN_ORPD, 14764 IX86_BUILTIN_XORPD, 14765 14766 IX86_BUILTIN_SQRTPD, 14767 IX86_BUILTIN_SQRTSD, 14768 14769 IX86_BUILTIN_UNPCKHPD, 14770 IX86_BUILTIN_UNPCKLPD, 14771 14772 IX86_BUILTIN_SHUFPD, 14773 14774 IX86_BUILTIN_LOADUPD, 14775 IX86_BUILTIN_STOREUPD, 14776 IX86_BUILTIN_MOVSD, 14777 14778 IX86_BUILTIN_LOADHPD, 14779 IX86_BUILTIN_LOADLPD, 14780 14781 IX86_BUILTIN_CVTDQ2PD, 14782 IX86_BUILTIN_CVTDQ2PS, 14783 14784 IX86_BUILTIN_CVTPD2DQ, 14785 IX86_BUILTIN_CVTPD2PI, 14786 IX86_BUILTIN_CVTPD2PS, 14787 IX86_BUILTIN_CVTTPD2DQ, 14788 IX86_BUILTIN_CVTTPD2PI, 14789 14790 IX86_BUILTIN_CVTPI2PD, 14791 IX86_BUILTIN_CVTSI2SD, 14792 IX86_BUILTIN_CVTSI642SD, 14793 14794 IX86_BUILTIN_CVTSD2SI, 14795 IX86_BUILTIN_CVTSD2SI64, 14796 IX86_BUILTIN_CVTSD2SS, 14797 IX86_BUILTIN_CVTSS2SD, 14798 IX86_BUILTIN_CVTTSD2SI, 14799 IX86_BUILTIN_CVTTSD2SI64, 14800 14801 IX86_BUILTIN_CVTPS2DQ, 14802 IX86_BUILTIN_CVTPS2PD, 14803 IX86_BUILTIN_CVTTPS2DQ, 14804 14805 IX86_BUILTIN_MOVNTI, 14806 IX86_BUILTIN_MOVNTPD, 14807 IX86_BUILTIN_MOVNTDQ, 14808 14809 /* SSE2 MMX */ 14810 IX86_BUILTIN_MASKMOVDQU, 14811 IX86_BUILTIN_MOVMSKPD, 14812 IX86_BUILTIN_PMOVMSKB128, 14813 14814 IX86_BUILTIN_PACKSSWB128, 14815 IX86_BUILTIN_PACKSSDW128, 14816 IX86_BUILTIN_PACKUSWB128, 14817 14818 IX86_BUILTIN_PADDB128, 14819 IX86_BUILTIN_PADDW128, 14820 IX86_BUILTIN_PADDD128, 14821 IX86_BUILTIN_PADDQ128, 14822 IX86_BUILTIN_PADDSB128, 14823 IX86_BUILTIN_PADDSW128, 14824 IX86_BUILTIN_PADDUSB128, 14825 IX86_BUILTIN_PADDUSW128, 14826 IX86_BUILTIN_PSUBB128, 14827 IX86_BUILTIN_PSUBW128, 14828 IX86_BUILTIN_PSUBD128, 14829 IX86_BUILTIN_PSUBQ128, 14830 IX86_BUILTIN_PSUBSB128, 14831 IX86_BUILTIN_PSUBSW128, 14832 IX86_BUILTIN_PSUBUSB128, 14833 IX86_BUILTIN_PSUBUSW128, 14834 14835 IX86_BUILTIN_PAND128, 14836 IX86_BUILTIN_PANDN128, 14837 IX86_BUILTIN_POR128, 14838 IX86_BUILTIN_PXOR128, 14839 14840 IX86_BUILTIN_PAVGB128, 14841 IX86_BUILTIN_PAVGW128, 14842 14843 IX86_BUILTIN_PCMPEQB128, 14844 IX86_BUILTIN_PCMPEQW128, 14845 IX86_BUILTIN_PCMPEQD128, 14846 IX86_BUILTIN_PCMPGTB128, 14847 IX86_BUILTIN_PCMPGTW128, 14848 IX86_BUILTIN_PCMPGTD128, 14849 14850 IX86_BUILTIN_PMADDWD128, 14851 14852 IX86_BUILTIN_PMAXSW128, 14853 IX86_BUILTIN_PMAXUB128, 14854 IX86_BUILTIN_PMINSW128, 14855 IX86_BUILTIN_PMINUB128, 14856 14857 IX86_BUILTIN_PMULUDQ, 14858 IX86_BUILTIN_PMULUDQ128, 14859 IX86_BUILTIN_PMULHUW128, 14860 IX86_BUILTIN_PMULHW128, 14861 IX86_BUILTIN_PMULLW128, 14862 14863 IX86_BUILTIN_PSADBW128, 14864 IX86_BUILTIN_PSHUFHW, 14865 IX86_BUILTIN_PSHUFLW, 14866 IX86_BUILTIN_PSHUFD, 14867 14868 IX86_BUILTIN_PSLLW128, 14869 IX86_BUILTIN_PSLLD128, 14870 IX86_BUILTIN_PSLLQ128, 14871 IX86_BUILTIN_PSRAW128, 14872 IX86_BUILTIN_PSRAD128, 14873 IX86_BUILTIN_PSRLW128, 14874 IX86_BUILTIN_PSRLD128, 14875 IX86_BUILTIN_PSRLQ128, 14876 IX86_BUILTIN_PSLLDQI128, 14877 IX86_BUILTIN_PSLLWI128, 14878 IX86_BUILTIN_PSLLDI128, 14879 IX86_BUILTIN_PSLLQI128, 14880 IX86_BUILTIN_PSRAWI128, 14881 IX86_BUILTIN_PSRADI128, 14882 IX86_BUILTIN_PSRLDQI128, 14883 IX86_BUILTIN_PSRLWI128, 14884 IX86_BUILTIN_PSRLDI128, 14885 IX86_BUILTIN_PSRLQI128, 14886 14887 IX86_BUILTIN_PUNPCKHBW128, 14888 IX86_BUILTIN_PUNPCKHWD128, 14889 IX86_BUILTIN_PUNPCKHDQ128, 14890 IX86_BUILTIN_PUNPCKHQDQ128, 14891 IX86_BUILTIN_PUNPCKLBW128, 14892 IX86_BUILTIN_PUNPCKLWD128, 14893 IX86_BUILTIN_PUNPCKLDQ128, 14894 IX86_BUILTIN_PUNPCKLQDQ128, 14895 14896 IX86_BUILTIN_CLFLUSH, 14897 IX86_BUILTIN_MFENCE, 14898 IX86_BUILTIN_LFENCE, 14899 14900 /* Prescott New Instructions. */ 14901 IX86_BUILTIN_ADDSUBPS, 14902 IX86_BUILTIN_HADDPS, 14903 IX86_BUILTIN_HSUBPS, 14904 IX86_BUILTIN_MOVSHDUP, 14905 IX86_BUILTIN_MOVSLDUP, 14906 IX86_BUILTIN_ADDSUBPD, 14907 IX86_BUILTIN_HADDPD, 14908 IX86_BUILTIN_HSUBPD, 14909 IX86_BUILTIN_LDDQU, 14910 14911 IX86_BUILTIN_MONITOR, 14912 IX86_BUILTIN_MWAIT, 14913 14914 /* SSSE3. */ 14915 IX86_BUILTIN_PHADDW, 14916 IX86_BUILTIN_PHADDD, 14917 IX86_BUILTIN_PHADDSW, 14918 IX86_BUILTIN_PHSUBW, 14919 IX86_BUILTIN_PHSUBD, 14920 IX86_BUILTIN_PHSUBSW, 14921 IX86_BUILTIN_PMADDUBSW, 14922 IX86_BUILTIN_PMULHRSW, 14923 IX86_BUILTIN_PSHUFB, 14924 IX86_BUILTIN_PSIGNB, 14925 IX86_BUILTIN_PSIGNW, 14926 IX86_BUILTIN_PSIGND, 14927 IX86_BUILTIN_PALIGNR, 14928 IX86_BUILTIN_PABSB, 14929 IX86_BUILTIN_PABSW, 14930 IX86_BUILTIN_PABSD, 14931 14932 IX86_BUILTIN_PHADDW128, 14933 IX86_BUILTIN_PHADDD128, 14934 IX86_BUILTIN_PHADDSW128, 14935 IX86_BUILTIN_PHSUBW128, 14936 IX86_BUILTIN_PHSUBD128, 14937 IX86_BUILTIN_PHSUBSW128, 14938 IX86_BUILTIN_PMADDUBSW128, 14939 IX86_BUILTIN_PMULHRSW128, 14940 IX86_BUILTIN_PSHUFB128, 14941 IX86_BUILTIN_PSIGNB128, 14942 IX86_BUILTIN_PSIGNW128, 14943 IX86_BUILTIN_PSIGND128, 14944 IX86_BUILTIN_PALIGNR128, 14945 IX86_BUILTIN_PABSB128, 14946 IX86_BUILTIN_PABSW128, 14947 IX86_BUILTIN_PABSD128, 14948 14949 /* AMDFAM10 - SSE4A New Instructions. */ 14950 IX86_BUILTIN_MOVNTSD, 14951 IX86_BUILTIN_MOVNTSS, 14952 IX86_BUILTIN_EXTRQI, 14953 IX86_BUILTIN_EXTRQ, 14954 IX86_BUILTIN_INSERTQI, 14955 IX86_BUILTIN_INSERTQ, 14956 14957 IX86_BUILTIN_VEC_INIT_V2SI, 14958 IX86_BUILTIN_VEC_INIT_V4HI, 14959 IX86_BUILTIN_VEC_INIT_V8QI, 14960 IX86_BUILTIN_VEC_EXT_V2DF, 14961 IX86_BUILTIN_VEC_EXT_V2DI, 14962 IX86_BUILTIN_VEC_EXT_V4SF, 14963 IX86_BUILTIN_VEC_EXT_V4SI, 14964 IX86_BUILTIN_VEC_EXT_V8HI, 14965 IX86_BUILTIN_VEC_EXT_V16QI, 14966 IX86_BUILTIN_VEC_EXT_V2SI, 14967 IX86_BUILTIN_VEC_EXT_V4HI, 14968 IX86_BUILTIN_VEC_SET_V8HI, 14969 IX86_BUILTIN_VEC_SET_V4HI, 14970 14971 IX86_BUILTIN_MAX 14972}; 14973 14974#define def_builtin(MASK, NAME, TYPE, CODE) \ 14975do { \ 14976 if ((MASK) & target_flags \ 14977 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 14978 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 14979 NULL, NULL_TREE); \ 14980} while (0) 14981 14982/* Bits for builtin_description.flag. */ 14983 14984/* Set when we don't support the comparison natively, and should 14985 swap_comparison in order to support it. */ 14986#define BUILTIN_DESC_SWAP_OPERANDS 1 14987 14988struct builtin_description 14989{ 14990 const unsigned int mask; 14991 const enum insn_code icode; 14992 const char *const name; 14993 const enum ix86_builtins code; 14994 const enum rtx_code comparison; 14995 const unsigned int flag; 14996}; 14997 14998static const struct builtin_description bdesc_comi[] = 14999{ 15000 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 15001 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 15002 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 15003 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 15004 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 15005 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 15006 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 15007 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 15008 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 15009 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 15010 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 15011 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 15012 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 15013 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 15014 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 15015 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 15016 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 15017 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 15018 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 15019 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 15020 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 15021 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 15022 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 15023 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 15024}; 15025 15026static const struct builtin_description bdesc_2arg[] = 15027{ 15028 /* SSE */ 15029 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 15030 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 15031 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 15032 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 15033 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 15034 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 15035 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 15036 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 15037 15038 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 15039 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 15040 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 15041 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 15042 BUILTIN_DESC_SWAP_OPERANDS }, 15043 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 15044 BUILTIN_DESC_SWAP_OPERANDS }, 15045 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 15046 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, 15047 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, 15048 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, 15049 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, 15050 BUILTIN_DESC_SWAP_OPERANDS }, 15051 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, 15052 BUILTIN_DESC_SWAP_OPERANDS }, 15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, 15054 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 15055 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 15056 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 15057 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 15058 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, 15059 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, 15060 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, 15061 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, 15062 BUILTIN_DESC_SWAP_OPERANDS }, 15063 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, 15064 BUILTIN_DESC_SWAP_OPERANDS }, 15065 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, 15066 15067 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 15068 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 15069 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 15070 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 15071 15072 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 15073 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 15074 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 15075 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 15076 15077 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 15078 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 15079 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 15080 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 15081 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 15082 15083 /* MMX */ 15084 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 15085 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 15086 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 15087 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 15088 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 15089 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 15090 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 15091 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 15092 15093 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 15094 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 15095 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 15096 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 15097 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 15098 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 15099 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 15100 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 15101 15102 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 15103 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 15104 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 15105 15106 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 15107 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 15108 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 15109 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 15110 15111 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 15112 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 15113 15114 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 15115 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 15116 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 15117 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 15118 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 15119 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 15120 15121 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 15122 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 15123 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 15124 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 15125 15126 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 15127 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 15128 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 15129 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 15130 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 15131 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 15132 15133 /* Special. */ 15134 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 15135 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 15136 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 15137 15138 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 15139 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 15140 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 15141 15142 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 15143 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 15144 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 15145 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 15146 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 15147 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 15148 15149 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 15150 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 15151 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 15152 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 15153 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 15154 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 15155 15156 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 15157 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 15158 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 15159 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 15160 15161 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 15162 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 15163 15164 /* SSE2 */ 15165 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 15166 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 15167 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 15168 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 15169 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 15170 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 15171 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 15172 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 15173 15174 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 15175 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 15176 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 15177 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 15178 BUILTIN_DESC_SWAP_OPERANDS }, 15179 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 15180 BUILTIN_DESC_SWAP_OPERANDS }, 15181 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 15182 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, 15183 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, 15184 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, 15185 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, 15186 BUILTIN_DESC_SWAP_OPERANDS }, 15187 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, 15188 BUILTIN_DESC_SWAP_OPERANDS }, 15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, 15190 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 15191 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 15192 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 15193 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 15194 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, 15195 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, 15196 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, 15197 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, 15198 15199 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 15200 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 15201 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 15202 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 15203 15204 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 15205 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 15206 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 15207 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 15208 15209 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 15210 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 15211 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 15212 15213 /* SSE2 MMX */ 15214 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 15215 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 15216 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 15217 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 15218 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 15219 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 15220 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 15221 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 15222 15223 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 15224 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 15225 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 15226 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 15227 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 15228 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 15229 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 15230 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 15231 15232 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 15233 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 15234 15235 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 15236 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 15237 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 15238 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 15239 15240 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 15241 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 15242 15243 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 15244 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 15245 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 15246 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 15247 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 15248 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 15249 15250 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 15251 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 15252 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 15253 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 15254 15255 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 15256 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 15257 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 15258 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 15259 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 15260 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 15261 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 15262 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 15263 15264 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 15265 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 15266 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 15267 15268 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 15269 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 15270 15271 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, 15272 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, 15273 15274 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 15275 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 15276 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 15277 15278 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 15279 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 15280 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 15281 15282 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 15283 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 15284 15285 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 15286 15287 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 15288 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 15289 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 15290 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 15291 15292 /* SSE3 MMX */ 15293 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 15294 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 15295 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 15296 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 15297 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 15298 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, 15299 15300 /* SSSE3 */ 15301 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, 15302 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, 15303 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, 15304 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, 15305 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, 15306 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, 15307 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, 15308 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, 15309 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, 15310 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, 15311 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, 15312 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, 15313 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, 15314 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, 15315 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, 15316 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, 15317 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, 15318 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, 15319 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, 15320 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, 15321 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, 15322 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, 15323 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, 15324 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 } 15325}; 15326 15327static const struct builtin_description bdesc_1arg[] = 15328{ 15329 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 15330 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 15331 15332 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 15333 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 15334 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 15335 15336 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 15337 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 15338 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 15339 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 15340 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 15341 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 15342 15343 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 15344 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 15345 15346 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 15347 15348 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 15349 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 15350 15351 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 15352 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 15353 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 15354 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 15355 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 15356 15357 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 15358 15359 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 15360 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 15361 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 15362 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 15363 15364 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 15365 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 15366 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 15367 15368 /* SSE3 */ 15369 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 15370 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 15371 15372 /* SSSE3 */ 15373 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, 15374 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, 15375 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, 15376 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, 15377 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, 15378 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, 15379}; 15380 15381static void 15382ix86_init_builtins (void) 15383{ 15384 if (TARGET_MMX) 15385 ix86_init_mmx_sse_builtins (); 15386} 15387 15388/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 15389 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 15390 builtins. */ 15391static void 15392ix86_init_mmx_sse_builtins (void) 15393{ 15394 const struct builtin_description * d; 15395 size_t i; 15396 15397 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); 15398 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 15399 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 15400 tree V2DI_type_node 15401 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 15402 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 15403 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 15404 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 15405 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 15406 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); 15407 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 15408 15409 tree pchar_type_node = build_pointer_type (char_type_node); 15410 tree pcchar_type_node = build_pointer_type ( 15411 build_type_variant (char_type_node, 1, 0)); 15412 tree pfloat_type_node = build_pointer_type (float_type_node); 15413 tree pcfloat_type_node = build_pointer_type ( 15414 build_type_variant (float_type_node, 1, 0)); 15415 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 15416 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 15417 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 15418 15419 /* Comparisons. */ 15420 tree int_ftype_v4sf_v4sf 15421 = build_function_type_list (integer_type_node, 15422 V4SF_type_node, V4SF_type_node, NULL_TREE); 15423 tree v4si_ftype_v4sf_v4sf 15424 = build_function_type_list (V4SI_type_node, 15425 V4SF_type_node, V4SF_type_node, NULL_TREE); 15426 /* MMX/SSE/integer conversions. */ 15427 tree int_ftype_v4sf 15428 = build_function_type_list (integer_type_node, 15429 V4SF_type_node, NULL_TREE); 15430 tree int64_ftype_v4sf 15431 = build_function_type_list (long_long_integer_type_node, 15432 V4SF_type_node, NULL_TREE); 15433 tree int_ftype_v8qi 15434 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 15435 tree v4sf_ftype_v4sf_int 15436 = build_function_type_list (V4SF_type_node, 15437 V4SF_type_node, integer_type_node, NULL_TREE); 15438 tree v4sf_ftype_v4sf_int64 15439 = build_function_type_list (V4SF_type_node, 15440 V4SF_type_node, long_long_integer_type_node, 15441 NULL_TREE); 15442 tree v4sf_ftype_v4sf_v2si 15443 = build_function_type_list (V4SF_type_node, 15444 V4SF_type_node, V2SI_type_node, NULL_TREE); 15445 15446 /* Miscellaneous. */ 15447 tree v8qi_ftype_v4hi_v4hi 15448 = build_function_type_list (V8QI_type_node, 15449 V4HI_type_node, V4HI_type_node, NULL_TREE); 15450 tree v4hi_ftype_v2si_v2si 15451 = build_function_type_list (V4HI_type_node, 15452 V2SI_type_node, V2SI_type_node, NULL_TREE); 15453 tree v4sf_ftype_v4sf_v4sf_int 15454 = build_function_type_list (V4SF_type_node, 15455 V4SF_type_node, V4SF_type_node, 15456 integer_type_node, NULL_TREE); 15457 tree v2si_ftype_v4hi_v4hi 15458 = build_function_type_list (V2SI_type_node, 15459 V4HI_type_node, V4HI_type_node, NULL_TREE); 15460 tree v4hi_ftype_v4hi_int 15461 = build_function_type_list (V4HI_type_node, 15462 V4HI_type_node, integer_type_node, NULL_TREE); 15463 tree v4hi_ftype_v4hi_di 15464 = build_function_type_list (V4HI_type_node, 15465 V4HI_type_node, long_long_unsigned_type_node, 15466 NULL_TREE); 15467 tree v2si_ftype_v2si_di 15468 = build_function_type_list (V2SI_type_node, 15469 V2SI_type_node, long_long_unsigned_type_node, 15470 NULL_TREE); 15471 tree void_ftype_void 15472 = build_function_type (void_type_node, void_list_node); 15473 tree void_ftype_unsigned 15474 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 15475 tree void_ftype_unsigned_unsigned 15476 = build_function_type_list (void_type_node, unsigned_type_node, 15477 unsigned_type_node, NULL_TREE); 15478 tree void_ftype_pcvoid_unsigned_unsigned 15479 = build_function_type_list (void_type_node, const_ptr_type_node, 15480 unsigned_type_node, unsigned_type_node, 15481 NULL_TREE); 15482 tree unsigned_ftype_void 15483 = build_function_type (unsigned_type_node, void_list_node); 15484 tree v2si_ftype_v4sf 15485 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 15486 /* Loads/stores. */ 15487 tree void_ftype_v8qi_v8qi_pchar 15488 = build_function_type_list (void_type_node, 15489 V8QI_type_node, V8QI_type_node, 15490 pchar_type_node, NULL_TREE); 15491 tree v4sf_ftype_pcfloat 15492 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 15493 /* @@@ the type is bogus */ 15494 tree v4sf_ftype_v4sf_pv2si 15495 = build_function_type_list (V4SF_type_node, 15496 V4SF_type_node, pv2si_type_node, NULL_TREE); 15497 tree void_ftype_pv2si_v4sf 15498 = build_function_type_list (void_type_node, 15499 pv2si_type_node, V4SF_type_node, NULL_TREE); 15500 tree void_ftype_pfloat_v4sf 15501 = build_function_type_list (void_type_node, 15502 pfloat_type_node, V4SF_type_node, NULL_TREE); 15503 tree void_ftype_pdi_di 15504 = build_function_type_list (void_type_node, 15505 pdi_type_node, long_long_unsigned_type_node, 15506 NULL_TREE); 15507 tree void_ftype_pv2di_v2di 15508 = build_function_type_list (void_type_node, 15509 pv2di_type_node, V2DI_type_node, NULL_TREE); 15510 /* Normal vector unops. */ 15511 tree v4sf_ftype_v4sf 15512 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 15513 tree v16qi_ftype_v16qi 15514 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); 15515 tree v8hi_ftype_v8hi 15516 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); 15517 tree v4si_ftype_v4si 15518 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); 15519 tree v8qi_ftype_v8qi 15520 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); 15521 tree v4hi_ftype_v4hi 15522 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); 15523 15524 /* Normal vector binops. */ 15525 tree v4sf_ftype_v4sf_v4sf 15526 = build_function_type_list (V4SF_type_node, 15527 V4SF_type_node, V4SF_type_node, NULL_TREE); 15528 tree v8qi_ftype_v8qi_v8qi 15529 = build_function_type_list (V8QI_type_node, 15530 V8QI_type_node, V8QI_type_node, NULL_TREE); 15531 tree v4hi_ftype_v4hi_v4hi 15532 = build_function_type_list (V4HI_type_node, 15533 V4HI_type_node, V4HI_type_node, NULL_TREE); 15534 tree v2si_ftype_v2si_v2si 15535 = build_function_type_list (V2SI_type_node, 15536 V2SI_type_node, V2SI_type_node, NULL_TREE); 15537 tree di_ftype_di_di 15538 = build_function_type_list (long_long_unsigned_type_node, 15539 long_long_unsigned_type_node, 15540 long_long_unsigned_type_node, NULL_TREE); 15541 15542 tree di_ftype_di_di_int 15543 = build_function_type_list (long_long_unsigned_type_node, 15544 long_long_unsigned_type_node, 15545 long_long_unsigned_type_node, 15546 integer_type_node, NULL_TREE); 15547 15548 tree v2si_ftype_v2sf 15549 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 15550 tree v2sf_ftype_v2si 15551 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 15552 tree v2si_ftype_v2si 15553 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 15554 tree v2sf_ftype_v2sf 15555 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 15556 tree v2sf_ftype_v2sf_v2sf 15557 = build_function_type_list (V2SF_type_node, 15558 V2SF_type_node, V2SF_type_node, NULL_TREE); 15559 tree v2si_ftype_v2sf_v2sf 15560 = build_function_type_list (V2SI_type_node, 15561 V2SF_type_node, V2SF_type_node, NULL_TREE); 15562 tree pint_type_node = build_pointer_type (integer_type_node); 15563 tree pdouble_type_node = build_pointer_type (double_type_node); 15564 tree pcdouble_type_node = build_pointer_type ( 15565 build_type_variant (double_type_node, 1, 0)); 15566 tree int_ftype_v2df_v2df 15567 = build_function_type_list (integer_type_node, 15568 V2DF_type_node, V2DF_type_node, NULL_TREE); 15569 15570 tree void_ftype_pcvoid 15571 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 15572 tree v4sf_ftype_v4si 15573 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 15574 tree v4si_ftype_v4sf 15575 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 15576 tree v2df_ftype_v4si 15577 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 15578 tree v4si_ftype_v2df 15579 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 15580 tree v2si_ftype_v2df 15581 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 15582 tree v4sf_ftype_v2df 15583 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 15584 tree v2df_ftype_v2si 15585 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 15586 tree v2df_ftype_v4sf 15587 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 15588 tree int_ftype_v2df 15589 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 15590 tree int64_ftype_v2df 15591 = build_function_type_list (long_long_integer_type_node, 15592 V2DF_type_node, NULL_TREE); 15593 tree v2df_ftype_v2df_int 15594 = build_function_type_list (V2DF_type_node, 15595 V2DF_type_node, integer_type_node, NULL_TREE); 15596 tree v2df_ftype_v2df_int64 15597 = build_function_type_list (V2DF_type_node, 15598 V2DF_type_node, long_long_integer_type_node, 15599 NULL_TREE); 15600 tree v4sf_ftype_v4sf_v2df 15601 = build_function_type_list (V4SF_type_node, 15602 V4SF_type_node, V2DF_type_node, NULL_TREE); 15603 tree v2df_ftype_v2df_v4sf 15604 = build_function_type_list (V2DF_type_node, 15605 V2DF_type_node, V4SF_type_node, NULL_TREE); 15606 tree v2df_ftype_v2df_v2df_int 15607 = build_function_type_list (V2DF_type_node, 15608 V2DF_type_node, V2DF_type_node, 15609 integer_type_node, 15610 NULL_TREE); 15611 tree v2df_ftype_v2df_pcdouble 15612 = build_function_type_list (V2DF_type_node, 15613 V2DF_type_node, pcdouble_type_node, NULL_TREE); 15614 tree void_ftype_pdouble_v2df 15615 = build_function_type_list (void_type_node, 15616 pdouble_type_node, V2DF_type_node, NULL_TREE); 15617 tree void_ftype_pint_int 15618 = build_function_type_list (void_type_node, 15619 pint_type_node, integer_type_node, NULL_TREE); 15620 tree void_ftype_v16qi_v16qi_pchar 15621 = build_function_type_list (void_type_node, 15622 V16QI_type_node, V16QI_type_node, 15623 pchar_type_node, NULL_TREE); 15624 tree v2df_ftype_pcdouble 15625 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 15626 tree v2df_ftype_v2df_v2df 15627 = build_function_type_list (V2DF_type_node, 15628 V2DF_type_node, V2DF_type_node, NULL_TREE); 15629 tree v16qi_ftype_v16qi_v16qi 15630 = build_function_type_list (V16QI_type_node, 15631 V16QI_type_node, V16QI_type_node, NULL_TREE); 15632 tree v8hi_ftype_v8hi_v8hi 15633 = build_function_type_list (V8HI_type_node, 15634 V8HI_type_node, V8HI_type_node, NULL_TREE); 15635 tree v4si_ftype_v4si_v4si 15636 = build_function_type_list (V4SI_type_node, 15637 V4SI_type_node, V4SI_type_node, NULL_TREE); 15638 tree v2di_ftype_v2di_v2di 15639 = build_function_type_list (V2DI_type_node, 15640 V2DI_type_node, V2DI_type_node, NULL_TREE); 15641 tree v2di_ftype_v2df_v2df 15642 = build_function_type_list (V2DI_type_node, 15643 V2DF_type_node, V2DF_type_node, NULL_TREE); 15644 tree v2df_ftype_v2df 15645 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 15646 tree v2di_ftype_v2di_int 15647 = build_function_type_list (V2DI_type_node, 15648 V2DI_type_node, integer_type_node, NULL_TREE); 15649 tree v2di_ftype_v2di_v2di_int 15650 = build_function_type_list (V2DI_type_node, V2DI_type_node, 15651 V2DI_type_node, integer_type_node, NULL_TREE); 15652 tree v4si_ftype_v4si_int 15653 = build_function_type_list (V4SI_type_node, 15654 V4SI_type_node, integer_type_node, NULL_TREE); 15655 tree v8hi_ftype_v8hi_int 15656 = build_function_type_list (V8HI_type_node, 15657 V8HI_type_node, integer_type_node, NULL_TREE); 15658 tree v4si_ftype_v8hi_v8hi 15659 = build_function_type_list (V4SI_type_node, 15660 V8HI_type_node, V8HI_type_node, NULL_TREE); 15661 tree di_ftype_v8qi_v8qi 15662 = build_function_type_list (long_long_unsigned_type_node, 15663 V8QI_type_node, V8QI_type_node, NULL_TREE); 15664 tree di_ftype_v2si_v2si 15665 = build_function_type_list (long_long_unsigned_type_node, 15666 V2SI_type_node, V2SI_type_node, NULL_TREE); 15667 tree v2di_ftype_v16qi_v16qi 15668 = build_function_type_list (V2DI_type_node, 15669 V16QI_type_node, V16QI_type_node, NULL_TREE); 15670 tree v2di_ftype_v4si_v4si 15671 = build_function_type_list (V2DI_type_node, 15672 V4SI_type_node, V4SI_type_node, NULL_TREE); 15673 tree int_ftype_v16qi 15674 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 15675 tree v16qi_ftype_pcchar 15676 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 15677 tree void_ftype_pchar_v16qi 15678 = build_function_type_list (void_type_node, 15679 pchar_type_node, V16QI_type_node, NULL_TREE); 15680 15681 tree v2di_ftype_v2di_unsigned_unsigned 15682 = build_function_type_list (V2DI_type_node, V2DI_type_node, 15683 unsigned_type_node, unsigned_type_node, 15684 NULL_TREE); 15685 tree v2di_ftype_v2di_v2di_unsigned_unsigned 15686 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, 15687 unsigned_type_node, unsigned_type_node, 15688 NULL_TREE); 15689 tree v2di_ftype_v2di_v16qi 15690 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, 15691 NULL_TREE); 15692 15693 tree float80_type; 15694 tree float128_type; 15695 tree ftype; 15696 15697 /* The __float80 type. */ 15698 if (TYPE_MODE (long_double_type_node) == XFmode) 15699 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 15700 "__float80"); 15701 else 15702 { 15703 /* The __float80 type. */ 15704 float80_type = make_node (REAL_TYPE); 15705 TYPE_PRECISION (float80_type) = 80; 15706 layout_type (float80_type); 15707 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 15708 } 15709 15710 if (TARGET_64BIT) 15711 { 15712 float128_type = make_node (REAL_TYPE); 15713 TYPE_PRECISION (float128_type) = 128; 15714 layout_type (float128_type); 15715 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 15716 } 15717 15718 /* Add all builtins that are more or less simple operations on two 15719 operands. */ 15720 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 15721 { 15722 /* Use one of the operands; the target can have a different mode for 15723 mask-generating compares. */ 15724 enum machine_mode mode; 15725 tree type; 15726 15727 if (d->name == 0) 15728 continue; 15729 mode = insn_data[d->icode].operand[1].mode; 15730 15731 switch (mode) 15732 { 15733 case V16QImode: 15734 type = v16qi_ftype_v16qi_v16qi; 15735 break; 15736 case V8HImode: 15737 type = v8hi_ftype_v8hi_v8hi; 15738 break; 15739 case V4SImode: 15740 type = v4si_ftype_v4si_v4si; 15741 break; 15742 case V2DImode: 15743 type = v2di_ftype_v2di_v2di; 15744 break; 15745 case V2DFmode: 15746 type = v2df_ftype_v2df_v2df; 15747 break; 15748 case V4SFmode: 15749 type = v4sf_ftype_v4sf_v4sf; 15750 break; 15751 case V8QImode: 15752 type = v8qi_ftype_v8qi_v8qi; 15753 break; 15754 case V4HImode: 15755 type = v4hi_ftype_v4hi_v4hi; 15756 break; 15757 case V2SImode: 15758 type = v2si_ftype_v2si_v2si; 15759 break; 15760 case DImode: 15761 type = di_ftype_di_di; 15762 break; 15763 15764 default: 15765 gcc_unreachable (); 15766 } 15767 15768 /* Override for comparisons. */ 15769 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 15770 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) 15771 type = v4si_ftype_v4sf_v4sf; 15772 15773 if (d->icode == CODE_FOR_sse2_maskcmpv2df3 15774 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 15775 type = v2di_ftype_v2df_v2df; 15776 15777 def_builtin (d->mask, d->name, type, d->code); 15778 } 15779 15780 /* Add all builtins that are more or less simple operations on 1 operand. */ 15781 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 15782 { 15783 enum machine_mode mode; 15784 tree type; 15785 15786 if (d->name == 0) 15787 continue; 15788 mode = insn_data[d->icode].operand[1].mode; 15789 15790 switch (mode) 15791 { 15792 case V16QImode: 15793 type = v16qi_ftype_v16qi; 15794 break; 15795 case V8HImode: 15796 type = v8hi_ftype_v8hi; 15797 break; 15798 case V4SImode: 15799 type = v4si_ftype_v4si; 15800 break; 15801 case V2DFmode: 15802 type = v2df_ftype_v2df; 15803 break; 15804 case V4SFmode: 15805 type = v4sf_ftype_v4sf; 15806 break; 15807 case V8QImode: 15808 type = v8qi_ftype_v8qi; 15809 break; 15810 case V4HImode: 15811 type = v4hi_ftype_v4hi; 15812 break; 15813 case V2SImode: 15814 type = v2si_ftype_v2si; 15815 break; 15816 15817 default: 15818 abort (); 15819 } 15820 15821 def_builtin (d->mask, d->name, type, d->code); 15822 } 15823 15824 /* Add the remaining MMX insns with somewhat more complicated types. */ 15825 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 15826 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 15827 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 15828 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 15829 15830 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 15831 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 15832 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 15833 15834 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 15835 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 15836 15837 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 15838 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 15839 15840 /* comi/ucomi insns. */ 15841 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 15842 if (d->mask == MASK_SSE2) 15843 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 15844 else 15845 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 15846 15847 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 15848 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 15849 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 15850 15851 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 15852 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 15853 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 15854 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 15855 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 15856 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 15857 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 15858 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 15859 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 15860 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 15861 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 15862 15863 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 15864 15865 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 15866 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 15867 15868 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 15869 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 15870 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 15871 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 15872 15873 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 15874 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 15875 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 15876 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 15877 15878 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 15879 15880 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 15881 15882 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 15883 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 15884 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 15885 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 15886 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 15887 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 15888 15889 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 15890 15891 /* Original 3DNow! */ 15892 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 15893 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 15894 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 15895 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 15896 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 15897 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 15898 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 15899 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 15900 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 15901 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 15902 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 15903 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 15904 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 15905 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 15906 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 15912 15913 /* 3DNow! extension as used in the Athlon CPU. */ 15914 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 15915 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 15916 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 15917 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 15918 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 15919 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 15920 15921 /* SSE2 */ 15922 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 15923 15924 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 15925 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 15926 15927 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); 15928 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); 15929 15930 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 15931 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 15932 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 15933 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 15934 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 15935 15936 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 15937 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 15938 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 15939 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 15940 15941 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 15942 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 15943 15944 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 15945 15946 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 15947 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 15948 15949 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 15950 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 15951 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 15952 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 15953 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 15954 15955 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 15956 15957 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 15958 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 15959 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 15960 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 15961 15962 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 15965 15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 15967 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 15968 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 15970 15971 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 15972 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 15973 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 15974 15975 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 15976 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 15977 15978 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); 15979 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); 15980 15981 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); 15982 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); 15983 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 15984 15985 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); 15986 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); 15987 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 15988 15989 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); 15990 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); 15991 15992 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 15993 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 15994 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 15996 15997 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 15998 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 16001 16002 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 16003 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 16004 16005 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 16006 16007 /* Prescott New Instructions. */ 16008 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 16009 void_ftype_pcvoid_unsigned_unsigned, 16010 IX86_BUILTIN_MONITOR); 16011 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 16012 void_ftype_unsigned_unsigned, 16013 IX86_BUILTIN_MWAIT); 16014 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 16015 v4sf_ftype_v4sf, 16016 IX86_BUILTIN_MOVSHDUP); 16017 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 16018 v4sf_ftype_v4sf, 16019 IX86_BUILTIN_MOVSLDUP); 16020 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 16021 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 16022 16023 /* SSSE3. */ 16024 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", 16025 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); 16026 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, 16027 IX86_BUILTIN_PALIGNR); 16028 16029 /* AMDFAM10 SSE4A New built-ins */ 16030 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", 16031 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); 16032 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", 16033 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); 16034 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", 16035 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI); 16036 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", 16037 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ); 16038 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", 16039 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI); 16040 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", 16041 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); 16042 16043 /* Access to the vec_init patterns. */ 16044 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 16045 integer_type_node, NULL_TREE); 16046 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", 16047 ftype, IX86_BUILTIN_VEC_INIT_V2SI); 16048 16049 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 16050 short_integer_type_node, 16051 short_integer_type_node, 16052 short_integer_type_node, NULL_TREE); 16053 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", 16054 ftype, IX86_BUILTIN_VEC_INIT_V4HI); 16055 16056 ftype = build_function_type_list (V8QI_type_node, char_type_node, 16057 char_type_node, char_type_node, 16058 char_type_node, char_type_node, 16059 char_type_node, char_type_node, 16060 char_type_node, NULL_TREE); 16061 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", 16062 ftype, IX86_BUILTIN_VEC_INIT_V8QI); 16063 16064 /* Access to the vec_extract patterns. */ 16065 ftype = build_function_type_list (double_type_node, V2DF_type_node, 16066 integer_type_node, NULL_TREE); 16067 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", 16068 ftype, IX86_BUILTIN_VEC_EXT_V2DF); 16069 16070 ftype = build_function_type_list (long_long_integer_type_node, 16071 V2DI_type_node, integer_type_node, 16072 NULL_TREE); 16073 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", 16074 ftype, IX86_BUILTIN_VEC_EXT_V2DI); 16075 16076 ftype = build_function_type_list (float_type_node, V4SF_type_node, 16077 integer_type_node, NULL_TREE); 16078 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", 16079 ftype, IX86_BUILTIN_VEC_EXT_V4SF); 16080 16081 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 16082 integer_type_node, NULL_TREE); 16083 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", 16084 ftype, IX86_BUILTIN_VEC_EXT_V4SI); 16085 16086 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 16087 integer_type_node, NULL_TREE); 16088 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", 16089 ftype, IX86_BUILTIN_VEC_EXT_V8HI); 16090 16091 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 16092 integer_type_node, NULL_TREE); 16093 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", 16094 ftype, IX86_BUILTIN_VEC_EXT_V4HI); 16095 16096 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 16097 integer_type_node, NULL_TREE); 16098 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", 16099 ftype, IX86_BUILTIN_VEC_EXT_V2SI); 16100 16101 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 16102 integer_type_node, NULL_TREE); 16103 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 16104 16105 /* Access to the vec_set patterns. */ 16106 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 16107 intHI_type_node, 16108 integer_type_node, NULL_TREE); 16109 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", 16110 ftype, IX86_BUILTIN_VEC_SET_V8HI); 16111 16112 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 16113 intHI_type_node, 16114 integer_type_node, NULL_TREE); 16115 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", 16116 ftype, IX86_BUILTIN_VEC_SET_V4HI); 16117} 16118 16119/* Errors in the source file can cause expand_expr to return const0_rtx 16120 where we expect a vector. To avoid crashing, use one of the vector 16121 clear instructions. */ 16122static rtx 16123safe_vector_operand (rtx x, enum machine_mode mode) 16124{ 16125 if (x == const0_rtx) 16126 x = CONST0_RTX (mode); 16127 return x; 16128} 16129 16130/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 16131 16132static rtx 16133ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 16134{ 16135 rtx pat, xops[3]; 16136 tree arg0 = TREE_VALUE (arglist); 16137 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16138 rtx op0 = expand_normal (arg0); 16139 rtx op1 = expand_normal (arg1); 16140 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16141 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16142 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 16143 16144 if (VECTOR_MODE_P (mode0)) 16145 op0 = safe_vector_operand (op0, mode0); 16146 if (VECTOR_MODE_P (mode1)) 16147 op1 = safe_vector_operand (op1, mode1); 16148 16149 if (optimize || !target 16150 || GET_MODE (target) != tmode 16151 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16152 target = gen_reg_rtx (tmode); 16153 16154 if (GET_MODE (op1) == SImode && mode1 == TImode) 16155 { 16156 rtx x = gen_reg_rtx (V4SImode); 16157 emit_insn (gen_sse2_loadd (x, op1)); 16158 op1 = gen_lowpart (TImode, x); 16159 } 16160 16161 /* The insn must want input operands in the same modes as the 16162 result. */ 16163 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 16164 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 16165 16166 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 16167 op0 = copy_to_mode_reg (mode0, op0); 16168 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16169 op1 = copy_to_mode_reg (mode1, op1); 16170 16171 /* ??? Using ix86_fixup_binary_operands is problematic when 16172 we've got mismatched modes. Fake it. */ 16173 16174 xops[0] = target; 16175 xops[1] = op0; 16176 xops[2] = op1; 16177 16178 if (tmode == mode0 && tmode == mode1) 16179 { 16180 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); 16181 op0 = xops[1]; 16182 op1 = xops[2]; 16183 } 16184 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) 16185 { 16186 op0 = force_reg (mode0, op0); 16187 op1 = force_reg (mode1, op1); 16188 target = gen_reg_rtx (tmode); 16189 } 16190 16191 pat = GEN_FCN (icode) (target, op0, op1); 16192 if (! pat) 16193 return 0; 16194 emit_insn (pat); 16195 return target; 16196} 16197 16198/* Subroutine of ix86_expand_builtin to take care of stores. */ 16199 16200static rtx 16201ix86_expand_store_builtin (enum insn_code icode, tree arglist) 16202{ 16203 rtx pat; 16204 tree arg0 = TREE_VALUE (arglist); 16205 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16206 rtx op0 = expand_normal (arg0); 16207 rtx op1 = expand_normal (arg1); 16208 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 16209 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 16210 16211 if (VECTOR_MODE_P (mode1)) 16212 op1 = safe_vector_operand (op1, mode1); 16213 16214 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16215 op1 = copy_to_mode_reg (mode1, op1); 16216 16217 pat = GEN_FCN (icode) (op0, op1); 16218 if (pat) 16219 emit_insn (pat); 16220 return 0; 16221} 16222 16223/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 16224 16225static rtx 16226ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 16227 rtx target, int do_load) 16228{ 16229 rtx pat; 16230 tree arg0 = TREE_VALUE (arglist); 16231 rtx op0 = expand_normal (arg0); 16232 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16233 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16234 16235 if (optimize || !target 16236 || GET_MODE (target) != tmode 16237 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16238 target = gen_reg_rtx (tmode); 16239 if (do_load) 16240 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16241 else 16242 { 16243 if (VECTOR_MODE_P (mode0)) 16244 op0 = safe_vector_operand (op0, mode0); 16245 16246 if ((optimize && !register_operand (op0, mode0)) 16247 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16248 op0 = copy_to_mode_reg (mode0, op0); 16249 } 16250 16251 pat = GEN_FCN (icode) (target, op0); 16252 if (! pat) 16253 return 0; 16254 emit_insn (pat); 16255 return target; 16256} 16257 16258/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 16259 sqrtss, rsqrtss, rcpss. */ 16260 16261static rtx 16262ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 16263{ 16264 rtx pat; 16265 tree arg0 = TREE_VALUE (arglist); 16266 rtx op1, op0 = expand_normal (arg0); 16267 enum machine_mode tmode = insn_data[icode].operand[0].mode; 16268 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 16269 16270 if (optimize || !target 16271 || GET_MODE (target) != tmode 16272 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16273 target = gen_reg_rtx (tmode); 16274 16275 if (VECTOR_MODE_P (mode0)) 16276 op0 = safe_vector_operand (op0, mode0); 16277 16278 if ((optimize && !register_operand (op0, mode0)) 16279 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16280 op0 = copy_to_mode_reg (mode0, op0); 16281 16282 op1 = op0; 16283 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 16284 op1 = copy_to_mode_reg (mode0, op1); 16285 16286 pat = GEN_FCN (icode) (target, op0, op1); 16287 if (! pat) 16288 return 0; 16289 emit_insn (pat); 16290 return target; 16291} 16292 16293/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 16294 16295static rtx 16296ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 16297 rtx target) 16298{ 16299 rtx pat; 16300 tree arg0 = TREE_VALUE (arglist); 16301 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16302 rtx op0 = expand_normal (arg0); 16303 rtx op1 = expand_normal (arg1); 16304 rtx op2; 16305 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 16306 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 16307 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 16308 enum rtx_code comparison = d->comparison; 16309 16310 if (VECTOR_MODE_P (mode0)) 16311 op0 = safe_vector_operand (op0, mode0); 16312 if (VECTOR_MODE_P (mode1)) 16313 op1 = safe_vector_operand (op1, mode1); 16314 16315 /* Swap operands if we have a comparison that isn't available in 16316 hardware. */ 16317 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16318 { 16319 rtx tmp = gen_reg_rtx (mode1); 16320 emit_move_insn (tmp, op1); 16321 op1 = op0; 16322 op0 = tmp; 16323 } 16324 16325 if (optimize || !target 16326 || GET_MODE (target) != tmode 16327 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 16328 target = gen_reg_rtx (tmode); 16329 16330 if ((optimize && !register_operand (op0, mode0)) 16331 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 16332 op0 = copy_to_mode_reg (mode0, op0); 16333 if ((optimize && !register_operand (op1, mode1)) 16334 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 16335 op1 = copy_to_mode_reg (mode1, op1); 16336 16337 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16338 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 16339 if (! pat) 16340 return 0; 16341 emit_insn (pat); 16342 return target; 16343} 16344 16345/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 16346 16347static rtx 16348ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 16349 rtx target) 16350{ 16351 rtx pat; 16352 tree arg0 = TREE_VALUE (arglist); 16353 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16354 rtx op0 = expand_normal (arg0); 16355 rtx op1 = expand_normal (arg1); 16356 rtx op2; 16357 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 16358 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 16359 enum rtx_code comparison = d->comparison; 16360 16361 if (VECTOR_MODE_P (mode0)) 16362 op0 = safe_vector_operand (op0, mode0); 16363 if (VECTOR_MODE_P (mode1)) 16364 op1 = safe_vector_operand (op1, mode1); 16365 16366 /* Swap operands if we have a comparison that isn't available in 16367 hardware. */ 16368 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 16369 { 16370 rtx tmp = op1; 16371 op1 = op0; 16372 op0 = tmp; 16373 } 16374 16375 target = gen_reg_rtx (SImode); 16376 emit_move_insn (target, const0_rtx); 16377 target = gen_rtx_SUBREG (QImode, target, 0); 16378 16379 if ((optimize && !register_operand (op0, mode0)) 16380 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 16381 op0 = copy_to_mode_reg (mode0, op0); 16382 if ((optimize && !register_operand (op1, mode1)) 16383 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 16384 op1 = copy_to_mode_reg (mode1, op1); 16385 16386 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 16387 pat = GEN_FCN (d->icode) (op0, op1); 16388 if (! pat) 16389 return 0; 16390 emit_insn (pat); 16391 emit_insn (gen_rtx_SET (VOIDmode, 16392 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 16393 gen_rtx_fmt_ee (comparison, QImode, 16394 SET_DEST (pat), 16395 const0_rtx))); 16396 16397 return SUBREG_REG (target); 16398} 16399 16400/* Return the integer constant in ARG. Constrain it to be in the range 16401 of the subparts of VEC_TYPE; issue an error if not. */ 16402 16403static int 16404get_element_number (tree vec_type, tree arg) 16405{ 16406 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 16407 16408 if (!host_integerp (arg, 1) 16409 || (elt = tree_low_cst (arg, 1), elt > max)) 16410 { 16411 error ("selector must be an integer constant in the range 0..%wi", max); 16412 return 0; 16413 } 16414 16415 return elt; 16416} 16417 16418/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16419 ix86_expand_vector_init. We DO have language-level syntax for this, in 16420 the form of (type){ init-list }. Except that since we can't place emms 16421 instructions from inside the compiler, we can't allow the use of MMX 16422 registers unless the user explicitly asks for it. So we do *not* define 16423 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 16424 we have builtins invoked by mmintrin.h that gives us license to emit 16425 these sorts of instructions. */ 16426 16427static rtx 16428ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) 16429{ 16430 enum machine_mode tmode = TYPE_MODE (type); 16431 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 16432 int i, n_elt = GET_MODE_NUNITS (tmode); 16433 rtvec v = rtvec_alloc (n_elt); 16434 16435 gcc_assert (VECTOR_MODE_P (tmode)); 16436 16437 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) 16438 { 16439 rtx x = expand_normal (TREE_VALUE (arglist)); 16440 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 16441 } 16442 16443 gcc_assert (arglist == NULL); 16444 16445 if (!target || !register_operand (target, tmode)) 16446 target = gen_reg_rtx (tmode); 16447 16448 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 16449 return target; 16450} 16451 16452/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16453 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 16454 had a language-level syntax for referencing vector elements. */ 16455 16456static rtx 16457ix86_expand_vec_ext_builtin (tree arglist, rtx target) 16458{ 16459 enum machine_mode tmode, mode0; 16460 tree arg0, arg1; 16461 int elt; 16462 rtx op0; 16463 16464 arg0 = TREE_VALUE (arglist); 16465 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16466 16467 op0 = expand_normal (arg0); 16468 elt = get_element_number (TREE_TYPE (arg0), arg1); 16469 16470 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16471 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 16472 gcc_assert (VECTOR_MODE_P (mode0)); 16473 16474 op0 = force_reg (mode0, op0); 16475 16476 if (optimize || !target || !register_operand (target, tmode)) 16477 target = gen_reg_rtx (tmode); 16478 16479 ix86_expand_vector_extract (true, target, op0, elt); 16480 16481 return target; 16482} 16483 16484/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 16485 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 16486 a language-level syntax for referencing vector elements. */ 16487 16488static rtx 16489ix86_expand_vec_set_builtin (tree arglist) 16490{ 16491 enum machine_mode tmode, mode1; 16492 tree arg0, arg1, arg2; 16493 int elt; 16494 rtx op0, op1, target; 16495 16496 arg0 = TREE_VALUE (arglist); 16497 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16498 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16499 16500 tmode = TYPE_MODE (TREE_TYPE (arg0)); 16501 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 16502 gcc_assert (VECTOR_MODE_P (tmode)); 16503 16504 op0 = expand_expr (arg0, NULL_RTX, tmode, 0); 16505 op1 = expand_expr (arg1, NULL_RTX, mode1, 0); 16506 elt = get_element_number (TREE_TYPE (arg0), arg2); 16507 16508 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 16509 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 16510 16511 op0 = force_reg (tmode, op0); 16512 op1 = force_reg (mode1, op1); 16513 16514 /* OP0 is the source of these builtin functions and shouldn't be 16515 modified. Create a copy, use it and return it as target. */ 16516 target = gen_reg_rtx (tmode); 16517 emit_move_insn (target, op0); 16518 ix86_expand_vector_set (true, target, op1, elt); 16519 16520 return target; 16521} 16522 16523/* Expand an expression EXP that calls a built-in function, 16524 with result going to TARGET if that's convenient 16525 (and in mode MODE if that's convenient). 16526 SUBTARGET may be used as the target for computing one of EXP's operands. 16527 IGNORE is nonzero if the value is to be ignored. */ 16528 16529static rtx 16530ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 16531 enum machine_mode mode ATTRIBUTE_UNUSED, 16532 int ignore ATTRIBUTE_UNUSED) 16533{ 16534 const struct builtin_description *d; 16535 size_t i; 16536 enum insn_code icode; 16537 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 16538 tree arglist = TREE_OPERAND (exp, 1); 16539 tree arg0, arg1, arg2, arg3; 16540 rtx op0, op1, op2, op3, pat; 16541 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4; 16542 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 16543 16544 switch (fcode) 16545 { 16546 case IX86_BUILTIN_EMMS: 16547 emit_insn (gen_mmx_emms ()); 16548 return 0; 16549 16550 case IX86_BUILTIN_SFENCE: 16551 emit_insn (gen_sse_sfence ()); 16552 return 0; 16553 16554 case IX86_BUILTIN_MASKMOVQ: 16555 case IX86_BUILTIN_MASKMOVDQU: 16556 icode = (fcode == IX86_BUILTIN_MASKMOVQ 16557 ? CODE_FOR_mmx_maskmovq 16558 : CODE_FOR_sse2_maskmovdqu); 16559 /* Note the arg order is different from the operand order. */ 16560 arg1 = TREE_VALUE (arglist); 16561 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 16562 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16563 op0 = expand_normal (arg0); 16564 op1 = expand_normal (arg1); 16565 op2 = expand_normal (arg2); 16566 mode0 = insn_data[icode].operand[0].mode; 16567 mode1 = insn_data[icode].operand[1].mode; 16568 mode2 = insn_data[icode].operand[2].mode; 16569 16570 op0 = force_reg (Pmode, op0); 16571 op0 = gen_rtx_MEM (mode1, op0); 16572 16573 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 16574 op0 = copy_to_mode_reg (mode0, op0); 16575 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 16576 op1 = copy_to_mode_reg (mode1, op1); 16577 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 16578 op2 = copy_to_mode_reg (mode2, op2); 16579 pat = GEN_FCN (icode) (op0, op1, op2); 16580 if (! pat) 16581 return 0; 16582 emit_insn (pat); 16583 return 0; 16584 16585 case IX86_BUILTIN_SQRTSS: 16586 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); 16587 case IX86_BUILTIN_RSQRTSS: 16588 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); 16589 case IX86_BUILTIN_RCPSS: 16590 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); 16591 16592 case IX86_BUILTIN_LOADUPS: 16593 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 16594 16595 case IX86_BUILTIN_STOREUPS: 16596 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 16597 16598 case IX86_BUILTIN_LOADHPS: 16599 case IX86_BUILTIN_LOADLPS: 16600 case IX86_BUILTIN_LOADHPD: 16601 case IX86_BUILTIN_LOADLPD: 16602 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps 16603 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps 16604 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd 16605 : CODE_FOR_sse2_loadlpd); 16606 arg0 = TREE_VALUE (arglist); 16607 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16608 op0 = expand_normal (arg0); 16609 op1 = expand_normal (arg1); 16610 tmode = insn_data[icode].operand[0].mode; 16611 mode0 = insn_data[icode].operand[1].mode; 16612 mode1 = insn_data[icode].operand[2].mode; 16613 16614 op0 = force_reg (mode0, op0); 16615 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 16616 if (optimize || target == 0 16617 || GET_MODE (target) != tmode 16618 || !register_operand (target, tmode)) 16619 target = gen_reg_rtx (tmode); 16620 pat = GEN_FCN (icode) (target, op0, op1); 16621 if (! pat) 16622 return 0; 16623 emit_insn (pat); 16624 return target; 16625 16626 case IX86_BUILTIN_STOREHPS: 16627 case IX86_BUILTIN_STORELPS: 16628 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps 16629 : CODE_FOR_sse_storelps); 16630 arg0 = TREE_VALUE (arglist); 16631 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16632 op0 = expand_normal (arg0); 16633 op1 = expand_normal (arg1); 16634 mode0 = insn_data[icode].operand[0].mode; 16635 mode1 = insn_data[icode].operand[1].mode; 16636 16637 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16638 op1 = force_reg (mode1, op1); 16639 16640 pat = GEN_FCN (icode) (op0, op1); 16641 if (! pat) 16642 return 0; 16643 emit_insn (pat); 16644 return const0_rtx; 16645 16646 case IX86_BUILTIN_MOVNTPS: 16647 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 16648 case IX86_BUILTIN_MOVNTQ: 16649 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 16650 16651 case IX86_BUILTIN_LDMXCSR: 16652 op0 = expand_normal (TREE_VALUE (arglist)); 16653 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16654 emit_move_insn (target, op0); 16655 emit_insn (gen_sse_ldmxcsr (target)); 16656 return 0; 16657 16658 case IX86_BUILTIN_STMXCSR: 16659 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16660 emit_insn (gen_sse_stmxcsr (target)); 16661 return copy_to_mode_reg (SImode, target); 16662 16663 case IX86_BUILTIN_SHUFPS: 16664 case IX86_BUILTIN_SHUFPD: 16665 icode = (fcode == IX86_BUILTIN_SHUFPS 16666 ? CODE_FOR_sse_shufps 16667 : CODE_FOR_sse2_shufpd); 16668 arg0 = TREE_VALUE (arglist); 16669 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16670 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16671 op0 = expand_normal (arg0); 16672 op1 = expand_normal (arg1); 16673 op2 = expand_normal (arg2); 16674 tmode = insn_data[icode].operand[0].mode; 16675 mode0 = insn_data[icode].operand[1].mode; 16676 mode1 = insn_data[icode].operand[2].mode; 16677 mode2 = insn_data[icode].operand[3].mode; 16678 16679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16680 op0 = copy_to_mode_reg (mode0, op0); 16681 if ((optimize && !register_operand (op1, mode1)) 16682 || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16683 op1 = copy_to_mode_reg (mode1, op1); 16684 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 16685 { 16686 /* @@@ better error message */ 16687 error ("mask must be an immediate"); 16688 return gen_reg_rtx (tmode); 16689 } 16690 if (optimize || target == 0 16691 || GET_MODE (target) != tmode 16692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16693 target = gen_reg_rtx (tmode); 16694 pat = GEN_FCN (icode) (target, op0, op1, op2); 16695 if (! pat) 16696 return 0; 16697 emit_insn (pat); 16698 return target; 16699 16700 case IX86_BUILTIN_PSHUFW: 16701 case IX86_BUILTIN_PSHUFD: 16702 case IX86_BUILTIN_PSHUFHW: 16703 case IX86_BUILTIN_PSHUFLW: 16704 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 16705 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 16706 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 16707 : CODE_FOR_mmx_pshufw); 16708 arg0 = TREE_VALUE (arglist); 16709 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16710 op0 = expand_normal (arg0); 16711 op1 = expand_normal (arg1); 16712 tmode = insn_data[icode].operand[0].mode; 16713 mode1 = insn_data[icode].operand[1].mode; 16714 mode2 = insn_data[icode].operand[2].mode; 16715 16716 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16717 op0 = copy_to_mode_reg (mode1, op0); 16718 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16719 { 16720 /* @@@ better error message */ 16721 error ("mask must be an immediate"); 16722 return const0_rtx; 16723 } 16724 if (target == 0 16725 || GET_MODE (target) != tmode 16726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16727 target = gen_reg_rtx (tmode); 16728 pat = GEN_FCN (icode) (target, op0, op1); 16729 if (! pat) 16730 return 0; 16731 emit_insn (pat); 16732 return target; 16733 16734 case IX86_BUILTIN_PSLLWI128: 16735 icode = CODE_FOR_ashlv8hi3; 16736 goto do_pshifti; 16737 case IX86_BUILTIN_PSLLDI128: 16738 icode = CODE_FOR_ashlv4si3; 16739 goto do_pshifti; 16740 case IX86_BUILTIN_PSLLQI128: 16741 icode = CODE_FOR_ashlv2di3; 16742 goto do_pshifti; 16743 case IX86_BUILTIN_PSRAWI128: 16744 icode = CODE_FOR_ashrv8hi3; 16745 goto do_pshifti; 16746 case IX86_BUILTIN_PSRADI128: 16747 icode = CODE_FOR_ashrv4si3; 16748 goto do_pshifti; 16749 case IX86_BUILTIN_PSRLWI128: 16750 icode = CODE_FOR_lshrv8hi3; 16751 goto do_pshifti; 16752 case IX86_BUILTIN_PSRLDI128: 16753 icode = CODE_FOR_lshrv4si3; 16754 goto do_pshifti; 16755 case IX86_BUILTIN_PSRLQI128: 16756 icode = CODE_FOR_lshrv2di3; 16757 goto do_pshifti; 16758 do_pshifti: 16759 arg0 = TREE_VALUE (arglist); 16760 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16763 16764 if (GET_CODE (op1) != CONST_INT) 16765 { 16766 error ("shift must be an immediate"); 16767 return const0_rtx; 16768 } 16769 if (INTVAL (op1) < 0 || INTVAL (op1) > 255) 16770 op1 = GEN_INT (255); 16771 16772 tmode = insn_data[icode].operand[0].mode; 16773 mode1 = insn_data[icode].operand[1].mode; 16774 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16775 op0 = copy_to_reg (op0); 16776 16777 target = gen_reg_rtx (tmode); 16778 pat = GEN_FCN (icode) (target, op0, op1); 16779 if (!pat) 16780 return 0; 16781 emit_insn (pat); 16782 return target; 16783 16784 case IX86_BUILTIN_PSLLW128: 16785 icode = CODE_FOR_ashlv8hi3; 16786 goto do_pshift; 16787 case IX86_BUILTIN_PSLLD128: 16788 icode = CODE_FOR_ashlv4si3; 16789 goto do_pshift; 16790 case IX86_BUILTIN_PSLLQ128: 16791 icode = CODE_FOR_ashlv2di3; 16792 goto do_pshift; 16793 case IX86_BUILTIN_PSRAW128: 16794 icode = CODE_FOR_ashrv8hi3; 16795 goto do_pshift; 16796 case IX86_BUILTIN_PSRAD128: 16797 icode = CODE_FOR_ashrv4si3; 16798 goto do_pshift; 16799 case IX86_BUILTIN_PSRLW128: 16800 icode = CODE_FOR_lshrv8hi3; 16801 goto do_pshift; 16802 case IX86_BUILTIN_PSRLD128: 16803 icode = CODE_FOR_lshrv4si3; 16804 goto do_pshift; 16805 case IX86_BUILTIN_PSRLQ128: 16806 icode = CODE_FOR_lshrv2di3; 16807 goto do_pshift; 16808 do_pshift: 16809 arg0 = TREE_VALUE (arglist); 16810 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16811 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16812 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16813 16814 tmode = insn_data[icode].operand[0].mode; 16815 mode1 = insn_data[icode].operand[1].mode; 16816 16817 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16818 op0 = copy_to_reg (op0); 16819 16820 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); 16821 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 16822 op1 = copy_to_reg (op1); 16823 16824 target = gen_reg_rtx (tmode); 16825 pat = GEN_FCN (icode) (target, op0, op1); 16826 if (!pat) 16827 return 0; 16828 emit_insn (pat); 16829 return target; 16830 16831 case IX86_BUILTIN_PSLLDQI128: 16832 case IX86_BUILTIN_PSRLDQI128: 16833 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 16834 : CODE_FOR_sse2_lshrti3); 16835 arg0 = TREE_VALUE (arglist); 16836 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16837 op0 = expand_normal (arg0); 16838 op1 = expand_normal (arg1); 16839 tmode = insn_data[icode].operand[0].mode; 16840 mode1 = insn_data[icode].operand[1].mode; 16841 mode2 = insn_data[icode].operand[2].mode; 16842 16843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16844 { 16845 op0 = copy_to_reg (op0); 16846 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16847 } 16848 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16849 { 16850 error ("shift must be an immediate"); 16851 return const0_rtx; 16852 } 16853 target = gen_reg_rtx (V2DImode); 16854 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), 16855 op0, op1); 16856 if (! pat) 16857 return 0; 16858 emit_insn (pat); 16859 return target; 16860 16861 case IX86_BUILTIN_FEMMS: 16862 emit_insn (gen_mmx_femms ()); 16863 return NULL_RTX; 16864 16865 case IX86_BUILTIN_PAVGUSB: 16866 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); 16867 16868 case IX86_BUILTIN_PF2ID: 16869 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); 16870 16871 case IX86_BUILTIN_PFACC: 16872 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); 16873 16874 case IX86_BUILTIN_PFADD: 16875 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); 16876 16877 case IX86_BUILTIN_PFCMPEQ: 16878 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); 16879 16880 case IX86_BUILTIN_PFCMPGE: 16881 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); 16882 16883 case IX86_BUILTIN_PFCMPGT: 16884 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); 16885 16886 case IX86_BUILTIN_PFMAX: 16887 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); 16888 16889 case IX86_BUILTIN_PFMIN: 16890 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); 16891 16892 case IX86_BUILTIN_PFMUL: 16893 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); 16894 16895 case IX86_BUILTIN_PFRCP: 16896 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); 16897 16898 case IX86_BUILTIN_PFRCPIT1: 16899 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); 16900 16901 case IX86_BUILTIN_PFRCPIT2: 16902 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); 16903 16904 case IX86_BUILTIN_PFRSQIT1: 16905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); 16906 16907 case IX86_BUILTIN_PFRSQRT: 16908 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); 16909 16910 case IX86_BUILTIN_PFSUB: 16911 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); 16912 16913 case IX86_BUILTIN_PFSUBR: 16914 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); 16915 16916 case IX86_BUILTIN_PI2FD: 16917 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); 16918 16919 case IX86_BUILTIN_PMULHRW: 16920 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); 16921 16922 case IX86_BUILTIN_PF2IW: 16923 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); 16924 16925 case IX86_BUILTIN_PFNACC: 16926 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); 16927 16928 case IX86_BUILTIN_PFPNACC: 16929 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); 16930 16931 case IX86_BUILTIN_PI2FW: 16932 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); 16933 16934 case IX86_BUILTIN_PSWAPDSI: 16935 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); 16936 16937 case IX86_BUILTIN_PSWAPDSF: 16938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); 16939 16940 case IX86_BUILTIN_SQRTSD: 16941 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); 16942 case IX86_BUILTIN_LOADUPD: 16943 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 16944 case IX86_BUILTIN_STOREUPD: 16945 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 16946 16947 case IX86_BUILTIN_MFENCE: 16948 emit_insn (gen_sse2_mfence ()); 16949 return 0; 16950 case IX86_BUILTIN_LFENCE: 16951 emit_insn (gen_sse2_lfence ()); 16952 return 0; 16953 16954 case IX86_BUILTIN_CLFLUSH: 16955 arg0 = TREE_VALUE (arglist); 16956 op0 = expand_normal (arg0); 16957 icode = CODE_FOR_sse2_clflush; 16958 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 16959 op0 = copy_to_mode_reg (Pmode, op0); 16960 16961 emit_insn (gen_sse2_clflush (op0)); 16962 return 0; 16963 16964 case IX86_BUILTIN_MOVNTPD: 16965 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 16966 case IX86_BUILTIN_MOVNTDQ: 16967 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 16968 case IX86_BUILTIN_MOVNTI: 16969 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 16970 16971 case IX86_BUILTIN_LOADDQU: 16972 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 16973 case IX86_BUILTIN_STOREDQU: 16974 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 16975 16976 case IX86_BUILTIN_MONITOR: 16977 arg0 = TREE_VALUE (arglist); 16978 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16979 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16980 op0 = expand_normal (arg0); 16981 op1 = expand_normal (arg1); 16982 op2 = expand_normal (arg2); 16983 if (!REG_P (op0)) 16984 op0 = copy_to_mode_reg (Pmode, op0); 16985 if (!REG_P (op1)) 16986 op1 = copy_to_mode_reg (SImode, op1); 16987 if (!REG_P (op2)) 16988 op2 = copy_to_mode_reg (SImode, op2); 16989 if (!TARGET_64BIT) 16990 emit_insn (gen_sse3_monitor (op0, op1, op2)); 16991 else 16992 emit_insn (gen_sse3_monitor64 (op0, op1, op2)); 16993 return 0; 16994 16995 case IX86_BUILTIN_MWAIT: 16996 arg0 = TREE_VALUE (arglist); 16997 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16998 op0 = expand_normal (arg0); 16999 op1 = expand_normal (arg1); 17000 if (!REG_P (op0)) 17001 op0 = copy_to_mode_reg (SImode, op0); 17002 if (!REG_P (op1)) 17003 op1 = copy_to_mode_reg (SImode, op1); 17004 emit_insn (gen_sse3_mwait (op0, op1)); 17005 return 0; 17006 17007 case IX86_BUILTIN_LDDQU: 17008 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, 17009 target, 1); 17010 17011 case IX86_BUILTIN_PALIGNR: 17012 case IX86_BUILTIN_PALIGNR128: 17013 if (fcode == IX86_BUILTIN_PALIGNR) 17014 { 17015 icode = CODE_FOR_ssse3_palignrdi; 17016 mode = DImode; 17017 } 17018 else 17019 { 17020 icode = CODE_FOR_ssse3_palignrti; 17021 mode = V2DImode; 17022 } 17023 arg0 = TREE_VALUE (arglist); 17024 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17025 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17026 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 17027 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 17028 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 17029 tmode = insn_data[icode].operand[0].mode; 17030 mode1 = insn_data[icode].operand[1].mode; 17031 mode2 = insn_data[icode].operand[2].mode; 17032 mode3 = insn_data[icode].operand[3].mode; 17033 17034 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17035 { 17036 op0 = copy_to_reg (op0); 17037 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 17038 } 17039 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17040 { 17041 op1 = copy_to_reg (op1); 17042 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); 17043 } 17044 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17045 { 17046 error ("shift must be an immediate"); 17047 return const0_rtx; 17048 } 17049 target = gen_reg_rtx (mode); 17050 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), 17051 op0, op1, op2); 17052 if (! pat) 17053 return 0; 17054 emit_insn (pat); 17055 return target; 17056 17057 case IX86_BUILTIN_MOVNTSD: 17058 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist); 17059 17060 case IX86_BUILTIN_MOVNTSS: 17061 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist); 17062 17063 case IX86_BUILTIN_INSERTQ: 17064 case IX86_BUILTIN_EXTRQ: 17065 icode = (fcode == IX86_BUILTIN_EXTRQ 17066 ? CODE_FOR_sse4a_extrq 17067 : CODE_FOR_sse4a_insertq); 17068 arg0 = TREE_VALUE (arglist); 17069 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17070 op0 = expand_normal (arg0); 17071 op1 = expand_normal (arg1); 17072 tmode = insn_data[icode].operand[0].mode; 17073 mode1 = insn_data[icode].operand[1].mode; 17074 mode2 = insn_data[icode].operand[2].mode; 17075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17076 op0 = copy_to_mode_reg (mode1, op0); 17077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17078 op1 = copy_to_mode_reg (mode2, op1); 17079 if (optimize || target == 0 17080 || GET_MODE (target) != tmode 17081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17082 target = gen_reg_rtx (tmode); 17083 pat = GEN_FCN (icode) (target, op0, op1); 17084 if (! pat) 17085 return NULL_RTX; 17086 emit_insn (pat); 17087 return target; 17088 17089 case IX86_BUILTIN_EXTRQI: 17090 icode = CODE_FOR_sse4a_extrqi; 17091 arg0 = TREE_VALUE (arglist); 17092 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17093 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17094 op0 = expand_normal (arg0); 17095 op1 = expand_normal (arg1); 17096 op2 = expand_normal (arg2); 17097 tmode = insn_data[icode].operand[0].mode; 17098 mode1 = insn_data[icode].operand[1].mode; 17099 mode2 = insn_data[icode].operand[2].mode; 17100 mode3 = insn_data[icode].operand[3].mode; 17101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17102 op0 = copy_to_mode_reg (mode1, op0); 17103 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17104 { 17105 error ("index mask must be an immediate"); 17106 return gen_reg_rtx (tmode); 17107 } 17108 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17109 { 17110 error ("length mask must be an immediate"); 17111 return gen_reg_rtx (tmode); 17112 } 17113 if (optimize || target == 0 17114 || GET_MODE (target) != tmode 17115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17116 target = gen_reg_rtx (tmode); 17117 pat = GEN_FCN (icode) (target, op0, op1, op2); 17118 if (! pat) 17119 return NULL_RTX; 17120 emit_insn (pat); 17121 return target; 17122 17123 case IX86_BUILTIN_INSERTQI: 17124 icode = CODE_FOR_sse4a_insertqi; 17125 arg0 = TREE_VALUE (arglist); 17126 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 17127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 17128 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); 17129 op0 = expand_normal (arg0); 17130 op1 = expand_normal (arg1); 17131 op2 = expand_normal (arg2); 17132 op3 = expand_normal (arg3); 17133 tmode = insn_data[icode].operand[0].mode; 17134 mode1 = insn_data[icode].operand[1].mode; 17135 mode2 = insn_data[icode].operand[2].mode; 17136 mode3 = insn_data[icode].operand[3].mode; 17137 mode4 = insn_data[icode].operand[4].mode; 17138 17139 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 17140 op0 = copy_to_mode_reg (mode1, op0); 17141 17142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 17143 op1 = copy_to_mode_reg (mode2, op1); 17144 17145 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) 17146 { 17147 error ("index mask must be an immediate"); 17148 return gen_reg_rtx (tmode); 17149 } 17150 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4)) 17151 { 17152 error ("length mask must be an immediate"); 17153 return gen_reg_rtx (tmode); 17154 } 17155 if (optimize || target == 0 17156 || GET_MODE (target) != tmode 17157 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 17158 target = gen_reg_rtx (tmode); 17159 pat = GEN_FCN (icode) (target, op0, op1, op2, op3); 17160 if (! pat) 17161 return NULL_RTX; 17162 emit_insn (pat); 17163 return target; 17164 17165 case IX86_BUILTIN_VEC_INIT_V2SI: 17166 case IX86_BUILTIN_VEC_INIT_V4HI: 17167 case IX86_BUILTIN_VEC_INIT_V8QI: 17168 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); 17169 17170 case IX86_BUILTIN_VEC_EXT_V2DF: 17171 case IX86_BUILTIN_VEC_EXT_V2DI: 17172 case IX86_BUILTIN_VEC_EXT_V4SF: 17173 case IX86_BUILTIN_VEC_EXT_V4SI: 17174 case IX86_BUILTIN_VEC_EXT_V8HI: 17175 case IX86_BUILTIN_VEC_EXT_V16QI: 17176 case IX86_BUILTIN_VEC_EXT_V2SI: 17177 case IX86_BUILTIN_VEC_EXT_V4HI: 17178 return ix86_expand_vec_ext_builtin (arglist, target); 17179 17180 case IX86_BUILTIN_VEC_SET_V8HI: 17181 case IX86_BUILTIN_VEC_SET_V4HI: 17182 return ix86_expand_vec_set_builtin (arglist); 17183 17184 default: 17185 break; 17186 } 17187 17188 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 17189 if (d->code == fcode) 17190 { 17191 /* Compares are treated specially. */ 17192 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 17193 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 17194 || d->icode == CODE_FOR_sse2_maskcmpv2df3 17195 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 17196 return ix86_expand_sse_compare (d, arglist, target); 17197 17198 return ix86_expand_binop_builtin (d->icode, arglist, target); 17199 } 17200 17201 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 17202 if (d->code == fcode) 17203 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 17204 17205 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 17206 if (d->code == fcode) 17207 return ix86_expand_sse_comi (d, arglist, target); 17208 17209 gcc_unreachable (); 17210} 17211 17212/* Store OPERAND to the memory after reload is completed. This means 17213 that we can't easily use assign_stack_local. */ 17214rtx 17215ix86_force_to_memory (enum machine_mode mode, rtx operand) 17216{ 17217 rtx result; 17218 17219 gcc_assert (reload_completed); 17220 if (TARGET_RED_ZONE) 17221 { 17222 result = gen_rtx_MEM (mode, 17223 gen_rtx_PLUS (Pmode, 17224 stack_pointer_rtx, 17225 GEN_INT (-RED_ZONE_SIZE))); 17226 emit_move_insn (result, operand); 17227 } 17228 else if (!TARGET_RED_ZONE && TARGET_64BIT) 17229 { 17230 switch (mode) 17231 { 17232 case HImode: 17233 case SImode: 17234 operand = gen_lowpart (DImode, operand); 17235 /* FALLTHRU */ 17236 case DImode: 17237 emit_insn ( 17238 gen_rtx_SET (VOIDmode, 17239 gen_rtx_MEM (DImode, 17240 gen_rtx_PRE_DEC (DImode, 17241 stack_pointer_rtx)), 17242 operand)); 17243 break; 17244 default: 17245 gcc_unreachable (); 17246 } 17247 result = gen_rtx_MEM (mode, stack_pointer_rtx); 17248 } 17249 else 17250 { 17251 switch (mode) 17252 { 17253 case DImode: 17254 { 17255 rtx operands[2]; 17256 split_di (&operand, 1, operands, operands + 1); 17257 emit_insn ( 17258 gen_rtx_SET (VOIDmode, 17259 gen_rtx_MEM (SImode, 17260 gen_rtx_PRE_DEC (Pmode, 17261 stack_pointer_rtx)), 17262 operands[1])); 17263 emit_insn ( 17264 gen_rtx_SET (VOIDmode, 17265 gen_rtx_MEM (SImode, 17266 gen_rtx_PRE_DEC (Pmode, 17267 stack_pointer_rtx)), 17268 operands[0])); 17269 } 17270 break; 17271 case HImode: 17272 /* Store HImodes as SImodes. */ 17273 operand = gen_lowpart (SImode, operand); 17274 /* FALLTHRU */ 17275 case SImode: 17276 emit_insn ( 17277 gen_rtx_SET (VOIDmode, 17278 gen_rtx_MEM (GET_MODE (operand), 17279 gen_rtx_PRE_DEC (SImode, 17280 stack_pointer_rtx)), 17281 operand)); 17282 break; 17283 default: 17284 gcc_unreachable (); 17285 } 17286 result = gen_rtx_MEM (mode, stack_pointer_rtx); 17287 } 17288 return result; 17289} 17290 17291/* Free operand from the memory. */ 17292void 17293ix86_free_from_memory (enum machine_mode mode) 17294{ 17295 if (!TARGET_RED_ZONE) 17296 { 17297 int size; 17298 17299 if (mode == DImode || TARGET_64BIT) 17300 size = 8; 17301 else 17302 size = 4; 17303 /* Use LEA to deallocate stack space. In peephole2 it will be converted 17304 to pop or add instruction if registers are available. */ 17305 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 17306 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 17307 GEN_INT (size)))); 17308 } 17309} 17310 17311/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 17312 QImode must go into class Q_REGS. 17313 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 17314 movdf to do mem-to-mem moves through integer regs. */ 17315enum reg_class 17316ix86_preferred_reload_class (rtx x, enum reg_class class) 17317{ 17318 enum machine_mode mode = GET_MODE (x); 17319 17320 /* We're only allowed to return a subclass of CLASS. Many of the 17321 following checks fail for NO_REGS, so eliminate that early. */ 17322 if (class == NO_REGS) 17323 return NO_REGS; 17324 17325 /* All classes can load zeros. */ 17326 if (x == CONST0_RTX (mode)) 17327 return class; 17328 17329 /* Force constants into memory if we are loading a (nonzero) constant into 17330 an MMX or SSE register. This is because there are no MMX/SSE instructions 17331 to load from a constant. */ 17332 if (CONSTANT_P (x) 17333 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) 17334 return NO_REGS; 17335 17336 /* Prefer SSE regs only, if we can use them for math. */ 17337 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 17338 return SSE_CLASS_P (class) ? class : NO_REGS; 17339 17340 /* Floating-point constants need more complex checks. */ 17341 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 17342 { 17343 /* General regs can load everything. */ 17344 if (reg_class_subset_p (class, GENERAL_REGS)) 17345 return class; 17346 17347 /* Floats can load 0 and 1 plus some others. Note that we eliminated 17348 zero above. We only want to wind up preferring 80387 registers if 17349 we plan on doing computation with them. */ 17350 if (TARGET_80387 17351 && standard_80387_constant_p (x)) 17352 { 17353 /* Limit class to non-sse. */ 17354 if (class == FLOAT_SSE_REGS) 17355 return FLOAT_REGS; 17356 if (class == FP_TOP_SSE_REGS) 17357 return FP_TOP_REG; 17358 if (class == FP_SECOND_SSE_REGS) 17359 return FP_SECOND_REG; 17360 if (class == FLOAT_INT_REGS || class == FLOAT_REGS) 17361 return class; 17362 } 17363 17364 return NO_REGS; 17365 } 17366 17367 /* Generally when we see PLUS here, it's the function invariant 17368 (plus soft-fp const_int). Which can only be computed into general 17369 regs. */ 17370 if (GET_CODE (x) == PLUS) 17371 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; 17372 17373 /* QImode constants are easy to load, but non-constant QImode data 17374 must go into Q_REGS. */ 17375 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 17376 { 17377 if (reg_class_subset_p (class, Q_REGS)) 17378 return class; 17379 if (reg_class_subset_p (Q_REGS, class)) 17380 return Q_REGS; 17381 return NO_REGS; 17382 } 17383 17384 return class; 17385} 17386 17387/* Discourage putting floating-point values in SSE registers unless 17388 SSE math is being used, and likewise for the 387 registers. */ 17389enum reg_class 17390ix86_preferred_output_reload_class (rtx x, enum reg_class class) 17391{ 17392 enum machine_mode mode = GET_MODE (x); 17393 17394 /* Restrict the output reload class to the register bank that we are doing 17395 math on. If we would like not to return a subset of CLASS, reject this 17396 alternative: if reload cannot do this, it will still use its choice. */ 17397 mode = GET_MODE (x); 17398 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 17399 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; 17400 17401 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) 17402 { 17403 if (class == FP_TOP_SSE_REGS) 17404 return FP_TOP_REG; 17405 else if (class == FP_SECOND_SSE_REGS) 17406 return FP_SECOND_REG; 17407 else 17408 return FLOAT_CLASS_P (class) ? class : NO_REGS; 17409 } 17410 17411 return class; 17412} 17413 17414/* If we are copying between general and FP registers, we need a memory 17415 location. The same is true for SSE and MMX registers. 17416 17417 The macro can't work reliably when one of the CLASSES is class containing 17418 registers from multiple units (SSE, MMX, integer). We avoid this by never 17419 combining those units in single alternative in the machine description. 17420 Ensure that this constraint holds to avoid unexpected surprises. 17421 17422 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 17423 enforce these sanity checks. */ 17424 17425int 17426ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 17427 enum machine_mode mode, int strict) 17428{ 17429 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 17430 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 17431 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 17432 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 17433 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 17434 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 17435 { 17436 gcc_assert (!strict); 17437 return true; 17438 } 17439 17440 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 17441 return true; 17442 17443 /* ??? This is a lie. We do have moves between mmx/general, and for 17444 mmx/sse2. But by saying we need secondary memory we discourage the 17445 register allocator from using the mmx registers unless needed. */ 17446 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 17447 return true; 17448 17449 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17450 { 17451 /* SSE1 doesn't have any direct moves from other classes. */ 17452 if (!TARGET_SSE2) 17453 return true; 17454 17455 /* If the target says that inter-unit moves are more expensive 17456 than moving through memory, then don't generate them. */ 17457 if (!TARGET_INTER_UNIT_MOVES && !optimize_size) 17458 return true; 17459 17460 /* Between SSE and general, we have moves no larger than word size. */ 17461 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 17462 return true; 17463 17464 /* ??? For the cost of one register reformat penalty, we could use 17465 the same instructions to move SFmode and DFmode data, but the 17466 relevant move patterns don't support those alternatives. */ 17467 if (mode == SFmode || mode == DFmode) 17468 return true; 17469 } 17470 17471 return false; 17472} 17473 17474/* Return true if the registers in CLASS cannot represent the change from 17475 modes FROM to TO. */ 17476 17477bool 17478ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 17479 enum reg_class class) 17480{ 17481 if (from == to) 17482 return false; 17483 17484 /* x87 registers can't do subreg at all, as all values are reformatted 17485 to extended precision. */ 17486 if (MAYBE_FLOAT_CLASS_P (class)) 17487 return true; 17488 17489 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) 17490 { 17491 /* Vector registers do not support QI or HImode loads. If we don't 17492 disallow a change to these modes, reload will assume it's ok to 17493 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 17494 the vec_dupv4hi pattern. */ 17495 if (GET_MODE_SIZE (from) < 4) 17496 return true; 17497 17498 /* Vector registers do not support subreg with nonzero offsets, which 17499 are otherwise valid for integer registers. Since we can't see 17500 whether we have a nonzero offset from here, prohibit all 17501 nonparadoxical subregs changing size. */ 17502 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 17503 return true; 17504 } 17505 17506 return false; 17507} 17508 17509/* Return the cost of moving data from a register in class CLASS1 to 17510 one in class CLASS2. 17511 17512 It is not required that the cost always equal 2 when FROM is the same as TO; 17513 on some machines it is expensive to move between registers if they are not 17514 general registers. */ 17515 17516int 17517ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 17518 enum reg_class class2) 17519{ 17520 /* In case we require secondary memory, compute cost of the store followed 17521 by load. In order to avoid bad register allocation choices, we need 17522 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 17523 17524 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 17525 { 17526 int cost = 1; 17527 17528 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 17529 MEMORY_MOVE_COST (mode, class1, 1)); 17530 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 17531 MEMORY_MOVE_COST (mode, class2, 1)); 17532 17533 /* In case of copying from general_purpose_register we may emit multiple 17534 stores followed by single load causing memory size mismatch stall. 17535 Count this as arbitrarily high cost of 20. */ 17536 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 17537 cost += 20; 17538 17539 /* In the case of FP/MMX moves, the registers actually overlap, and we 17540 have to switch modes in order to treat them differently. */ 17541 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 17542 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 17543 cost += 20; 17544 17545 return cost; 17546 } 17547 17548 /* Moves between SSE/MMX and integer unit are expensive. */ 17549 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 17550 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 17551 return ix86_cost->mmxsse_to_integer; 17552 if (MAYBE_FLOAT_CLASS_P (class1)) 17553 return ix86_cost->fp_move; 17554 if (MAYBE_SSE_CLASS_P (class1)) 17555 return ix86_cost->sse_move; 17556 if (MAYBE_MMX_CLASS_P (class1)) 17557 return ix86_cost->mmx_move; 17558 return 2; 17559} 17560 17561/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 17562 17563bool 17564ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 17565{ 17566 /* Flags and only flags can only hold CCmode values. */ 17567 if (CC_REGNO_P (regno)) 17568 return GET_MODE_CLASS (mode) == MODE_CC; 17569 if (GET_MODE_CLASS (mode) == MODE_CC 17570 || GET_MODE_CLASS (mode) == MODE_RANDOM 17571 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 17572 return 0; 17573 if (FP_REGNO_P (regno)) 17574 return VALID_FP_MODE_P (mode); 17575 if (SSE_REGNO_P (regno)) 17576 { 17577 /* We implement the move patterns for all vector modes into and 17578 out of SSE registers, even when no operation instructions 17579 are available. */ 17580 return (VALID_SSE_REG_MODE (mode) 17581 || VALID_SSE2_REG_MODE (mode) 17582 || VALID_MMX_REG_MODE (mode) 17583 || VALID_MMX_REG_MODE_3DNOW (mode)); 17584 } 17585 if (MMX_REGNO_P (regno)) 17586 { 17587 /* We implement the move patterns for 3DNOW modes even in MMX mode, 17588 so if the register is available at all, then we can move data of 17589 the given mode into or out of it. */ 17590 return (VALID_MMX_REG_MODE (mode) 17591 || VALID_MMX_REG_MODE_3DNOW (mode)); 17592 } 17593 17594 if (mode == QImode) 17595 { 17596 /* Take care for QImode values - they can be in non-QI regs, 17597 but then they do cause partial register stalls. */ 17598 if (regno < 4 || TARGET_64BIT) 17599 return 1; 17600 if (!TARGET_PARTIAL_REG_STALL) 17601 return 1; 17602 return reload_in_progress || reload_completed; 17603 } 17604 /* We handle both integer and floats in the general purpose registers. */ 17605 else if (VALID_INT_MODE_P (mode)) 17606 return 1; 17607 else if (VALID_FP_MODE_P (mode)) 17608 return 1; 17609 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 17610 on to use that value in smaller contexts, this can easily force a 17611 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 17612 supporting DImode, allow it. */ 17613 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 17614 return 1; 17615 17616 return 0; 17617} 17618 17619/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 17620 tieable integer mode. */ 17621 17622static bool 17623ix86_tieable_integer_mode_p (enum machine_mode mode) 17624{ 17625 switch (mode) 17626 { 17627 case HImode: 17628 case SImode: 17629 return true; 17630 17631 case QImode: 17632 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 17633 17634 case DImode: 17635 return TARGET_64BIT; 17636 17637 default: 17638 return false; 17639 } 17640} 17641 17642/* Return true if MODE1 is accessible in a register that can hold MODE2 17643 without copying. That is, all register classes that can hold MODE2 17644 can also hold MODE1. */ 17645 17646bool 17647ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 17648{ 17649 if (mode1 == mode2) 17650 return true; 17651 17652 if (ix86_tieable_integer_mode_p (mode1) 17653 && ix86_tieable_integer_mode_p (mode2)) 17654 return true; 17655 17656 /* MODE2 being XFmode implies fp stack or general regs, which means we 17657 can tie any smaller floating point modes to it. Note that we do not 17658 tie this with TFmode. */ 17659 if (mode2 == XFmode) 17660 return mode1 == SFmode || mode1 == DFmode; 17661 17662 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 17663 that we can tie it with SFmode. */ 17664 if (mode2 == DFmode) 17665 return mode1 == SFmode; 17666 17667 /* If MODE2 is only appropriate for an SSE register, then tie with 17668 any other mode acceptable to SSE registers. */ 17669 if (GET_MODE_SIZE (mode2) >= 8 17670 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 17671 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); 17672 17673 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie 17674 with any other mode acceptable to MMX registers. */ 17675 if (GET_MODE_SIZE (mode2) == 8 17676 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 17677 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); 17678 17679 return false; 17680} 17681 17682/* Return the cost of moving data of mode M between a 17683 register and memory. A value of 2 is the default; this cost is 17684 relative to those in `REGISTER_MOVE_COST'. 17685 17686 If moving between registers and memory is more expensive than 17687 between two registers, you should define this macro to express the 17688 relative cost. 17689 17690 Model also increased moving costs of QImode registers in non 17691 Q_REGS classes. 17692 */ 17693int 17694ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 17695{ 17696 if (FLOAT_CLASS_P (class)) 17697 { 17698 int index; 17699 switch (mode) 17700 { 17701 case SFmode: 17702 index = 0; 17703 break; 17704 case DFmode: 17705 index = 1; 17706 break; 17707 case XFmode: 17708 index = 2; 17709 break; 17710 default: 17711 return 100; 17712 } 17713 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 17714 } 17715 if (SSE_CLASS_P (class)) 17716 { 17717 int index; 17718 switch (GET_MODE_SIZE (mode)) 17719 { 17720 case 4: 17721 index = 0; 17722 break; 17723 case 8: 17724 index = 1; 17725 break; 17726 case 16: 17727 index = 2; 17728 break; 17729 default: 17730 return 100; 17731 } 17732 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 17733 } 17734 if (MMX_CLASS_P (class)) 17735 { 17736 int index; 17737 switch (GET_MODE_SIZE (mode)) 17738 { 17739 case 4: 17740 index = 0; 17741 break; 17742 case 8: 17743 index = 1; 17744 break; 17745 default: 17746 return 100; 17747 } 17748 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 17749 } 17750 switch (GET_MODE_SIZE (mode)) 17751 { 17752 case 1: 17753 if (in) 17754 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 17755 : ix86_cost->movzbl_load); 17756 else 17757 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 17758 : ix86_cost->int_store[0] + 4); 17759 break; 17760 case 2: 17761 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 17762 default: 17763 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 17764 if (mode == TFmode) 17765 mode = XFmode; 17766 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 17767 * (((int) GET_MODE_SIZE (mode) 17768 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 17769 } 17770} 17771 17772/* Compute a (partial) cost for rtx X. Return true if the complete 17773 cost has been computed, and false if subexpressions should be 17774 scanned. In either case, *TOTAL contains the cost result. */ 17775 17776static bool 17777ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 17778{ 17779 enum machine_mode mode = GET_MODE (x); 17780 17781 switch (code) 17782 { 17783 case CONST_INT: 17784 case CONST: 17785 case LABEL_REF: 17786 case SYMBOL_REF: 17787 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 17788 *total = 3; 17789 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 17790 *total = 2; 17791 else if (flag_pic && SYMBOLIC_CONST (x) 17792 && (!TARGET_64BIT 17793 || (!GET_CODE (x) != LABEL_REF 17794 && (GET_CODE (x) != SYMBOL_REF 17795 || !SYMBOL_REF_LOCAL_P (x))))) 17796 *total = 1; 17797 else 17798 *total = 0; 17799 return true; 17800 17801 case CONST_DOUBLE: 17802 if (mode == VOIDmode) 17803 *total = 0; 17804 else 17805 switch (standard_80387_constant_p (x)) 17806 { 17807 case 1: /* 0.0 */ 17808 *total = 1; 17809 break; 17810 default: /* Other constants */ 17811 *total = 2; 17812 break; 17813 case 0: 17814 case -1: 17815 /* Start with (MEM (SYMBOL_REF)), since that's where 17816 it'll probably end up. Add a penalty for size. */ 17817 *total = (COSTS_N_INSNS (1) 17818 + (flag_pic != 0 && !TARGET_64BIT) 17819 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 17820 break; 17821 } 17822 return true; 17823 17824 case ZERO_EXTEND: 17825 /* The zero extensions is often completely free on x86_64, so make 17826 it as cheap as possible. */ 17827 if (TARGET_64BIT && mode == DImode 17828 && GET_MODE (XEXP (x, 0)) == SImode) 17829 *total = 1; 17830 else if (TARGET_ZERO_EXTEND_WITH_AND) 17831 *total = ix86_cost->add; 17832 else 17833 *total = ix86_cost->movzx; 17834 return false; 17835 17836 case SIGN_EXTEND: 17837 *total = ix86_cost->movsx; 17838 return false; 17839 17840 case ASHIFT: 17841 if (GET_CODE (XEXP (x, 1)) == CONST_INT 17842 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 17843 { 17844 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17845 if (value == 1) 17846 { 17847 *total = ix86_cost->add; 17848 return false; 17849 } 17850 if ((value == 2 || value == 3) 17851 && ix86_cost->lea <= ix86_cost->shift_const) 17852 { 17853 *total = ix86_cost->lea; 17854 return false; 17855 } 17856 } 17857 /* FALLTHRU */ 17858 17859 case ROTATE: 17860 case ASHIFTRT: 17861 case LSHIFTRT: 17862 case ROTATERT: 17863 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 17864 { 17865 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17866 { 17867 if (INTVAL (XEXP (x, 1)) > 32) 17868 *total = ix86_cost->shift_const + COSTS_N_INSNS (2); 17869 else 17870 *total = ix86_cost->shift_const * 2; 17871 } 17872 else 17873 { 17874 if (GET_CODE (XEXP (x, 1)) == AND) 17875 *total = ix86_cost->shift_var * 2; 17876 else 17877 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); 17878 } 17879 } 17880 else 17881 { 17882 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17883 *total = ix86_cost->shift_const; 17884 else 17885 *total = ix86_cost->shift_var; 17886 } 17887 return false; 17888 17889 case MULT: 17890 if (FLOAT_MODE_P (mode)) 17891 { 17892 *total = ix86_cost->fmul; 17893 return false; 17894 } 17895 else 17896 { 17897 rtx op0 = XEXP (x, 0); 17898 rtx op1 = XEXP (x, 1); 17899 int nbits; 17900 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17901 { 17902 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17903 for (nbits = 0; value != 0; value &= value - 1) 17904 nbits++; 17905 } 17906 else 17907 /* This is arbitrary. */ 17908 nbits = 7; 17909 17910 /* Compute costs correctly for widening multiplication. */ 17911 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) 17912 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 17913 == GET_MODE_SIZE (mode)) 17914 { 17915 int is_mulwiden = 0; 17916 enum machine_mode inner_mode = GET_MODE (op0); 17917 17918 if (GET_CODE (op0) == GET_CODE (op1)) 17919 is_mulwiden = 1, op1 = XEXP (op1, 0); 17920 else if (GET_CODE (op1) == CONST_INT) 17921 { 17922 if (GET_CODE (op0) == SIGN_EXTEND) 17923 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 17924 == INTVAL (op1); 17925 else 17926 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 17927 } 17928 17929 if (is_mulwiden) 17930 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 17931 } 17932 17933 *total = (ix86_cost->mult_init[MODE_INDEX (mode)] 17934 + nbits * ix86_cost->mult_bit 17935 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); 17936 17937 return true; 17938 } 17939 17940 case DIV: 17941 case UDIV: 17942 case MOD: 17943 case UMOD: 17944 if (FLOAT_MODE_P (mode)) 17945 *total = ix86_cost->fdiv; 17946 else 17947 *total = ix86_cost->divide[MODE_INDEX (mode)]; 17948 return false; 17949 17950 case PLUS: 17951 if (FLOAT_MODE_P (mode)) 17952 *total = ix86_cost->fadd; 17953 else if (GET_MODE_CLASS (mode) == MODE_INT 17954 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 17955 { 17956 if (GET_CODE (XEXP (x, 0)) == PLUS 17957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 17958 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 17959 && CONSTANT_P (XEXP (x, 1))) 17960 { 17961 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 17962 if (val == 2 || val == 4 || val == 8) 17963 { 17964 *total = ix86_cost->lea; 17965 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17966 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 17967 outer_code); 17968 *total += rtx_cost (XEXP (x, 1), outer_code); 17969 return true; 17970 } 17971 } 17972 else if (GET_CODE (XEXP (x, 0)) == MULT 17973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 17974 { 17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 17976 if (val == 2 || val == 4 || val == 8) 17977 { 17978 *total = ix86_cost->lea; 17979 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17980 *total += rtx_cost (XEXP (x, 1), outer_code); 17981 return true; 17982 } 17983 } 17984 else if (GET_CODE (XEXP (x, 0)) == PLUS) 17985 { 17986 *total = ix86_cost->lea; 17987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17988 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17989 *total += rtx_cost (XEXP (x, 1), outer_code); 17990 return true; 17991 } 17992 } 17993 /* FALLTHRU */ 17994 17995 case MINUS: 17996 if (FLOAT_MODE_P (mode)) 17997 { 17998 *total = ix86_cost->fadd; 17999 return false; 18000 } 18001 /* FALLTHRU */ 18002 18003 case AND: 18004 case IOR: 18005 case XOR: 18006 if (!TARGET_64BIT && mode == DImode) 18007 { 18008 *total = (ix86_cost->add * 2 18009 + (rtx_cost (XEXP (x, 0), outer_code) 18010 << (GET_MODE (XEXP (x, 0)) != DImode)) 18011 + (rtx_cost (XEXP (x, 1), outer_code) 18012 << (GET_MODE (XEXP (x, 1)) != DImode))); 18013 return true; 18014 } 18015 /* FALLTHRU */ 18016 18017 case NEG: 18018 if (FLOAT_MODE_P (mode)) 18019 { 18020 *total = ix86_cost->fchs; 18021 return false; 18022 } 18023 /* FALLTHRU */ 18024 18025 case NOT: 18026 if (!TARGET_64BIT && mode == DImode) 18027 *total = ix86_cost->add * 2; 18028 else 18029 *total = ix86_cost->add; 18030 return false; 18031 18032 case COMPARE: 18033 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 18034 && XEXP (XEXP (x, 0), 1) == const1_rtx 18035 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT 18036 && XEXP (x, 1) == const0_rtx) 18037 { 18038 /* This kind of construct is implemented using test[bwl]. 18039 Treat it as if we had an AND. */ 18040 *total = (ix86_cost->add 18041 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) 18042 + rtx_cost (const1_rtx, outer_code)); 18043 return true; 18044 } 18045 return false; 18046 18047 case FLOAT_EXTEND: 18048 if (!TARGET_SSE_MATH 18049 || mode == XFmode 18050 || (mode == DFmode && !TARGET_SSE2)) 18051 /* For standard 80387 constants, raise the cost to prevent 18052 compress_float_constant() to generate load from memory. */ 18053 switch (standard_80387_constant_p (XEXP (x, 0))) 18054 { 18055 case -1: 18056 case 0: 18057 *total = 0; 18058 break; 18059 case 1: /* 0.0 */ 18060 *total = 1; 18061 break; 18062 default: 18063 *total = (x86_ext_80387_constants & TUNEMASK 18064 || optimize_size 18065 ? 1 : 0); 18066 } 18067 return false; 18068 18069 case ABS: 18070 if (FLOAT_MODE_P (mode)) 18071 *total = ix86_cost->fabs; 18072 return false; 18073 18074 case SQRT: 18075 if (FLOAT_MODE_P (mode)) 18076 *total = ix86_cost->fsqrt; 18077 return false; 18078 18079 case UNSPEC: 18080 if (XINT (x, 1) == UNSPEC_TP) 18081 *total = 0; 18082 return false; 18083 18084 default: 18085 return false; 18086 } 18087} 18088 18089#if TARGET_MACHO 18090 18091static int current_machopic_label_num; 18092 18093/* Given a symbol name and its associated stub, write out the 18094 definition of the stub. */ 18095 18096void 18097machopic_output_stub (FILE *file, const char *symb, const char *stub) 18098{ 18099 unsigned int length; 18100 char *binder_name, *symbol_name, lazy_ptr_name[32]; 18101 int label = ++current_machopic_label_num; 18102 18103 /* For 64-bit we shouldn't get here. */ 18104 gcc_assert (!TARGET_64BIT); 18105 18106 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 18107 symb = (*targetm.strip_name_encoding) (symb); 18108 18109 length = strlen (stub); 18110 binder_name = alloca (length + 32); 18111 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 18112 18113 length = strlen (symb); 18114 symbol_name = alloca (length + 32); 18115 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 18116 18117 sprintf (lazy_ptr_name, "L%d$lz", label); 18118 18119 if (MACHOPIC_PURE) 18120 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 18121 else 18122 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 18123 18124 fprintf (file, "%s:\n", stub); 18125 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 18126 18127 if (MACHOPIC_PURE) 18128 { 18129 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 18130 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 18131 fprintf (file, "\tjmp\t*%%edx\n"); 18132 } 18133 else 18134 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 18135 18136 fprintf (file, "%s:\n", binder_name); 18137 18138 if (MACHOPIC_PURE) 18139 { 18140 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 18141 fprintf (file, "\tpushl\t%%eax\n"); 18142 } 18143 else 18144 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 18145 18146 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 18147 18148 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 18149 fprintf (file, "%s:\n", lazy_ptr_name); 18150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 18151 fprintf (file, "\t.long %s\n", binder_name); 18152} 18153 18154void 18155darwin_x86_file_end (void) 18156{ 18157 darwin_file_end (); 18158 ix86_file_end (); 18159} 18160#endif /* TARGET_MACHO */ 18161 18162/* Order the registers for register allocator. */ 18163 18164void 18165x86_order_regs_for_local_alloc (void) 18166{ 18167 int pos = 0; 18168 int i; 18169 18170 /* First allocate the local general purpose registers. */ 18171 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 18172 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 18173 reg_alloc_order [pos++] = i; 18174 18175 /* Global general purpose registers. */ 18176 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 18177 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 18178 reg_alloc_order [pos++] = i; 18179 18180 /* x87 registers come first in case we are doing FP math 18181 using them. */ 18182 if (!TARGET_SSE_MATH) 18183 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 18184 reg_alloc_order [pos++] = i; 18185 18186 /* SSE registers. */ 18187 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 18188 reg_alloc_order [pos++] = i; 18189 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 18190 reg_alloc_order [pos++] = i; 18191 18192 /* x87 registers. */ 18193 if (TARGET_SSE_MATH) 18194 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 18195 reg_alloc_order [pos++] = i; 18196 18197 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 18198 reg_alloc_order [pos++] = i; 18199 18200 /* Initialize the rest of array as we do not allocate some registers 18201 at all. */ 18202 while (pos < FIRST_PSEUDO_REGISTER) 18203 reg_alloc_order [pos++] = 0; 18204} 18205 18206/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 18207 struct attribute_spec.handler. */ 18208static tree 18209ix86_handle_struct_attribute (tree *node, tree name, 18210 tree args ATTRIBUTE_UNUSED, 18211 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 18212{ 18213 tree *type = NULL; 18214 if (DECL_P (*node)) 18215 { 18216 if (TREE_CODE (*node) == TYPE_DECL) 18217 type = &TREE_TYPE (*node); 18218 } 18219 else 18220 type = node; 18221 18222 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 18223 || TREE_CODE (*type) == UNION_TYPE))) 18224 { 18225 warning (OPT_Wattributes, "%qs attribute ignored", 18226 IDENTIFIER_POINTER (name)); 18227 *no_add_attrs = true; 18228 } 18229 18230 else if ((is_attribute_p ("ms_struct", name) 18231 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 18232 || ((is_attribute_p ("gcc_struct", name) 18233 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 18234 { 18235 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 18236 IDENTIFIER_POINTER (name)); 18237 *no_add_attrs = true; 18238 } 18239 18240 return NULL_TREE; 18241} 18242 18243static bool 18244ix86_ms_bitfield_layout_p (tree record_type) 18245{ 18246 return (TARGET_MS_BITFIELD_LAYOUT && 18247 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 18248 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 18249} 18250 18251/* Returns an expression indicating where the this parameter is 18252 located on entry to the FUNCTION. */ 18253 18254static rtx 18255x86_this_parameter (tree function) 18256{ 18257 tree type = TREE_TYPE (function); 18258 18259 if (TARGET_64BIT) 18260 { 18261 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 18262 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 18263 } 18264 18265 if (ix86_function_regparm (type, function) > 0) 18266 { 18267 tree parm; 18268 18269 parm = TYPE_ARG_TYPES (type); 18270 /* Figure out whether or not the function has a variable number of 18271 arguments. */ 18272 for (; parm; parm = TREE_CHAIN (parm)) 18273 if (TREE_VALUE (parm) == void_type_node) 18274 break; 18275 /* If not, the this parameter is in the first argument. */ 18276 if (parm) 18277 { 18278 int regno = 0; 18279 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 18280 regno = 2; 18281 return gen_rtx_REG (SImode, regno); 18282 } 18283 } 18284 18285 if (aggregate_value_p (TREE_TYPE (type), type)) 18286 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 18287 else 18288 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 18289} 18290 18291/* Determine whether x86_output_mi_thunk can succeed. */ 18292 18293static bool 18294x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 18295 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 18296 HOST_WIDE_INT vcall_offset, tree function) 18297{ 18298 /* 64-bit can handle anything. */ 18299 if (TARGET_64BIT) 18300 return true; 18301 18302 /* For 32-bit, everything's fine if we have one free register. */ 18303 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 18304 return true; 18305 18306 /* Need a free register for vcall_offset. */ 18307 if (vcall_offset) 18308 return false; 18309 18310 /* Need a free register for GOT references. */ 18311 if (flag_pic && !(*targetm.binds_local_p) (function)) 18312 return false; 18313 18314 /* Otherwise ok. */ 18315 return true; 18316} 18317 18318/* Output the assembler code for a thunk function. THUNK_DECL is the 18319 declaration for the thunk function itself, FUNCTION is the decl for 18320 the target function. DELTA is an immediate constant offset to be 18321 added to THIS. If VCALL_OFFSET is nonzero, the word at 18322 *(*this + vcall_offset) should be added to THIS. */ 18323 18324static void 18325x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 18326 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 18327 HOST_WIDE_INT vcall_offset, tree function) 18328{ 18329 rtx xops[3]; 18330 rtx this = x86_this_parameter (function); 18331 rtx this_reg, tmp; 18332 18333 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 18334 pull it in now and let DELTA benefit. */ 18335 if (REG_P (this)) 18336 this_reg = this; 18337 else if (vcall_offset) 18338 { 18339 /* Put the this parameter into %eax. */ 18340 xops[0] = this; 18341 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 18342 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18343 } 18344 else 18345 this_reg = NULL_RTX; 18346 18347 /* Adjust the this parameter by a fixed constant. */ 18348 if (delta) 18349 { 18350 xops[0] = GEN_INT (delta); 18351 xops[1] = this_reg ? this_reg : this; 18352 if (TARGET_64BIT) 18353 { 18354 if (!x86_64_general_operand (xops[0], DImode)) 18355 { 18356 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18357 xops[1] = tmp; 18358 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 18359 xops[0] = tmp; 18360 xops[1] = this; 18361 } 18362 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18363 } 18364 else 18365 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18366 } 18367 18368 /* Adjust the this parameter by a value stored in the vtable. */ 18369 if (vcall_offset) 18370 { 18371 if (TARGET_64BIT) 18372 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 18373 else 18374 { 18375 int tmp_regno = 2 /* ECX */; 18376 if (lookup_attribute ("fastcall", 18377 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 18378 tmp_regno = 0 /* EAX */; 18379 tmp = gen_rtx_REG (SImode, tmp_regno); 18380 } 18381 18382 xops[0] = gen_rtx_MEM (Pmode, this_reg); 18383 xops[1] = tmp; 18384 if (TARGET_64BIT) 18385 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18386 else 18387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18388 18389 /* Adjust the this parameter. */ 18390 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 18391 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 18392 { 18393 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 18394 xops[0] = GEN_INT (vcall_offset); 18395 xops[1] = tmp2; 18396 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 18397 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 18398 } 18399 xops[1] = this_reg; 18400 if (TARGET_64BIT) 18401 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 18402 else 18403 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 18404 } 18405 18406 /* If necessary, drop THIS back to its stack slot. */ 18407 if (this_reg && this_reg != this) 18408 { 18409 xops[0] = this_reg; 18410 xops[1] = this; 18411 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 18412 } 18413 18414 xops[0] = XEXP (DECL_RTL (function), 0); 18415 if (TARGET_64BIT) 18416 { 18417 if (!flag_pic || (*targetm.binds_local_p) (function)) 18418 output_asm_insn ("jmp\t%P0", xops); 18419 else 18420 { 18421 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 18422 tmp = gen_rtx_CONST (Pmode, tmp); 18423 tmp = gen_rtx_MEM (QImode, tmp); 18424 xops[0] = tmp; 18425 output_asm_insn ("jmp\t%A0", xops); 18426 } 18427 } 18428 else 18429 { 18430 if (!flag_pic || (*targetm.binds_local_p) (function)) 18431 output_asm_insn ("jmp\t%P0", xops); 18432 else 18433#if TARGET_MACHO 18434 if (TARGET_MACHO) 18435 { 18436 rtx sym_ref = XEXP (DECL_RTL (function), 0); 18437 tmp = (gen_rtx_SYMBOL_REF 18438 (Pmode, 18439 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 18440 tmp = gen_rtx_MEM (QImode, tmp); 18441 xops[0] = tmp; 18442 output_asm_insn ("jmp\t%0", xops); 18443 } 18444 else 18445#endif /* TARGET_MACHO */ 18446 { 18447 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 18448 output_set_got (tmp, NULL_RTX); 18449 18450 xops[1] = tmp; 18451 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 18452 output_asm_insn ("jmp\t{*}%1", xops); 18453 } 18454 } 18455} 18456 18457static void 18458x86_file_start (void) 18459{ 18460 default_file_start (); 18461#if TARGET_MACHO 18462 darwin_file_start (); 18463#endif 18464 if (X86_FILE_START_VERSION_DIRECTIVE) 18465 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 18466 if (X86_FILE_START_FLTUSED) 18467 fputs ("\t.global\t__fltused\n", asm_out_file); 18468 if (ix86_asm_dialect == ASM_INTEL) 18469 fputs ("\t.intel_syntax\n", asm_out_file); 18470} 18471 18472int 18473x86_field_alignment (tree field, int computed) 18474{ 18475 enum machine_mode mode; 18476 tree type = TREE_TYPE (field); 18477 18478 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 18479 return computed; 18480 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 18481 ? get_inner_array_type (type) : type); 18482 if (mode == DFmode || mode == DCmode 18483 || GET_MODE_CLASS (mode) == MODE_INT 18484 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 18485 return MIN (32, computed); 18486 return computed; 18487} 18488 18489/* Output assembler code to FILE to increment profiler label # LABELNO 18490 for profiling a function entry. */ 18491void 18492x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 18493{ 18494 if (TARGET_64BIT) 18495 if (flag_pic) 18496 { 18497#ifndef NO_PROFILE_COUNTERS 18498 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 18499#endif 18500 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 18501 } 18502 else 18503 { 18504#ifndef NO_PROFILE_COUNTERS 18505 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 18506#endif 18507 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18508 } 18509 else if (flag_pic) 18510 { 18511#ifndef NO_PROFILE_COUNTERS 18512 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 18513 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 18514#endif 18515 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 18516 } 18517 else 18518 { 18519#ifndef NO_PROFILE_COUNTERS 18520 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 18521 PROFILE_COUNT_REGISTER); 18522#endif 18523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 18524 } 18525} 18526 18527/* We don't have exact information about the insn sizes, but we may assume 18528 quite safely that we are informed about all 1 byte insns and memory 18529 address sizes. This is enough to eliminate unnecessary padding in 18530 99% of cases. */ 18531 18532static int 18533min_insn_size (rtx insn) 18534{ 18535 int l = 0; 18536 18537 if (!INSN_P (insn) || !active_insn_p (insn)) 18538 return 0; 18539 18540 /* Discard alignments we've emit and jump instructions. */ 18541 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 18542 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 18543 return 0; 18544 if (GET_CODE (insn) == JUMP_INSN 18545 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 18546 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 18547 return 0; 18548 18549 /* Important case - calls are always 5 bytes. 18550 It is common to have many calls in the row. */ 18551 if (GET_CODE (insn) == CALL_INSN 18552 && symbolic_reference_mentioned_p (PATTERN (insn)) 18553 && !SIBLING_CALL_P (insn)) 18554 return 5; 18555 if (get_attr_length (insn) <= 1) 18556 return 1; 18557 18558 /* For normal instructions we may rely on the sizes of addresses 18559 and the presence of symbol to require 4 bytes of encoding. 18560 This is not the case for jumps where references are PC relative. */ 18561 if (GET_CODE (insn) != JUMP_INSN) 18562 { 18563 l = get_attr_length_address (insn); 18564 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 18565 l = 4; 18566 } 18567 if (l) 18568 return 1+l; 18569 else 18570 return 2; 18571} 18572 18573/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 18574 window. */ 18575 18576static void 18577ix86_avoid_jump_misspredicts (void) 18578{ 18579 rtx insn, start = get_insns (); 18580 int nbytes = 0, njumps = 0; 18581 int isjump = 0; 18582 18583 /* Look for all minimal intervals of instructions containing 4 jumps. 18584 The intervals are bounded by START and INSN. NBYTES is the total 18585 size of instructions in the interval including INSN and not including 18586 START. When the NBYTES is smaller than 16 bytes, it is possible 18587 that the end of START and INSN ends up in the same 16byte page. 18588 18589 The smallest offset in the page INSN can start is the case where START 18590 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 18591 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 18592 */ 18593 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 18594 { 18595 18596 nbytes += min_insn_size (insn); 18597 if (dump_file) 18598 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 18599 INSN_UID (insn), min_insn_size (insn)); 18600 if ((GET_CODE (insn) == JUMP_INSN 18601 && GET_CODE (PATTERN (insn)) != ADDR_VEC 18602 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 18603 || GET_CODE (insn) == CALL_INSN) 18604 njumps++; 18605 else 18606 continue; 18607 18608 while (njumps > 3) 18609 { 18610 start = NEXT_INSN (start); 18611 if ((GET_CODE (start) == JUMP_INSN 18612 && GET_CODE (PATTERN (start)) != ADDR_VEC 18613 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 18614 || GET_CODE (start) == CALL_INSN) 18615 njumps--, isjump = 1; 18616 else 18617 isjump = 0; 18618 nbytes -= min_insn_size (start); 18619 } 18620 gcc_assert (njumps >= 0); 18621 if (dump_file) 18622 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 18623 INSN_UID (start), INSN_UID (insn), nbytes); 18624 18625 if (njumps == 3 && isjump && nbytes < 16) 18626 { 18627 int padsize = 15 - nbytes + min_insn_size (insn); 18628 18629 if (dump_file) 18630 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 18631 INSN_UID (insn), padsize); 18632 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 18633 } 18634 } 18635} 18636 18637/* AMD Athlon works faster 18638 when RET is not destination of conditional jump or directly preceded 18639 by other jump instruction. We avoid the penalty by inserting NOP just 18640 before the RET instructions in such cases. */ 18641static void 18642ix86_pad_returns (void) 18643{ 18644 edge e; 18645 edge_iterator ei; 18646 18647 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 18648 { 18649 basic_block bb = e->src; 18650 rtx ret = BB_END (bb); 18651 rtx prev; 18652 bool replace = false; 18653 18654 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 18655 || !maybe_hot_bb_p (bb)) 18656 continue; 18657 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 18658 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 18659 break; 18660 if (prev && GET_CODE (prev) == CODE_LABEL) 18661 { 18662 edge e; 18663 edge_iterator ei; 18664 18665 FOR_EACH_EDGE (e, ei, bb->preds) 18666 if (EDGE_FREQUENCY (e) && e->src->index >= 0 18667 && !(e->flags & EDGE_FALLTHRU)) 18668 replace = true; 18669 } 18670 if (!replace) 18671 { 18672 prev = prev_active_insn (ret); 18673 if (prev 18674 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 18675 || GET_CODE (prev) == CALL_INSN)) 18676 replace = true; 18677 /* Empty functions get branch mispredict even when the jump destination 18678 is not visible to us. */ 18679 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 18680 replace = true; 18681 } 18682 if (replace) 18683 { 18684 emit_insn_before (gen_return_internal_long (), ret); 18685 delete_insn (ret); 18686 } 18687 } 18688} 18689 18690/* Implement machine specific optimizations. We implement padding of returns 18691 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 18692static void 18693ix86_reorg (void) 18694{ 18695 if (TARGET_PAD_RETURNS && optimize && !optimize_size) 18696 ix86_pad_returns (); 18697 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) 18698 ix86_avoid_jump_misspredicts (); 18699} 18700 18701/* Return nonzero when QImode register that must be represented via REX prefix 18702 is used. */ 18703bool 18704x86_extended_QIreg_mentioned_p (rtx insn) 18705{ 18706 int i; 18707 extract_insn_cached (insn); 18708 for (i = 0; i < recog_data.n_operands; i++) 18709 if (REG_P (recog_data.operand[i]) 18710 && REGNO (recog_data.operand[i]) >= 4) 18711 return true; 18712 return false; 18713} 18714 18715/* Return nonzero when P points to register encoded via REX prefix. 18716 Called via for_each_rtx. */ 18717static int 18718extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 18719{ 18720 unsigned int regno; 18721 if (!REG_P (*p)) 18722 return 0; 18723 regno = REGNO (*p); 18724 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 18725} 18726 18727/* Return true when INSN mentions register that must be encoded using REX 18728 prefix. */ 18729bool 18730x86_extended_reg_mentioned_p (rtx insn) 18731{ 18732 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 18733} 18734 18735/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 18736 optabs would emit if we didn't have TFmode patterns. */ 18737 18738void 18739x86_emit_floatuns (rtx operands[2]) 18740{ 18741 rtx neglab, donelab, i0, i1, f0, in, out; 18742 enum machine_mode mode, inmode; 18743 18744 inmode = GET_MODE (operands[1]); 18745 gcc_assert (inmode == SImode || inmode == DImode); 18746 18747 out = operands[0]; 18748 in = force_reg (inmode, operands[1]); 18749 mode = GET_MODE (out); 18750 neglab = gen_label_rtx (); 18751 donelab = gen_label_rtx (); 18752 i1 = gen_reg_rtx (Pmode); 18753 f0 = gen_reg_rtx (mode); 18754 18755 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 18756 18757 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 18758 emit_jump_insn (gen_jump (donelab)); 18759 emit_barrier (); 18760 18761 emit_label (neglab); 18762 18763 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18764 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18765 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 18766 expand_float (f0, i0, 0); 18767 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 18768 18769 emit_label (donelab); 18770} 18771 18772/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18773 with all elements equal to VAR. Return true if successful. */ 18774 18775static bool 18776ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 18777 rtx target, rtx val) 18778{ 18779 enum machine_mode smode, wsmode, wvmode; 18780 rtx x; 18781 18782 switch (mode) 18783 { 18784 case V2SImode: 18785 case V2SFmode: 18786 if (!mmx_ok) 18787 return false; 18788 /* FALLTHRU */ 18789 18790 case V2DFmode: 18791 case V2DImode: 18792 case V4SFmode: 18793 case V4SImode: 18794 val = force_reg (GET_MODE_INNER (mode), val); 18795 x = gen_rtx_VEC_DUPLICATE (mode, val); 18796 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18797 return true; 18798 18799 case V4HImode: 18800 if (!mmx_ok) 18801 return false; 18802 if (TARGET_SSE || TARGET_3DNOW_A) 18803 { 18804 val = gen_lowpart (SImode, val); 18805 x = gen_rtx_TRUNCATE (HImode, val); 18806 x = gen_rtx_VEC_DUPLICATE (mode, x); 18807 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18808 return true; 18809 } 18810 else 18811 { 18812 smode = HImode; 18813 wsmode = SImode; 18814 wvmode = V2SImode; 18815 goto widen; 18816 } 18817 18818 case V8QImode: 18819 if (!mmx_ok) 18820 return false; 18821 smode = QImode; 18822 wsmode = HImode; 18823 wvmode = V4HImode; 18824 goto widen; 18825 case V8HImode: 18826 if (TARGET_SSE2) 18827 { 18828 rtx tmp1, tmp2; 18829 /* Extend HImode to SImode using a paradoxical SUBREG. */ 18830 tmp1 = gen_reg_rtx (SImode); 18831 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18832 /* Insert the SImode value as low element of V4SImode vector. */ 18833 tmp2 = gen_reg_rtx (V4SImode); 18834 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18835 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18836 CONST0_RTX (V4SImode), 18837 const1_rtx); 18838 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18839 /* Cast the V4SImode vector back to a V8HImode vector. */ 18840 tmp1 = gen_reg_rtx (V8HImode); 18841 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 18842 /* Duplicate the low short through the whole low SImode word. */ 18843 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 18844 /* Cast the V8HImode vector back to a V4SImode vector. */ 18845 tmp2 = gen_reg_rtx (V4SImode); 18846 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18847 /* Replicate the low element of the V4SImode vector. */ 18848 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18849 /* Cast the V2SImode back to V8HImode, and store in target. */ 18850 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 18851 return true; 18852 } 18853 smode = HImode; 18854 wsmode = SImode; 18855 wvmode = V4SImode; 18856 goto widen; 18857 case V16QImode: 18858 if (TARGET_SSE2) 18859 { 18860 rtx tmp1, tmp2; 18861 /* Extend QImode to SImode using a paradoxical SUBREG. */ 18862 tmp1 = gen_reg_rtx (SImode); 18863 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18864 /* Insert the SImode value as low element of V4SImode vector. */ 18865 tmp2 = gen_reg_rtx (V4SImode); 18866 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18867 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18868 CONST0_RTX (V4SImode), 18869 const1_rtx); 18870 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18871 /* Cast the V4SImode vector back to a V16QImode vector. */ 18872 tmp1 = gen_reg_rtx (V16QImode); 18873 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 18874 /* Duplicate the low byte through the whole low SImode word. */ 18875 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18876 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18877 /* Cast the V16QImode vector back to a V4SImode vector. */ 18878 tmp2 = gen_reg_rtx (V4SImode); 18879 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18880 /* Replicate the low element of the V4SImode vector. */ 18881 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18882 /* Cast the V2SImode back to V16QImode, and store in target. */ 18883 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 18884 return true; 18885 } 18886 smode = QImode; 18887 wsmode = HImode; 18888 wvmode = V8HImode; 18889 goto widen; 18890 widen: 18891 /* Replicate the value once into the next wider mode and recurse. */ 18892 val = convert_modes (wsmode, smode, val, true); 18893 x = expand_simple_binop (wsmode, ASHIFT, val, 18894 GEN_INT (GET_MODE_BITSIZE (smode)), 18895 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18896 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 18897 18898 x = gen_reg_rtx (wvmode); 18899 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 18900 gcc_unreachable (); 18901 emit_move_insn (target, gen_lowpart (mode, x)); 18902 return true; 18903 18904 default: 18905 return false; 18906 } 18907} 18908 18909/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18910 whose ONE_VAR element is VAR, and other elements are zero. Return true 18911 if successful. */ 18912 18913static bool 18914ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 18915 rtx target, rtx var, int one_var) 18916{ 18917 enum machine_mode vsimode; 18918 rtx new_target; 18919 rtx x, tmp; 18920 18921 switch (mode) 18922 { 18923 case V2SFmode: 18924 case V2SImode: 18925 if (!mmx_ok) 18926 return false; 18927 /* FALLTHRU */ 18928 18929 case V2DFmode: 18930 case V2DImode: 18931 if (one_var != 0) 18932 return false; 18933 var = force_reg (GET_MODE_INNER (mode), var); 18934 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 18935 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18936 return true; 18937 18938 case V4SFmode: 18939 case V4SImode: 18940 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 18941 new_target = gen_reg_rtx (mode); 18942 else 18943 new_target = target; 18944 var = force_reg (GET_MODE_INNER (mode), var); 18945 x = gen_rtx_VEC_DUPLICATE (mode, var); 18946 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 18947 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 18948 if (one_var != 0) 18949 { 18950 /* We need to shuffle the value to the correct position, so 18951 create a new pseudo to store the intermediate result. */ 18952 18953 /* With SSE2, we can use the integer shuffle insns. */ 18954 if (mode != V4SFmode && TARGET_SSE2) 18955 { 18956 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 18957 GEN_INT (1), 18958 GEN_INT (one_var == 1 ? 0 : 1), 18959 GEN_INT (one_var == 2 ? 0 : 1), 18960 GEN_INT (one_var == 3 ? 0 : 1))); 18961 if (target != new_target) 18962 emit_move_insn (target, new_target); 18963 return true; 18964 } 18965 18966 /* Otherwise convert the intermediate result to V4SFmode and 18967 use the SSE1 shuffle instructions. */ 18968 if (mode != V4SFmode) 18969 { 18970 tmp = gen_reg_rtx (V4SFmode); 18971 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 18972 } 18973 else 18974 tmp = new_target; 18975 18976 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, 18977 GEN_INT (1), 18978 GEN_INT (one_var == 1 ? 0 : 1), 18979 GEN_INT (one_var == 2 ? 0+4 : 1+4), 18980 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 18981 18982 if (mode != V4SFmode) 18983 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 18984 else if (tmp != target) 18985 emit_move_insn (target, tmp); 18986 } 18987 else if (target != new_target) 18988 emit_move_insn (target, new_target); 18989 return true; 18990 18991 case V8HImode: 18992 case V16QImode: 18993 vsimode = V4SImode; 18994 goto widen; 18995 case V4HImode: 18996 case V8QImode: 18997 if (!mmx_ok) 18998 return false; 18999 vsimode = V2SImode; 19000 goto widen; 19001 widen: 19002 if (one_var != 0) 19003 return false; 19004 19005 /* Zero extend the variable element to SImode and recurse. */ 19006 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 19007 19008 x = gen_reg_rtx (vsimode); 19009 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 19010 var, one_var)) 19011 gcc_unreachable (); 19012 19013 emit_move_insn (target, gen_lowpart (mode, x)); 19014 return true; 19015 19016 default: 19017 return false; 19018 } 19019} 19020 19021/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 19022 consisting of the values in VALS. It is known that all elements 19023 except ONE_VAR are constants. Return true if successful. */ 19024 19025static bool 19026ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 19027 rtx target, rtx vals, int one_var) 19028{ 19029 rtx var = XVECEXP (vals, 0, one_var); 19030 enum machine_mode wmode; 19031 rtx const_vec, x; 19032 19033 const_vec = copy_rtx (vals); 19034 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 19035 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 19036 19037 switch (mode) 19038 { 19039 case V2DFmode: 19040 case V2DImode: 19041 case V2SFmode: 19042 case V2SImode: 19043 /* For the two element vectors, it's just as easy to use 19044 the general case. */ 19045 return false; 19046 19047 case V4SFmode: 19048 case V4SImode: 19049 case V8HImode: 19050 case V4HImode: 19051 break; 19052 19053 case V16QImode: 19054 wmode = V8HImode; 19055 goto widen; 19056 case V8QImode: 19057 wmode = V4HImode; 19058 goto widen; 19059 widen: 19060 /* There's no way to set one QImode entry easily. Combine 19061 the variable value with its adjacent constant value, and 19062 promote to an HImode set. */ 19063 x = XVECEXP (vals, 0, one_var ^ 1); 19064 if (one_var & 1) 19065 { 19066 var = convert_modes (HImode, QImode, var, true); 19067 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 19068 NULL_RTX, 1, OPTAB_LIB_WIDEN); 19069 x = GEN_INT (INTVAL (x) & 0xff); 19070 } 19071 else 19072 { 19073 var = convert_modes (HImode, QImode, var, true); 19074 x = gen_int_mode (INTVAL (x) << 8, HImode); 19075 } 19076 if (x != const0_rtx) 19077 var = expand_simple_binop (HImode, IOR, var, x, var, 19078 1, OPTAB_LIB_WIDEN); 19079 19080 x = gen_reg_rtx (wmode); 19081 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 19082 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 19083 19084 emit_move_insn (target, gen_lowpart (mode, x)); 19085 return true; 19086 19087 default: 19088 return false; 19089 } 19090 19091 emit_move_insn (target, const_vec); 19092 ix86_expand_vector_set (mmx_ok, target, var, one_var); 19093 return true; 19094} 19095 19096/* A subroutine of ix86_expand_vector_init. Handle the most general case: 19097 all values variable, and none identical. */ 19098 19099static void 19100ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 19101 rtx target, rtx vals) 19102{ 19103 enum machine_mode half_mode = GET_MODE_INNER (mode); 19104 rtx op0 = NULL, op1 = NULL; 19105 bool use_vec_concat = false; 19106 19107 switch (mode) 19108 { 19109 case V2SFmode: 19110 case V2SImode: 19111 if (!mmx_ok && !TARGET_SSE) 19112 break; 19113 /* FALLTHRU */ 19114 19115 case V2DFmode: 19116 case V2DImode: 19117 /* For the two element vectors, we always implement VEC_CONCAT. */ 19118 op0 = XVECEXP (vals, 0, 0); 19119 op1 = XVECEXP (vals, 0, 1); 19120 use_vec_concat = true; 19121 break; 19122 19123 case V4SFmode: 19124 half_mode = V2SFmode; 19125 goto half; 19126 case V4SImode: 19127 half_mode = V2SImode; 19128 goto half; 19129 half: 19130 { 19131 rtvec v; 19132 19133 /* For V4SF and V4SI, we implement a concat of two V2 vectors. 19134 Recurse to load the two halves. */ 19135 19136 op0 = gen_reg_rtx (half_mode); 19137 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); 19138 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); 19139 19140 op1 = gen_reg_rtx (half_mode); 19141 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); 19142 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); 19143 19144 use_vec_concat = true; 19145 } 19146 break; 19147 19148 case V8HImode: 19149 case V16QImode: 19150 case V4HImode: 19151 case V8QImode: 19152 break; 19153 19154 default: 19155 gcc_unreachable (); 19156 } 19157 19158 if (use_vec_concat) 19159 { 19160 if (!register_operand (op0, half_mode)) 19161 op0 = force_reg (half_mode, op0); 19162 if (!register_operand (op1, half_mode)) 19163 op1 = force_reg (half_mode, op1); 19164 19165 emit_insn (gen_rtx_SET (VOIDmode, target, 19166 gen_rtx_VEC_CONCAT (mode, op0, op1))); 19167 } 19168 else 19169 { 19170 int i, j, n_elts, n_words, n_elt_per_word; 19171 enum machine_mode inner_mode; 19172 rtx words[4], shift; 19173 19174 inner_mode = GET_MODE_INNER (mode); 19175 n_elts = GET_MODE_NUNITS (mode); 19176 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 19177 n_elt_per_word = n_elts / n_words; 19178 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 19179 19180 for (i = 0; i < n_words; ++i) 19181 { 19182 rtx word = NULL_RTX; 19183 19184 for (j = 0; j < n_elt_per_word; ++j) 19185 { 19186 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 19187 elt = convert_modes (word_mode, inner_mode, elt, true); 19188 19189 if (j == 0) 19190 word = elt; 19191 else 19192 { 19193 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 19194 word, 1, OPTAB_LIB_WIDEN); 19195 word = expand_simple_binop (word_mode, IOR, word, elt, 19196 word, 1, OPTAB_LIB_WIDEN); 19197 } 19198 } 19199 19200 words[i] = word; 19201 } 19202 19203 if (n_words == 1) 19204 emit_move_insn (target, gen_lowpart (mode, words[0])); 19205 else if (n_words == 2) 19206 { 19207 rtx tmp = gen_reg_rtx (mode); 19208 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); 19209 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 19210 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 19211 emit_move_insn (target, tmp); 19212 } 19213 else if (n_words == 4) 19214 { 19215 rtx tmp = gen_reg_rtx (V4SImode); 19216 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 19217 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 19218 emit_move_insn (target, gen_lowpart (mode, tmp)); 19219 } 19220 else 19221 gcc_unreachable (); 19222 } 19223} 19224 19225/* Initialize vector TARGET via VALS. Suppress the use of MMX 19226 instructions unless MMX_OK is true. */ 19227 19228void 19229ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 19230{ 19231 enum machine_mode mode = GET_MODE (target); 19232 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19233 int n_elts = GET_MODE_NUNITS (mode); 19234 int n_var = 0, one_var = -1; 19235 bool all_same = true, all_const_zero = true; 19236 int i; 19237 rtx x; 19238 19239 for (i = 0; i < n_elts; ++i) 19240 { 19241 x = XVECEXP (vals, 0, i); 19242 if (!CONSTANT_P (x)) 19243 n_var++, one_var = i; 19244 else if (x != CONST0_RTX (inner_mode)) 19245 all_const_zero = false; 19246 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 19247 all_same = false; 19248 } 19249 19250 /* Constants are best loaded from the constant pool. */ 19251 if (n_var == 0) 19252 { 19253 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 19254 return; 19255 } 19256 19257 /* If all values are identical, broadcast the value. */ 19258 if (all_same 19259 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 19260 XVECEXP (vals, 0, 0))) 19261 return; 19262 19263 /* Values where only one field is non-constant are best loaded from 19264 the pool and overwritten via move later. */ 19265 if (n_var == 1) 19266 { 19267 if (all_const_zero 19268 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 19269 XVECEXP (vals, 0, one_var), 19270 one_var)) 19271 return; 19272 19273 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 19274 return; 19275 } 19276 19277 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 19278} 19279 19280void 19281ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 19282{ 19283 enum machine_mode mode = GET_MODE (target); 19284 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19285 bool use_vec_merge = false; 19286 rtx tmp; 19287 19288 switch (mode) 19289 { 19290 case V2SFmode: 19291 case V2SImode: 19292 if (mmx_ok) 19293 { 19294 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 19295 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 19296 if (elt == 0) 19297 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 19298 else 19299 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 19300 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19301 return; 19302 } 19303 break; 19304 19305 case V2DFmode: 19306 case V2DImode: 19307 { 19308 rtx op0, op1; 19309 19310 /* For the two element vectors, we implement a VEC_CONCAT with 19311 the extraction of the other element. */ 19312 19313 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 19314 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 19315 19316 if (elt == 0) 19317 op0 = val, op1 = tmp; 19318 else 19319 op0 = tmp, op1 = val; 19320 19321 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 19322 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19323 } 19324 return; 19325 19326 case V4SFmode: 19327 switch (elt) 19328 { 19329 case 0: 19330 use_vec_merge = true; 19331 break; 19332 19333 case 1: 19334 /* tmp = target = A B C D */ 19335 tmp = copy_to_reg (target); 19336 /* target = A A B B */ 19337 emit_insn (gen_sse_unpcklps (target, target, target)); 19338 /* target = X A B B */ 19339 ix86_expand_vector_set (false, target, val, 0); 19340 /* target = A X C D */ 19341 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19342 GEN_INT (1), GEN_INT (0), 19343 GEN_INT (2+4), GEN_INT (3+4))); 19344 return; 19345 19346 case 2: 19347 /* tmp = target = A B C D */ 19348 tmp = copy_to_reg (target); 19349 /* tmp = X B C D */ 19350 ix86_expand_vector_set (false, tmp, val, 0); 19351 /* target = A B X D */ 19352 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19353 GEN_INT (0), GEN_INT (1), 19354 GEN_INT (0+4), GEN_INT (3+4))); 19355 return; 19356 19357 case 3: 19358 /* tmp = target = A B C D */ 19359 tmp = copy_to_reg (target); 19360 /* tmp = X B C D */ 19361 ix86_expand_vector_set (false, tmp, val, 0); 19362 /* target = A B X D */ 19363 emit_insn (gen_sse_shufps_1 (target, target, tmp, 19364 GEN_INT (0), GEN_INT (1), 19365 GEN_INT (2+4), GEN_INT (0+4))); 19366 return; 19367 19368 default: 19369 gcc_unreachable (); 19370 } 19371 break; 19372 19373 case V4SImode: 19374 /* Element 0 handled by vec_merge below. */ 19375 if (elt == 0) 19376 { 19377 use_vec_merge = true; 19378 break; 19379 } 19380 19381 if (TARGET_SSE2) 19382 { 19383 /* With SSE2, use integer shuffles to swap element 0 and ELT, 19384 store into element 0, then shuffle them back. */ 19385 19386 rtx order[4]; 19387 19388 order[0] = GEN_INT (elt); 19389 order[1] = const1_rtx; 19390 order[2] = const2_rtx; 19391 order[3] = GEN_INT (3); 19392 order[elt] = const0_rtx; 19393 19394 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19395 order[1], order[2], order[3])); 19396 19397 ix86_expand_vector_set (false, target, val, 0); 19398 19399 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 19400 order[1], order[2], order[3])); 19401 } 19402 else 19403 { 19404 /* For SSE1, we have to reuse the V4SF code. */ 19405 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 19406 gen_lowpart (SFmode, val), elt); 19407 } 19408 return; 19409 19410 case V8HImode: 19411 use_vec_merge = TARGET_SSE2; 19412 break; 19413 case V4HImode: 19414 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19415 break; 19416 19417 case V16QImode: 19418 case V8QImode: 19419 default: 19420 break; 19421 } 19422 19423 if (use_vec_merge) 19424 { 19425 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 19426 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 19427 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19428 } 19429 else 19430 { 19431 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19432 19433 emit_move_insn (mem, target); 19434 19435 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19436 emit_move_insn (tmp, val); 19437 19438 emit_move_insn (target, mem); 19439 } 19440} 19441 19442void 19443ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 19444{ 19445 enum machine_mode mode = GET_MODE (vec); 19446 enum machine_mode inner_mode = GET_MODE_INNER (mode); 19447 bool use_vec_extr = false; 19448 rtx tmp; 19449 19450 switch (mode) 19451 { 19452 case V2SImode: 19453 case V2SFmode: 19454 if (!mmx_ok) 19455 break; 19456 /* FALLTHRU */ 19457 19458 case V2DFmode: 19459 case V2DImode: 19460 use_vec_extr = true; 19461 break; 19462 19463 case V4SFmode: 19464 switch (elt) 19465 { 19466 case 0: 19467 tmp = vec; 19468 break; 19469 19470 case 1: 19471 case 3: 19472 tmp = gen_reg_rtx (mode); 19473 emit_insn (gen_sse_shufps_1 (tmp, vec, vec, 19474 GEN_INT (elt), GEN_INT (elt), 19475 GEN_INT (elt+4), GEN_INT (elt+4))); 19476 break; 19477 19478 case 2: 19479 tmp = gen_reg_rtx (mode); 19480 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 19481 break; 19482 19483 default: 19484 gcc_unreachable (); 19485 } 19486 vec = tmp; 19487 use_vec_extr = true; 19488 elt = 0; 19489 break; 19490 19491 case V4SImode: 19492 if (TARGET_SSE2) 19493 { 19494 switch (elt) 19495 { 19496 case 0: 19497 tmp = vec; 19498 break; 19499 19500 case 1: 19501 case 3: 19502 tmp = gen_reg_rtx (mode); 19503 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 19504 GEN_INT (elt), GEN_INT (elt), 19505 GEN_INT (elt), GEN_INT (elt))); 19506 break; 19507 19508 case 2: 19509 tmp = gen_reg_rtx (mode); 19510 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 19511 break; 19512 19513 default: 19514 gcc_unreachable (); 19515 } 19516 vec = tmp; 19517 use_vec_extr = true; 19518 elt = 0; 19519 } 19520 else 19521 { 19522 /* For SSE1, we have to reuse the V4SF code. */ 19523 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 19524 gen_lowpart (V4SFmode, vec), elt); 19525 return; 19526 } 19527 break; 19528 19529 case V8HImode: 19530 use_vec_extr = TARGET_SSE2; 19531 break; 19532 case V4HImode: 19533 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 19534 break; 19535 19536 case V16QImode: 19537 case V8QImode: 19538 /* ??? Could extract the appropriate HImode element and shift. */ 19539 default: 19540 break; 19541 } 19542 19543 if (use_vec_extr) 19544 { 19545 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 19546 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 19547 19548 /* Let the rtl optimizers know about the zero extension performed. */ 19549 if (inner_mode == HImode) 19550 { 19551 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 19552 target = gen_lowpart (SImode, target); 19553 } 19554 19555 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 19556 } 19557 else 19558 { 19559 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 19560 19561 emit_move_insn (mem, vec); 19562 19563 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 19564 emit_move_insn (target, tmp); 19565 } 19566} 19567 19568/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 19569 pattern to reduce; DEST is the destination; IN is the input vector. */ 19570 19571void 19572ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 19573{ 19574 rtx tmp1, tmp2, tmp3; 19575 19576 tmp1 = gen_reg_rtx (V4SFmode); 19577 tmp2 = gen_reg_rtx (V4SFmode); 19578 tmp3 = gen_reg_rtx (V4SFmode); 19579 19580 emit_insn (gen_sse_movhlps (tmp1, in, in)); 19581 emit_insn (fn (tmp2, tmp1, in)); 19582 19583 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, 19584 GEN_INT (1), GEN_INT (1), 19585 GEN_INT (1+4), GEN_INT (1+4))); 19586 emit_insn (fn (dest, tmp2, tmp3)); 19587} 19588 19589/* Target hook for scalar_mode_supported_p. */ 19590static bool 19591ix86_scalar_mode_supported_p (enum machine_mode mode) 19592{ 19593 if (DECIMAL_FLOAT_MODE_P (mode)) 19594 return true; 19595 else 19596 return default_scalar_mode_supported_p (mode); 19597} 19598 19599/* Implements target hook vector_mode_supported_p. */ 19600static bool 19601ix86_vector_mode_supported_p (enum machine_mode mode) 19602{ 19603 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 19604 return true; 19605 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 19606 return true; 19607 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 19608 return true; 19609 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 19610 return true; 19611 return false; 19612} 19613 19614/* Worker function for TARGET_MD_ASM_CLOBBERS. 19615 19616 We do this in the new i386 backend to maintain source compatibility 19617 with the old cc0-based compiler. */ 19618 19619static tree 19620ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 19621 tree inputs ATTRIBUTE_UNUSED, 19622 tree clobbers) 19623{ 19624 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 19625 clobbers); 19626 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 19627 clobbers); 19628 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), 19629 clobbers); 19630 return clobbers; 19631} 19632 19633/* Return true if this goes in small data/bss. */ 19634 19635static bool 19636ix86_in_large_data_p (tree exp) 19637{ 19638 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 19639 return false; 19640 19641 /* Functions are never large data. */ 19642 if (TREE_CODE (exp) == FUNCTION_DECL) 19643 return false; 19644 19645 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 19646 { 19647 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 19648 if (strcmp (section, ".ldata") == 0 19649 || strcmp (section, ".lbss") == 0) 19650 return true; 19651 return false; 19652 } 19653 else 19654 { 19655 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 19656 19657 /* If this is an incomplete type with size 0, then we can't put it 19658 in data because it might be too big when completed. */ 19659 if (!size || size > ix86_section_threshold) 19660 return true; 19661 } 19662 19663 return false; 19664} 19665static void 19666ix86_encode_section_info (tree decl, rtx rtl, int first) 19667{ 19668 default_encode_section_info (decl, rtl, first); 19669 19670 if (TREE_CODE (decl) == VAR_DECL 19671 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 19672 && ix86_in_large_data_p (decl)) 19673 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 19674} 19675 19676/* Worker function for REVERSE_CONDITION. */ 19677 19678enum rtx_code 19679ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 19680{ 19681 return (mode != CCFPmode && mode != CCFPUmode 19682 ? reverse_condition (code) 19683 : reverse_condition_maybe_unordered (code)); 19684} 19685 19686/* Output code to perform an x87 FP register move, from OPERANDS[1] 19687 to OPERANDS[0]. */ 19688 19689const char * 19690output_387_reg_move (rtx insn, rtx *operands) 19691{ 19692 if (REG_P (operands[1]) 19693 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 19694 { 19695 if (REGNO (operands[0]) == FIRST_STACK_REG) 19696 return output_387_ffreep (operands, 0); 19697 return "fstp\t%y0"; 19698 } 19699 if (STACK_TOP_P (operands[0])) 19700 return "fld%z1\t%y1"; 19701 return "fst\t%y0"; 19702} 19703 19704/* Output code to perform a conditional jump to LABEL, if C2 flag in 19705 FP status register is set. */ 19706 19707void 19708ix86_emit_fp_unordered_jump (rtx label) 19709{ 19710 rtx reg = gen_reg_rtx (HImode); 19711 rtx temp; 19712 19713 emit_insn (gen_x86_fnstsw_1 (reg)); 19714 19715 if (TARGET_USE_SAHF) 19716 { 19717 emit_insn (gen_x86_sahf_1 (reg)); 19718 19719 temp = gen_rtx_REG (CCmode, FLAGS_REG); 19720 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 19721 } 19722 else 19723 { 19724 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 19725 19726 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 19727 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 19728 } 19729 19730 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 19731 gen_rtx_LABEL_REF (VOIDmode, label), 19732 pc_rtx); 19733 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 19734 emit_jump_insn (temp); 19735} 19736 19737/* Output code to perform a log1p XFmode calculation. */ 19738 19739void ix86_emit_i387_log1p (rtx op0, rtx op1) 19740{ 19741 rtx label1 = gen_label_rtx (); 19742 rtx label2 = gen_label_rtx (); 19743 19744 rtx tmp = gen_reg_rtx (XFmode); 19745 rtx tmp2 = gen_reg_rtx (XFmode); 19746 19747 emit_insn (gen_absxf2 (tmp, op1)); 19748 emit_insn (gen_cmpxf (tmp, 19749 CONST_DOUBLE_FROM_REAL_VALUE ( 19750 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 19751 XFmode))); 19752 emit_jump_insn (gen_bge (label1)); 19753 19754 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19755 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); 19756 emit_jump (label2); 19757 19758 emit_label (label1); 19759 emit_move_insn (tmp, CONST1_RTX (XFmode)); 19760 emit_insn (gen_addxf3 (tmp, op1, tmp)); 19761 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19762 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); 19763 19764 emit_label (label2); 19765} 19766 19767/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 19768 19769static void 19770i386_solaris_elf_named_section (const char *name, unsigned int flags, 19771 tree decl) 19772{ 19773 /* With Binutils 2.15, the "@unwind" marker must be specified on 19774 every occurrence of the ".eh_frame" section, not just the first 19775 one. */ 19776 if (TARGET_64BIT 19777 && strcmp (name, ".eh_frame") == 0) 19778 { 19779 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 19780 flags & SECTION_WRITE ? "aw" : "a"); 19781 return; 19782 } 19783 default_elf_asm_named_section (name, flags, decl); 19784} 19785 19786/* Return the mangling of TYPE if it is an extended fundamental type. */ 19787 19788static const char * 19789ix86_mangle_fundamental_type (tree type) 19790{ 19791 switch (TYPE_MODE (type)) 19792 { 19793 case TFmode: 19794 /* __float128 is "g". */ 19795 return "g"; 19796 case XFmode: 19797 /* "long double" or __float80 is "e". */ 19798 return "e"; 19799 default: 19800 return NULL; 19801 } 19802} 19803 19804/* For 32-bit code we can save PIC register setup by using 19805 __stack_chk_fail_local hidden function instead of calling 19806 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 19807 register, so it is better to call __stack_chk_fail directly. */ 19808 19809static tree 19810ix86_stack_protect_fail (void) 19811{ 19812 return TARGET_64BIT 19813 ? default_external_stack_protect_fail () 19814 : default_hidden_stack_protect_fail (); 19815} 19816 19817/* Select a format to encode pointers in exception handling data. CODE 19818 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 19819 true if the symbol may be affected by dynamic relocations. 19820 19821 ??? All x86 object file formats are capable of representing this. 19822 After all, the relocation needed is the same as for the call insn. 19823 Whether or not a particular assembler allows us to enter such, I 19824 guess we'll have to see. */ 19825int 19826asm_preferred_eh_data_format (int code, int global) 19827{ 19828 if (flag_pic) 19829 { 19830 int type = DW_EH_PE_sdata8; 19831 if (!TARGET_64BIT 19832 || ix86_cmodel == CM_SMALL_PIC 19833 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 19834 type = DW_EH_PE_sdata4; 19835 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 19836 } 19837 if (ix86_cmodel == CM_SMALL 19838 || (ix86_cmodel == CM_MEDIUM && code)) 19839 return DW_EH_PE_udata4; 19840 return DW_EH_PE_absptr; 19841} 19842 19843#include "gt-i386.h" 19844